From d01598887d54b680cb101c4c507e182b0267337f Mon Sep 17 00:00:00 2001 From: Doug Coleman Date: Fri, 22 Aug 2008 16:45:33 -0500 Subject: [PATCH] read1 drop -> drop1 add possessive/reluctant parsing --- extra/regexp2/parser/parser.factor | 42 ++++++++++++++++++++++++------ extra/regexp2/utils/utils.factor | 1 + 2 files changed, 35 insertions(+), 8 deletions(-) diff --git a/extra/regexp2/parser/parser.factor b/extra/regexp2/parser/parser.factor index 6eda3310d0..7413c3f35c 100644 --- a/extra/regexp2/parser/parser.factor +++ b/extra/regexp2/parser/parser.factor @@ -12,8 +12,18 @@ FROM: math.ranges => [a,b] ; MIXIN: node TUPLE: concatenation seq ; INSTANCE: concatenation node TUPLE: alternation seq ; INSTANCE: alternation node -TUPLE: kleene-star term ; INSTANCE: kleene-star node +! !!!!!!!! TUPLE: question term ; INSTANCE: question node +TUPLE: kleene-star term ; INSTANCE: kleene-star node + +! !!!!!!!! +TUPLE: possessive-question term ; INSTANCE: possessive-question node +TUPLE: possessive-kleene-star term ; INSTANCE: possessive-kleene-star node + +! !!!!!!!! +TUPLE: reluctant-question term ; INSTANCE: reluctant-question node +TUPLE: reluctant-kleene-star term ; INSTANCE: reluctant-kleene-star node + TUPLE: negation term ; INSTANCE: negation node TUPLE: constant char ; INSTANCE: constant node TUPLE: range from to ; INSTANCE: range node @@ -51,6 +61,11 @@ left-parenthesis pipe caret dash ; : get-unicode-case ( -- ? ) unicode-case get-option ; : get-reversed-regexp ( -- ? ) reversed-regexp get-option ; +: ( obj -- kleene ) possessive-kleene-star boa ; +: ( obj -- kleene ) reluctant-kleene-star boa ; +: ( obj -- kleene ) possessive-question boa ; +: ( obj -- kleene ) reluctant-question boa ; + : ( obj -- negation ) negation boa ; : ( seq -- concatenation ) >vector get-reversed-regexp [ reverse ] when @@ -146,9 +161,9 @@ ERROR: bad-special-group string ; { [ dup CHAR: > = ] [ drop nested-parse-regexp pop-stack make-independent-group ] } { [ dup CHAR: < = peek1 CHAR: = = and ] - [ drop read1 drop nested-parse-regexp pop-stack make-positive-lookbehind ] } + [ drop drop1 nested-parse-regexp pop-stack make-positive-lookbehind ] } { [ dup CHAR: < = peek1 CHAR: ! = and ] - [ drop read1 drop nested-parse-regexp pop-stack make-negative-lookbehind ] } + [ drop drop1 nested-parse-regexp pop-stack make-negative-lookbehind ] } [ ":)" read-until [ swap prefix ] dip @@ -162,16 +177,27 @@ ERROR: bad-special-group string ; : handle-left-parenthesis ( -- ) peek1 CHAR: ? = - [ read1 drop (parse-special-group) ] + [ drop1 (parse-special-group) ] [ nested-parse-regexp ] if ; : handle-dot ( -- ) any-char push-stack ; : handle-pipe ( -- ) pipe push-stack ; -: handle-star ( -- ) stack pop push-stack ; +: (handle-star) ( obj -- kleene-star ) + peek1 { + { CHAR: + [ drop1 ] } + { CHAR: ? [ drop1 ] } + [ drop ] + } case ; +: handle-star ( -- ) stack pop (handle-star) push-stack ; : handle-question ( -- ) - stack pop epsilon 2array push-stack ; + stack pop peek1 { + { CHAR: + [ drop1 ] } + { CHAR: ? [ drop1 ] } + [ drop epsilon 2array ] + } case push-stack ; : handle-plus ( -- ) - stack pop dup 2array push-stack ; + stack pop dup (handle-star) + 2array push-stack ; ERROR: unmatched-brace ; : parse-repetition ( -- start finish ? ) @@ -247,7 +273,7 @@ ERROR: expected-posix-class ; ERROR: bad-escaped-literals seq ; : parse-escaped-literals ( -- obj ) "\\E" read-until [ bad-escaped-literals ] unless - read1 drop + drop1 [ epsilon ] [ [ ] V{ } map-as first|concatenation diff --git a/extra/regexp2/utils/utils.factor b/extra/regexp2/utils/utils.factor index 0167e73005..a7606e0af3 100644 --- a/extra/regexp2/utils/utils.factor +++ b/extra/regexp2/utils/utils.factor @@ -19,6 +19,7 @@ IN: regexp2.utils : push1 ( obj -- ) input-stream get stream>> push ; : peek1 ( -- obj ) input-stream get stream>> [ f ] [ peek ] if-empty ; : pop3 ( seq -- obj1 obj2 obj3 ) [ pop ] [ pop ] [ pop ] tri spin ; +: drop1 ( -- ) read1 drop ; : stack ( -- obj ) current-regexp get stack>> ; : change-whole-stack ( quot -- )