fix the parser for groups
parent
c61c9eb625
commit
da35e13153
|
@ -1,10 +1,10 @@
|
||||||
! Copyright (C) 2008 Doug Coleman.
|
! Copyright (C) 2008 Doug Coleman.
|
||||||
! See http://factorcode.org/license.txt for BSD license.
|
! See http://factorcode.org/license.txt for BSD license.
|
||||||
USING: accessors arrays assocs combinators io io.streams.string
|
USING: accessors arrays assocs combinators io io.streams.string
|
||||||
kernel math math.parser multi-methods namespaces qualified sets
|
kernel math math.parser namespaces qualified sets
|
||||||
quotations sequences splitting symbols vectors math.order
|
quotations sequences splitting symbols vectors math.order
|
||||||
unicode.categories strings regexp.backend regexp.utils
|
unicode.categories strings regexp.backend regexp.utils
|
||||||
unicode.case ;
|
unicode.case words ;
|
||||||
IN: regexp.parser
|
IN: regexp.parser
|
||||||
|
|
||||||
FROM: math.ranges => [a,b] ;
|
FROM: math.ranges => [a,b] ;
|
||||||
|
@ -25,11 +25,21 @@ TUPLE: reluctant-kleene-star term ; INSTANCE: reluctant-kleene-star node
|
||||||
TUPLE: negation term ; INSTANCE: negation node
|
TUPLE: negation term ; INSTANCE: negation node
|
||||||
TUPLE: constant char ; INSTANCE: constant node
|
TUPLE: constant char ; INSTANCE: constant node
|
||||||
TUPLE: range from to ; INSTANCE: range node
|
TUPLE: range from to ; INSTANCE: range node
|
||||||
|
|
||||||
|
MIXIN: parentheses-group
|
||||||
TUPLE: lookahead term ; INSTANCE: lookahead node
|
TUPLE: lookahead term ; INSTANCE: lookahead node
|
||||||
|
INSTANCE: lookahead parentheses-group
|
||||||
TUPLE: lookbehind term ; INSTANCE: lookbehind node
|
TUPLE: lookbehind term ; INSTANCE: lookbehind node
|
||||||
|
INSTANCE: lookbehind parentheses-group
|
||||||
TUPLE: capture-group term ; INSTANCE: capture-group node
|
TUPLE: capture-group term ; INSTANCE: capture-group node
|
||||||
|
INSTANCE: capture-group parentheses-group
|
||||||
TUPLE: non-capture-group term ; INSTANCE: non-capture-group node
|
TUPLE: non-capture-group term ; INSTANCE: non-capture-group node
|
||||||
|
INSTANCE: non-capture-group parentheses-group
|
||||||
TUPLE: independent-group term ; INSTANCE: independent-group node ! atomic group
|
TUPLE: independent-group term ; INSTANCE: independent-group node ! atomic group
|
||||||
|
INSTANCE: independent-group parentheses-group
|
||||||
|
TUPLE: comment-group term ; INSTANCE: comment-group node
|
||||||
|
INSTANCE: comment-group parentheses-group
|
||||||
|
|
||||||
TUPLE: character-class-range from to ; INSTANCE: character-class-range node
|
TUPLE: character-class-range from to ; INSTANCE: character-class-range node
|
||||||
SINGLETON: epsilon INSTANCE: epsilon node
|
SINGLETON: epsilon INSTANCE: epsilon node
|
||||||
SINGLETON: any-char INSTANCE: any-char node
|
SINGLETON: any-char INSTANCE: any-char node
|
||||||
|
@ -98,25 +108,6 @@ left-parenthesis pipe caret dash ;
|
||||||
|
|
||||||
ERROR: unmatched-parentheses ;
|
ERROR: unmatched-parentheses ;
|
||||||
|
|
||||||
: make-positive-lookahead ( string -- )
|
|
||||||
lookahead boa push-stack ;
|
|
||||||
|
|
||||||
: make-negative-lookahead ( string -- )
|
|
||||||
<negation> lookahead boa push-stack ;
|
|
||||||
|
|
||||||
: make-independent-group ( string -- )
|
|
||||||
#! no backtracking
|
|
||||||
independent-group boa push-stack ;
|
|
||||||
|
|
||||||
: make-positive-lookbehind ( string -- )
|
|
||||||
lookbehind boa push-stack ;
|
|
||||||
|
|
||||||
: make-negative-lookbehind ( string -- )
|
|
||||||
<negation> lookbehind boa push-stack ;
|
|
||||||
|
|
||||||
: make-non-capturing-group ( string -- )
|
|
||||||
non-capture-group boa push-stack ;
|
|
||||||
|
|
||||||
ERROR: bad-option ch ;
|
ERROR: bad-option ch ;
|
||||||
|
|
||||||
: option ( ch -- singleton )
|
: option ( ch -- singleton )
|
||||||
|
@ -143,33 +134,38 @@ ERROR: bad-option ch ;
|
||||||
|
|
||||||
DEFER: (parse-regexp)
|
DEFER: (parse-regexp)
|
||||||
: parse-special-group ( -- )
|
: parse-special-group ( -- )
|
||||||
beginning-of-group push-stack
|
;
|
||||||
(parse-regexp) pop-stack make-non-capturing-group ;
|
! beginning-of-group push-stack
|
||||||
|
! (parse-regexp) pop-stack make-non-capturing-group ;
|
||||||
|
|
||||||
ERROR: bad-special-group string ;
|
ERROR: bad-special-group string ;
|
||||||
|
|
||||||
DEFER: nested-parse-regexp
|
: nested-parse-regexp ( token ? -- )
|
||||||
|
[ push-stack (parse-regexp) pop-stack ] dip
|
||||||
|
[ <negation> ] when pop-stack boa push-stack ;
|
||||||
|
|
||||||
|
! non-capturing groups
|
||||||
: (parse-special-group) ( -- )
|
: (parse-special-group) ( -- )
|
||||||
read1 {
|
read1 {
|
||||||
{ [ dup CHAR: # = ]
|
{ [ dup CHAR: # = ] ! comment
|
||||||
[ drop nested-parse-regexp pop-stack drop ] }
|
[ drop comment-group f nested-parse-regexp pop-stack drop ] }
|
||||||
{ [ dup CHAR: : = ]
|
{ [ dup CHAR: : = ]
|
||||||
[ drop nested-parse-regexp pop-stack make-non-capturing-group ] }
|
[ drop non-capture-group f nested-parse-regexp ] }
|
||||||
{ [ dup CHAR: = = ]
|
{ [ dup CHAR: = = ]
|
||||||
[ drop nested-parse-regexp pop-stack make-positive-lookahead ] }
|
[ drop lookahead f nested-parse-regexp ] }
|
||||||
{ [ dup CHAR: ! = ]
|
{ [ dup CHAR: ! = ]
|
||||||
[ drop nested-parse-regexp pop-stack make-negative-lookahead ] }
|
[ drop lookahead t nested-parse-regexp ] }
|
||||||
{ [ dup CHAR: > = ]
|
{ [ dup CHAR: > = ]
|
||||||
[ drop nested-parse-regexp pop-stack make-independent-group ] }
|
[ drop non-capture-group f nested-parse-regexp ] }
|
||||||
{ [ dup CHAR: < = peek1 CHAR: = = and ]
|
{ [ dup CHAR: < = peek1 CHAR: = = and ]
|
||||||
[ drop drop1 nested-parse-regexp pop-stack make-positive-lookbehind ] }
|
[ drop drop1 lookbehind f nested-parse-regexp ] }
|
||||||
{ [ dup CHAR: < = peek1 CHAR: ! = and ]
|
{ [ dup CHAR: < = peek1 CHAR: ! = and ]
|
||||||
[ drop drop1 nested-parse-regexp pop-stack make-negative-lookbehind ] }
|
[ drop drop1 lookbehind t nested-parse-regexp ] }
|
||||||
[
|
[
|
||||||
":)" read-until
|
":)" read-until
|
||||||
[ swap prefix ] dip
|
[ swap prefix ] dip
|
||||||
{
|
{
|
||||||
{ CHAR: : [ parse-options parse-special-group ] }
|
{ CHAR: : [ parse-options (parse-special-group) ] }
|
||||||
{ CHAR: ) [ parse-options ] }
|
{ CHAR: ) [ parse-options ] }
|
||||||
[ drop bad-special-group ]
|
[ drop bad-special-group ]
|
||||||
} case
|
} case
|
||||||
|
@ -179,7 +175,7 @@ DEFER: nested-parse-regexp
|
||||||
: handle-left-parenthesis ( -- )
|
: handle-left-parenthesis ( -- )
|
||||||
peek1 CHAR: ? =
|
peek1 CHAR: ? =
|
||||||
[ drop1 (parse-special-group) ]
|
[ drop1 (parse-special-group) ]
|
||||||
[ nested-parse-regexp ] if ;
|
[ capture-group f nested-parse-regexp ] if ;
|
||||||
|
|
||||||
: handle-dot ( -- ) any-char push-stack ;
|
: handle-dot ( -- ) any-char push-stack ;
|
||||||
: handle-pipe ( -- ) pipe push-stack ;
|
: handle-pipe ( -- ) pipe push-stack ;
|
||||||
|
@ -408,14 +404,12 @@ DEFER: handle-left-bracket
|
||||||
[ first|concatenation ] map first|alternation ;
|
[ first|concatenation ] map first|alternation ;
|
||||||
|
|
||||||
: handle-right-parenthesis ( -- )
|
: handle-right-parenthesis ( -- )
|
||||||
stack beginning-of-group over last-index cut rest
|
stack dup [ parentheses-group "members" word-prop member? ] find-last -rot cut rest
|
||||||
[ current-regexp get swap >>stack drop ]
|
[ [ push ] keep current-regexp get (>>stack) ]
|
||||||
[ finish-regexp-parse <capture-group> push-stack ] bi* ;
|
[ finish-regexp-parse push-stack ] bi* ;
|
||||||
|
|
||||||
: nested-parse-regexp ( -- )
|
|
||||||
beginning-of-group push-stack (parse-regexp) ;
|
|
||||||
|
|
||||||
: ((parse-regexp)) ( token -- ? )
|
: parse-regexp-token ( token -- ? )
|
||||||
{
|
{
|
||||||
{ CHAR: . [ handle-dot t ] }
|
{ CHAR: . [ handle-dot t ] }
|
||||||
{ CHAR: ( [ handle-left-parenthesis t ] }
|
{ CHAR: ( [ handle-left-parenthesis t ] }
|
||||||
|
@ -433,7 +427,7 @@ DEFER: handle-left-bracket
|
||||||
} case ;
|
} case ;
|
||||||
|
|
||||||
: (parse-regexp) ( -- )
|
: (parse-regexp) ( -- )
|
||||||
read1 [ ((parse-regexp)) [ (parse-regexp) ] when ] when* ;
|
read1 [ parse-regexp-token [ (parse-regexp) ] when ] when* ;
|
||||||
|
|
||||||
: parse-regexp ( regexp -- )
|
: parse-regexp ( regexp -- )
|
||||||
dup current-regexp [
|
dup current-regexp [
|
||||||
|
|
Loading…
Reference in New Issue