first stab at capture groups. they work for unambiguous groups (no overlap), working on fixing it for any case
parent
35564e4377
commit
c252060bb5
|
@ -121,7 +121,12 @@ M: character-class-range nfa-node ( node -- )
|
||||||
class-transition add-simple-entry ;
|
class-transition add-simple-entry ;
|
||||||
|
|
||||||
M: capture-group nfa-node ( node -- )
|
M: capture-group nfa-node ( node -- )
|
||||||
term>> nfa-node ;
|
eps literal-transition add-simple-entry
|
||||||
|
capture-group-on add-traversal-flag
|
||||||
|
term>> nfa-node
|
||||||
|
eps literal-transition add-simple-entry
|
||||||
|
capture-group-off add-traversal-flag
|
||||||
|
2 [ concatenate-nodes ] times ;
|
||||||
|
|
||||||
! xyzzy
|
! xyzzy
|
||||||
M: non-capture-group nfa-node ( node -- )
|
M: non-capture-group nfa-node ( node -- )
|
||||||
|
|
|
@ -318,7 +318,17 @@ IN: regexp-tests
|
||||||
|
|
||||||
[ { 0 1 } ] [ "ab" "a(?=b)(?=b)" <regexp> first-match ] unit-test
|
[ { 0 1 } ] [ "ab" "a(?=b)(?=b)" <regexp> first-match ] unit-test
|
||||||
[ { 1 2 } ] [ "ba" "a(?<=b)(?<=b)" <regexp> first-match ] unit-test
|
[ { 1 2 } ] [ "ba" "a(?<=b)(?<=b)" <regexp> first-match ] unit-test
|
||||||
|
|
||||||
|
|
||||||
[ { 1 2 } ] [ "cab" "a(?=b)(?<=c)" <regexp> first-match ] unit-test
|
[ { 1 2 } ] [ "cab" "a(?=b)(?<=c)" <regexp> first-match ] unit-test
|
||||||
|
|
||||||
|
! capture group 1: "aaaa" 2: ""
|
||||||
|
! "aaaa" "(a*)(a*)" <regexp> match*
|
||||||
|
! "aaaa" "(a*)(a+)" <regexp> match*
|
||||||
|
|
||||||
|
[ { 0 2 } ] [ "ab" "(a|ab)(bc)?" <regexp> first-match ] unit-test
|
||||||
|
[ { 0 3 } ] [ "abc" "(a|ab)(bc)?" <regexp> first-match ] unit-test
|
||||||
|
|
||||||
|
[ { 0 2 } ] [ "ab" "(ab|a)(bc)?" <regexp> first-match ] unit-test
|
||||||
|
[ { 0 3 } ] [ "abc" "(ab|a)(bc)?" <regexp> first-match ] unit-test
|
||||||
|
|
||||||
|
[ { 23 24 } ] [ "aaaaaaaaaaaaaaaaaaaaaaab" "((a*)*b)*b" <regexp> first-match ] unit-test
|
||||||
|
|
||||||
|
|
|
@ -28,6 +28,9 @@ IN: regexp
|
||||||
: match ( string regexp -- pair )
|
: match ( string regexp -- pair )
|
||||||
<dfa-traverser> do-match return-match ;
|
<dfa-traverser> do-match return-match ;
|
||||||
|
|
||||||
|
: match* ( string regexp -- pair )
|
||||||
|
<dfa-traverser> do-match [ return-match ] [ captured-groups>> ] bi ;
|
||||||
|
|
||||||
: matches? ( string regexp -- ? )
|
: matches? ( string regexp -- ? )
|
||||||
dupd match
|
dupd match
|
||||||
[ [ length ] [ length>> 1- ] bi* = ] [ drop f ] if* ;
|
[ [ length ] [ length>> 1- ] bi* = ] [ drop f ] if* ;
|
||||||
|
|
|
@ -1,7 +1,7 @@
|
||||||
! Copyright (C) 2008 Doug Coleman.
|
! Copyright (C) 2008 Doug Coleman.
|
||||||
! See http://factorcode.org/license.txt for BSD license.
|
! See http://factorcode.org/license.txt for BSD license.
|
||||||
USING: accessors assocs combinators kernel math math.ranges
|
USING: accessors assocs combinators kernel math math.ranges
|
||||||
quotations sequences regexp.parser regexp.classes fry
|
quotations sequences regexp.parser regexp.classes fry arrays
|
||||||
combinators.short-circuit regexp.utils prettyprint regexp.nfa ;
|
combinators.short-circuit regexp.utils prettyprint regexp.nfa ;
|
||||||
IN: regexp.traversal
|
IN: regexp.traversal
|
||||||
|
|
||||||
|
@ -9,10 +9,11 @@ TUPLE: dfa-traverser
|
||||||
dfa-table
|
dfa-table
|
||||||
traversal-flags
|
traversal-flags
|
||||||
traverse-forward
|
traverse-forward
|
||||||
capture-groups
|
|
||||||
{ capture-group-index integer }
|
|
||||||
lookahead-counters
|
lookahead-counters
|
||||||
lookbehind-counters
|
lookbehind-counters
|
||||||
|
capture-counters
|
||||||
|
captured-groups
|
||||||
|
capture-group-index
|
||||||
last-state current-state
|
last-state current-state
|
||||||
text
|
text
|
||||||
start-index current-index
|
start-index current-index
|
||||||
|
@ -28,10 +29,12 @@ TUPLE: dfa-traverser
|
||||||
t >>traverse-forward
|
t >>traverse-forward
|
||||||
0 >>start-index
|
0 >>start-index
|
||||||
0 >>current-index
|
0 >>current-index
|
||||||
|
0 >>capture-group-index
|
||||||
V{ } clone >>matches
|
V{ } clone >>matches
|
||||||
V{ } clone >>capture-groups
|
V{ } clone >>capture-counters
|
||||||
V{ } clone >>lookbehind-counters
|
V{ } clone >>lookbehind-counters
|
||||||
V{ } clone >>lookahead-counters ;
|
V{ } clone >>lookahead-counters
|
||||||
|
H{ } clone >>captured-groups ;
|
||||||
|
|
||||||
: final-state? ( dfa-traverser -- ? )
|
: final-state? ( dfa-traverser -- ? )
|
||||||
[ current-state>> ] [ dfa-table>> final-states>> ] bi
|
[ current-state>> ] [ dfa-table>> final-states>> ] bi
|
||||||
|
@ -75,9 +78,28 @@ M: lookbehind-off flag-action ( dfa-traverser flag -- )
|
||||||
dup lookbehind-counters>>
|
dup lookbehind-counters>>
|
||||||
[ drop ] [ pop '[ _ + 2 + ] change-current-index drop ] if-empty ;
|
[ drop ] [ pop '[ _ + 2 + ] change-current-index drop ] if-empty ;
|
||||||
|
|
||||||
|
M: capture-group-on flag-action ( dfa-traverser flag -- )
|
||||||
|
drop
|
||||||
|
[ current-index>> 0 2array ]
|
||||||
|
[ capture-counters>> ] bi push ;
|
||||||
|
|
||||||
|
M: capture-group-off flag-action ( dfa-traverser flag -- )
|
||||||
|
drop
|
||||||
|
dup capture-counters>> empty? [
|
||||||
|
drop
|
||||||
|
] [
|
||||||
|
{
|
||||||
|
[ capture-counters>> pop first2 dupd + ]
|
||||||
|
[ text>> <slice> ]
|
||||||
|
[ [ 1+ ] change-capture-group-index capture-group-index>> ]
|
||||||
|
[ captured-groups>> set-at ]
|
||||||
|
} cleave
|
||||||
|
] if ;
|
||||||
|
|
||||||
: process-flags ( dfa-traverser -- )
|
: process-flags ( dfa-traverser -- )
|
||||||
[ [ 1+ ] map ] change-lookahead-counters
|
[ [ 1+ ] map ] change-lookahead-counters
|
||||||
[ [ 1+ ] map ] change-lookbehind-counters
|
[ [ 1+ ] map ] change-lookbehind-counters
|
||||||
|
[ [ first2 1+ 2array ] map ] change-capture-counters
|
||||||
! dup current-state>> .
|
! dup current-state>> .
|
||||||
dup [ current-state>> ] [ traversal-flags>> ] bi
|
dup [ current-state>> ] [ traversal-flags>> ] bi
|
||||||
at [ dup . flag-action ] with each ;
|
at [ dup . flag-action ] with each ;
|
||||||
|
|
Loading…
Reference in New Issue