Regexp match iterators are better
parent
ec5bad2f7c
commit
329875b170
|
@ -431,6 +431,8 @@ IN: regexp-tests
|
||||||
[ f ] [ "a bar b" R/ foo/ re-contains? ] unit-test
|
[ f ] [ "a bar b" R/ foo/ re-contains? ] unit-test
|
||||||
[ t ] [ "foo" R/ foo/ re-contains? ] unit-test
|
[ t ] [ "foo" R/ foo/ re-contains? ] unit-test
|
||||||
|
|
||||||
|
[ { "foo" "fxx" "fab" } ] [ "fab fxx foo" R/ f../r all-matches [ >string ] map ] unit-test
|
||||||
|
|
||||||
! [ t ] [ "foo" "\\bfoo\\b" <regexp> matches? ] unit-test
|
! [ t ] [ "foo" "\\bfoo\\b" <regexp> matches? ] unit-test
|
||||||
! [ t ] [ "afoob" "\\Bfoo\\B" <regexp> matches? ] unit-test
|
! [ t ] [ "afoob" "\\Bfoo\\B" <regexp> matches? ] unit-test
|
||||||
! [ t ] [ "afoob" "\\bfoo\\b" <regexp> matches? ] unit-test
|
! [ t ] [ "afoob" "\\bfoo\\b" <regexp> matches? ] unit-test
|
||||||
|
|
|
@ -33,8 +33,6 @@ M: lookbehind question>quot ! Returns ( index string -- ? )
|
||||||
'[ [ 1- ] dip f _ execute ]
|
'[ [ 1- ] dip f _ execute ]
|
||||||
] maybe-negated ;
|
] maybe-negated ;
|
||||||
|
|
||||||
<PRIVATE
|
|
||||||
|
|
||||||
: check-string ( string -- string )
|
: check-string ( string -- string )
|
||||||
! Make this configurable
|
! Make this configurable
|
||||||
dup string? [ "String required" throw ] unless ;
|
dup string? [ "String required" throw ] unless ;
|
||||||
|
@ -58,26 +56,49 @@ PRIVATE>
|
||||||
|
|
||||||
<PRIVATE
|
<PRIVATE
|
||||||
|
|
||||||
|
: make-slice ( i j seq -- slice )
|
||||||
|
[ 2dup > [ swap [ 1+ ] bi@ ] when ] dip <slice> ; inline
|
||||||
|
|
||||||
: match-slice ( i string quot -- slice/f )
|
: match-slice ( i string quot -- slice/f )
|
||||||
[ 2dup ] dip call
|
[ 2dup ] dip call
|
||||||
[ swap <slice> ] [ 2drop f ] if* ; inline
|
[ swap make-slice ] [ 2drop f ] if* ; inline
|
||||||
|
|
||||||
: match-from ( i string quot -- slice/f )
|
: search-range ( i string reverse? -- seq )
|
||||||
[ [ length [a,b) ] keep ] dip
|
[ drop 0 [a,b] ] [ length [a,b) ] if ; inline
|
||||||
'[ _ _ match-slice ] map-find drop ; inline
|
|
||||||
|
|
||||||
: next-match ( i string quot -- i match/f )
|
:: next-match ( i string quot reverse? -- i slice/f )
|
||||||
match-from [ dup [ to>> ] when ] keep ; inline
|
i string reverse? search-range
|
||||||
|
[ string quot match-slice ] map-find drop
|
||||||
|
[ dup [ reverse? [ from>> ] [ to>> ] if ] when ] keep ; inline
|
||||||
|
|
||||||
: do-next-match ( i string regexp -- i match/f )
|
: do-next-match ( i string regexp -- i match/f )
|
||||||
dup next-match>> execute( i string regexp -- i match/f ) ;
|
dup next-match>> execute( i string regexp -- i match/f ) ; inline
|
||||||
|
|
||||||
PRIVATE>
|
PRIVATE>
|
||||||
|
|
||||||
: all-matches ( string regexp -- seq )
|
TUPLE: match-iterator
|
||||||
|
{ string read-only }
|
||||||
|
{ regexp read-only }
|
||||||
|
{ i read-only }
|
||||||
|
{ value read-only } ;
|
||||||
|
|
||||||
|
: iterate ( iterator -- iterator'/f )
|
||||||
|
dup
|
||||||
|
[ i>> ] [ string>> ] [ regexp>> ] tri do-next-match
|
||||||
|
[ [ [ string>> ] [ regexp>> ] bi ] 2dip match-iterator boa ]
|
||||||
|
[ 2drop f ] if* ;
|
||||||
|
|
||||||
|
: value ( iterator/f -- value/f )
|
||||||
|
dup [ value>> ] when ;
|
||||||
|
|
||||||
|
: <match-iterator> ( string regexp -- match-iterator )
|
||||||
[ check-string ] dip
|
[ check-string ] dip
|
||||||
[ 0 [ dup ] ] 2dip '[ _ _ do-next-match ] produce
|
2dup end/start nip f
|
||||||
nip but-last ;
|
match-iterator boa
|
||||||
|
iterate ; inline
|
||||||
|
|
||||||
|
: all-matches ( string regexp -- seq )
|
||||||
|
<match-iterator> [ iterate ] follow [ value ] map ;
|
||||||
|
|
||||||
: count-matches ( string regexp -- n )
|
: count-matches ( string regexp -- n )
|
||||||
all-matches length ;
|
all-matches length ;
|
||||||
|
@ -92,8 +113,7 @@ PRIVATE>
|
||||||
PRIVATE>
|
PRIVATE>
|
||||||
|
|
||||||
: first-match ( string regexp -- slice/f )
|
: first-match ( string regexp -- slice/f )
|
||||||
[ 0 ] [ check-string ] [ ] tri*
|
<match-iterator> value ;
|
||||||
do-next-match nip ;
|
|
||||||
|
|
||||||
: re-contains? ( string regexp -- ? )
|
: re-contains? ( string regexp -- ? )
|
||||||
first-match >boolean ;
|
first-match >boolean ;
|
||||||
|
@ -137,9 +157,9 @@ GENERIC: compile-next-match ( regexp -- regexp )
|
||||||
M: regexp compile-next-match ( regexp -- regexp )
|
M: regexp compile-next-match ( regexp -- regexp )
|
||||||
dup '[
|
dup '[
|
||||||
dup \ next-initial-word = [
|
dup \ next-initial-word = [
|
||||||
drop _ compile-regexp dfa>>
|
drop _ [ compile-regexp dfa>> ] [ reverse-regexp? ] bi
|
||||||
'[ _ '[ _ _ execute ] next-match ]
|
'[ _ '[ _ _ execute ] _ next-match ]
|
||||||
(( i string -- i match/f )) simple-define-temp
|
(( i string regexp -- i match/f )) simple-define-temp
|
||||||
] when
|
] when
|
||||||
] change-next-match ;
|
] change-next-match ;
|
||||||
|
|
||||||
|
|
Loading…
Reference in New Issue