Regexp match iterators are better

db4
Daniel Ehrenberg 2009-03-11 12:29:33 -05:00
parent ec5bad2f7c
commit 329875b170
2 changed files with 39 additions and 17 deletions

View File

@ -431,6 +431,8 @@ IN: regexp-tests
[ f ] [ "a bar b" R/ foo/ re-contains? ] unit-test [ f ] [ "a bar b" R/ foo/ re-contains? ] unit-test
[ t ] [ "foo" R/ foo/ re-contains? ] unit-test [ t ] [ "foo" R/ foo/ re-contains? ] unit-test
[ { "foo" "fxx" "fab" } ] [ "fab fxx foo" R/ f../r all-matches [ >string ] map ] unit-test
! [ t ] [ "foo" "\\bfoo\\b" <regexp> matches? ] unit-test ! [ t ] [ "foo" "\\bfoo\\b" <regexp> matches? ] unit-test
! [ t ] [ "afoob" "\\Bfoo\\B" <regexp> matches? ] unit-test ! [ t ] [ "afoob" "\\Bfoo\\B" <regexp> matches? ] unit-test
! [ t ] [ "afoob" "\\bfoo\\b" <regexp> matches? ] unit-test ! [ t ] [ "afoob" "\\bfoo\\b" <regexp> matches? ] unit-test

View File

@ -33,8 +33,6 @@ M: lookbehind question>quot ! Returns ( index string -- ? )
'[ [ 1- ] dip f _ execute ] '[ [ 1- ] dip f _ execute ]
] maybe-negated ; ] maybe-negated ;
<PRIVATE
: check-string ( string -- string ) : check-string ( string -- string )
! Make this configurable ! Make this configurable
dup string? [ "String required" throw ] unless ; dup string? [ "String required" throw ] unless ;
@ -58,26 +56,49 @@ PRIVATE>
<PRIVATE <PRIVATE
: make-slice ( i j seq -- slice )
[ 2dup > [ swap [ 1+ ] bi@ ] when ] dip <slice> ; inline
: match-slice ( i string quot -- slice/f ) : match-slice ( i string quot -- slice/f )
[ 2dup ] dip call [ 2dup ] dip call
[ swap <slice> ] [ 2drop f ] if* ; inline [ swap make-slice ] [ 2drop f ] if* ; inline
: match-from ( i string quot -- slice/f ) : search-range ( i string reverse? -- seq )
[ [ length [a,b) ] keep ] dip [ drop 0 [a,b] ] [ length [a,b) ] if ; inline
'[ _ _ match-slice ] map-find drop ; inline
: next-match ( i string quot -- i match/f ) :: next-match ( i string quot reverse? -- i slice/f )
match-from [ dup [ to>> ] when ] keep ; inline i string reverse? search-range
[ string quot match-slice ] map-find drop
[ dup [ reverse? [ from>> ] [ to>> ] if ] when ] keep ; inline
: do-next-match ( i string regexp -- i match/f ) : do-next-match ( i string regexp -- i match/f )
dup next-match>> execute( i string regexp -- i match/f ) ; dup next-match>> execute( i string regexp -- i match/f ) ; inline
PRIVATE> PRIVATE>
: all-matches ( string regexp -- seq ) TUPLE: match-iterator
{ string read-only }
{ regexp read-only }
{ i read-only }
{ value read-only } ;
: iterate ( iterator -- iterator'/f )
dup
[ i>> ] [ string>> ] [ regexp>> ] tri do-next-match
[ [ [ string>> ] [ regexp>> ] bi ] 2dip match-iterator boa ]
[ 2drop f ] if* ;
: value ( iterator/f -- value/f )
dup [ value>> ] when ;
: <match-iterator> ( string regexp -- match-iterator )
[ check-string ] dip [ check-string ] dip
[ 0 [ dup ] ] 2dip '[ _ _ do-next-match ] produce 2dup end/start nip f
nip but-last ; match-iterator boa
iterate ; inline
: all-matches ( string regexp -- seq )
<match-iterator> [ iterate ] follow [ value ] map ;
: count-matches ( string regexp -- n ) : count-matches ( string regexp -- n )
all-matches length ; all-matches length ;
@ -92,8 +113,7 @@ PRIVATE>
PRIVATE> PRIVATE>
: first-match ( string regexp -- slice/f ) : first-match ( string regexp -- slice/f )
[ 0 ] [ check-string ] [ ] tri* <match-iterator> value ;
do-next-match nip ;
: re-contains? ( string regexp -- ? ) : re-contains? ( string regexp -- ? )
first-match >boolean ; first-match >boolean ;
@ -137,9 +157,9 @@ GENERIC: compile-next-match ( regexp -- regexp )
M: regexp compile-next-match ( regexp -- regexp ) M: regexp compile-next-match ( regexp -- regexp )
dup '[ dup '[
dup \ next-initial-word = [ dup \ next-initial-word = [
drop _ compile-regexp dfa>> drop _ [ compile-regexp dfa>> ] [ reverse-regexp? ] bi
'[ _ '[ _ _ execute ] next-match ] '[ _ '[ _ _ execute ] _ next-match ]
(( i string -- i match/f )) simple-define-temp (( i string regexp -- i match/f )) simple-define-temp
] when ] when
] change-next-match ; ] change-next-match ;