From 329875b1707c750b9ef727a40bb80ece3c0dfddd Mon Sep 17 00:00:00 2001 From: Daniel Ehrenberg Date: Wed, 11 Mar 2009 12:29:33 -0500 Subject: [PATCH] Regexp match iterators are better --- basis/regexp/regexp-tests.factor | 2 ++ basis/regexp/regexp.factor | 54 ++++++++++++++++++++++---------- 2 files changed, 39 insertions(+), 17 deletions(-) diff --git a/basis/regexp/regexp-tests.factor b/basis/regexp/regexp-tests.factor index f05416ab94..e01241552d 100644 --- a/basis/regexp/regexp-tests.factor +++ b/basis/regexp/regexp-tests.factor @@ -431,6 +431,8 @@ IN: regexp-tests [ f ] [ "a bar b" R/ foo/ re-contains? ] unit-test [ t ] [ "foo" R/ foo/ re-contains? ] unit-test +[ { "foo" "fxx" "fab" } ] [ "fab fxx foo" R/ f../r all-matches [ >string ] map ] unit-test + ! [ t ] [ "foo" "\\bfoo\\b" matches? ] unit-test ! [ t ] [ "afoob" "\\Bfoo\\B" matches? ] unit-test ! [ t ] [ "afoob" "\\bfoo\\b" matches? ] unit-test diff --git a/basis/regexp/regexp.factor b/basis/regexp/regexp.factor index 90218e05bd..d116bff73d 100644 --- a/basis/regexp/regexp.factor +++ b/basis/regexp/regexp.factor @@ -33,8 +33,6 @@ M: lookbehind question>quot ! Returns ( index string -- ? ) '[ [ 1- ] dip f _ execute ] ] maybe-negated ; - [ swap [ 1+ ] bi@ ] when ] dip ; inline + : match-slice ( i string quot -- slice/f ) [ 2dup ] dip call - [ swap ] [ 2drop f ] if* ; inline + [ swap make-slice ] [ 2drop f ] if* ; inline -: match-from ( i string quot -- slice/f ) - [ [ length [a,b) ] keep ] dip - '[ _ _ match-slice ] map-find drop ; inline +: search-range ( i string reverse? -- seq ) + [ drop 0 [a,b] ] [ length [a,b) ] if ; inline -: next-match ( i string quot -- i match/f ) - match-from [ dup [ to>> ] when ] keep ; inline +:: next-match ( i string quot reverse? -- i slice/f ) + i string reverse? search-range + [ string quot match-slice ] map-find drop + [ dup [ reverse? [ from>> ] [ to>> ] if ] when ] keep ; inline : do-next-match ( i string regexp -- i match/f ) - dup next-match>> execute( i string regexp -- i match/f ) ; + dup next-match>> execute( i string regexp -- i match/f ) ; inline PRIVATE> -: all-matches ( string regexp -- seq ) +TUPLE: match-iterator + { string read-only } + { regexp read-only } + { i read-only } + { value read-only } ; + +: iterate ( iterator -- iterator'/f ) + dup + [ i>> ] [ string>> ] [ regexp>> ] tri do-next-match + [ [ [ string>> ] [ regexp>> ] bi ] 2dip match-iterator boa ] + [ 2drop f ] if* ; + +: value ( iterator/f -- value/f ) + dup [ value>> ] when ; + +: ( string regexp -- match-iterator ) [ check-string ] dip - [ 0 [ dup ] ] 2dip '[ _ _ do-next-match ] produce - nip but-last ; + 2dup end/start nip f + match-iterator boa + iterate ; inline + +: all-matches ( string regexp -- seq ) + [ iterate ] follow [ value ] map ; : count-matches ( string regexp -- n ) all-matches length ; @@ -92,8 +113,7 @@ PRIVATE> PRIVATE> : first-match ( string regexp -- slice/f ) - [ 0 ] [ check-string ] [ ] tri* - do-next-match nip ; + value ; : re-contains? ( string regexp -- ? ) first-match >boolean ; @@ -137,9 +157,9 @@ GENERIC: compile-next-match ( regexp -- regexp ) M: regexp compile-next-match ( regexp -- regexp ) dup '[ dup \ next-initial-word = [ - drop _ compile-regexp dfa>> - '[ _ '[ _ _ execute ] next-match ] - (( i string -- i match/f )) simple-define-temp + drop _ [ compile-regexp dfa>> ] [ reverse-regexp? ] bi + '[ _ '[ _ _ execute ] _ next-match ] + (( i string regexp -- i match/f )) simple-define-temp ] when ] change-next-match ;