Remove match iterators for a performance boost
parent
c193f1b68a
commit
643da5f073
|
@ -42,8 +42,8 @@ ARTICLE: { "regexp" "operations" } "Matching operations with regular expressions
|
||||||
{ $subsection matches? }
|
{ $subsection matches? }
|
||||||
{ $subsection re-contains? }
|
{ $subsection re-contains? }
|
||||||
{ $subsection first-match }
|
{ $subsection first-match }
|
||||||
{ $subsection all-matches }
|
{ $subsection all-matching-slices }
|
||||||
{ $subsection re-split1 }
|
{ $subsection all-matching-subseqs }
|
||||||
{ $subsection re-split }
|
{ $subsection re-split }
|
||||||
{ $subsection re-replace }
|
{ $subsection re-replace }
|
||||||
{ $subsection count-matches } ;
|
{ $subsection count-matches } ;
|
||||||
|
@ -67,25 +67,21 @@ HELP: matches?
|
||||||
{ $values { "string" string } { "regexp" regexp } { "?" "a boolean" } }
|
{ $values { "string" string } { "regexp" regexp } { "?" "a boolean" } }
|
||||||
{ $description "Tests if the string as a whole matches the given regular expression." } ;
|
{ $description "Tests if the string as a whole matches the given regular expression." } ;
|
||||||
|
|
||||||
HELP: re-split1
|
HELP: all-matching-slices
|
||||||
{ $values { "string" string } { "regexp" regexp } { "before" string } { "after/f" string } }
|
|
||||||
{ $description "Searches the string for a substring which matches the pattern. If found, the input string is split on the leftmost and longest occurence of the match, and the two halves are given as output. If no match is found, then the input string and " { $link f } " are output." } ;
|
|
||||||
|
|
||||||
HELP: all-matches
|
|
||||||
{ $values { "string" string } { "regexp" regexp } { "seq" "a sequence of slices of the input" } }
|
{ $values { "string" string } { "regexp" regexp } { "seq" "a sequence of slices of the input" } }
|
||||||
{ $description "Finds a sequence of disjoint substrings which each match the pattern. It chooses this by finding the leftmost longest match, and then the leftmost longest match which starts after the end of the previous match, and so on." } ;
|
{ $description "Finds a sequence of disjoint substrings which each match the pattern. It chooses this by finding the leftmost longest match, and then the leftmost longest match which starts after the end of the previous match, and so on." } ;
|
||||||
|
|
||||||
HELP: count-matches
|
HELP: count-matches
|
||||||
{ $values { "string" string } { "regexp" regexp } { "n" integer } }
|
{ $values { "string" string } { "regexp" regexp } { "n" integer } }
|
||||||
{ $description "Counts how many disjoint matches the regexp has in the string, as made unambiguous by " { $link all-matches } "." } ;
|
{ $description "Counts how many disjoint matches the regexp has in the string, as made unambiguous by " { $link all-matching-slices } "." } ;
|
||||||
|
|
||||||
HELP: re-split
|
HELP: re-split
|
||||||
{ $values { "string" string } { "regexp" regexp } { "seq" "a sequence of slices of the input" } }
|
{ $values { "string" string } { "regexp" regexp } { "seq" "a sequence of slices of the input" } }
|
||||||
{ $description "Splits the input string into chunks separated by the regular expression. Each chunk contains no match of the regexp. The chunks are chosen by the strategy of " { $link all-matches } "." } ;
|
{ $description "Splits the input string into chunks separated by the regular expression. Each chunk contains no match of the regexp. The chunks are chosen by the strategy of " { $link all-matching-slices } "." } ;
|
||||||
|
|
||||||
HELP: re-replace
|
HELP: re-replace
|
||||||
{ $values { "string" string } { "regexp" regexp } { "replacement" string } { "result" string } }
|
{ $values { "string" string } { "regexp" regexp } { "replacement" string } { "result" string } }
|
||||||
{ $description "Replaces substrings which match the input regexp with the given replacement text. The boundaries of the substring are chosen by the strategy used by " { $link all-matches } "." } ;
|
{ $description "Replaces substrings which match the input regexp with the given replacement text. The boundaries of the substring are chosen by the strategy used by " { $link all-matching-slices } "." } ;
|
||||||
|
|
||||||
HELP: first-match
|
HELP: first-match
|
||||||
{ $values { "string" string } { "regexp" regexp } { "slice/f" "the match, if one exists" } }
|
{ $values { "string" string } { "regexp" regexp } { "slice/f" "the match, if one exists" } }
|
||||||
|
|
|
@ -287,7 +287,7 @@ IN: regexp-tests
|
||||||
[ { "a" "" } ] [ "a=" R/ =/ re-split [ >string ] map ] unit-test
|
[ { "a" "" } ] [ "a=" R/ =/ re-split [ >string ] map ] unit-test
|
||||||
|
|
||||||
[ { "ABC" "DEF" "GHI" } ]
|
[ { "ABC" "DEF" "GHI" } ]
|
||||||
[ "1ABC2DEF3GHI4" R/ [A-Z]+/ all-matches [ >string ] map ] unit-test
|
[ "1ABC2DEF3GHI4" R/ [A-Z]+/ all-matching-subseqs ] unit-test
|
||||||
|
|
||||||
[ 3 ]
|
[ 3 ]
|
||||||
[ "1ABC2DEF3GHI4" R/ [A-Z]+/ count-matches ] unit-test
|
[ "1ABC2DEF3GHI4" R/ [A-Z]+/ count-matches ] unit-test
|
||||||
|
@ -431,7 +431,7 @@ IN: regexp-tests
|
||||||
[ f ] [ "a bar b" R/ foo/ re-contains? ] unit-test
|
[ f ] [ "a bar b" R/ foo/ re-contains? ] unit-test
|
||||||
[ t ] [ "foo" R/ foo/ re-contains? ] unit-test
|
[ t ] [ "foo" R/ foo/ re-contains? ] unit-test
|
||||||
|
|
||||||
[ { "foo" "fxx" "fab" } ] [ "fab fxx foo" R/ f../r all-matches [ >string ] map ] unit-test
|
[ { "foo" "fxx" "fab" } ] [ "fab fxx foo" R/ f../r all-matching-subseqs ] unit-test
|
||||||
|
|
||||||
! [ t ] [ "foo" "\\bfoo\\b" <regexp> matches? ] unit-test
|
! [ t ] [ "foo" "\\bfoo\\b" <regexp> matches? ] unit-test
|
||||||
! [ t ] [ "afoob" "\\Bfoo\\B" <regexp> matches? ] unit-test
|
! [ t ] [ "afoob" "\\Bfoo\\B" <regexp> matches? ] unit-test
|
||||||
|
|
|
@ -49,93 +49,90 @@ M: reverse-regexp end/start drop length 1- -1 swap ;
|
||||||
PRIVATE>
|
PRIVATE>
|
||||||
|
|
||||||
: matches? ( string regexp -- ? )
|
: matches? ( string regexp -- ? )
|
||||||
[ end/start ] 2keep
|
|
||||||
[ check-string ] dip
|
[ check-string ] dip
|
||||||
|
[ end/start ] 2keep
|
||||||
match-index-from
|
match-index-from
|
||||||
[ swap = ] [ drop f ] if* ;
|
[ = ] [ drop f ] if* ;
|
||||||
|
|
||||||
<PRIVATE
|
<PRIVATE
|
||||||
|
|
||||||
TUPLE: match { i read-only } { j read-only } { seq read-only } ;
|
TUPLE: match { i read-only } { start read-only } { end read-only } { string read-only } ;
|
||||||
|
|
||||||
: match-slice ( i string quot -- match/f )
|
:: <match> ( i string quot: ( i string -- i seq j ) reverse? -- match/f )
|
||||||
[ 2dup ] dip call
|
i string quot call dup [| j |
|
||||||
[ swap match boa ] [ 2drop f ] if* ; inline
|
j i j
|
||||||
|
reverse? [ swap [ 1+ ] bi@ ] when
|
||||||
|
string match boa
|
||||||
|
] when ; inline
|
||||||
|
|
||||||
: search-range ( i string reverse? -- seq )
|
: search-range ( i string reverse? -- seq )
|
||||||
[ drop 0 [a,b] ] [ length [a,b) ] if ; inline
|
[ drop 0 [a,b] ] [ length [a,b) ] if ; inline
|
||||||
|
|
||||||
: match>result ( match reverse? -- i start end string )
|
: match>result ( match -- i start end string )
|
||||||
over [
|
dup
|
||||||
[ [ i>> ] [ j>> tuck ] [ seq>> ] tri ] dip
|
[ { [ i>> ] [ start>> ] [ end>> ] [ string>> ] } cleave ]
|
||||||
[ [ swap [ 1+ ] bi@ ] dip ] when
|
[ drop f f f f ]
|
||||||
] [ 2drop f f f f ] if ; inline
|
if ; inline
|
||||||
|
|
||||||
:: next-match ( i string quot reverse? -- i start end string )
|
:: next-match ( i string quot reverse? -- i start end ? )
|
||||||
i string reverse? search-range
|
i string reverse? search-range
|
||||||
[ string quot match-slice ] map-find drop
|
[ string quot reverse? <match> ] map-find drop
|
||||||
reverse? match>result ; inline
|
match>result ; inline
|
||||||
|
|
||||||
: do-next-match ( i string regexp -- i start end string )
|
: do-next-match ( i string regexp -- i start end ? )
|
||||||
dup next-match>>
|
dup next-match>>
|
||||||
execute-unsafe( i string regexp -- i start end string ) ;
|
execute-unsafe( i string regexp -- i start end ? ) ; inline
|
||||||
|
|
||||||
: next-slice ( i string regexp -- i/f slice/f )
|
:: (each-match) ( i string regexp quot: ( start end string -- ) -- )
|
||||||
do-next-match
|
i string regexp do-next-match [| i' start end |
|
||||||
[ slice boa ] [ drop ] if* ; inline
|
start end string quot call
|
||||||
|
i' string regexp quot (each-match)
|
||||||
|
] [ 3drop ] if ; inline recursive
|
||||||
|
|
||||||
PRIVATE>
|
PRIVATE>
|
||||||
|
|
||||||
TUPLE: match-iterator
|
: prepare-match-iterator ( string regexp -- i string regexp )
|
||||||
{ string read-only }
|
[ check-string ] dip [ end/start nip ] 2keep ; inline
|
||||||
{ regexp read-only }
|
|
||||||
{ i read-only }
|
|
||||||
{ value read-only } ;
|
|
||||||
|
|
||||||
: iterate ( iterator -- iterator'/f )
|
: each-match ( string regexp quot: ( start end string -- ) -- )
|
||||||
dup
|
[ prepare-match-iterator ] dip (each-match) ; inline
|
||||||
[ i>> ] [ string>> ] [ regexp>> ] tri next-slice
|
|
||||||
[ [ [ string>> ] [ regexp>> ] bi ] 2dip match-iterator boa ]
|
|
||||||
[ 2drop f ] if* ;
|
|
||||||
|
|
||||||
: value ( iterator/f -- value/f )
|
: map-matches ( string regexp quot: ( start end string -- obj ) -- seq )
|
||||||
dup [ value>> ] when ;
|
accumulator [ each-match ] dip >array ; inline
|
||||||
|
|
||||||
: <match-iterator> ( string regexp -- match-iterator )
|
: all-matching-slices ( string regexp -- seq )
|
||||||
[ check-string ] dip
|
[ slice boa ] map-matches ;
|
||||||
2dup end/start nip f
|
|
||||||
match-iterator boa
|
|
||||||
iterate ; inline
|
|
||||||
|
|
||||||
: all-matches ( string regexp -- seq )
|
: all-matching-subseqs ( string regexp -- seq )
|
||||||
<match-iterator> [ iterate ] follow [ value ] map ;
|
[ subseq ] map-matches ;
|
||||||
|
|
||||||
: count-matches ( string regexp -- n )
|
: count-matches ( string regexp -- n )
|
||||||
all-matches length ;
|
[ 0 ] 2dip [ 3drop 1+ ] each-match ;
|
||||||
|
|
||||||
<PRIVATE
|
<PRIVATE
|
||||||
|
|
||||||
:: split-slices ( string slices -- new-slices )
|
:: (re-split) ( string regexp quot -- new-slices )
|
||||||
slices [ to>> ] map 0 prefix
|
0 string regexp [| end start end' string |
|
||||||
slices [ from>> ] map string length suffix
|
end' ! leave it on the stack for the next iteration
|
||||||
[ string <slice> ] 2map ;
|
end start string quot call
|
||||||
|
] map-matches
|
||||||
|
! Final chunk
|
||||||
|
swap string length string quot call suffix ; inline
|
||||||
|
|
||||||
PRIVATE>
|
PRIVATE>
|
||||||
|
|
||||||
: first-match ( string regexp -- slice/f )
|
: first-match ( string regexp -- slice/f )
|
||||||
<match-iterator> value ;
|
[ prepare-match-iterator do-next-match ] [ drop ] 2bi
|
||||||
|
'[ _ slice boa nip ] [ 3drop f ] if ;
|
||||||
|
|
||||||
: re-contains? ( string regexp -- ? )
|
: re-contains? ( string regexp -- ? )
|
||||||
first-match >boolean ;
|
prepare-match-iterator do-next-match [ 3drop ] dip >boolean ;
|
||||||
|
|
||||||
: re-split1 ( string regexp -- before after/f )
|
|
||||||
dupd first-match [ 1array split-slices first2 ] [ f ] if* ;
|
|
||||||
|
|
||||||
: re-split ( string regexp -- seq )
|
: re-split ( string regexp -- seq )
|
||||||
dupd all-matches split-slices ;
|
[ slice boa ] (re-split) ;
|
||||||
|
|
||||||
: re-replace ( string regexp replacement -- result )
|
: re-replace ( string regexp replacement -- result )
|
||||||
[ re-split ] dip join ;
|
[ [ subseq ] (re-split) ] dip join ;
|
||||||
|
|
||||||
<PRIVATE
|
<PRIVATE
|
||||||
|
|
||||||
|
|
Loading…
Reference in New Issue