pcre: findnext algorithm improved
parent
e40a6cb927
commit
80c32e2bc6
|
@ -68,6 +68,9 @@ os unix? [ [ 10 ] [ PCRE_CONFIG_NEWLINE config ] unit-test ] when
|
||||||
[ { { { f "" } } { { f "" } } { { f "" } } { { f "" } } } ]
|
[ { { { f "" } } { { f "" } } { { f "" } } { { f "" } } } ]
|
||||||
[ "foo" "B*" findall ] unit-test
|
[ "foo" "B*" findall ] unit-test
|
||||||
|
|
||||||
|
! Empty matches in strings with multi-byte characters are tricky.
|
||||||
|
[ ] [ "ööööö" "x*" findall ] unit-test
|
||||||
|
|
||||||
! Tests for matches?
|
! Tests for matches?
|
||||||
[ t ] [ "örjan" "örjan" matches? ] unit-test
|
[ t ] [ "örjan" "örjan" matches? ] unit-test
|
||||||
|
|
||||||
|
|
|
@ -4,14 +4,12 @@ USING:
|
||||||
arrays
|
arrays
|
||||||
assocs
|
assocs
|
||||||
fry
|
fry
|
||||||
grouping
|
|
||||||
io.encodings.utf8 io.encodings.string
|
io.encodings.utf8 io.encodings.string
|
||||||
kernel
|
kernel
|
||||||
math
|
math
|
||||||
mirrors
|
mirrors
|
||||||
pcre.ffi pcre.info
|
pcre.ffi pcre.info
|
||||||
sequences sequences.generalizations
|
sequences sequences.generalizations
|
||||||
sets.private
|
|
||||||
strings ;
|
strings ;
|
||||||
QUALIFIED: splitting
|
QUALIFIED: splitting
|
||||||
IN: pcre
|
IN: pcre
|
||||||
|
@ -49,35 +47,33 @@ TUPLE: matcher pcre extra subject ofs exec-opts match ;
|
||||||
: <matcher> ( subject compiled-pcre -- matcher )
|
: <matcher> ( subject compiled-pcre -- matcher )
|
||||||
[ utf8 encode ] dip [ pcre>> ] [ extra>> ] bi rot 0 0 f matcher boa ;
|
[ utf8 encode ] dip [ pcre>> ] [ extra>> ] bi rot 0 0 f matcher boa ;
|
||||||
|
|
||||||
: exec-result>match ( count match-data -- match/f )
|
|
||||||
over dup -1 <
|
|
||||||
[ pcre-error ] [ dup -1 = [ 3drop f ] [ drop 2array ] if ] if ;
|
|
||||||
|
|
||||||
! This handling of zero-length matches is taken from pcredemo.c
|
! This handling of zero-length matches is taken from pcredemo.c
|
||||||
: empty-match-opts ( -- opts )
|
: empty-match-opts ( -- opts )
|
||||||
PCRE_NOTEMPTY_ATSTART PCRE_ANCHORED bitor ;
|
PCRE_NOTEMPTY_ATSTART PCRE_ANCHORED bitor ;
|
||||||
|
|
||||||
: findnext ( matcher -- matcher'/f )
|
: findnext ( matcher -- matcher'/f )
|
||||||
clone dup <mirror> values 6 firstn drop exec exec-result>match
|
clone dup <mirror> values 6 firstn drop exec
|
||||||
|
over dup -1 < [ pcre-error ] when
|
||||||
|
-1 =
|
||||||
[
|
[
|
||||||
[ >>match ]
|
2drop dup exec-opts>> 0 =
|
||||||
[
|
|
||||||
second
|
|
||||||
[ first2 = [ empty-match-opts ] [ 0 ] if >>exec-opts ]
|
|
||||||
[ second >>ofs ] bi
|
|
||||||
] bi
|
|
||||||
]
|
|
||||||
[
|
|
||||||
dup exec-opts>> 0 =
|
|
||||||
[ drop f ]
|
[ drop f ]
|
||||||
[
|
[
|
||||||
|
! dup [ ofs>> 1 + dup ] [ subject>> ] bi bounds-check?
|
||||||
|
! [ >>ofs 0 >>exec-opts findnext ] [ 2drop f ] if
|
||||||
|
|
||||||
dup [ ofs>> 1 + ] [ subject>> length ] bi over <
|
dup [ ofs>> 1 + ] [ subject>> length ] bi over <
|
||||||
[ 2drop f ]
|
[ 2drop f ] [ >>ofs 0 >>exec-opts findnext ] if
|
||||||
[
|
|
||||||
[ >>ofs ] [ drop 0 >>exec-opts ] bi
|
|
||||||
] if
|
|
||||||
] if
|
] if
|
||||||
] if* ;
|
]
|
||||||
|
[
|
||||||
|
[ 2array >>match ]
|
||||||
|
[
|
||||||
|
nip
|
||||||
|
[ first2 = [ empty-match-opts ] [ 0 ] if >>exec-opts ]
|
||||||
|
[ second >>ofs ] bi
|
||||||
|
] 2bi
|
||||||
|
] if ;
|
||||||
|
|
||||||
! Result parsing
|
! Result parsing
|
||||||
: substring-list ( subject match-array count -- alien )
|
: substring-list ( subject match-array count -- alien )
|
||||||
|
@ -97,7 +93,7 @@ TUPLE: matcher pcre extra subject ofs exec-opts match ;
|
||||||
GENERIC: findall ( subject obj -- matches )
|
GENERIC: findall ( subject obj -- matches )
|
||||||
|
|
||||||
M: compiled-pcre findall
|
M: compiled-pcre findall
|
||||||
[ <matcher> [ findnext ] follow [ match>> ] map pruned harvest ]
|
[ <matcher> [ findnext ] follow [ match>> ] map harvest ]
|
||||||
[ nametable>> rot [ parse-match ] 2with map ] 2bi >array ;
|
[ nametable>> rot [ parse-match ] 2with map ] 2bi >array ;
|
||||||
|
|
||||||
M: string findall
|
M: string findall
|
||||||
|
|
Loading…
Reference in New Issue