diff --git a/basis/pcre/pcre-tests.factor b/basis/pcre/pcre-tests.factor index 05866f4588..a98dcf6700 100644 --- a/basis/pcre/pcre-tests.factor +++ b/basis/pcre/pcre-tests.factor @@ -68,6 +68,9 @@ os unix? [ [ 10 ] [ PCRE_CONFIG_NEWLINE config ] unit-test ] when [ { { { f "" } } { { f "" } } { { f "" } } { { f "" } } } ] [ "foo" "B*" findall ] unit-test +! Empty matches in strings with multi-byte characters are tricky. +[ ] [ "ööööö" "x*" findall ] unit-test + ! Tests for matches? [ t ] [ "örjan" "örjan" matches? ] unit-test diff --git a/basis/pcre/pcre.factor b/basis/pcre/pcre.factor index 74e6b673d2..1c6b53cc21 100644 --- a/basis/pcre/pcre.factor +++ b/basis/pcre/pcre.factor @@ -4,14 +4,12 @@ USING: arrays assocs fry - grouping io.encodings.utf8 io.encodings.string kernel math mirrors pcre.ffi pcre.info sequences sequences.generalizations - sets.private strings ; QUALIFIED: splitting IN: pcre @@ -49,35 +47,33 @@ TUPLE: matcher pcre extra subject ofs exec-opts match ; : ( subject compiled-pcre -- matcher ) [ utf8 encode ] dip [ pcre>> ] [ extra>> ] bi rot 0 0 f matcher boa ; -: exec-result>match ( count match-data -- match/f ) - over dup -1 < - [ pcre-error ] [ dup -1 = [ 3drop f ] [ drop 2array ] if ] if ; - ! This handling of zero-length matches is taken from pcredemo.c : empty-match-opts ( -- opts ) PCRE_NOTEMPTY_ATSTART PCRE_ANCHORED bitor ; : findnext ( matcher -- matcher'/f ) - clone dup values 6 firstn drop exec exec-result>match + clone dup values 6 firstn drop exec + over dup -1 < [ pcre-error ] when + -1 = [ - [ >>match ] - [ - second - [ first2 = [ empty-match-opts ] [ 0 ] if >>exec-opts ] - [ second >>ofs ] bi - ] bi - ] - [ - dup exec-opts>> 0 = + 2drop dup exec-opts>> 0 = [ drop f ] [ + ! dup [ ofs>> 1 + dup ] [ subject>> ] bi bounds-check? + ! [ >>ofs 0 >>exec-opts findnext ] [ 2drop f ] if + dup [ ofs>> 1 + ] [ subject>> length ] bi over < - [ 2drop f ] - [ - [ >>ofs ] [ drop 0 >>exec-opts ] bi - ] if + [ 2drop f ] [ >>ofs 0 >>exec-opts findnext ] if ] if - ] if* ; + ] + [ + [ 2array >>match ] + [ + nip + [ first2 = [ empty-match-opts ] [ 0 ] if >>exec-opts ] + [ second >>ofs ] bi + ] 2bi + ] if ; ! Result parsing : substring-list ( subject match-array count -- alien ) @@ -97,7 +93,7 @@ TUPLE: matcher pcre extra subject ofs exec-opts match ; GENERIC: findall ( subject obj -- matches ) M: compiled-pcre findall - [ [ findnext ] follow [ match>> ] map pruned harvest ] + [ [ findnext ] follow [ match>> ] map harvest ] [ nametable>> rot [ parse-match ] 2with map ] 2bi >array ; M: string findall