Reorganizing regexp matcher protocol
parent
af2d380a7f
commit
99a2b95a5b
|
@ -36,8 +36,7 @@ IN: regexp.compiler
|
|||
|
||||
: transitions>quot ( transitions final-state? -- quot )
|
||||
[ split-literals suffix ] dip
|
||||
'[ _ _ step ] ;
|
||||
! '[ { array-capacity string } declare _ _ step ] ;
|
||||
'[ { array-capacity string } declare _ _ step ] ;
|
||||
|
||||
: word>quot ( word dfa -- quot )
|
||||
[ transitions>> at ]
|
||||
|
@ -68,8 +67,11 @@ IN: regexp.compiler
|
|||
: dfa>word ( dfa -- word )
|
||||
states>words [ states>code ] keep start-state>> ;
|
||||
|
||||
: run-regexp ( string word -- ? )
|
||||
[ f 0 ] 2dip execute ; inline
|
||||
: check-string ( string -- string )
|
||||
dup string? [ "String required" throw ] unless ;
|
||||
|
||||
: run-regexp ( start-index string word -- ? )
|
||||
{ [ f ] [ >fixnum ] [ check-string ] [ execute ] } spread ; inline
|
||||
|
||||
: dfa>quotation ( dfa -- quot )
|
||||
dfa>word '[ _ run-regexp ] ;
|
||||
|
@ -77,5 +79,5 @@ IN: regexp.compiler
|
|||
TUPLE: quot-matcher quot ;
|
||||
C: <quot-matcher> quot-matcher
|
||||
|
||||
M: quot-matcher match-index
|
||||
quot>> call( string -- i/f ) ;
|
||||
M: quot-matcher match-index-from
|
||||
quot>> call( index string -- i/f ) ;
|
||||
|
|
|
@ -1,61 +1,60 @@
|
|||
! Copyright (C) 2008, 2009 Daniel Ehrenberg, Doug Coleman.
|
||||
! See http://factorcode.org/license.txt for BSD license.
|
||||
USING: kernel sequences math splitting make fry ;
|
||||
USING: kernel sequences math splitting make fry locals math.ranges
|
||||
accessors arrays ;
|
||||
IN: regexp.matchers
|
||||
|
||||
! For now, a matcher is just something with a method to do the
|
||||
! equivalent of match.
|
||||
|
||||
! matcher protocol:
|
||||
GENERIC: match-index ( string matcher -- index/f )
|
||||
GENERIC: match-index-from ( i string matcher -- index/f )
|
||||
|
||||
: match ( string matcher -- slice/f )
|
||||
dupd match-index [ head-slice ] [ drop f ] if* ;
|
||||
: match-index-head ( string matcher -- index/f )
|
||||
[ 0 ] 2dip match-index-from ;
|
||||
|
||||
: match-slice ( i string matcher -- slice/f )
|
||||
[ 2dup ] dip match-index-from
|
||||
[ swap <slice> ] [ 2drop f ] if* ;
|
||||
|
||||
: matches? ( string matcher -- ? )
|
||||
dupd match-index
|
||||
dupd match-index-head
|
||||
[ swap length = ] [ drop f ] if* ;
|
||||
|
||||
: match-head ( string matcher -- end/f ) match [ length ] [ f ] if* ;
|
||||
: map-find ( seq quot -- result elt )
|
||||
[ f ] 2dip
|
||||
'[ nip @ dup ] find
|
||||
[ [ drop f ] unless ] dip ; inline
|
||||
|
||||
: match-at ( string m matcher -- n/f finished? )
|
||||
[
|
||||
2dup swap length > [ 2drop f f ] [ tail-slice t ] if
|
||||
] dip swap [ match-head f ] [ 2drop f t ] if ;
|
||||
:: match-from ( i string matcher -- slice/f )
|
||||
i string length [a,b)
|
||||
[ string matcher match-slice ] map-find drop ;
|
||||
|
||||
: match-range ( string m matcher -- a/f b/f )
|
||||
3dup match-at over [
|
||||
drop nip rot drop dupd +
|
||||
] [
|
||||
[ 3drop drop f f ] [ drop [ 1+ ] dip match-range ] if
|
||||
] if ;
|
||||
: match-head ( str matcher -- slice/f )
|
||||
[ 0 ] 2dip match-from ;
|
||||
|
||||
: first-match ( string matcher -- slice/f )
|
||||
dupd 0 swap match-range rot over [ <slice> ] [ 3drop f ] if ;
|
||||
: next-match ( i string matcher -- i match/f )
|
||||
match-from [ dup [ to>> ] when ] keep ;
|
||||
|
||||
: re-cut ( string matcher -- end/f start )
|
||||
dupd first-match
|
||||
[ split1-slice swap ] [ "" like f swap ] if* ;
|
||||
|
||||
<PRIVATE
|
||||
|
||||
: (re-split) ( string matcher -- )
|
||||
over [ [ re-cut , ] keep (re-split) ] [ 2drop ] if ;
|
||||
|
||||
PRIVATE>
|
||||
|
||||
: re-split ( string matcher -- seq )
|
||||
[ (re-split) ] { } make ;
|
||||
|
||||
: re-replace ( string matcher replacement -- result )
|
||||
[ re-split ] dip join ;
|
||||
|
||||
: next-match ( string matcher -- end/f match/f )
|
||||
dupd first-match dup
|
||||
[ [ split1-slice nip ] keep ] [ 2drop f f ] if ;
|
||||
|
||||
: all-matches ( string matcher -- seq )
|
||||
[ dup ] swap '[ _ next-match ] [ ] produce nip harvest ;
|
||||
:: all-matches ( string matcher -- seq )
|
||||
0 [ dup ] [ string matcher next-match ] [ ] produce nip but-last ;
|
||||
|
||||
: count-matches ( string matcher -- n )
|
||||
all-matches length ;
|
||||
|
||||
<PRIVATE
|
||||
|
||||
:: split-slices ( string slices -- new-slices )
|
||||
slices [ to>> ] map 0 prefix
|
||||
slices [ from>> ] map string length suffix
|
||||
[ string <slice> ] 2map ;
|
||||
|
||||
PRIVATE>
|
||||
|
||||
: re-split1 ( string matcher -- before after/f )
|
||||
dupd match-head [ 1array split-slices first2 ] [ f ] if* ;
|
||||
|
||||
: re-split ( string matcher -- seq )
|
||||
dupd all-matches split-slices ;
|
||||
|
||||
: re-replace ( string matcher replacement -- result )
|
||||
[ re-split ] dip join ;
|
||||
|
|
|
@ -208,8 +208,8 @@ IN: regexp-tests
|
|||
[ f ] [ "aaaxb" "a+ab" <regexp> matches? ] unit-test
|
||||
[ t ] [ "aaacb" "a+cb" <regexp> matches? ] unit-test
|
||||
|
||||
[ 3 ] [ "aaacb" "a*" <regexp> match-head ] unit-test
|
||||
[ 2 ] [ "aaacb" "aa?" <regexp> match-head ] unit-test
|
||||
[ 3 ] [ "aaacb" "a*" <regexp> match-index-head ] unit-test
|
||||
[ 2 ] [ "aaacb" "aa?" <regexp> match-index-head ] unit-test
|
||||
|
||||
[ t ] [ "aaa" R/ AAA/i matches? ] unit-test
|
||||
[ f ] [ "aax" R/ AAA/i matches? ] unit-test
|
||||
|
@ -238,11 +238,11 @@ IN: regexp-tests
|
|||
[ f ] [ "A" "\\p{Lower}" <regexp> matches? ] unit-test
|
||||
[ t ] [ "A" R/ \p{Lower}/i matches? ] unit-test
|
||||
|
||||
[ t ] [ "abc" <reversed> R/ abc/r matches? ] unit-test
|
||||
[ t ] [ "abc" <reversed> R/ a[bB][cC]/r matches? ] unit-test
|
||||
[ t ] [ "abc" reverse R/ abc/r matches? ] unit-test
|
||||
[ t ] [ "abc" reverse R/ a[bB][cC]/r matches? ] unit-test
|
||||
|
||||
[ t ] [ "xabc" R/ abc/ <reverse-matcher> match >boolean ] unit-test
|
||||
[ t ] [ "xabc" R/ a[bB][cC]/ <reverse-matcher> match >boolean ] unit-test
|
||||
[ t ] [ "xabc" R/ abc/ <reverse-matcher> match-index-head >boolean ] unit-test
|
||||
[ t ] [ "xabc" R/ a[bB][cC]/ <reverse-matcher> match-index-head >boolean ] unit-test
|
||||
|
||||
[ t ] [ "s@f" "[a-z.-]@[a-z]" <regexp> matches? ] unit-test
|
||||
[ f ] [ "a" "[a-z.-]@[a-z]" <regexp> matches? ] unit-test
|
||||
|
@ -267,13 +267,13 @@ IN: regexp-tests
|
|||
|
||||
[ ] [ "USING: regexp kernel ; R' \\*[^\s*][^*]*\\*' drop" eval ] unit-test
|
||||
|
||||
[ "ab" ] [ "ab" "(a|ab)(bc)?" <regexp> first-match >string ] unit-test
|
||||
[ "abc" ] [ "abc" "(a|ab)(bc)?" <regexp> first-match >string ] unit-test
|
||||
[ "ab" ] [ "ab" "(a|ab)(bc)?" <regexp> match-head >string ] unit-test
|
||||
[ "abc" ] [ "abc" "(a|ab)(bc)?" <regexp> match-head >string ] unit-test
|
||||
|
||||
[ "ab" ] [ "ab" "(ab|a)(bc)?" <regexp> first-match >string ] unit-test
|
||||
[ "abc" ] [ "abc" "(ab|a)(bc)?" <regexp> first-match >string ] unit-test
|
||||
[ "ab" ] [ "ab" "(ab|a)(bc)?" <regexp> match-head >string ] unit-test
|
||||
[ "abc" ] [ "abc" "(ab|a)(bc)?" <regexp> match-head >string ] unit-test
|
||||
|
||||
[ "b" ] [ "aaaaaaaaaaaaaaaaaaaaaaab" "((a*)*b)*b" <regexp> first-match >string ] unit-test
|
||||
[ "b" ] [ "aaaaaaaaaaaaaaaaaaaaaaab" "((a*)*b)*b" <regexp> match-head >string ] unit-test
|
||||
|
||||
! [ t ] [ "a:b" ".+:?" <regexp> matches? ] unit-test
|
||||
|
||||
|
@ -304,16 +304,16 @@ IN: regexp-tests
|
|||
[ "-- title --" ] [ "== title ==" R/ =/ "-" re-replace ] unit-test
|
||||
|
||||
/*
|
||||
[ f ] [ "ab" "a(?!b)" <regexp> first-match ] unit-test
|
||||
[ "a" ] [ "ac" "a(?!b)" <regexp> first-match >string ] unit-test
|
||||
[ f ] [ "ab" "a(?!b)" <regexp> match-head ] unit-test
|
||||
[ "a" ] [ "ac" "a(?!b)" <regexp> match-head >string ] unit-test
|
||||
[ t ] [ "fxxbar" "(?!foo).{3}bar" <regexp> matches? ] unit-test
|
||||
[ f ] [ "foobar" "(?!foo).{3}bar" <regexp> matches? ] unit-test
|
||||
[ "a" ] [ "ab" "a(?=b)(?=b)" <regexp> first-match >string ] unit-test
|
||||
[ "a" ] [ "ba" "a(?<=b)(?<=b)" <regexp> first-match >string ] unit-test
|
||||
[ "a" ] [ "cab" "a(?=b)(?<=c)" <regexp> first-match >string ] unit-test
|
||||
[ "a" ] [ "ab" "a(?=b)(?=b)" <regexp> match-head >string ] unit-test
|
||||
[ "a" ] [ "ba" "a(?<=b)(?<=b)" <regexp> match-head >string ] unit-test
|
||||
[ "a" ] [ "cab" "a(?=b)(?<=c)" <regexp> match-head >string ] unit-test
|
||||
|
||||
[ 3 ] [ "foobar" "foo(?=bar)" <regexp> match-head ] unit-test
|
||||
[ f ] [ "foobxr" "foo(?=bar)" <regexp> match-head ] unit-test
|
||||
[ 3 ] [ "foobar" "foo(?=bar)" <regexp> match-index-head ] unit-test
|
||||
[ f ] [ "foobxr" "foo(?=bar)" <regexp> match-index-head ] unit-test
|
||||
*/
|
||||
|
||||
! Bug in parsing word
|
||||
|
@ -393,15 +393,15 @@ IN: regexp-tests
|
|||
! [ t ] [ "a\r" R/ a$/m matches? ] unit-test
|
||||
! [ t ] [ "a\r\n" R/ a$/m matches? ] unit-test
|
||||
|
||||
! [ f ] [ "foobxr" "foo\\z" <regexp> match-head ] unit-test
|
||||
! [ 3 ] [ "foo" "foo\\z" <regexp> match-head ] unit-test
|
||||
! [ f ] [ "foobxr" "foo\\z" <regexp> match-index-head ] unit-test
|
||||
! [ 3 ] [ "foo" "foo\\z" <regexp> match-index-head ] unit-test
|
||||
|
||||
! [ t ] [ "foo" "\\bfoo\\b" <regexp> matches? ] unit-test
|
||||
! [ t ] [ "afoob" "\\Bfoo\\B" <regexp> matches? ] unit-test
|
||||
! [ t ] [ "afoob" "\\bfoo\\b" <regexp> matches? ] unit-test
|
||||
! [ f ] [ "foo" "\\Bfoo\\B" <regexp> matches? ] unit-test
|
||||
|
||||
! [ 3 ] [ "foo bar" "foo\\b" <regexp> match-head ] unit-test
|
||||
! [ 3 ] [ "foo bar" "foo\\b" <regexp> match-index-head ] unit-test
|
||||
! [ f ] [ "fooxbar" "foo\\b" <regexp> matches? ] unit-test
|
||||
! [ t ] [ "foo" "foo\\b" <regexp> matches? ] unit-test
|
||||
! [ t ] [ "foo bar" "foo\\b bar" <regexp> matches? ] unit-test
|
||||
|
@ -409,18 +409,18 @@ IN: regexp-tests
|
|||
! [ f ] [ "foo" "foo\\bbar" <regexp> matches? ] unit-test
|
||||
|
||||
! [ f ] [ "foo bar" "foo\\B" <regexp> matches? ] unit-test
|
||||
! [ 3 ] [ "fooxbar" "foo\\B" <regexp> match-head ] unit-test
|
||||
! [ 3 ] [ "fooxbar" "foo\\B" <regexp> match-index-head ] unit-test
|
||||
! [ t ] [ "foo" "foo\\B" <regexp> matches? ] unit-test
|
||||
! [ f ] [ "foo bar" "foo\\B bar" <regexp> matches? ] unit-test
|
||||
! [ t ] [ "fooxbar" "foo\\Bxbar" <regexp> matches? ] unit-test
|
||||
! [ f ] [ "foo" "foo\\Bbar" <regexp> matches? ] unit-test
|
||||
|
||||
! [ 1 ] [ "aaacb" "a+?" <regexp> match-head ] unit-test
|
||||
! [ 1 ] [ "aaacb" "aa??" <regexp> match-head ] unit-test
|
||||
! [ 1 ] [ "aaacb" "a+?" <regexp> match-index-head ] unit-test
|
||||
! [ 1 ] [ "aaacb" "aa??" <regexp> match-index-head ] unit-test
|
||||
! [ f ] [ "aaaab" "a++ab" <regexp> matches? ] unit-test
|
||||
! [ t ] [ "aaacb" "a++cb" <regexp> matches? ] unit-test
|
||||
! [ 3 ] [ "aacb" "aa?c" <regexp> match-head ] unit-test
|
||||
! [ 3 ] [ "aacb" "aa??c" <regexp> match-head ] unit-test
|
||||
! [ 3 ] [ "aacb" "aa?c" <regexp> match-index-head ] unit-test
|
||||
! [ 3 ] [ "aacb" "aa??c" <regexp> match-index-head ] unit-test
|
||||
|
||||
! "ab" "a(?=b*)" <regexp> match
|
||||
! "abbbbbc" "a(?=b*c)" <regexp> match
|
||||
|
|
|
@ -45,13 +45,13 @@ C: <reverse-matcher> reverse-matcher
|
|||
: compile-reverse ( regexp -- regexp )
|
||||
dup '[ [ _ get-ast <reversed-option> ast>dfa ] unless* ] change-reverse-dfa ;
|
||||
|
||||
M: regexp match-index ( string regexp -- index/f )
|
||||
M: regexp match-index-from ( string regexp -- index/f )
|
||||
dup dfa-quot>>
|
||||
[ <quot-matcher> ]
|
||||
[ compile-regexp dfa>> <dfa-matcher> ] ?if
|
||||
match-index ;
|
||||
match-index-from ;
|
||||
|
||||
M: reverse-matcher match-index ( string regexp -- index/f )
|
||||
M: reverse-matcher match-index-from ( string regexp -- index/f )
|
||||
[ <reversed> ] [ regexp>> compile-reverse reverse-dfa>> ] bi*
|
||||
<dfa-traverser> do-match match-index>> ;
|
||||
|
||||
|
|
|
@ -12,11 +12,11 @@ TUPLE: dfa-traverser
|
|||
current-index
|
||||
match-index ;
|
||||
|
||||
: <dfa-traverser> ( text dfa -- match )
|
||||
: <dfa-traverser> ( start-index text dfa -- match )
|
||||
dfa-traverser new
|
||||
swap [ start-state>> >>current-state ] [ >>dfa-table ] bi
|
||||
swap >>text
|
||||
0 >>current-index ;
|
||||
swap >>current-index ;
|
||||
|
||||
: final-state? ( dfa-traverser -- ? )
|
||||
[ current-state>> ]
|
||||
|
@ -65,5 +65,5 @@ TUPLE: dfa-traverser
|
|||
|
||||
TUPLE: dfa-matcher dfa ;
|
||||
C: <dfa-matcher> dfa-matcher
|
||||
M: dfa-matcher match-index
|
||||
M: dfa-matcher match-index-from
|
||||
dfa>> <dfa-traverser> do-match match-index>> ;
|
||||
|
|
Loading…
Reference in New Issue