Switching basis/globs to regexps (and EBNF for syntax); this exposes a bug in regexp
parent
03ae348e78
commit
0b5ebce339
|
@ -14,5 +14,6 @@ USING: tools.test globs ;
|
|||
[ f ] [ "foo.java" "*.{xml,txt}" glob-matches? ] unit-test
|
||||
[ t ] [ "foo.txt" "*.{xml,txt}" glob-matches? ] unit-test
|
||||
[ t ] [ "foo.xml" "*.{xml,txt}" glob-matches? ] unit-test
|
||||
[ f ] [ "foo." "*.{,xml,txt}" glob-matches? ] unit-test
|
||||
[ f ] [ "foo." "*.{xml,txt}" glob-matches? ] unit-test
|
||||
[ t ] [ "foo." "*.{,xml,txt}" glob-matches? ] unit-test
|
||||
[ t ] [ "foo.{" "*.{" glob-matches? ] unit-test
|
||||
|
|
|
@ -1,42 +1,42 @@
|
|||
! Copyright (C) 2007 Slava Pestov.
|
||||
! Copyright (C) 2007, 2009 Slava Pestov, Daniel Ehrenberg.
|
||||
! See http://factorcode.org/license.txt for BSD license.
|
||||
USING: parser-combinators parser-combinators.regexp lists sequences kernel
|
||||
promises strings unicode.case ;
|
||||
USING: sequences kernel regexp.combinators regexp.matchers strings unicode.case
|
||||
peg.ebnf regexp arrays ;
|
||||
IN: globs
|
||||
|
||||
<PRIVATE
|
||||
EBNF: <glob>
|
||||
|
||||
: 'char' ( -- parser )
|
||||
[ ",*?" member? not ] satisfy ;
|
||||
Character = "\\" .:c => [[ c 1string <literal> ]]
|
||||
| !(","|"}") . => [[ 1string <literal> ]]
|
||||
|
||||
: 'string' ( -- parser )
|
||||
'char' <+> [ >lower token ] <@ ;
|
||||
RangeCharacter = !("]") .
|
||||
|
||||
: 'escaped-char' ( -- parser )
|
||||
"\\" token any-char-parser &> [ 1token ] <@ ;
|
||||
Range = RangeCharacter:a "-" RangeCharacter:b => [[ a b <char-range> ]]
|
||||
| RangeCharacter => [[ 1string <literal> ]]
|
||||
|
||||
: 'escaped-string' ( -- parser )
|
||||
'string' 'escaped-char' <|> ;
|
||||
StartRange = .:a "-" RangeCharacter:b => [[ a b <char-range> ]]
|
||||
| . => [[ 1string <literal> ]]
|
||||
|
||||
DEFER: 'term'
|
||||
Ranges = StartRange:s Range*:r => [[ r s prefix ]]
|
||||
|
||||
: 'glob' ( -- parser )
|
||||
'term' <*> [ <and-parser> ] <@ ;
|
||||
CharClass = "^"?:n Ranges:e => [[ e <or> n [ <not> ] when ]]
|
||||
|
||||
: 'union' ( -- parser )
|
||||
'glob' "," token nonempty-list-of "{" "}" surrounded-by
|
||||
[ <or-parser> ] <@ ;
|
||||
AlternationBody = Concatenation:c "," AlternationBody:a => [[ a c prefix ]]
|
||||
| Concatenation => [[ 1array ]]
|
||||
|
||||
LAZY: 'term' ( -- parser )
|
||||
'union'
|
||||
'character-class' <|>
|
||||
"?" token [ drop any-char-parser ] <@ <|>
|
||||
"*" token [ drop any-char-parser <*> ] <@ <|>
|
||||
'escaped-string' <|> ;
|
||||
Element = "*" => [[ R/ .*/ ]]
|
||||
| "?" => [[ R/ ./ ]]
|
||||
| "[" CharClass:c "]" => [[ c ]]
|
||||
| "{" AlternationBody:b "}" => [[ b <or> ]]
|
||||
| Character
|
||||
|
||||
PRIVATE>
|
||||
Concatenation = Element* => [[ <sequence> ]]
|
||||
|
||||
: <glob> ( string -- glob ) 'glob' just parse-1 just ;
|
||||
End = !(.)
|
||||
|
||||
Main = Concatenation End
|
||||
|
||||
;EBNF
|
||||
|
||||
: glob-matches? ( input glob -- ? )
|
||||
[ >lower ] [ <glob> ] bi* parse nil? not ;
|
||||
[ >case-fold ] bi@ <glob> matches? ;
|
||||
|
|
|
@ -1,6 +1,6 @@
|
|||
! Copyright (C) 2009 Daniel Ehrenberg
|
||||
! See http://factorcode.org/license.txt for BSD license.
|
||||
USING: regexp.combinators tools.test regexp kernel sequences ;
|
||||
USING: regexp.combinators tools.test regexp kernel sequences regexp.matchers ;
|
||||
IN: regexp.combinators.tests
|
||||
|
||||
: strings ( -- regexp )
|
||||
|
|
|
@ -1,7 +1,7 @@
|
|||
! Copyright (C) 2009 Daniel Ehrenberg
|
||||
! See http://factorcode.org/license.txt for BSD license.
|
||||
USING: regexp sequences kernel regexp.negation regexp.ast
|
||||
accessors fry ;
|
||||
accessors fry regexp.classes ;
|
||||
IN: regexp.combinators
|
||||
|
||||
<PRIVATE
|
||||
|
@ -18,6 +18,11 @@ CONSTANT: <nothing> R/ (?~.*)/
|
|||
: <literal> ( string -- regexp )
|
||||
[ "\\Q" "\\E" surround ] [ <concatenation> ] bi make-regexp ; foldable
|
||||
|
||||
: <char-range> ( char1 char2 -- regexp )
|
||||
[ [ "[" "-" surround ] [ "]" append ] bi* append ]
|
||||
[ <range> ]
|
||||
2bi make-regexp ;
|
||||
|
||||
: <or> ( regexps -- disjunction )
|
||||
[ [ raw>> "(" ")" surround ] map "|" join ]
|
||||
[ [ parse-tree>> ] map <alternation> ] bi
|
||||
|
|
|
@ -32,9 +32,13 @@ GENERIC: match-index-from ( i string matcher -- index/f )
|
|||
: match-head ( str matcher -- slice/f )
|
||||
[ 0 ] 2dip match-from ;
|
||||
|
||||
<PRIVATE
|
||||
|
||||
: next-match ( i string matcher -- i match/f )
|
||||
match-from [ dup [ to>> ] when ] keep ;
|
||||
|
||||
PRIVATE>
|
||||
|
||||
:: all-matches ( string matcher -- seq )
|
||||
0 [ dup ] [ string matcher next-match ] [ ] produce nip but-last ;
|
||||
|
||||
|
|
|
@ -19,6 +19,7 @@ IN: regexp.minimize
|
|||
|
||||
: rewrite-transitions ( transition-table assoc quot -- transition-table )
|
||||
[
|
||||
[ clone ] dip
|
||||
[ '[ _ at ] change-start-state ]
|
||||
[ '[ [ _ at ] map-set ] change-final-states ]
|
||||
[ ] tri
|
||||
|
|
|
@ -342,6 +342,9 @@ IN: regexp-tests
|
|||
[ f ] [ "πc" R/ [a-zA-Z]c|\p{Lower}b/ matches? ] unit-test
|
||||
[ f ] [ "Ab" R/ [a-zA-Z]c|\p{Lower}b/ matches? ] unit-test
|
||||
|
||||
[ t ] [ "aaaa" R/ .*a./ matches? ] unit-test
|
||||
|
||||
! DFA is compiled when needed, or when literal
|
||||
[ f ] [ "foo" <regexp> dfa>> >boolean ] unit-test
|
||||
[ t ] [ R/ foo/ dfa>> >boolean ] unit-test
|
||||
|
||||
|
|
Loading…
Reference in New Issue