Switching basis/globs to regexps (and EBNF for syntax); this exposes a bug in regexp

db4
Daniel Ehrenberg 2009-03-02 15:31:28 -06:00
parent 03ae348e78
commit 0b5ebce339
7 changed files with 44 additions and 30 deletions

View File

@ -14,5 +14,6 @@ USING: tools.test globs ;
[ f ] [ "foo.java" "*.{xml,txt}" glob-matches? ] unit-test
[ t ] [ "foo.txt" "*.{xml,txt}" glob-matches? ] unit-test
[ t ] [ "foo.xml" "*.{xml,txt}" glob-matches? ] unit-test
[ f ] [ "foo." "*.{,xml,txt}" glob-matches? ] unit-test
[ f ] [ "foo." "*.{xml,txt}" glob-matches? ] unit-test
[ t ] [ "foo." "*.{,xml,txt}" glob-matches? ] unit-test
[ t ] [ "foo.{" "*.{" glob-matches? ] unit-test

View File

@ -1,42 +1,42 @@
! Copyright (C) 2007 Slava Pestov.
! Copyright (C) 2007, 2009 Slava Pestov, Daniel Ehrenberg.
! See http://factorcode.org/license.txt for BSD license.
USING: parser-combinators parser-combinators.regexp lists sequences kernel
promises strings unicode.case ;
USING: sequences kernel regexp.combinators regexp.matchers strings unicode.case
peg.ebnf regexp arrays ;
IN: globs
<PRIVATE
EBNF: <glob>
: 'char' ( -- parser )
[ ",*?" member? not ] satisfy ;
Character = "\\" .:c => [[ c 1string <literal> ]]
| !(","|"}") . => [[ 1string <literal> ]]
: 'string' ( -- parser )
'char' <+> [ >lower token ] <@ ;
RangeCharacter = !("]") .
: 'escaped-char' ( -- parser )
"\\" token any-char-parser &> [ 1token ] <@ ;
Range = RangeCharacter:a "-" RangeCharacter:b => [[ a b <char-range> ]]
| RangeCharacter => [[ 1string <literal> ]]
: 'escaped-string' ( -- parser )
'string' 'escaped-char' <|> ;
StartRange = .:a "-" RangeCharacter:b => [[ a b <char-range> ]]
| . => [[ 1string <literal> ]]
DEFER: 'term'
Ranges = StartRange:s Range*:r => [[ r s prefix ]]
: 'glob' ( -- parser )
'term' <*> [ <and-parser> ] <@ ;
CharClass = "^"?:n Ranges:e => [[ e <or> n [ <not> ] when ]]
: 'union' ( -- parser )
'glob' "," token nonempty-list-of "{" "}" surrounded-by
[ <or-parser> ] <@ ;
AlternationBody = Concatenation:c "," AlternationBody:a => [[ a c prefix ]]
| Concatenation => [[ 1array ]]
LAZY: 'term' ( -- parser )
'union'
'character-class' <|>
"?" token [ drop any-char-parser ] <@ <|>
"*" token [ drop any-char-parser <*> ] <@ <|>
'escaped-string' <|> ;
Element = "*" => [[ R/ .*/ ]]
| "?" => [[ R/ ./ ]]
| "[" CharClass:c "]" => [[ c ]]
| "{" AlternationBody:b "}" => [[ b <or> ]]
| Character
PRIVATE>
Concatenation = Element* => [[ <sequence> ]]
: <glob> ( string -- glob ) 'glob' just parse-1 just ;
End = !(.)
Main = Concatenation End
;EBNF
: glob-matches? ( input glob -- ? )
[ >lower ] [ <glob> ] bi* parse nil? not ;
[ >case-fold ] bi@ <glob> matches? ;

View File

@ -1,6 +1,6 @@
! Copyright (C) 2009 Daniel Ehrenberg
! See http://factorcode.org/license.txt for BSD license.
USING: regexp.combinators tools.test regexp kernel sequences ;
USING: regexp.combinators tools.test regexp kernel sequences regexp.matchers ;
IN: regexp.combinators.tests
: strings ( -- regexp )

View File

@ -1,7 +1,7 @@
! Copyright (C) 2009 Daniel Ehrenberg
! See http://factorcode.org/license.txt for BSD license.
USING: regexp sequences kernel regexp.negation regexp.ast
accessors fry ;
accessors fry regexp.classes ;
IN: regexp.combinators
<PRIVATE
@ -18,6 +18,11 @@ CONSTANT: <nothing> R/ (?~.*)/
: <literal> ( string -- regexp )
[ "\\Q" "\\E" surround ] [ <concatenation> ] bi make-regexp ; foldable
: <char-range> ( char1 char2 -- regexp )
[ [ "[" "-" surround ] [ "]" append ] bi* append ]
[ <range> ]
2bi make-regexp ;
: <or> ( regexps -- disjunction )
[ [ raw>> "(" ")" surround ] map "|" join ]
[ [ parse-tree>> ] map <alternation> ] bi

View File

@ -32,9 +32,13 @@ GENERIC: match-index-from ( i string matcher -- index/f )
: match-head ( str matcher -- slice/f )
[ 0 ] 2dip match-from ;
<PRIVATE
: next-match ( i string matcher -- i match/f )
match-from [ dup [ to>> ] when ] keep ;
PRIVATE>
:: all-matches ( string matcher -- seq )
0 [ dup ] [ string matcher next-match ] [ ] produce nip but-last ;

View File

@ -19,6 +19,7 @@ IN: regexp.minimize
: rewrite-transitions ( transition-table assoc quot -- transition-table )
[
[ clone ] dip
[ '[ _ at ] change-start-state ]
[ '[ [ _ at ] map-set ] change-final-states ]
[ ] tri

View File

@ -342,6 +342,9 @@ IN: regexp-tests
[ f ] [ "πc" R/ [a-zA-Z]c|\p{Lower}b/ matches? ] unit-test
[ f ] [ "Ab" R/ [a-zA-Z]c|\p{Lower}b/ matches? ] unit-test
[ t ] [ "aaaa" R/ .*a./ matches? ] unit-test
! DFA is compiled when needed, or when literal
[ f ] [ "foo" <regexp> dfa>> >boolean ] unit-test
[ t ] [ R/ foo/ dfa>> >boolean ] unit-test