Switching basis/globs to regexps (and EBNF for syntax); this exposes a bug in regexp

db4
Daniel Ehrenberg 2009-03-02 15:31:28 -06:00
parent 03ae348e78
commit 0b5ebce339
7 changed files with 44 additions and 30 deletions

View File

@ -14,5 +14,6 @@ USING: tools.test globs ;
[ f ] [ "foo.java" "*.{xml,txt}" glob-matches? ] unit-test [ f ] [ "foo.java" "*.{xml,txt}" glob-matches? ] unit-test
[ t ] [ "foo.txt" "*.{xml,txt}" glob-matches? ] unit-test [ t ] [ "foo.txt" "*.{xml,txt}" glob-matches? ] unit-test
[ t ] [ "foo.xml" "*.{xml,txt}" glob-matches? ] unit-test [ t ] [ "foo.xml" "*.{xml,txt}" glob-matches? ] unit-test
[ f ] [ "foo." "*.{,xml,txt}" glob-matches? ] unit-test [ f ] [ "foo." "*.{xml,txt}" glob-matches? ] unit-test
[ t ] [ "foo." "*.{,xml,txt}" glob-matches? ] unit-test
[ t ] [ "foo.{" "*.{" glob-matches? ] unit-test [ t ] [ "foo.{" "*.{" glob-matches? ] unit-test

View File

@ -1,42 +1,42 @@
! Copyright (C) 2007 Slava Pestov. ! Copyright (C) 2007, 2009 Slava Pestov, Daniel Ehrenberg.
! See http://factorcode.org/license.txt for BSD license. ! See http://factorcode.org/license.txt for BSD license.
USING: parser-combinators parser-combinators.regexp lists sequences kernel USING: sequences kernel regexp.combinators regexp.matchers strings unicode.case
promises strings unicode.case ; peg.ebnf regexp arrays ;
IN: globs IN: globs
<PRIVATE EBNF: <glob>
: 'char' ( -- parser ) Character = "\\" .:c => [[ c 1string <literal> ]]
[ ",*?" member? not ] satisfy ; | !(","|"}") . => [[ 1string <literal> ]]
: 'string' ( -- parser ) RangeCharacter = !("]") .
'char' <+> [ >lower token ] <@ ;
: 'escaped-char' ( -- parser ) Range = RangeCharacter:a "-" RangeCharacter:b => [[ a b <char-range> ]]
"\\" token any-char-parser &> [ 1token ] <@ ; | RangeCharacter => [[ 1string <literal> ]]
: 'escaped-string' ( -- parser ) StartRange = .:a "-" RangeCharacter:b => [[ a b <char-range> ]]
'string' 'escaped-char' <|> ; | . => [[ 1string <literal> ]]
DEFER: 'term' Ranges = StartRange:s Range*:r => [[ r s prefix ]]
: 'glob' ( -- parser ) CharClass = "^"?:n Ranges:e => [[ e <or> n [ <not> ] when ]]
'term' <*> [ <and-parser> ] <@ ;
: 'union' ( -- parser ) AlternationBody = Concatenation:c "," AlternationBody:a => [[ a c prefix ]]
'glob' "," token nonempty-list-of "{" "}" surrounded-by | Concatenation => [[ 1array ]]
[ <or-parser> ] <@ ;
LAZY: 'term' ( -- parser ) Element = "*" => [[ R/ .*/ ]]
'union' | "?" => [[ R/ ./ ]]
'character-class' <|> | "[" CharClass:c "]" => [[ c ]]
"?" token [ drop any-char-parser ] <@ <|> | "{" AlternationBody:b "}" => [[ b <or> ]]
"*" token [ drop any-char-parser <*> ] <@ <|> | Character
'escaped-string' <|> ;
PRIVATE> Concatenation = Element* => [[ <sequence> ]]
: <glob> ( string -- glob ) 'glob' just parse-1 just ; End = !(.)
Main = Concatenation End
;EBNF
: glob-matches? ( input glob -- ? ) : glob-matches? ( input glob -- ? )
[ >lower ] [ <glob> ] bi* parse nil? not ; [ >case-fold ] bi@ <glob> matches? ;

View File

@ -1,6 +1,6 @@
! Copyright (C) 2009 Daniel Ehrenberg ! Copyright (C) 2009 Daniel Ehrenberg
! See http://factorcode.org/license.txt for BSD license. ! See http://factorcode.org/license.txt for BSD license.
USING: regexp.combinators tools.test regexp kernel sequences ; USING: regexp.combinators tools.test regexp kernel sequences regexp.matchers ;
IN: regexp.combinators.tests IN: regexp.combinators.tests
: strings ( -- regexp ) : strings ( -- regexp )

View File

@ -1,7 +1,7 @@
! Copyright (C) 2009 Daniel Ehrenberg ! Copyright (C) 2009 Daniel Ehrenberg
! See http://factorcode.org/license.txt for BSD license. ! See http://factorcode.org/license.txt for BSD license.
USING: regexp sequences kernel regexp.negation regexp.ast USING: regexp sequences kernel regexp.negation regexp.ast
accessors fry ; accessors fry regexp.classes ;
IN: regexp.combinators IN: regexp.combinators
<PRIVATE <PRIVATE
@ -18,6 +18,11 @@ CONSTANT: <nothing> R/ (?~.*)/
: <literal> ( string -- regexp ) : <literal> ( string -- regexp )
[ "\\Q" "\\E" surround ] [ <concatenation> ] bi make-regexp ; foldable [ "\\Q" "\\E" surround ] [ <concatenation> ] bi make-regexp ; foldable
: <char-range> ( char1 char2 -- regexp )
[ [ "[" "-" surround ] [ "]" append ] bi* append ]
[ <range> ]
2bi make-regexp ;
: <or> ( regexps -- disjunction ) : <or> ( regexps -- disjunction )
[ [ raw>> "(" ")" surround ] map "|" join ] [ [ raw>> "(" ")" surround ] map "|" join ]
[ [ parse-tree>> ] map <alternation> ] bi [ [ parse-tree>> ] map <alternation> ] bi

View File

@ -32,9 +32,13 @@ GENERIC: match-index-from ( i string matcher -- index/f )
: match-head ( str matcher -- slice/f ) : match-head ( str matcher -- slice/f )
[ 0 ] 2dip match-from ; [ 0 ] 2dip match-from ;
<PRIVATE
: next-match ( i string matcher -- i match/f ) : next-match ( i string matcher -- i match/f )
match-from [ dup [ to>> ] when ] keep ; match-from [ dup [ to>> ] when ] keep ;
PRIVATE>
:: all-matches ( string matcher -- seq ) :: all-matches ( string matcher -- seq )
0 [ dup ] [ string matcher next-match ] [ ] produce nip but-last ; 0 [ dup ] [ string matcher next-match ] [ ] produce nip but-last ;

View File

@ -19,6 +19,7 @@ IN: regexp.minimize
: rewrite-transitions ( transition-table assoc quot -- transition-table ) : rewrite-transitions ( transition-table assoc quot -- transition-table )
[ [
[ clone ] dip
[ '[ _ at ] change-start-state ] [ '[ _ at ] change-start-state ]
[ '[ [ _ at ] map-set ] change-final-states ] [ '[ [ _ at ] map-set ] change-final-states ]
[ ] tri [ ] tri

View File

@ -342,6 +342,9 @@ IN: regexp-tests
[ f ] [ "πc" R/ [a-zA-Z]c|\p{Lower}b/ matches? ] unit-test [ f ] [ "πc" R/ [a-zA-Z]c|\p{Lower}b/ matches? ] unit-test
[ f ] [ "Ab" R/ [a-zA-Z]c|\p{Lower}b/ matches? ] unit-test [ f ] [ "Ab" R/ [a-zA-Z]c|\p{Lower}b/ matches? ] unit-test
[ t ] [ "aaaa" R/ .*a./ matches? ] unit-test
! DFA is compiled when needed, or when literal
[ f ] [ "foo" <regexp> dfa>> >boolean ] unit-test [ f ] [ "foo" <regexp> dfa>> >boolean ] unit-test
[ t ] [ R/ foo/ dfa>> >boolean ] unit-test [ t ] [ R/ foo/ dfa>> >boolean ] unit-test