Switching basis/globs to regexps (and EBNF for syntax); this exposes a bug in regexp
parent
03ae348e78
commit
0b5ebce339
|
@ -14,5 +14,6 @@ USING: tools.test globs ;
|
||||||
[ f ] [ "foo.java" "*.{xml,txt}" glob-matches? ] unit-test
|
[ f ] [ "foo.java" "*.{xml,txt}" glob-matches? ] unit-test
|
||||||
[ t ] [ "foo.txt" "*.{xml,txt}" glob-matches? ] unit-test
|
[ t ] [ "foo.txt" "*.{xml,txt}" glob-matches? ] unit-test
|
||||||
[ t ] [ "foo.xml" "*.{xml,txt}" glob-matches? ] unit-test
|
[ t ] [ "foo.xml" "*.{xml,txt}" glob-matches? ] unit-test
|
||||||
[ f ] [ "foo." "*.{,xml,txt}" glob-matches? ] unit-test
|
[ f ] [ "foo." "*.{xml,txt}" glob-matches? ] unit-test
|
||||||
|
[ t ] [ "foo." "*.{,xml,txt}" glob-matches? ] unit-test
|
||||||
[ t ] [ "foo.{" "*.{" glob-matches? ] unit-test
|
[ t ] [ "foo.{" "*.{" glob-matches? ] unit-test
|
||||||
|
|
|
@ -1,42 +1,42 @@
|
||||||
! Copyright (C) 2007 Slava Pestov.
|
! Copyright (C) 2007, 2009 Slava Pestov, Daniel Ehrenberg.
|
||||||
! See http://factorcode.org/license.txt for BSD license.
|
! See http://factorcode.org/license.txt for BSD license.
|
||||||
USING: parser-combinators parser-combinators.regexp lists sequences kernel
|
USING: sequences kernel regexp.combinators regexp.matchers strings unicode.case
|
||||||
promises strings unicode.case ;
|
peg.ebnf regexp arrays ;
|
||||||
IN: globs
|
IN: globs
|
||||||
|
|
||||||
<PRIVATE
|
EBNF: <glob>
|
||||||
|
|
||||||
: 'char' ( -- parser )
|
Character = "\\" .:c => [[ c 1string <literal> ]]
|
||||||
[ ",*?" member? not ] satisfy ;
|
| !(","|"}") . => [[ 1string <literal> ]]
|
||||||
|
|
||||||
: 'string' ( -- parser )
|
RangeCharacter = !("]") .
|
||||||
'char' <+> [ >lower token ] <@ ;
|
|
||||||
|
|
||||||
: 'escaped-char' ( -- parser )
|
Range = RangeCharacter:a "-" RangeCharacter:b => [[ a b <char-range> ]]
|
||||||
"\\" token any-char-parser &> [ 1token ] <@ ;
|
| RangeCharacter => [[ 1string <literal> ]]
|
||||||
|
|
||||||
: 'escaped-string' ( -- parser )
|
StartRange = .:a "-" RangeCharacter:b => [[ a b <char-range> ]]
|
||||||
'string' 'escaped-char' <|> ;
|
| . => [[ 1string <literal> ]]
|
||||||
|
|
||||||
DEFER: 'term'
|
Ranges = StartRange:s Range*:r => [[ r s prefix ]]
|
||||||
|
|
||||||
: 'glob' ( -- parser )
|
CharClass = "^"?:n Ranges:e => [[ e <or> n [ <not> ] when ]]
|
||||||
'term' <*> [ <and-parser> ] <@ ;
|
|
||||||
|
|
||||||
: 'union' ( -- parser )
|
AlternationBody = Concatenation:c "," AlternationBody:a => [[ a c prefix ]]
|
||||||
'glob' "," token nonempty-list-of "{" "}" surrounded-by
|
| Concatenation => [[ 1array ]]
|
||||||
[ <or-parser> ] <@ ;
|
|
||||||
|
|
||||||
LAZY: 'term' ( -- parser )
|
Element = "*" => [[ R/ .*/ ]]
|
||||||
'union'
|
| "?" => [[ R/ ./ ]]
|
||||||
'character-class' <|>
|
| "[" CharClass:c "]" => [[ c ]]
|
||||||
"?" token [ drop any-char-parser ] <@ <|>
|
| "{" AlternationBody:b "}" => [[ b <or> ]]
|
||||||
"*" token [ drop any-char-parser <*> ] <@ <|>
|
| Character
|
||||||
'escaped-string' <|> ;
|
|
||||||
|
|
||||||
PRIVATE>
|
Concatenation = Element* => [[ <sequence> ]]
|
||||||
|
|
||||||
: <glob> ( string -- glob ) 'glob' just parse-1 just ;
|
End = !(.)
|
||||||
|
|
||||||
|
Main = Concatenation End
|
||||||
|
|
||||||
|
;EBNF
|
||||||
|
|
||||||
: glob-matches? ( input glob -- ? )
|
: glob-matches? ( input glob -- ? )
|
||||||
[ >lower ] [ <glob> ] bi* parse nil? not ;
|
[ >case-fold ] bi@ <glob> matches? ;
|
||||||
|
|
|
@ -1,6 +1,6 @@
|
||||||
! Copyright (C) 2009 Daniel Ehrenberg
|
! Copyright (C) 2009 Daniel Ehrenberg
|
||||||
! See http://factorcode.org/license.txt for BSD license.
|
! See http://factorcode.org/license.txt for BSD license.
|
||||||
USING: regexp.combinators tools.test regexp kernel sequences ;
|
USING: regexp.combinators tools.test regexp kernel sequences regexp.matchers ;
|
||||||
IN: regexp.combinators.tests
|
IN: regexp.combinators.tests
|
||||||
|
|
||||||
: strings ( -- regexp )
|
: strings ( -- regexp )
|
||||||
|
|
|
@ -1,7 +1,7 @@
|
||||||
! Copyright (C) 2009 Daniel Ehrenberg
|
! Copyright (C) 2009 Daniel Ehrenberg
|
||||||
! See http://factorcode.org/license.txt for BSD license.
|
! See http://factorcode.org/license.txt for BSD license.
|
||||||
USING: regexp sequences kernel regexp.negation regexp.ast
|
USING: regexp sequences kernel regexp.negation regexp.ast
|
||||||
accessors fry ;
|
accessors fry regexp.classes ;
|
||||||
IN: regexp.combinators
|
IN: regexp.combinators
|
||||||
|
|
||||||
<PRIVATE
|
<PRIVATE
|
||||||
|
@ -18,6 +18,11 @@ CONSTANT: <nothing> R/ (?~.*)/
|
||||||
: <literal> ( string -- regexp )
|
: <literal> ( string -- regexp )
|
||||||
[ "\\Q" "\\E" surround ] [ <concatenation> ] bi make-regexp ; foldable
|
[ "\\Q" "\\E" surround ] [ <concatenation> ] bi make-regexp ; foldable
|
||||||
|
|
||||||
|
: <char-range> ( char1 char2 -- regexp )
|
||||||
|
[ [ "[" "-" surround ] [ "]" append ] bi* append ]
|
||||||
|
[ <range> ]
|
||||||
|
2bi make-regexp ;
|
||||||
|
|
||||||
: <or> ( regexps -- disjunction )
|
: <or> ( regexps -- disjunction )
|
||||||
[ [ raw>> "(" ")" surround ] map "|" join ]
|
[ [ raw>> "(" ")" surround ] map "|" join ]
|
||||||
[ [ parse-tree>> ] map <alternation> ] bi
|
[ [ parse-tree>> ] map <alternation> ] bi
|
||||||
|
|
|
@ -32,9 +32,13 @@ GENERIC: match-index-from ( i string matcher -- index/f )
|
||||||
: match-head ( str matcher -- slice/f )
|
: match-head ( str matcher -- slice/f )
|
||||||
[ 0 ] 2dip match-from ;
|
[ 0 ] 2dip match-from ;
|
||||||
|
|
||||||
|
<PRIVATE
|
||||||
|
|
||||||
: next-match ( i string matcher -- i match/f )
|
: next-match ( i string matcher -- i match/f )
|
||||||
match-from [ dup [ to>> ] when ] keep ;
|
match-from [ dup [ to>> ] when ] keep ;
|
||||||
|
|
||||||
|
PRIVATE>
|
||||||
|
|
||||||
:: all-matches ( string matcher -- seq )
|
:: all-matches ( string matcher -- seq )
|
||||||
0 [ dup ] [ string matcher next-match ] [ ] produce nip but-last ;
|
0 [ dup ] [ string matcher next-match ] [ ] produce nip but-last ;
|
||||||
|
|
||||||
|
|
|
@ -19,6 +19,7 @@ IN: regexp.minimize
|
||||||
|
|
||||||
: rewrite-transitions ( transition-table assoc quot -- transition-table )
|
: rewrite-transitions ( transition-table assoc quot -- transition-table )
|
||||||
[
|
[
|
||||||
|
[ clone ] dip
|
||||||
[ '[ _ at ] change-start-state ]
|
[ '[ _ at ] change-start-state ]
|
||||||
[ '[ [ _ at ] map-set ] change-final-states ]
|
[ '[ [ _ at ] map-set ] change-final-states ]
|
||||||
[ ] tri
|
[ ] tri
|
||||||
|
|
|
@ -342,6 +342,9 @@ IN: regexp-tests
|
||||||
[ f ] [ "πc" R/ [a-zA-Z]c|\p{Lower}b/ matches? ] unit-test
|
[ f ] [ "πc" R/ [a-zA-Z]c|\p{Lower}b/ matches? ] unit-test
|
||||||
[ f ] [ "Ab" R/ [a-zA-Z]c|\p{Lower}b/ matches? ] unit-test
|
[ f ] [ "Ab" R/ [a-zA-Z]c|\p{Lower}b/ matches? ] unit-test
|
||||||
|
|
||||||
|
[ t ] [ "aaaa" R/ .*a./ matches? ] unit-test
|
||||||
|
|
||||||
|
! DFA is compiled when needed, or when literal
|
||||||
[ f ] [ "foo" <regexp> dfa>> >boolean ] unit-test
|
[ f ] [ "foo" <regexp> dfa>> >boolean ] unit-test
|
||||||
[ t ] [ R/ foo/ dfa>> >boolean ] unit-test
|
[ t ] [ R/ foo/ dfa>> >boolean ] unit-test
|
||||||
|
|
||||||
|
|
Loading…
Reference in New Issue