Regexp character class intersection and difference syntax

db4
Daniel Ehrenberg 2009-03-21 02:53:36 -05:00
parent a88f1050be
commit d6f9947bff
4 changed files with 51 additions and 6 deletions

View File

@ -230,7 +230,10 @@ TUPLE: class-partition integers not-integers simples not-simples and or other ;
dup or-class flatten partition-classes
dup not-integers>> length {
{ 0 [ nip make-or-class ] }
{ 1 [ not-integers>> first [ class>> '[ _ swap class-member? ] any? ] keep or ] }
{ 1 [
not-integers>> first
[ class>> '[ _ swap class-member? ] any? ] keep or
] }
[ 3drop t ]
} case ;
@ -251,6 +254,12 @@ M: or-class <not-class>
M: t <not-class> drop f ;
M: f <not-class> drop t ;
: <minus-class> ( a b -- a-b )
<not-class> 2array <and-class> ;
: <sym-diff-class> ( a b -- a~b )
2array [ <or-class> ] [ <and-class> ] bi <minus-class> ;
M: primitive-class class-member?
class>> class-member? ;

View File

@ -11,7 +11,7 @@ IN: regexp.parser.tests
"a|b" "a.b" "a|b|c" "abc|b" "a|bcd" "a|(b)" "(?-i:a)" "||"
"(a)|b" "(a|b)" "((a)|(b))" "(?:a)" "(?i:a)" "|b" "b|"
"[abc]" "[a-c]" "[^a-c]" "[^]]" "[]a]" "[[]" "[]-a]" "[a-]" "[-]"
"[--a]" "foo*" "(foo)*" "(a|b)|c" "(foo){2,3}" "(foo){2,}"
"foo*" "(foo)*" "(a|b)|c" "(foo){2,3}" "(foo){2,}"
"(foo){2}" "{2,3}" "{," "{,}" "}" "foo}" "[^]-a]" "[^-]a]"
"[a-]" "[^a-]" "[^a-]" "a{,2}" "(?#foobar)"
"\\p{Space}" "\\t" "\\[" "[\\]]" "\\P{Space}"

View File

@ -148,19 +148,29 @@ Character = EscapeSequence
| "^" => [[ ^ <tagged-epsilon> ]]
| . ?[ allowed-char? ]?
AnyRangeCharacter = EscapeSequence | .
AnyRangeCharacter = !("&&"|"||"|"--"|"~~") (EscapeSequence | .)
RangeCharacter = !("]") AnyRangeCharacter
Range = RangeCharacter:a "-" RangeCharacter:b => [[ a b <range-class> ]]
Range = RangeCharacter:a "-" !("-") RangeCharacter:b => [[ a b <range-class> ]]
| RangeCharacter
StartRange = AnyRangeCharacter:a "-" RangeCharacter:b => [[ a b <range-class> ]]
StartRange = AnyRangeCharacter:a "-" !("-") RangeCharacter:b => [[ a b <range-class> ]]
| AnyRangeCharacter
Ranges = StartRange:s Range*:r => [[ r s prefix ]]
CharClass = "^"?:n Ranges:e => [[ e n char-class ]]
BasicCharClass = "^"?:n Ranges:e => [[ e n char-class ]]
CharClass = BasicCharClass:b "&&" CharClass:c
=> [[ b c 2array <and-class> ]]
| BasicCharClass:b "||" CharClass:c
=> [[ b c 2array <or-class> ]]
| BasicCharClass:b "~~" CharClass:c
=> [[ b c <sym-diff-class> ]]
| BasicCharClass:b "--" CharClass:c
=> [[ b c <minus-class> ]]
| BasicCharClass
Options = [idmsux]*

View File

@ -508,3 +508,29 @@ IN: regexp-tests
[ t ] [ " " R/ \P{LL}/ matches? ] unit-test
[ f ] [ "a" R/ \P{sCriPt = latin}/ matches? ] unit-test
[ t ] [ " " R/ \P{SCRIPT = laTIn}/ matches? ] unit-test
! Logical operators
[ t ] [ "a" R/ [\p{script=latin}\p{lower}]/ matches? ] unit-test
[ t ] [ "π" R/ [\p{script=latin}\p{lower}]/ matches? ] unit-test
[ t ] [ "A" R/ [\p{script=latin}\p{lower}]/ matches? ] unit-test
[ f ] [ "3" R/ [\p{script=latin}\p{lower}]/ matches? ] unit-test
[ t ] [ "a" R/ [\p{script=latin}||\p{lower}]/ matches? ] unit-test
[ t ] [ "π" R/ [\p{script=latin}||\p{lower}]/ matches? ] unit-test
[ t ] [ "A" R/ [\p{script=latin}||\p{lower}]/ matches? ] unit-test
[ f ] [ "3" R/ [\p{script=latin}||\p{lower}]/ matches? ] unit-test
[ t ] [ "a" R/ [\p{script=latin}&&\p{lower}]/ matches? ] unit-test
[ f ] [ "π" R/ [\p{script=latin}&&\p{lower}]/ matches? ] unit-test
[ f ] [ "A" R/ [\p{script=latin}&&\p{lower}]/ matches? ] unit-test
[ f ] [ "3" R/ [\p{script=latin}&&\p{lower}]/ matches? ] unit-test
[ f ] [ "a" R/ [\p{script=latin}~~\p{lower}]/ matches? ] unit-test
[ t ] [ "π" R/ [\p{script=latin}~~\p{lower}]/ matches? ] unit-test
[ t ] [ "A" R/ [\p{script=latin}~~\p{lower}]/ matches? ] unit-test
[ f ] [ "3" R/ [\p{script=latin}~~\p{lower}]/ matches? ] unit-test
[ f ] [ "a" R/ [\p{script=latin}--\p{lower}]/ matches? ] unit-test
[ f ] [ "π" R/ [\p{script=latin}--\p{lower}]/ matches? ] unit-test
[ t ] [ "A" R/ [\p{script=latin}--\p{lower}]/ matches? ] unit-test
[ f ] [ "3" R/ [\p{script=latin}--\p{lower}]/ matches? ] unit-test