Regexp character class intersection and difference syntax
parent
a88f1050be
commit
d6f9947bff
|
@ -230,7 +230,10 @@ TUPLE: class-partition integers not-integers simples not-simples and or other ;
|
|||
dup or-class flatten partition-classes
|
||||
dup not-integers>> length {
|
||||
{ 0 [ nip make-or-class ] }
|
||||
{ 1 [ not-integers>> first [ class>> '[ _ swap class-member? ] any? ] keep or ] }
|
||||
{ 1 [
|
||||
not-integers>> first
|
||||
[ class>> '[ _ swap class-member? ] any? ] keep or
|
||||
] }
|
||||
[ 3drop t ]
|
||||
} case ;
|
||||
|
||||
|
@ -251,6 +254,12 @@ M: or-class <not-class>
|
|||
M: t <not-class> drop f ;
|
||||
M: f <not-class> drop t ;
|
||||
|
||||
: <minus-class> ( a b -- a-b )
|
||||
<not-class> 2array <and-class> ;
|
||||
|
||||
: <sym-diff-class> ( a b -- a~b )
|
||||
2array [ <or-class> ] [ <and-class> ] bi <minus-class> ;
|
||||
|
||||
M: primitive-class class-member?
|
||||
class>> class-member? ;
|
||||
|
||||
|
|
|
@ -11,7 +11,7 @@ IN: regexp.parser.tests
|
|||
"a|b" "a.b" "a|b|c" "abc|b" "a|bcd" "a|(b)" "(?-i:a)" "||"
|
||||
"(a)|b" "(a|b)" "((a)|(b))" "(?:a)" "(?i:a)" "|b" "b|"
|
||||
"[abc]" "[a-c]" "[^a-c]" "[^]]" "[]a]" "[[]" "[]-a]" "[a-]" "[-]"
|
||||
"[--a]" "foo*" "(foo)*" "(a|b)|c" "(foo){2,3}" "(foo){2,}"
|
||||
"foo*" "(foo)*" "(a|b)|c" "(foo){2,3}" "(foo){2,}"
|
||||
"(foo){2}" "{2,3}" "{," "{,}" "}" "foo}" "[^]-a]" "[^-]a]"
|
||||
"[a-]" "[^a-]" "[^a-]" "a{,2}" "(?#foobar)"
|
||||
"\\p{Space}" "\\t" "\\[" "[\\]]" "\\P{Space}"
|
||||
|
|
|
@ -148,19 +148,29 @@ Character = EscapeSequence
|
|||
| "^" => [[ ^ <tagged-epsilon> ]]
|
||||
| . ?[ allowed-char? ]?
|
||||
|
||||
AnyRangeCharacter = EscapeSequence | .
|
||||
AnyRangeCharacter = !("&&"|"||"|"--"|"~~") (EscapeSequence | .)
|
||||
|
||||
RangeCharacter = !("]") AnyRangeCharacter
|
||||
|
||||
Range = RangeCharacter:a "-" RangeCharacter:b => [[ a b <range-class> ]]
|
||||
Range = RangeCharacter:a "-" !("-") RangeCharacter:b => [[ a b <range-class> ]]
|
||||
| RangeCharacter
|
||||
|
||||
StartRange = AnyRangeCharacter:a "-" RangeCharacter:b => [[ a b <range-class> ]]
|
||||
StartRange = AnyRangeCharacter:a "-" !("-") RangeCharacter:b => [[ a b <range-class> ]]
|
||||
| AnyRangeCharacter
|
||||
|
||||
Ranges = StartRange:s Range*:r => [[ r s prefix ]]
|
||||
|
||||
CharClass = "^"?:n Ranges:e => [[ e n char-class ]]
|
||||
BasicCharClass = "^"?:n Ranges:e => [[ e n char-class ]]
|
||||
|
||||
CharClass = BasicCharClass:b "&&" CharClass:c
|
||||
=> [[ b c 2array <and-class> ]]
|
||||
| BasicCharClass:b "||" CharClass:c
|
||||
=> [[ b c 2array <or-class> ]]
|
||||
| BasicCharClass:b "~~" CharClass:c
|
||||
=> [[ b c <sym-diff-class> ]]
|
||||
| BasicCharClass:b "--" CharClass:c
|
||||
=> [[ b c <minus-class> ]]
|
||||
| BasicCharClass
|
||||
|
||||
Options = [idmsux]*
|
||||
|
||||
|
|
|
@ -508,3 +508,29 @@ IN: regexp-tests
|
|||
[ t ] [ " " R/ \P{LL}/ matches? ] unit-test
|
||||
[ f ] [ "a" R/ \P{sCriPt = latin}/ matches? ] unit-test
|
||||
[ t ] [ " " R/ \P{SCRIPT = laTIn}/ matches? ] unit-test
|
||||
|
||||
! Logical operators
|
||||
[ t ] [ "a" R/ [\p{script=latin}\p{lower}]/ matches? ] unit-test
|
||||
[ t ] [ "π" R/ [\p{script=latin}\p{lower}]/ matches? ] unit-test
|
||||
[ t ] [ "A" R/ [\p{script=latin}\p{lower}]/ matches? ] unit-test
|
||||
[ f ] [ "3" R/ [\p{script=latin}\p{lower}]/ matches? ] unit-test
|
||||
|
||||
[ t ] [ "a" R/ [\p{script=latin}||\p{lower}]/ matches? ] unit-test
|
||||
[ t ] [ "π" R/ [\p{script=latin}||\p{lower}]/ matches? ] unit-test
|
||||
[ t ] [ "A" R/ [\p{script=latin}||\p{lower}]/ matches? ] unit-test
|
||||
[ f ] [ "3" R/ [\p{script=latin}||\p{lower}]/ matches? ] unit-test
|
||||
|
||||
[ t ] [ "a" R/ [\p{script=latin}&&\p{lower}]/ matches? ] unit-test
|
||||
[ f ] [ "π" R/ [\p{script=latin}&&\p{lower}]/ matches? ] unit-test
|
||||
[ f ] [ "A" R/ [\p{script=latin}&&\p{lower}]/ matches? ] unit-test
|
||||
[ f ] [ "3" R/ [\p{script=latin}&&\p{lower}]/ matches? ] unit-test
|
||||
|
||||
[ f ] [ "a" R/ [\p{script=latin}~~\p{lower}]/ matches? ] unit-test
|
||||
[ t ] [ "π" R/ [\p{script=latin}~~\p{lower}]/ matches? ] unit-test
|
||||
[ t ] [ "A" R/ [\p{script=latin}~~\p{lower}]/ matches? ] unit-test
|
||||
[ f ] [ "3" R/ [\p{script=latin}~~\p{lower}]/ matches? ] unit-test
|
||||
|
||||
[ f ] [ "a" R/ [\p{script=latin}--\p{lower}]/ matches? ] unit-test
|
||||
[ f ] [ "π" R/ [\p{script=latin}--\p{lower}]/ matches? ] unit-test
|
||||
[ t ] [ "A" R/ [\p{script=latin}--\p{lower}]/ matches? ] unit-test
|
||||
[ f ] [ "3" R/ [\p{script=latin}--\p{lower}]/ matches? ] unit-test
|
||||
|
|
Loading…
Reference in New Issue