diff --git a/basis/regexp/classes/classes.factor b/basis/regexp/classes/classes.factor index e114dea260..a1c4e3ca2a 100644 --- a/basis/regexp/classes/classes.factor +++ b/basis/regexp/classes/classes.factor @@ -230,7 +230,10 @@ TUPLE: class-partition integers not-integers simples not-simples and or other ; dup or-class flatten partition-classes dup not-integers>> length { { 0 [ nip make-or-class ] } - { 1 [ not-integers>> first [ class>> '[ _ swap class-member? ] any? ] keep or ] } + { 1 [ + not-integers>> first + [ class>> '[ _ swap class-member? ] any? ] keep or + ] } [ 3drop t ] } case ; @@ -251,6 +254,12 @@ M: or-class M: t drop f ; M: f drop t ; +: ( a b -- a-b ) + 2array ; + +: ( a b -- a~b ) + 2array [ ] [ ] bi ; + M: primitive-class class-member? class>> class-member? ; diff --git a/basis/regexp/parser/parser-tests.factor b/basis/regexp/parser/parser-tests.factor index d606015f61..5ea9753fba 100644 --- a/basis/regexp/parser/parser-tests.factor +++ b/basis/regexp/parser/parser-tests.factor @@ -11,7 +11,7 @@ IN: regexp.parser.tests "a|b" "a.b" "a|b|c" "abc|b" "a|bcd" "a|(b)" "(?-i:a)" "||" "(a)|b" "(a|b)" "((a)|(b))" "(?:a)" "(?i:a)" "|b" "b|" "[abc]" "[a-c]" "[^a-c]" "[^]]" "[]a]" "[[]" "[]-a]" "[a-]" "[-]" - "[--a]" "foo*" "(foo)*" "(a|b)|c" "(foo){2,3}" "(foo){2,}" + "foo*" "(foo)*" "(a|b)|c" "(foo){2,3}" "(foo){2,}" "(foo){2}" "{2,3}" "{," "{,}" "}" "foo}" "[^]-a]" "[^-]a]" "[a-]" "[^a-]" "[^a-]" "a{,2}" "(?#foobar)" "\\p{Space}" "\\t" "\\[" "[\\]]" "\\P{Space}" diff --git a/basis/regexp/parser/parser.factor b/basis/regexp/parser/parser.factor index e8de469a94..9fcadc4008 100644 --- a/basis/regexp/parser/parser.factor +++ b/basis/regexp/parser/parser.factor @@ -148,19 +148,29 @@ Character = EscapeSequence | "^" => [[ ^ ]] | . ?[ allowed-char? ]? -AnyRangeCharacter = EscapeSequence | . +AnyRangeCharacter = !("&&"|"||"|"--"|"~~") (EscapeSequence | .) RangeCharacter = !("]") AnyRangeCharacter -Range = RangeCharacter:a "-" RangeCharacter:b => [[ a b ]] +Range = RangeCharacter:a "-" !("-") RangeCharacter:b => [[ a b ]] | RangeCharacter -StartRange = AnyRangeCharacter:a "-" RangeCharacter:b => [[ a b ]] +StartRange = AnyRangeCharacter:a "-" !("-") RangeCharacter:b => [[ a b ]] | AnyRangeCharacter Ranges = StartRange:s Range*:r => [[ r s prefix ]] -CharClass = "^"?:n Ranges:e => [[ e n char-class ]] +BasicCharClass = "^"?:n Ranges:e => [[ e n char-class ]] + +CharClass = BasicCharClass:b "&&" CharClass:c + => [[ b c 2array ]] + | BasicCharClass:b "||" CharClass:c + => [[ b c 2array ]] + | BasicCharClass:b "~~" CharClass:c + => [[ b c ]] + | BasicCharClass:b "--" CharClass:c + => [[ b c ]] + | BasicCharClass Options = [idmsux]* diff --git a/basis/regexp/regexp-tests.factor b/basis/regexp/regexp-tests.factor index 999caeaed6..2234386803 100644 --- a/basis/regexp/regexp-tests.factor +++ b/basis/regexp/regexp-tests.factor @@ -508,3 +508,29 @@ IN: regexp-tests [ t ] [ " " R/ \P{LL}/ matches? ] unit-test [ f ] [ "a" R/ \P{sCriPt = latin}/ matches? ] unit-test [ t ] [ " " R/ \P{SCRIPT = laTIn}/ matches? ] unit-test + +! Logical operators +[ t ] [ "a" R/ [\p{script=latin}\p{lower}]/ matches? ] unit-test +[ t ] [ "π" R/ [\p{script=latin}\p{lower}]/ matches? ] unit-test +[ t ] [ "A" R/ [\p{script=latin}\p{lower}]/ matches? ] unit-test +[ f ] [ "3" R/ [\p{script=latin}\p{lower}]/ matches? ] unit-test + +[ t ] [ "a" R/ [\p{script=latin}||\p{lower}]/ matches? ] unit-test +[ t ] [ "π" R/ [\p{script=latin}||\p{lower}]/ matches? ] unit-test +[ t ] [ "A" R/ [\p{script=latin}||\p{lower}]/ matches? ] unit-test +[ f ] [ "3" R/ [\p{script=latin}||\p{lower}]/ matches? ] unit-test + +[ t ] [ "a" R/ [\p{script=latin}&&\p{lower}]/ matches? ] unit-test +[ f ] [ "π" R/ [\p{script=latin}&&\p{lower}]/ matches? ] unit-test +[ f ] [ "A" R/ [\p{script=latin}&&\p{lower}]/ matches? ] unit-test +[ f ] [ "3" R/ [\p{script=latin}&&\p{lower}]/ matches? ] unit-test + +[ f ] [ "a" R/ [\p{script=latin}~~\p{lower}]/ matches? ] unit-test +[ t ] [ "π" R/ [\p{script=latin}~~\p{lower}]/ matches? ] unit-test +[ t ] [ "A" R/ [\p{script=latin}~~\p{lower}]/ matches? ] unit-test +[ f ] [ "3" R/ [\p{script=latin}~~\p{lower}]/ matches? ] unit-test + +[ f ] [ "a" R/ [\p{script=latin}--\p{lower}]/ matches? ] unit-test +[ f ] [ "π" R/ [\p{script=latin}--\p{lower}]/ matches? ] unit-test +[ t ] [ "A" R/ [\p{script=latin}--\p{lower}]/ matches? ] unit-test +[ f ] [ "3" R/ [\p{script=latin}--\p{lower}]/ matches? ] unit-test