Regexp character class intersection and difference syntax
							parent
							
								
									a88f1050be
								
							
						
					
					
						commit
						d6f9947bff
					
				| 
						 | 
				
			
			@ -230,7 +230,10 @@ TUPLE: class-partition integers not-integers simples not-simples and or other ;
 | 
			
		|||
    dup or-class flatten partition-classes
 | 
			
		||||
    dup not-integers>> length {
 | 
			
		||||
        { 0 [ nip make-or-class ] }
 | 
			
		||||
        { 1 [ not-integers>> first [ class>> '[ _ swap class-member? ] any? ] keep or ] }
 | 
			
		||||
        { 1 [
 | 
			
		||||
            not-integers>> first
 | 
			
		||||
            [ class>> '[ _ swap class-member? ] any? ] keep or
 | 
			
		||||
        ] }
 | 
			
		||||
        [ 3drop t ]
 | 
			
		||||
    } case ;
 | 
			
		||||
 | 
			
		||||
| 
						 | 
				
			
			@ -251,6 +254,12 @@ M: or-class <not-class>
 | 
			
		|||
M: t <not-class> drop f ;
 | 
			
		||||
M: f <not-class> drop t ;
 | 
			
		||||
 | 
			
		||||
: <minus-class> ( a b -- a-b )
 | 
			
		||||
    <not-class> 2array <and-class> ;
 | 
			
		||||
 | 
			
		||||
: <sym-diff-class> ( a b -- a~b )
 | 
			
		||||
    2array [ <or-class> ] [ <and-class> ] bi <minus-class> ;
 | 
			
		||||
 | 
			
		||||
M: primitive-class class-member?
 | 
			
		||||
    class>> class-member? ;
 | 
			
		||||
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
| 
						 | 
				
			
			@ -11,7 +11,7 @@ IN: regexp.parser.tests
 | 
			
		|||
    "a|b" "a.b" "a|b|c" "abc|b" "a|bcd" "a|(b)" "(?-i:a)" "||"
 | 
			
		||||
    "(a)|b" "(a|b)" "((a)|(b))" "(?:a)" "(?i:a)" "|b" "b|"
 | 
			
		||||
    "[abc]" "[a-c]" "[^a-c]" "[^]]" "[]a]" "[[]" "[]-a]" "[a-]" "[-]"
 | 
			
		||||
    "[--a]" "foo*" "(foo)*" "(a|b)|c" "(foo){2,3}" "(foo){2,}"
 | 
			
		||||
    "foo*" "(foo)*" "(a|b)|c" "(foo){2,3}" "(foo){2,}"
 | 
			
		||||
    "(foo){2}" "{2,3}" "{," "{,}" "}" "foo}" "[^]-a]" "[^-]a]"
 | 
			
		||||
    "[a-]" "[^a-]" "[^a-]" "a{,2}" "(?#foobar)"
 | 
			
		||||
    "\\p{Space}" "\\t" "\\[" "[\\]]" "\\P{Space}"
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
| 
						 | 
				
			
			@ -148,19 +148,29 @@ Character = EscapeSequence
 | 
			
		|||
          | "^" => [[ ^ <tagged-epsilon> ]]
 | 
			
		||||
          | . ?[ allowed-char? ]?
 | 
			
		||||
 | 
			
		||||
AnyRangeCharacter = EscapeSequence | .
 | 
			
		||||
AnyRangeCharacter = !("&&"|"||"|"--"|"~~") (EscapeSequence | .)
 | 
			
		||||
 | 
			
		||||
RangeCharacter = !("]") AnyRangeCharacter
 | 
			
		||||
 | 
			
		||||
Range = RangeCharacter:a "-" RangeCharacter:b => [[ a b <range-class> ]]
 | 
			
		||||
Range = RangeCharacter:a "-" !("-") RangeCharacter:b => [[ a b <range-class> ]]
 | 
			
		||||
      | RangeCharacter
 | 
			
		||||
 | 
			
		||||
StartRange = AnyRangeCharacter:a "-" RangeCharacter:b => [[ a b <range-class> ]]
 | 
			
		||||
StartRange = AnyRangeCharacter:a "-" !("-") RangeCharacter:b => [[ a b <range-class> ]]
 | 
			
		||||
           | AnyRangeCharacter
 | 
			
		||||
 | 
			
		||||
Ranges = StartRange:s Range*:r => [[ r s prefix ]]
 | 
			
		||||
 | 
			
		||||
CharClass = "^"?:n Ranges:e => [[ e n char-class ]]
 | 
			
		||||
BasicCharClass =  "^"?:n Ranges:e => [[ e n char-class ]]
 | 
			
		||||
 | 
			
		||||
CharClass = BasicCharClass:b "&&" CharClass:c
 | 
			
		||||
                => [[ b c 2array <and-class> ]]
 | 
			
		||||
          | BasicCharClass:b "||" CharClass:c
 | 
			
		||||
                => [[ b c 2array <or-class> ]]
 | 
			
		||||
          | BasicCharClass:b "~~" CharClass:c
 | 
			
		||||
                => [[ b c <sym-diff-class> ]]
 | 
			
		||||
          | BasicCharClass:b "--" CharClass:c
 | 
			
		||||
                => [[ b c <minus-class> ]]
 | 
			
		||||
          | BasicCharClass
 | 
			
		||||
 | 
			
		||||
Options = [idmsux]*
 | 
			
		||||
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
| 
						 | 
				
			
			@ -508,3 +508,29 @@ IN: regexp-tests
 | 
			
		|||
[ t ] [ " " R/ \P{LL}/ matches? ] unit-test
 | 
			
		||||
[ f ] [ "a" R/ \P{sCriPt = latin}/ matches? ] unit-test
 | 
			
		||||
[ t ] [ " " R/ \P{SCRIPT = laTIn}/ matches? ] unit-test
 | 
			
		||||
 | 
			
		||||
! Logical operators
 | 
			
		||||
[ t ] [ "a" R/ [\p{script=latin}\p{lower}]/ matches? ] unit-test
 | 
			
		||||
[ t ] [ "π" R/ [\p{script=latin}\p{lower}]/ matches? ] unit-test
 | 
			
		||||
[ t ] [ "A" R/ [\p{script=latin}\p{lower}]/ matches? ] unit-test
 | 
			
		||||
[ f ] [ "3" R/ [\p{script=latin}\p{lower}]/ matches? ] unit-test
 | 
			
		||||
 | 
			
		||||
[ t ] [ "a" R/ [\p{script=latin}||\p{lower}]/ matches? ] unit-test
 | 
			
		||||
[ t ] [ "π" R/ [\p{script=latin}||\p{lower}]/ matches? ] unit-test
 | 
			
		||||
[ t ] [ "A" R/ [\p{script=latin}||\p{lower}]/ matches? ] unit-test
 | 
			
		||||
[ f ] [ "3" R/ [\p{script=latin}||\p{lower}]/ matches? ] unit-test
 | 
			
		||||
 | 
			
		||||
[ t ] [ "a" R/ [\p{script=latin}&&\p{lower}]/ matches? ] unit-test
 | 
			
		||||
[ f ] [ "π" R/ [\p{script=latin}&&\p{lower}]/ matches? ] unit-test
 | 
			
		||||
[ f ] [ "A" R/ [\p{script=latin}&&\p{lower}]/ matches? ] unit-test
 | 
			
		||||
[ f ] [ "3" R/ [\p{script=latin}&&\p{lower}]/ matches? ] unit-test
 | 
			
		||||
 | 
			
		||||
[ f ] [ "a" R/ [\p{script=latin}~~\p{lower}]/ matches? ] unit-test
 | 
			
		||||
[ t ] [ "π" R/ [\p{script=latin}~~\p{lower}]/ matches? ] unit-test
 | 
			
		||||
[ t ] [ "A" R/ [\p{script=latin}~~\p{lower}]/ matches? ] unit-test
 | 
			
		||||
[ f ] [ "3" R/ [\p{script=latin}~~\p{lower}]/ matches? ] unit-test
 | 
			
		||||
 | 
			
		||||
[ f ] [ "a" R/ [\p{script=latin}--\p{lower}]/ matches? ] unit-test
 | 
			
		||||
[ f ] [ "π" R/ [\p{script=latin}--\p{lower}]/ matches? ] unit-test
 | 
			
		||||
[ t ] [ "A" R/ [\p{script=latin}--\p{lower}]/ matches? ] unit-test
 | 
			
		||||
[ f ] [ "3" R/ [\p{script=latin}--\p{lower}]/ matches? ] unit-test
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
		Loading…
	
		Reference in New Issue