case insensitive works
parent
9eba6c0034
commit
7d0d2da318
|
@ -19,6 +19,7 @@ TUPLE: regexp
|
|||
0 >>state
|
||||
V{ } clone >>stack
|
||||
V{ } clone >>new-states
|
||||
H{ } clone >>options
|
||||
H{ } clone >>visited-states ;
|
||||
|
||||
SYMBOL: current-regexp
|
||||
|
|
|
@ -21,6 +21,9 @@ M: letter-class class-member? ( obj class -- ? )
|
|||
M: LETTER-class class-member? ( obj class -- ? )
|
||||
drop LETTER? ;
|
||||
|
||||
M: Letter-class class-member? ( obj class -- ? )
|
||||
drop Letter? ;
|
||||
|
||||
M: ascii-class class-member? ( obj class -- ? )
|
||||
drop ascii? ;
|
||||
|
||||
|
|
|
@ -31,18 +31,12 @@ SINGLETON: back-anchor INSTANCE: back-anchor node
|
|||
TUPLE: option-on option ; INSTANCE: option-on node
|
||||
TUPLE: option-off option ; INSTANCE: option-off node
|
||||
SINGLETONS: unix-lines dotall multiline comments case-insensitive unicode-case ;
|
||||
MIXIN: regexp-option
|
||||
INSTANCE: unix-lines regexp-option
|
||||
INSTANCE: dotall regexp-option
|
||||
INSTANCE: multiline regexp-option
|
||||
INSTANCE: comments regexp-option
|
||||
INSTANCE: case-insensitive regexp-option
|
||||
INSTANCE: unicode-case regexp-option
|
||||
|
||||
SINGLETONS: letter-class LETTER-class Letter-class digit-class
|
||||
alpha-class non-newline-blank-class
|
||||
ascii-class punctuation-class java-printable-class blank-class
|
||||
control-character-class hex-digit-class java-blank-class c-identifier-class ;
|
||||
control-character-class hex-digit-class java-blank-class c-identifier-class
|
||||
unmatchable-class ;
|
||||
|
||||
SINGLETONS: beginning-of-group end-of-group
|
||||
beginning-of-character-class end-of-character-class
|
||||
|
@ -75,6 +69,17 @@ left-parenthesis pipe caret dash ;
|
|||
: first|alternation ( seq -- first/alternation )
|
||||
dup length 1 = [ first ] [ <alternation> ] if ;
|
||||
|
||||
: <character-class-range> ( from to -- obj )
|
||||
2dup [ Letter? ] bi@ or get-case-insensitive and [
|
||||
[ [ ch>lower ] bi@ character-class-range boa ]
|
||||
[ [ ch>upper ] bi@ character-class-range boa ] 2bi
|
||||
2array [ [ from>> ] [ to>> ] bi < ] filter
|
||||
[ unmatchable-class ] [ first|alternation ] if-empty
|
||||
] [
|
||||
dup [ from>> ] [ to>> ] bi <
|
||||
[ character-class-range boa ] [ 2drop unmatchable-class ] if
|
||||
] if ;
|
||||
|
||||
ERROR: unmatched-parentheses ;
|
||||
|
||||
: make-positive-lookahead ( string -- )
|
||||
|
@ -213,10 +218,10 @@ ERROR: expected-posix-class ;
|
|||
read1 CHAR: { = [ expected-posix-class ] unless
|
||||
"}" read-until [ bad-character-class ] unless
|
||||
{
|
||||
{ "Lower" [ letter-class ] }
|
||||
{ "Upper" [ LETTER-class ] }
|
||||
{ "ASCII" [ ascii-class ] }
|
||||
{ "Lower" [ get-case-insensitive Letter-class letter-class ? ] }
|
||||
{ "Upper" [ get-case-insensitive Letter-class LETTER-class ? ] }
|
||||
{ "Alpha" [ Letter-class ] }
|
||||
{ "ASCII" [ ascii-class ] }
|
||||
{ "Digit" [ digit-class ] }
|
||||
{ "Alnum" [ alpha-class ] }
|
||||
{ "Punct" [ punctuation-class ] }
|
||||
|
@ -270,6 +275,13 @@ ERROR: bad-escaped-literals seq ;
|
|||
{ CHAR: 0 [ parse-octal <constant> ] }
|
||||
{ CHAR: c [ parse-control-character ] }
|
||||
|
||||
! { CHAR: b [ handle-word-boundary ] }
|
||||
! { CHAR: B [ handle-word-boundary <negation> ] }
|
||||
! { CHAR: A [ handle-beginning-of-input ] }
|
||||
! { CHAR: G [ end of previous match ] }
|
||||
! { CHAR: Z [ handle-end-of-input ] }
|
||||
! { CHAR: z [ handle-end-of-input ] } ! except for terminator
|
||||
|
||||
{ CHAR: Q [ parse-escaped-literals ] }
|
||||
} case ;
|
||||
|
||||
|
@ -293,7 +305,7 @@ ERROR: bad-escaped-literals seq ;
|
|||
handle-dash handle-caret ;
|
||||
|
||||
: apply-dash ( -- )
|
||||
stack [ pop3 nip character-class-range boa ] keep push ;
|
||||
stack [ pop3 nip <character-class-range> ] keep push ;
|
||||
|
||||
: apply-dash? ( -- ? )
|
||||
stack dup length 3 >=
|
||||
|
|
|
@ -1,4 +1,4 @@
|
|||
USING: regexp2 tools.test kernel regexp2.traversal ;
|
||||
USING: regexp2 tools.test kernel regexp2.parser regexp2.traversal ;
|
||||
IN: regexp2-tests
|
||||
|
||||
[ f ] [ "b" "a*" <regexp> matches? ] unit-test
|
||||
|
@ -203,6 +203,8 @@ IN: regexp2-tests
|
|||
<regexp> drop
|
||||
] unit-test
|
||||
|
||||
[ "{Lower}" <regexp> ] [ invalid-range? ] must-fail-with
|
||||
|
||||
[ t ] [ "fxxbar" "(?!foo).{3}bar" <regexp> matches? ] unit-test
|
||||
[ f ] [ "foobar" "(?!foo).{3}bar" <regexp> matches? ] unit-test
|
||||
|
||||
|
@ -226,9 +228,25 @@ IN: regexp2-tests
|
|||
! [ t ] [ "fooxbar" "foo\\Bxbar" <regexp> matches? ] unit-test
|
||||
! [ f ] [ "foo" "foo\\Bbar" <regexp> matches? ] unit-test
|
||||
|
||||
! [ t ] [ "s@f" "[a-z.-]@[a-z]" <regexp> matches? ] unit-test
|
||||
! [ f ] [ "a" "[a-z.-]@[a-z]" <regexp> matches? ] unit-test
|
||||
! [ t ] [ ".o" "\\.[a-z]" <regexp> matches? ] unit-test
|
||||
[ t ] [ "s@f" "[a-z.-]@[a-z]" <regexp> matches? ] unit-test
|
||||
[ f ] [ "a" "[a-z.-]@[a-z]" <regexp> matches? ] unit-test
|
||||
[ t ] [ ".o" "\\.[a-z]" <regexp> matches? ] unit-test
|
||||
|
||||
[ t ] [ "a" "(?i)a" <regexp> matches? ] unit-test
|
||||
[ t ] [ "a" "(?i)a" <regexp> matches? ] unit-test
|
||||
[ t ] [ "A" "(?i)a" <regexp> matches? ] unit-test
|
||||
[ t ] [ "A" "(?i)a" <regexp> matches? ] unit-test
|
||||
|
||||
[ t ] [ "a" "(?-i)a" <iregexp> matches? ] unit-test
|
||||
[ t ] [ "a" "(?-i)a" <iregexp> matches? ] unit-test
|
||||
[ f ] [ "A" "(?-i)a" <iregexp> matches? ] unit-test
|
||||
[ f ] [ "A" "(?-i)a" <iregexp> matches? ] unit-test
|
||||
|
||||
[ f ] [ "A" "[a-z]" <regexp> matches? ] unit-test
|
||||
[ t ] [ "A" "[a-z]" <iregexp> matches? ] unit-test
|
||||
|
||||
[ f ] [ "A" "\\p{Lower}" <regexp> matches? ] unit-test
|
||||
[ t ] [ "A" "\\p{Lower}" <iregexp> matches? ] unit-test
|
||||
|
||||
! Bug in parsing word
|
||||
! [ t ] [ "a" R' a' matches? ] unit-test
|
||||
|
|
Loading…
Reference in New Issue