case insensitive works
parent
9eba6c0034
commit
7d0d2da318
|
@ -19,6 +19,7 @@ TUPLE: regexp
|
||||||
0 >>state
|
0 >>state
|
||||||
V{ } clone >>stack
|
V{ } clone >>stack
|
||||||
V{ } clone >>new-states
|
V{ } clone >>new-states
|
||||||
|
H{ } clone >>options
|
||||||
H{ } clone >>visited-states ;
|
H{ } clone >>visited-states ;
|
||||||
|
|
||||||
SYMBOL: current-regexp
|
SYMBOL: current-regexp
|
||||||
|
|
|
@ -21,6 +21,9 @@ M: letter-class class-member? ( obj class -- ? )
|
||||||
M: LETTER-class class-member? ( obj class -- ? )
|
M: LETTER-class class-member? ( obj class -- ? )
|
||||||
drop LETTER? ;
|
drop LETTER? ;
|
||||||
|
|
||||||
|
M: Letter-class class-member? ( obj class -- ? )
|
||||||
|
drop Letter? ;
|
||||||
|
|
||||||
M: ascii-class class-member? ( obj class -- ? )
|
M: ascii-class class-member? ( obj class -- ? )
|
||||||
drop ascii? ;
|
drop ascii? ;
|
||||||
|
|
||||||
|
|
|
@ -31,18 +31,12 @@ SINGLETON: back-anchor INSTANCE: back-anchor node
|
||||||
TUPLE: option-on option ; INSTANCE: option-on node
|
TUPLE: option-on option ; INSTANCE: option-on node
|
||||||
TUPLE: option-off option ; INSTANCE: option-off node
|
TUPLE: option-off option ; INSTANCE: option-off node
|
||||||
SINGLETONS: unix-lines dotall multiline comments case-insensitive unicode-case ;
|
SINGLETONS: unix-lines dotall multiline comments case-insensitive unicode-case ;
|
||||||
MIXIN: regexp-option
|
|
||||||
INSTANCE: unix-lines regexp-option
|
|
||||||
INSTANCE: dotall regexp-option
|
|
||||||
INSTANCE: multiline regexp-option
|
|
||||||
INSTANCE: comments regexp-option
|
|
||||||
INSTANCE: case-insensitive regexp-option
|
|
||||||
INSTANCE: unicode-case regexp-option
|
|
||||||
|
|
||||||
SINGLETONS: letter-class LETTER-class Letter-class digit-class
|
SINGLETONS: letter-class LETTER-class Letter-class digit-class
|
||||||
alpha-class non-newline-blank-class
|
alpha-class non-newline-blank-class
|
||||||
ascii-class punctuation-class java-printable-class blank-class
|
ascii-class punctuation-class java-printable-class blank-class
|
||||||
control-character-class hex-digit-class java-blank-class c-identifier-class ;
|
control-character-class hex-digit-class java-blank-class c-identifier-class
|
||||||
|
unmatchable-class ;
|
||||||
|
|
||||||
SINGLETONS: beginning-of-group end-of-group
|
SINGLETONS: beginning-of-group end-of-group
|
||||||
beginning-of-character-class end-of-character-class
|
beginning-of-character-class end-of-character-class
|
||||||
|
@ -75,6 +69,17 @@ left-parenthesis pipe caret dash ;
|
||||||
: first|alternation ( seq -- first/alternation )
|
: first|alternation ( seq -- first/alternation )
|
||||||
dup length 1 = [ first ] [ <alternation> ] if ;
|
dup length 1 = [ first ] [ <alternation> ] if ;
|
||||||
|
|
||||||
|
: <character-class-range> ( from to -- obj )
|
||||||
|
2dup [ Letter? ] bi@ or get-case-insensitive and [
|
||||||
|
[ [ ch>lower ] bi@ character-class-range boa ]
|
||||||
|
[ [ ch>upper ] bi@ character-class-range boa ] 2bi
|
||||||
|
2array [ [ from>> ] [ to>> ] bi < ] filter
|
||||||
|
[ unmatchable-class ] [ first|alternation ] if-empty
|
||||||
|
] [
|
||||||
|
dup [ from>> ] [ to>> ] bi <
|
||||||
|
[ character-class-range boa ] [ 2drop unmatchable-class ] if
|
||||||
|
] if ;
|
||||||
|
|
||||||
ERROR: unmatched-parentheses ;
|
ERROR: unmatched-parentheses ;
|
||||||
|
|
||||||
: make-positive-lookahead ( string -- )
|
: make-positive-lookahead ( string -- )
|
||||||
|
@ -213,10 +218,10 @@ ERROR: expected-posix-class ;
|
||||||
read1 CHAR: { = [ expected-posix-class ] unless
|
read1 CHAR: { = [ expected-posix-class ] unless
|
||||||
"}" read-until [ bad-character-class ] unless
|
"}" read-until [ bad-character-class ] unless
|
||||||
{
|
{
|
||||||
{ "Lower" [ letter-class ] }
|
{ "Lower" [ get-case-insensitive Letter-class letter-class ? ] }
|
||||||
{ "Upper" [ LETTER-class ] }
|
{ "Upper" [ get-case-insensitive Letter-class LETTER-class ? ] }
|
||||||
{ "ASCII" [ ascii-class ] }
|
|
||||||
{ "Alpha" [ Letter-class ] }
|
{ "Alpha" [ Letter-class ] }
|
||||||
|
{ "ASCII" [ ascii-class ] }
|
||||||
{ "Digit" [ digit-class ] }
|
{ "Digit" [ digit-class ] }
|
||||||
{ "Alnum" [ alpha-class ] }
|
{ "Alnum" [ alpha-class ] }
|
||||||
{ "Punct" [ punctuation-class ] }
|
{ "Punct" [ punctuation-class ] }
|
||||||
|
@ -270,6 +275,13 @@ ERROR: bad-escaped-literals seq ;
|
||||||
{ CHAR: 0 [ parse-octal <constant> ] }
|
{ CHAR: 0 [ parse-octal <constant> ] }
|
||||||
{ CHAR: c [ parse-control-character ] }
|
{ CHAR: c [ parse-control-character ] }
|
||||||
|
|
||||||
|
! { CHAR: b [ handle-word-boundary ] }
|
||||||
|
! { CHAR: B [ handle-word-boundary <negation> ] }
|
||||||
|
! { CHAR: A [ handle-beginning-of-input ] }
|
||||||
|
! { CHAR: G [ end of previous match ] }
|
||||||
|
! { CHAR: Z [ handle-end-of-input ] }
|
||||||
|
! { CHAR: z [ handle-end-of-input ] } ! except for terminator
|
||||||
|
|
||||||
{ CHAR: Q [ parse-escaped-literals ] }
|
{ CHAR: Q [ parse-escaped-literals ] }
|
||||||
} case ;
|
} case ;
|
||||||
|
|
||||||
|
@ -293,7 +305,7 @@ ERROR: bad-escaped-literals seq ;
|
||||||
handle-dash handle-caret ;
|
handle-dash handle-caret ;
|
||||||
|
|
||||||
: apply-dash ( -- )
|
: apply-dash ( -- )
|
||||||
stack [ pop3 nip character-class-range boa ] keep push ;
|
stack [ pop3 nip <character-class-range> ] keep push ;
|
||||||
|
|
||||||
: apply-dash? ( -- ? )
|
: apply-dash? ( -- ? )
|
||||||
stack dup length 3 >=
|
stack dup length 3 >=
|
||||||
|
|
|
@ -1,4 +1,4 @@
|
||||||
USING: regexp2 tools.test kernel regexp2.traversal ;
|
USING: regexp2 tools.test kernel regexp2.parser regexp2.traversal ;
|
||||||
IN: regexp2-tests
|
IN: regexp2-tests
|
||||||
|
|
||||||
[ f ] [ "b" "a*" <regexp> matches? ] unit-test
|
[ f ] [ "b" "a*" <regexp> matches? ] unit-test
|
||||||
|
@ -203,6 +203,8 @@ IN: regexp2-tests
|
||||||
<regexp> drop
|
<regexp> drop
|
||||||
] unit-test
|
] unit-test
|
||||||
|
|
||||||
|
[ "{Lower}" <regexp> ] [ invalid-range? ] must-fail-with
|
||||||
|
|
||||||
[ t ] [ "fxxbar" "(?!foo).{3}bar" <regexp> matches? ] unit-test
|
[ t ] [ "fxxbar" "(?!foo).{3}bar" <regexp> matches? ] unit-test
|
||||||
[ f ] [ "foobar" "(?!foo).{3}bar" <regexp> matches? ] unit-test
|
[ f ] [ "foobar" "(?!foo).{3}bar" <regexp> matches? ] unit-test
|
||||||
|
|
||||||
|
@ -226,9 +228,25 @@ IN: regexp2-tests
|
||||||
! [ t ] [ "fooxbar" "foo\\Bxbar" <regexp> matches? ] unit-test
|
! [ t ] [ "fooxbar" "foo\\Bxbar" <regexp> matches? ] unit-test
|
||||||
! [ f ] [ "foo" "foo\\Bbar" <regexp> matches? ] unit-test
|
! [ f ] [ "foo" "foo\\Bbar" <regexp> matches? ] unit-test
|
||||||
|
|
||||||
! [ t ] [ "s@f" "[a-z.-]@[a-z]" <regexp> matches? ] unit-test
|
[ t ] [ "s@f" "[a-z.-]@[a-z]" <regexp> matches? ] unit-test
|
||||||
! [ f ] [ "a" "[a-z.-]@[a-z]" <regexp> matches? ] unit-test
|
[ f ] [ "a" "[a-z.-]@[a-z]" <regexp> matches? ] unit-test
|
||||||
! [ t ] [ ".o" "\\.[a-z]" <regexp> matches? ] unit-test
|
[ t ] [ ".o" "\\.[a-z]" <regexp> matches? ] unit-test
|
||||||
|
|
||||||
|
[ t ] [ "a" "(?i)a" <regexp> matches? ] unit-test
|
||||||
|
[ t ] [ "a" "(?i)a" <regexp> matches? ] unit-test
|
||||||
|
[ t ] [ "A" "(?i)a" <regexp> matches? ] unit-test
|
||||||
|
[ t ] [ "A" "(?i)a" <regexp> matches? ] unit-test
|
||||||
|
|
||||||
|
[ t ] [ "a" "(?-i)a" <iregexp> matches? ] unit-test
|
||||||
|
[ t ] [ "a" "(?-i)a" <iregexp> matches? ] unit-test
|
||||||
|
[ f ] [ "A" "(?-i)a" <iregexp> matches? ] unit-test
|
||||||
|
[ f ] [ "A" "(?-i)a" <iregexp> matches? ] unit-test
|
||||||
|
|
||||||
|
[ f ] [ "A" "[a-z]" <regexp> matches? ] unit-test
|
||||||
|
[ t ] [ "A" "[a-z]" <iregexp> matches? ] unit-test
|
||||||
|
|
||||||
|
[ f ] [ "A" "\\p{Lower}" <regexp> matches? ] unit-test
|
||||||
|
[ t ] [ "A" "\\p{Lower}" <iregexp> matches? ] unit-test
|
||||||
|
|
||||||
! Bug in parsing word
|
! Bug in parsing word
|
||||||
! [ t ] [ "a" R' a' matches? ] unit-test
|
! [ t ] [ "a" R' a' matches? ] unit-test
|
||||||
|
|
Loading…
Reference in New Issue