case insensitive works

db4
Doug Coleman 2008-08-21 17:55:25 -05:00
parent 9eba6c0034
commit 7d0d2da318
4 changed files with 50 additions and 16 deletions

View File

@ -19,6 +19,7 @@ TUPLE: regexp
0 >>state
V{ } clone >>stack
V{ } clone >>new-states
H{ } clone >>options
H{ } clone >>visited-states ;
SYMBOL: current-regexp

View File

@ -21,6 +21,9 @@ M: letter-class class-member? ( obj class -- ? )
M: LETTER-class class-member? ( obj class -- ? )
drop LETTER? ;
M: Letter-class class-member? ( obj class -- ? )
drop Letter? ;
M: ascii-class class-member? ( obj class -- ? )
drop ascii? ;

View File

@ -31,18 +31,12 @@ SINGLETON: back-anchor INSTANCE: back-anchor node
TUPLE: option-on option ; INSTANCE: option-on node
TUPLE: option-off option ; INSTANCE: option-off node
SINGLETONS: unix-lines dotall multiline comments case-insensitive unicode-case ;
MIXIN: regexp-option
INSTANCE: unix-lines regexp-option
INSTANCE: dotall regexp-option
INSTANCE: multiline regexp-option
INSTANCE: comments regexp-option
INSTANCE: case-insensitive regexp-option
INSTANCE: unicode-case regexp-option
SINGLETONS: letter-class LETTER-class Letter-class digit-class
alpha-class non-newline-blank-class
ascii-class punctuation-class java-printable-class blank-class
control-character-class hex-digit-class java-blank-class c-identifier-class ;
control-character-class hex-digit-class java-blank-class c-identifier-class
unmatchable-class ;
SINGLETONS: beginning-of-group end-of-group
beginning-of-character-class end-of-character-class
@ -75,6 +69,17 @@ left-parenthesis pipe caret dash ;
: first|alternation ( seq -- first/alternation )
dup length 1 = [ first ] [ <alternation> ] if ;
: <character-class-range> ( from to -- obj )
2dup [ Letter? ] bi@ or get-case-insensitive and [
[ [ ch>lower ] bi@ character-class-range boa ]
[ [ ch>upper ] bi@ character-class-range boa ] 2bi
2array [ [ from>> ] [ to>> ] bi < ] filter
[ unmatchable-class ] [ first|alternation ] if-empty
] [
dup [ from>> ] [ to>> ] bi <
[ character-class-range boa ] [ 2drop unmatchable-class ] if
] if ;
ERROR: unmatched-parentheses ;
: make-positive-lookahead ( string -- )
@ -213,10 +218,10 @@ ERROR: expected-posix-class ;
read1 CHAR: { = [ expected-posix-class ] unless
"}" read-until [ bad-character-class ] unless
{
{ "Lower" [ letter-class ] }
{ "Upper" [ LETTER-class ] }
{ "ASCII" [ ascii-class ] }
{ "Lower" [ get-case-insensitive Letter-class letter-class ? ] }
{ "Upper" [ get-case-insensitive Letter-class LETTER-class ? ] }
{ "Alpha" [ Letter-class ] }
{ "ASCII" [ ascii-class ] }
{ "Digit" [ digit-class ] }
{ "Alnum" [ alpha-class ] }
{ "Punct" [ punctuation-class ] }
@ -270,6 +275,13 @@ ERROR: bad-escaped-literals seq ;
{ CHAR: 0 [ parse-octal <constant> ] }
{ CHAR: c [ parse-control-character ] }
! { CHAR: b [ handle-word-boundary ] }
! { CHAR: B [ handle-word-boundary <negation> ] }
! { CHAR: A [ handle-beginning-of-input ] }
! { CHAR: G [ end of previous match ] }
! { CHAR: Z [ handle-end-of-input ] }
! { CHAR: z [ handle-end-of-input ] } ! except for terminator
{ CHAR: Q [ parse-escaped-literals ] }
} case ;
@ -293,7 +305,7 @@ ERROR: bad-escaped-literals seq ;
handle-dash handle-caret ;
: apply-dash ( -- )
stack [ pop3 nip character-class-range boa ] keep push ;
stack [ pop3 nip <character-class-range> ] keep push ;
: apply-dash? ( -- ? )
stack dup length 3 >=

View File

@ -1,4 +1,4 @@
USING: regexp2 tools.test kernel regexp2.traversal ;
USING: regexp2 tools.test kernel regexp2.parser regexp2.traversal ;
IN: regexp2-tests
[ f ] [ "b" "a*" <regexp> matches? ] unit-test
@ -203,6 +203,8 @@ IN: regexp2-tests
<regexp> drop
] unit-test
[ "{Lower}" <regexp> ] [ invalid-range? ] must-fail-with
[ t ] [ "fxxbar" "(?!foo).{3}bar" <regexp> matches? ] unit-test
[ f ] [ "foobar" "(?!foo).{3}bar" <regexp> matches? ] unit-test
@ -226,9 +228,25 @@ IN: regexp2-tests
! [ t ] [ "fooxbar" "foo\\Bxbar" <regexp> matches? ] unit-test
! [ f ] [ "foo" "foo\\Bbar" <regexp> matches? ] unit-test
! [ t ] [ "s@f" "[a-z.-]@[a-z]" <regexp> matches? ] unit-test
! [ f ] [ "a" "[a-z.-]@[a-z]" <regexp> matches? ] unit-test
! [ t ] [ ".o" "\\.[a-z]" <regexp> matches? ] unit-test
[ t ] [ "s@f" "[a-z.-]@[a-z]" <regexp> matches? ] unit-test
[ f ] [ "a" "[a-z.-]@[a-z]" <regexp> matches? ] unit-test
[ t ] [ ".o" "\\.[a-z]" <regexp> matches? ] unit-test
[ t ] [ "a" "(?i)a" <regexp> matches? ] unit-test
[ t ] [ "a" "(?i)a" <regexp> matches? ] unit-test
[ t ] [ "A" "(?i)a" <regexp> matches? ] unit-test
[ t ] [ "A" "(?i)a" <regexp> matches? ] unit-test
[ t ] [ "a" "(?-i)a" <iregexp> matches? ] unit-test
[ t ] [ "a" "(?-i)a" <iregexp> matches? ] unit-test
[ f ] [ "A" "(?-i)a" <iregexp> matches? ] unit-test
[ f ] [ "A" "(?-i)a" <iregexp> matches? ] unit-test
[ f ] [ "A" "[a-z]" <regexp> matches? ] unit-test
[ t ] [ "A" "[a-z]" <iregexp> matches? ] unit-test
[ f ] [ "A" "\\p{Lower}" <regexp> matches? ] unit-test
[ t ] [ "A" "\\p{Lower}" <iregexp> matches? ] unit-test
! Bug in parsing word
! [ t ] [ "a" R' a' matches? ] unit-test