more unit tests pass for character classes

db4
erg 2008-05-21 15:59:08 -05:00
parent 48802aeee6
commit fcab249f3e
2 changed files with 40 additions and 19 deletions

View File

@ -95,8 +95,9 @@ IN: regexp4-tests
[ t ] [ "]" "[]]" <regexp> matches? ] unit-test
[ f ] [ "]" "[^]]" <regexp> matches? ] unit-test
[ t ] [ "a" "[^]]" <regexp> matches? ] unit-test
[ "^" "[^]" <regexp> matches? ] must-fail
[ t ] [ "^" "[^]" <regexp> matches? ] must-fail
[ t ] [ "^" "[]^]" <regexp> matches? ] unit-test
[ t ] [ "]" "[]^]" <regexp> matches? ] unit-test

View File

@ -15,8 +15,7 @@ SYMBOL: runtime-epsilon
TUPLE: regexp raw parentheses-count bracket-count
state stack nfa new-states dfa minimized-dfa
dot-matches-newlines? character-sets capture-group
captured-groups ;
dot-matches-newlines? capture-group captured-groups ;
TUPLE: capture-group n range ;
@ -336,11 +335,26 @@ ERROR: bad-hex number ;
[ dup digit? [ parse-backreference ] [ make-nontoken-nfa ] if ]
} case ;
: make-character-set ( regexp -- )
: handle-dash ( vector -- vector )
[ dup dash eq? [ drop CHAR: - ] when ] map ;
ERROR: unmatched-negated-character-class class ;
: handle-caret ( vector -- vector ? )
dup [ length 2 >= ] [ first caret eq? ] bi and [
rest t
] [
f
] if ;
: make-character-class ( regexp -- )
left-bracket over stack>> cut-stack
pick (>>stack)
[ dup number? [ '[ dup , = ] ] when ] map
[ [ drop t ] 2array ] map [ drop f ] suffix [ cond ] curry
handle-dash
handle-caret
>r [ dup number? [ '[ dup , = ] ] when ] map
[ [ drop t ] 2array ] map [ drop f ] suffix [ cond ] curry r>
[ [ not ] compose ] when
make-nontoken-nfa ;
: apply-dash ( regexp -- )
@ -351,18 +365,18 @@ ERROR: bad-hex number ;
stack>> dup length 3 >=
[ [ length 2 - ] keep nth dash eq? ] [ drop f ] if ;
DEFER: parse-character-set
: (parse-character-set) ( regexp -- )
DEFER: parse-character-class
: (parse-character-class) ( regexp -- )
[
next get-char
{
{ CHAR: [ [
[ 1+ ] change-bracket-count left-bracket push-stack
parse-character-set
parse-character-class
] }
{ CHAR: ] [
[ 1- ] change-bracket-count
make-character-set
make-character-class
] }
{ CHAR: - [ dash push-stack ] }
! { CHAR: & [ ampersand push-stack ] }
@ -373,20 +387,27 @@ DEFER: parse-character-set
} case
] [
dup bracket-count>> 0 >
[ (parse-character-set) ] [ drop ] if
[ (parse-character-class) ] [ drop ] if
] bi ;
: parse-character-set-first ( regexp -- )
: parse-character-class-second ( regexp -- )
get-next
{
{ CHAR: ^ [ caret push-stack next ] }
{ CHAR: [ [ CHAR: [ make-nontoken-nfa next ] }
{ CHAR: ] [ CHAR: ] make-nontoken-nfa next ] }
{ CHAR: ] [ CHAR: ] push-stack next ] }
[ 2drop ]
} case ;
: parse-character-set ( regexp -- )
[ parse-character-set-first ] [ (parse-character-set) ] bi ;
: parse-character-class-first ( regexp -- )
get-next
{
{ CHAR: ^ [ caret dupd push-stack next parse-character-class-second ] }
{ CHAR: [ [ CHAR: [ push-stack next ] }
{ CHAR: ] [ CHAR: ] push-stack next ] }
[ 2drop ]
} case ;
: parse-character-class ( regexp -- )
[ parse-character-class-first ] [ (parse-character-class) ] bi ;
ERROR: unsupported-token token ;
: parse-token ( regexp token -- )
@ -404,7 +425,7 @@ ERROR: unsupported-token token ;
{ CHAR: [ [
drop
dup left-bracket push-stack
[ 1+ ] change-bracket-count parse-character-set
[ 1+ ] change-bracket-count parse-character-class
] }
! { CHAR: } [ drop drop "brace" ] }
! { CHAR: ? [ drop ] }
@ -517,7 +538,6 @@ ERROR: unsupported-token token ;
0 >>bracket-count
-1 >>state
V{ } clone >>stack
V{ } clone >>character-sets
<vector-table> >>nfa
dup [ parse-raw-regexp ] [ subset-construction ] bi ;