From 48802aeee6e509ba576e9730f93adf1bed8a21a2 Mon Sep 17 00:00:00 2001 From: erg Date: Wed, 21 May 2008 15:58:55 -0500 Subject: [PATCH 1/3] fix the unit tests --- extra/state-tables/state-tables-tests.factor | 35 ++++++++++++-------- 1 file changed, 21 insertions(+), 14 deletions(-) diff --git a/extra/state-tables/state-tables-tests.factor b/extra/state-tables/state-tables-tests.factor index b46cc94266..b86c4f57d9 100644 --- a/extra/state-tables/state-tables-tests.factor +++ b/extra/state-tables/state-tables-tests.factor @@ -1,5 +1,5 @@ -USING: kernel tables tools.test ; -IN: tables.tests +USING: kernel state-tables tools.test ; +IN: state-tables.tests : test-table @@ -11,12 +11,17 @@ IN: tables.tests "b" "s" "u" over set-entry ; [ - T{ table f - H{ - { "a" H{ { "l" "x" } { "c" "z" } { "o" "y" } } } - { "b" H{ { "l" "x" } { "s" "u" } { "o" "y" } } } + T{ + table + f + H{ + { "a" H{ { "l" "x" } { "c" "z" } { "o" "y" } } } + { "b" H{ { "l" "x" } { "s" "u" } { "o" "y" } } } + } + H{ { "l" t } { "s" t } { "c" t } { "o" t } } + f + H{ } } - H{ { "l" t } { "s" t } { "c" t } { "o" t } } } ] [ test-table ] unit-test [ "x" t ] [ "a" "l" test-table get-entry ] unit-test @@ -27,13 +32,13 @@ IN: tables.tests : vector-test-table - "a" "c" "z" over add-value - "a" "c" "r" over add-value - "a" "o" "y" over add-value - "a" "l" "x" over add-value - "b" "o" "y" over add-value - "b" "l" "x" over add-value - "b" "s" "u" over add-value ; + "a" "c" "z" over add-entry + "a" "c" "r" over add-entry + "a" "o" "y" over add-entry + "a" "l" "x" over add-entry + "b" "o" "y" over add-entry + "b" "l" "x" over add-entry + "b" "s" "u" over add-entry ; [ T{ vector-table f @@ -44,6 +49,8 @@ T{ vector-table f H{ { "l" "x" } { "s" "u" } { "o" "y" } } } } H{ { "l" t } { "s" t } { "c" t } { "o" t } } + f + H{ } } ] [ vector-test-table ] unit-test From fcab249f3e00add41a9019c50b3c5b99ae664b6a Mon Sep 17 00:00:00 2001 From: erg Date: Wed, 21 May 2008 15:59:08 -0500 Subject: [PATCH 2/3] more unit tests pass for character classes --- extra/regexp4/regexp4-tests.factor | 3 +- extra/regexp4/regexp4.factor | 56 ++++++++++++++++++++---------- 2 files changed, 40 insertions(+), 19 deletions(-) diff --git a/extra/regexp4/regexp4-tests.factor b/extra/regexp4/regexp4-tests.factor index ec8656bc27..8078932877 100644 --- a/extra/regexp4/regexp4-tests.factor +++ b/extra/regexp4/regexp4-tests.factor @@ -95,8 +95,9 @@ IN: regexp4-tests [ t ] [ "]" "[]]" matches? ] unit-test [ f ] [ "]" "[^]]" matches? ] unit-test +[ t ] [ "a" "[^]]" matches? ] unit-test -[ "^" "[^]" matches? ] must-fail +[ t ] [ "^" "[^]" matches? ] must-fail [ t ] [ "^" "[]^]" matches? ] unit-test [ t ] [ "]" "[]^]" matches? ] unit-test diff --git a/extra/regexp4/regexp4.factor b/extra/regexp4/regexp4.factor index 07ef430de3..dbb7495826 100644 --- a/extra/regexp4/regexp4.factor +++ b/extra/regexp4/regexp4.factor @@ -15,8 +15,7 @@ SYMBOL: runtime-epsilon TUPLE: regexp raw parentheses-count bracket-count state stack nfa new-states dfa minimized-dfa -dot-matches-newlines? character-sets capture-group -captured-groups ; +dot-matches-newlines? capture-group captured-groups ; TUPLE: capture-group n range ; @@ -336,11 +335,26 @@ ERROR: bad-hex number ; [ dup digit? [ parse-backreference ] [ make-nontoken-nfa ] if ] } case ; -: make-character-set ( regexp -- ) +: handle-dash ( vector -- vector ) + [ dup dash eq? [ drop CHAR: - ] when ] map ; + +ERROR: unmatched-negated-character-class class ; + +: handle-caret ( vector -- vector ? ) + dup [ length 2 >= ] [ first caret eq? ] bi and [ + rest t + ] [ + f + ] if ; + +: make-character-class ( regexp -- ) left-bracket over stack>> cut-stack pick (>>stack) - [ dup number? [ '[ dup , = ] ] when ] map - [ [ drop t ] 2array ] map [ drop f ] suffix [ cond ] curry + handle-dash + handle-caret + >r [ dup number? [ '[ dup , = ] ] when ] map + [ [ drop t ] 2array ] map [ drop f ] suffix [ cond ] curry r> + [ [ not ] compose ] when make-nontoken-nfa ; : apply-dash ( regexp -- ) @@ -351,18 +365,18 @@ ERROR: bad-hex number ; stack>> dup length 3 >= [ [ length 2 - ] keep nth dash eq? ] [ drop f ] if ; -DEFER: parse-character-set -: (parse-character-set) ( regexp -- ) +DEFER: parse-character-class +: (parse-character-class) ( regexp -- ) [ next get-char { { CHAR: [ [ [ 1+ ] change-bracket-count left-bracket push-stack - parse-character-set + parse-character-class ] } { CHAR: ] [ [ 1- ] change-bracket-count - make-character-set + make-character-class ] } { CHAR: - [ dash push-stack ] } ! { CHAR: & [ ampersand push-stack ] } @@ -373,20 +387,27 @@ DEFER: parse-character-set } case ] [ dup bracket-count>> 0 > - [ (parse-character-set) ] [ drop ] if + [ (parse-character-class) ] [ drop ] if ] bi ; -: parse-character-set-first ( regexp -- ) +: parse-character-class-second ( regexp -- ) get-next { - { CHAR: ^ [ caret push-stack next ] } - { CHAR: [ [ CHAR: [ make-nontoken-nfa next ] } - { CHAR: ] [ CHAR: ] make-nontoken-nfa next ] } + { CHAR: ] [ CHAR: ] push-stack next ] } [ 2drop ] } case ; -: parse-character-set ( regexp -- ) - [ parse-character-set-first ] [ (parse-character-set) ] bi ; +: parse-character-class-first ( regexp -- ) + get-next + { + { CHAR: ^ [ caret dupd push-stack next parse-character-class-second ] } + { CHAR: [ [ CHAR: [ push-stack next ] } + { CHAR: ] [ CHAR: ] push-stack next ] } + [ 2drop ] + } case ; + +: parse-character-class ( regexp -- ) + [ parse-character-class-first ] [ (parse-character-class) ] bi ; ERROR: unsupported-token token ; : parse-token ( regexp token -- ) @@ -404,7 +425,7 @@ ERROR: unsupported-token token ; { CHAR: [ [ drop dup left-bracket push-stack - [ 1+ ] change-bracket-count parse-character-set + [ 1+ ] change-bracket-count parse-character-class ] } ! { CHAR: } [ drop drop "brace" ] } ! { CHAR: ? [ drop ] } @@ -517,7 +538,6 @@ ERROR: unsupported-token token ; 0 >>bracket-count -1 >>state V{ } clone >>stack - V{ } clone >>character-sets >>nfa dup [ parse-raw-regexp ] [ subset-construction ] bi ; From 5b0d7bb3b7746c2f2da314ca4a3383e61b75b283 Mon Sep 17 00:00:00 2001 From: erg Date: Wed, 21 May 2008 16:08:24 -0500 Subject: [PATCH 3/3] fix another unit test --- extra/regexp4/regexp4.factor | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/extra/regexp4/regexp4.factor b/extra/regexp4/regexp4.factor index dbb7495826..add2e0c477 100644 --- a/extra/regexp4/regexp4.factor +++ b/extra/regexp4/regexp4.factor @@ -393,7 +393,9 @@ DEFER: parse-character-class : parse-character-class-second ( regexp -- ) get-next { + ! { CHAR: [ [ CHAR: [ push-stack next ] } { CHAR: ] [ CHAR: ] push-stack next ] } + { CHAR: - [ CHAR: - push-stack next ] } [ 2drop ] } case ; @@ -401,8 +403,9 @@ DEFER: parse-character-class get-next { { CHAR: ^ [ caret dupd push-stack next parse-character-class-second ] } - { CHAR: [ [ CHAR: [ push-stack next ] } + ! { CHAR: [ [ CHAR: [ push-stack next ] } { CHAR: ] [ CHAR: ] push-stack next ] } + { CHAR: - [ CHAR: - push-stack next ] } [ 2drop ] } case ;