diff --git a/extra/regexp4/regexp4-tests.factor b/extra/regexp4/regexp4-tests.factor index ec8656bc27..8078932877 100644 --- a/extra/regexp4/regexp4-tests.factor +++ b/extra/regexp4/regexp4-tests.factor @@ -95,8 +95,9 @@ IN: regexp4-tests [ t ] [ "]" "[]]" <regexp> matches? ] unit-test [ f ] [ "]" "[^]]" <regexp> matches? ] unit-test +[ t ] [ "a" "[^]]" <regexp> matches? ] unit-test -[ "^" "[^]" <regexp> matches? ] must-fail +[ t ] [ "^" "[^]" <regexp> matches? ] must-fail [ t ] [ "^" "[]^]" <regexp> matches? ] unit-test [ t ] [ "]" "[]^]" <regexp> matches? ] unit-test diff --git a/extra/regexp4/regexp4.factor b/extra/regexp4/regexp4.factor index 07ef430de3..add2e0c477 100644 --- a/extra/regexp4/regexp4.factor +++ b/extra/regexp4/regexp4.factor @@ -15,8 +15,7 @@ SYMBOL: runtime-epsilon TUPLE: regexp raw parentheses-count bracket-count state stack nfa new-states dfa minimized-dfa -dot-matches-newlines? character-sets capture-group -captured-groups ; +dot-matches-newlines? capture-group captured-groups ; TUPLE: capture-group n range ; @@ -336,11 +335,26 @@ ERROR: bad-hex number ; [ dup digit? [ parse-backreference ] [ make-nontoken-nfa ] if ] } case ; -: make-character-set ( regexp -- ) +: handle-dash ( vector -- vector ) + [ dup dash eq? [ drop CHAR: - ] when ] map ; + +ERROR: unmatched-negated-character-class class ; + +: handle-caret ( vector -- vector ? ) + dup [ length 2 >= ] [ first caret eq? ] bi and [ + rest t + ] [ + f + ] if ; + +: make-character-class ( regexp -- ) left-bracket over stack>> cut-stack pick (>>stack) - [ dup number? [ '[ dup , = ] ] when ] map - [ [ drop t ] 2array ] map [ drop f ] suffix [ cond ] curry + handle-dash + handle-caret + >r [ dup number? [ '[ dup , = ] ] when ] map + [ [ drop t ] 2array ] map [ drop f ] suffix [ cond ] curry r> + [ [ not ] compose ] when make-nontoken-nfa ; : apply-dash ( regexp -- ) @@ -351,18 +365,18 @@ ERROR: bad-hex number ; stack>> dup length 3 >= [ [ length 2 - ] keep nth dash eq? ] [ drop f ] if ; -DEFER: parse-character-set -: (parse-character-set) ( regexp -- ) +DEFER: parse-character-class +: (parse-character-class) ( regexp -- ) [ next get-char { { CHAR: [ [ [ 1+ ] change-bracket-count left-bracket push-stack - parse-character-set + parse-character-class ] } { CHAR: ] [ [ 1- ] change-bracket-count - make-character-set + make-character-class ] } { CHAR: - [ dash push-stack ] } ! { CHAR: & [ ampersand push-stack ] } @@ -373,20 +387,30 @@ DEFER: parse-character-set } case ] [ dup bracket-count>> 0 > - [ (parse-character-set) ] [ drop ] if + [ (parse-character-class) ] [ drop ] if ] bi ; -: parse-character-set-first ( regexp -- ) +: parse-character-class-second ( regexp -- ) get-next { - { CHAR: ^ [ caret push-stack next ] } - { CHAR: [ [ CHAR: [ make-nontoken-nfa next ] } - { CHAR: ] [ CHAR: ] make-nontoken-nfa next ] } + ! { CHAR: [ [ CHAR: [ push-stack next ] } + { CHAR: ] [ CHAR: ] push-stack next ] } + { CHAR: - [ CHAR: - push-stack next ] } [ 2drop ] } case ; -: parse-character-set ( regexp -- ) - [ parse-character-set-first ] [ (parse-character-set) ] bi ; +: parse-character-class-first ( regexp -- ) + get-next + { + { CHAR: ^ [ caret dupd push-stack next parse-character-class-second ] } + ! { CHAR: [ [ CHAR: [ push-stack next ] } + { CHAR: ] [ CHAR: ] push-stack next ] } + { CHAR: - [ CHAR: - push-stack next ] } + [ 2drop ] + } case ; + +: parse-character-class ( regexp -- ) + [ parse-character-class-first ] [ (parse-character-class) ] bi ; ERROR: unsupported-token token ; : parse-token ( regexp token -- ) @@ -404,7 +428,7 @@ ERROR: unsupported-token token ; { CHAR: [ [ drop dup left-bracket push-stack - [ 1+ ] change-bracket-count parse-character-set + [ 1+ ] change-bracket-count parse-character-class ] } ! { CHAR: } [ drop drop "brace" ] } ! { CHAR: ? [ drop ] } @@ -517,7 +541,6 @@ ERROR: unsupported-token token ; 0 >>bracket-count -1 >>state V{ } clone >>stack - V{ } clone >>character-sets <vector-table> >>nfa dup [ parse-raw-regexp ] [ subset-construction ] bi ; diff --git a/extra/state-tables/state-tables-tests.factor b/extra/state-tables/state-tables-tests.factor index b46cc94266..b86c4f57d9 100644 --- a/extra/state-tables/state-tables-tests.factor +++ b/extra/state-tables/state-tables-tests.factor @@ -1,5 +1,5 @@ -USING: kernel tables tools.test ; -IN: tables.tests +USING: kernel state-tables tools.test ; +IN: state-tables.tests : test-table <table> @@ -11,12 +11,17 @@ IN: tables.tests "b" "s" "u" <entry> over set-entry ; [ - T{ table f - H{ - { "a" H{ { "l" "x" } { "c" "z" } { "o" "y" } } } - { "b" H{ { "l" "x" } { "s" "u" } { "o" "y" } } } + T{ + table + f + H{ + { "a" H{ { "l" "x" } { "c" "z" } { "o" "y" } } } + { "b" H{ { "l" "x" } { "s" "u" } { "o" "y" } } } + } + H{ { "l" t } { "s" t } { "c" t } { "o" t } } + f + H{ } } - H{ { "l" t } { "s" t } { "c" t } { "o" t } } } ] [ test-table ] unit-test [ "x" t ] [ "a" "l" test-table get-entry ] unit-test @@ -27,13 +32,13 @@ IN: tables.tests : vector-test-table <vector-table> - "a" "c" "z" <entry> over add-value - "a" "c" "r" <entry> over add-value - "a" "o" "y" <entry> over add-value - "a" "l" "x" <entry> over add-value - "b" "o" "y" <entry> over add-value - "b" "l" "x" <entry> over add-value - "b" "s" "u" <entry> over add-value ; + "a" "c" "z" <entry> over add-entry + "a" "c" "r" <entry> over add-entry + "a" "o" "y" <entry> over add-entry + "a" "l" "x" <entry> over add-entry + "b" "o" "y" <entry> over add-entry + "b" "l" "x" <entry> over add-entry + "b" "s" "u" <entry> over add-entry ; [ T{ vector-table f @@ -44,6 +49,8 @@ T{ vector-table f H{ { "l" "x" } { "s" "u" } { "o" "y" } } } } H{ { "l" t } { "s" t } { "c" t } { "o" t } } + f + H{ } } ] [ vector-test-table ] unit-test