From 9814021e062a4cb1e62b21bfb74de6c8c70efca6 Mon Sep 17 00:00:00 2001
From: erg <erg@erg-desktop.(none)>
Date: Tue, 20 May 2008 18:57:48 -0500
Subject: [PATCH 1/2] comment out failing unit tests, addinng character groups

---
 extra/regexp4/regexp4-tests.factor | 84 ++++++++++++++--------------
 extra/regexp4/regexp4.factor       | 88 ++++++++++++++++++++----------
 2 files changed, 101 insertions(+), 71 deletions(-)
diff --git a/extra/regexp4/regexp4-tests.factor b/extra/regexp4/regexp4-tests.factor
index e878351b7e..cc71dee6aa 100644
--- a/extra/regexp4/regexp4-tests.factor
+++ b/extra/regexp4/regexp4-tests.factor
@@ -77,58 +77,58 @@ IN: regexp4-tests
 [ t ] [ "aaa" "a{1,3}" <regexp> matches? ] unit-test
 [ f ] [ "aaaa" "a{1,3}" <regexp> matches? ] unit-test
 
-[ f ] [ "" "[a]" <regexp> matches? ] unit-test
-[ t ] [ "a" "[a]" <regexp> matches? ] unit-test
-[ t ] [ "a" "[abc]" <regexp> matches? ] unit-test
-[ f ] [ "b" "[a]" <regexp> matches? ] unit-test
-[ f ] [ "d" "[abc]" <regexp> matches? ] unit-test
-[ t ] [ "ab" "[abc]{1,2}" <regexp> matches? ] unit-test
-[ f ] [ "abc" "[abc]{1,2}" <regexp> matches? ] unit-test
+! [ f ] [ "" "[a]" <regexp> matches? ] unit-test
+! [ t ] [ "a" "[a]" <regexp> matches? ] unit-test
+! [ t ] [ "a" "[abc]" <regexp> matches? ] unit-test
+! [ f ] [ "b" "[a]" <regexp> matches? ] unit-test
+! [ f ] [ "d" "[abc]" <regexp> matches? ] unit-test
+! [ t ] [ "ab" "[abc]{1,2}" <regexp> matches? ] unit-test
+! [ f ] [ "abc" "[abc]{1,2}" <regexp> matches? ] unit-test
 
-[ f ] [ "" "[^a]" <regexp> matches? ] unit-test
-[ f ] [ "a" "[^a]" <regexp> matches? ] unit-test
-[ f ] [ "a" "[^abc]" <regexp> matches? ] unit-test
-[ t ] [ "b" "[^a]" <regexp> matches? ] unit-test
-[ t ] [ "d" "[^abc]" <regexp> matches? ] unit-test
-[ f ] [ "ab" "[^abc]{1,2}" <regexp> matches? ] unit-test
-[ f ] [ "abc" "[^abc]{1,2}" <regexp> matches? ] unit-test
+! [ f ] [ "" "[^a]" <regexp> matches? ] unit-test
+! [ f ] [ "a" "[^a]" <regexp> matches? ] unit-test
+! [ f ] [ "a" "[^abc]" <regexp> matches? ] unit-test
+! [ t ] [ "b" "[^a]" <regexp> matches? ] unit-test
+! [ t ] [ "d" "[^abc]" <regexp> matches? ] unit-test
+! [ f ] [ "ab" "[^abc]{1,2}" <regexp> matches? ] unit-test
+! [ f ] [ "abc" "[^abc]{1,2}" <regexp> matches? ] unit-test
 
-[ t ] [ "]" "[]]" <regexp> matches? ] unit-test
-[ f ] [ "]" "[^]]" <regexp> matches? ] unit-test
+! [ t ] [ "]" "[]]" <regexp> matches? ] unit-test
+! [ f ] [ "]" "[^]]" <regexp> matches? ] unit-test
 
 ! [ "^" "[^]" <regexp> matches? ] must-fail
-[ t ] [ "^" "[]^]" <regexp> matches? ] unit-test
-[ t ] [ "]" "[]^]" <regexp> matches? ] unit-test
+! [ t ] [ "^" "[]^]" <regexp> matches? ] unit-test
+! [ t ] [ "]" "[]^]" <regexp> matches? ] unit-test
 
-[ t ] [ "[" "[[]" <regexp> matches? ] unit-test
-[ f ] [ "^" "[^^]" <regexp> matches? ] unit-test
-[ t ] [ "a" "[^^]" <regexp> matches? ] unit-test
+! [ t ] [ "[" "[[]" <regexp> matches? ] unit-test
+! [ f ] [ "^" "[^^]" <regexp> matches? ] unit-test
+! [ t ] [ "a" "[^^]" <regexp> matches? ] unit-test
 
-[ t ] [ "-" "[-]" <regexp> matches? ] unit-test
-[ f ] [ "a" "[-]" <regexp> matches? ] unit-test
-[ f ] [ "-" "[^-]" <regexp> matches? ] unit-test
-[ t ] [ "a" "[^-]" <regexp> matches? ] unit-test
+! [ t ] [ "-" "[-]" <regexp> matches? ] unit-test
+! [ f ] [ "a" "[-]" <regexp> matches? ] unit-test
+! [ f ] [ "-" "[^-]" <regexp> matches? ] unit-test
+! [ t ] [ "a" "[^-]" <regexp> matches? ] unit-test
 
-[ t ] [ "-" "[-a]" <regexp> matches? ] unit-test
-[ t ] [ "a" "[-a]" <regexp> matches? ] unit-test
-[ t ] [ "-" "[a-]" <regexp> matches? ] unit-test
-[ t ] [ "a" "[a-]" <regexp> matches? ] unit-test
-[ f ] [ "b" "[a-]" <regexp> matches? ] unit-test
-[ f ] [ "-" "[^-]" <regexp> matches? ] unit-test
-[ t ] [ "a" "[^-]" <regexp> matches? ] unit-test
+! [ t ] [ "-" "[-a]" <regexp> matches? ] unit-test
+! [ t ] [ "a" "[-a]" <regexp> matches? ] unit-test
+! [ t ] [ "-" "[a-]" <regexp> matches? ] unit-test
+! [ t ] [ "a" "[a-]" <regexp> matches? ] unit-test
+! [ f ] [ "b" "[a-]" <regexp> matches? ] unit-test
+! [ f ] [ "-" "[^-]" <regexp> matches? ] unit-test
+! [ t ] [ "a" "[^-]" <regexp> matches? ] unit-test
 
-[ f ] [ "-" "[a-c]" <regexp> matches? ] unit-test
-[ t ] [ "-" "[^a-c]" <regexp> matches? ] unit-test
-[ t ] [ "b" "[a-c]" <regexp> matches? ] unit-test
-[ f ] [ "b" "[^a-c]" <regexp> matches? ] unit-test
+! [ f ] [ "-" "[a-c]" <regexp> matches? ] unit-test
+! [ t ] [ "-" "[^a-c]" <regexp> matches? ] unit-test
+! [ t ] [ "b" "[a-c]" <regexp> matches? ] unit-test
+! [ f ] [ "b" "[^a-c]" <regexp> matches? ] unit-test
 
-[ t ] [ "-" "[a-c-]" <regexp> matches? ] unit-test
-[ f ] [ "-" "[^a-c-]" <regexp> matches? ] unit-test
+! [ t ] [ "-" "[a-c-]" <regexp> matches? ] unit-test
+! [ f ] [ "-" "[^a-c-]" <regexp> matches? ] unit-test
 
-[ t ] [ "\\" "[\\\\]" <regexp> matches? ] unit-test
-[ f ] [ "a" "[\\\\]" <regexp> matches? ] unit-test
-[ f ] [ "\\" "[^\\\\]" <regexp> matches? ] unit-test
-[ t ] [ "a" "[^\\\\]" <regexp> matches? ] unit-test
+! [ t ] [ "\\" "[\\\\]" <regexp> matches? ] unit-test
+! [ f ] [ "a" "[\\\\]" <regexp> matches? ] unit-test
+! [ f ] [ "\\" "[^\\\\]" <regexp> matches? ] unit-test
+! [ t ] [ "a" "[^\\\\]" <regexp> matches? ] unit-test
 
 
 ! ((A)(B(C)))
diff --git a/extra/regexp4/regexp4.factor b/extra/regexp4/regexp4.factor
index faf52f098d..a33a57bcea 100644
--- a/extra/regexp4/regexp4.factor
+++ b/extra/regexp4/regexp4.factor
@@ -8,29 +8,31 @@ symbols ;
 IN: regexp4
 
 SYMBOLS: eps start-state final-state beginning-of-text
-end-of-text left-paren right-paren alternation ;
+end-of-text left-parenthesis alternation left-bracket
+caret dash ampersand semicolon ;
 
-SYMBOL: runtim-epsilon
+SYMBOL: runtime-epsilon
 
-TUPLE: regexp raw paren-count bracket-count
+TUPLE: regexp raw parentheses-count bracket-count
 state stack nfa new-states dfa minimized-dfa
 dot-matches-newlines? character-sets capture-group
 captured-groups ;
 
 TUPLE: capture-group n range ;
 
-ERROR: paren-underflow ;
-ERROR: unbalanced-paren ;
+ERROR: parentheses-underflow ;
+ERROR: unbalanced-parentheses ;
+ERROR: unbalanced-brackets ;
 
 : push-stack ( regexp token -- ) swap stack>> push ;
 : push-all-stack ( regexp seq -- ) swap stack>> push-all ;
 : next-state ( regexp -- n ) [ 1+ ] change-state state>> ;
 
-: check-paren-underflow ( regexp -- )
-    paren-count>> 0 < [ paren-underflow ] when ;
+: check-parentheses-underflow ( regexp -- )
+    parentheses-count>> 0 < [ parentheses-underflow ] when ;
 
-: check-unbalanced-paren ( regexp -- )
-    paren-count>> 0 > [ unbalanced-paren ] when ;
+: check-unbalanced-parentheses ( regexp -- )
+    parentheses-count>> 0 > [ unbalanced-parentheses ] when ;
 
 :: (apply-alternation) ( stack regexp -- )
     [let | s2 [ stack peek first ]
@@ -82,10 +84,12 @@ ERROR: unbalanced-paren ;
         [ (apply-alternation) ] [ (apply-concatenation) ] if apply-loop
     ] when ;
 
+: cut-stack ( n vector -- vector' vector )
+    <reversed> tuck index cut reverse dup pop* ;
+    
 : apply-til-last ( token regexp -- )
-    swap [
-        <reversed> tuck index cut reverse dup pop*
-    ] change-stack >r reverse r> apply-loop stack>> push-all ;
+    swap [ cut-stack ] change-stack
+    >r reverse r> apply-loop stack>> push-all ;
 
 : concatenation-loop ( regexp -- )
     dup stack>> dup apply-concatenation?
@@ -294,16 +298,6 @@ ERROR: bad-hex number ;
     [ get-char CHAR: } = ] take-until
     "," split1 [ [ string>number ] bi@ ] keep >boolean ;
 
-: take-until-]
-    [ get-char CHAR: ] = ] take-until ;
-
-: make-character-set ( regexp str -- )
-    dup
-    [ length 1 > ] [ first CHAR: ^ = ] bi and
-    [ rest t ] [ f ] if
-    >r [ member? ] curry r>
-    [ [ not ] compose ] when make-nontoken-nfa ;
-
 : parse-escaped ( regexp -- )
     next get-char {
         { CHAR: \ [ CHAR: \ make-nontoken-nfa ] }
@@ -340,6 +334,39 @@ ERROR: bad-hex number ;
         [ dup digit? [ parse-backreference ] [ make-nontoken-nfa ] if ]
     } case ;
 
+: make-character-set ( regexp -- )
+    stack>> throw ;
+
+: (parse-character-set) ( regexp -- )
+    [
+        next get-char
+        {
+            { CHAR: [ [ [ 1+ ] change-bracket-count left-bracket push-stack ] }
+            { CHAR: ] [ [ 1- ] change-bracket-count left-bracket over stack>> cut-stack ] }
+            { CHAR: - [ dash push-stack ] }
+            { CHAR: & [ ampersand push-stack ] }
+            { CHAR: : [ semicolon push-stack ] }
+            { CHAR: \ [ parse-escaped ] }
+            { f [ unbalanced-brackets ] }
+            [ make-nontoken-nfa ]
+        } case
+    ] [
+        dup bracket-count>> 0 >
+        [ (parse-character-set) ]
+        [ make-character-set ] if
+    ] bi ;
+
+: parse-character-set-first ( regexp -- )
+    get-next
+    {
+        { CHAR: ^ [ caret push-stack next ] }
+        { CHAR: ] [ CHAR: ] make-nontoken-nfa next ] }
+        [ 2drop ]
+    } case ;
+
+: parse-character-set ( regexp -- )
+    [ parse-character-set-first ] [ (parse-character-set) ] bi ;
+
 ERROR: unsupported-token token ;
 : parse-token ( regexp token -- )
     dup {
@@ -347,20 +374,24 @@ ERROR: unsupported-token token ;
         { CHAR: $ [ drop back-anchor-construction ] }
         { CHAR: \ [ drop parse-escaped ] }
         { CHAR: | [ drop dup concatenation-loop alternation push-stack ] }
-        { CHAR: ( [ drop [ 1+ ] change-paren-count left-paren push-stack ] }
-        { CHAR: ) [ drop [ 1- ] change-paren-count left-paren apply-til-last ] }
+        { CHAR: ( [ drop [ 1+ ] change-parentheses-count left-parenthesis push-stack ] }
+        { CHAR: ) [ drop [ 1- ] change-parentheses-count left-parenthesis apply-til-last ] }
         { CHAR: * [ drop apply-kleene-closure ] }
         { CHAR: + [ drop apply-plus-closure ] }
         { CHAR: ? [ drop apply-question-closure ] }
         { CHAR: { [ drop parse-brace apply-brace-closure ] }
-        ! { CHAR: [ [ drop parse-character-set ] }
+        { CHAR: [ [
+            drop
+            dup left-bracket push-stack
+            [ 1+ ] change-bracket-count parse-character-set
+        ] }
         ! { CHAR: } [ drop drop "brace" ] }
         ! { CHAR: ? [ drop ] }
         { CHAR: . [ drop dot-construction ] }
         { beginning-of-text [ push-stack ] }
         { end-of-text [
             drop {
-                [ check-unbalanced-paren ]
+                [ check-unbalanced-parentheses ]
                 [ concatenation-loop ]
                 [ beginning-of-text apply-til-last ]
                 [ set-start-state ]
@@ -461,7 +492,8 @@ ERROR: unsupported-token token ;
 : <regexp> ( raw -- obj )
     regexp new
         swap >>raw
-        0 >>paren-count
+        0 >>parentheses-count
+        0 >>bracket-count
         -1 >>state
         V{ } clone >>stack 
         V{ } clone >>character-sets
@@ -535,8 +567,6 @@ TUPLE: dfa-traverser
 : matches? ( string regexp -- ? )
     dupd match [ [ length ] [ range-length 1- ] bi* = ] [ drop f ] if* ;
 
-
-
 ! character classes
 ! TUPLE: range-class from to ;
 ! TUPLE: or-class left right ;

From 9307bb3b4a4105701e976c74b07d6f55f75c1369 Mon Sep 17 00:00:00 2001
From: Doug Coleman <doug.coleman@gmail.com>
Date: Wed, 21 May 2008 00:30:22 -0500
Subject: [PATCH 2/2] working on character classes still have to do negation,
 &&, and :foo:

---
 extra/regexp4/regexp4-tests.factor | 86 +++++++++++++++---------------
 extra/regexp4/regexp4.factor       | 63 ++++++++++++++--------
 2 files changed, 85 insertions(+), 64 deletions(-)

diff --git a/extra/regexp4/regexp4-tests.factor b/extra/regexp4/regexp4-tests.factor
index cc71dee6aa..ec8656bc27 100644
--- a/extra/regexp4/regexp4-tests.factor
+++ b/extra/regexp4/regexp4-tests.factor
@@ -77,58 +77,58 @@ IN: regexp4-tests
 [ t ] [ "aaa" "a{1,3}" <regexp> matches? ] unit-test
 [ f ] [ "aaaa" "a{1,3}" <regexp> matches? ] unit-test
 
-! [ f ] [ "" "[a]" <regexp> matches? ] unit-test
-! [ t ] [ "a" "[a]" <regexp> matches? ] unit-test
-! [ t ] [ "a" "[abc]" <regexp> matches? ] unit-test
-! [ f ] [ "b" "[a]" <regexp> matches? ] unit-test
-! [ f ] [ "d" "[abc]" <regexp> matches? ] unit-test
-! [ t ] [ "ab" "[abc]{1,2}" <regexp> matches? ] unit-test
-! [ f ] [ "abc" "[abc]{1,2}" <regexp> matches? ] unit-test
+[ f ] [ "" "[a]" <regexp> matches? ] unit-test
+[ t ] [ "a" "[a]" <regexp> matches? ] unit-test
+[ t ] [ "a" "[abc]" <regexp> matches? ] unit-test
+[ f ] [ "b" "[a]" <regexp> matches? ] unit-test
+[ f ] [ "d" "[abc]" <regexp> matches? ] unit-test
+[ t ] [ "ab" "[abc]{1,2}" <regexp> matches? ] unit-test
+[ f ] [ "abc" "[abc]{1,2}" <regexp> matches? ] unit-test
 
-! [ f ] [ "" "[^a]" <regexp> matches? ] unit-test
-! [ f ] [ "a" "[^a]" <regexp> matches? ] unit-test
-! [ f ] [ "a" "[^abc]" <regexp> matches? ] unit-test
-! [ t ] [ "b" "[^a]" <regexp> matches? ] unit-test
-! [ t ] [ "d" "[^abc]" <regexp> matches? ] unit-test
-! [ f ] [ "ab" "[^abc]{1,2}" <regexp> matches? ] unit-test
-! [ f ] [ "abc" "[^abc]{1,2}" <regexp> matches? ] unit-test
+[ f ] [ "" "[^a]" <regexp> matches? ] unit-test
+[ f ] [ "a" "[^a]" <regexp> matches? ] unit-test
+[ f ] [ "a" "[^abc]" <regexp> matches? ] unit-test
+[ t ] [ "b" "[^a]" <regexp> matches? ] unit-test
+[ t ] [ "d" "[^abc]" <regexp> matches? ] unit-test
+[ f ] [ "ab" "[^abc]{1,2}" <regexp> matches? ] unit-test
+[ f ] [ "abc" "[^abc]{1,2}" <regexp> matches? ] unit-test
 
-! [ t ] [ "]" "[]]" <regexp> matches? ] unit-test
-! [ f ] [ "]" "[^]]" <regexp> matches? ] unit-test
+[ t ] [ "]" "[]]" <regexp> matches? ] unit-test
+[ f ] [ "]" "[^]]" <regexp> matches? ] unit-test
 
-! [ "^" "[^]" <regexp> matches? ] must-fail
-! [ t ] [ "^" "[]^]" <regexp> matches? ] unit-test
-! [ t ] [ "]" "[]^]" <regexp> matches? ] unit-test
+[ "^" "[^]" <regexp> matches? ] must-fail
+[ t ] [ "^" "[]^]" <regexp> matches? ] unit-test
+[ t ] [ "]" "[]^]" <regexp> matches? ] unit-test
 
-! [ t ] [ "[" "[[]" <regexp> matches? ] unit-test
-! [ f ] [ "^" "[^^]" <regexp> matches? ] unit-test
-! [ t ] [ "a" "[^^]" <regexp> matches? ] unit-test
+[ t ] [ "[" "[[]" <regexp> matches? ] unit-test
+[ f ] [ "^" "[^^]" <regexp> matches? ] unit-test
+[ t ] [ "a" "[^^]" <regexp> matches? ] unit-test
 
-! [ t ] [ "-" "[-]" <regexp> matches? ] unit-test
-! [ f ] [ "a" "[-]" <regexp> matches? ] unit-test
-! [ f ] [ "-" "[^-]" <regexp> matches? ] unit-test
-! [ t ] [ "a" "[^-]" <regexp> matches? ] unit-test
+[ t ] [ "-" "[-]" <regexp> matches? ] unit-test
+[ f ] [ "a" "[-]" <regexp> matches? ] unit-test
+[ f ] [ "-" "[^-]" <regexp> matches? ] unit-test
+[ t ] [ "a" "[^-]" <regexp> matches? ] unit-test
 
-! [ t ] [ "-" "[-a]" <regexp> matches? ] unit-test
-! [ t ] [ "a" "[-a]" <regexp> matches? ] unit-test
-! [ t ] [ "-" "[a-]" <regexp> matches? ] unit-test
-! [ t ] [ "a" "[a-]" <regexp> matches? ] unit-test
-! [ f ] [ "b" "[a-]" <regexp> matches? ] unit-test
-! [ f ] [ "-" "[^-]" <regexp> matches? ] unit-test
-! [ t ] [ "a" "[^-]" <regexp> matches? ] unit-test
+[ t ] [ "-" "[-a]" <regexp> matches? ] unit-test
+[ t ] [ "a" "[-a]" <regexp> matches? ] unit-test
+[ t ] [ "-" "[a-]" <regexp> matches? ] unit-test
+[ t ] [ "a" "[a-]" <regexp> matches? ] unit-test
+[ f ] [ "b" "[a-]" <regexp> matches? ] unit-test
+[ f ] [ "-" "[^-]" <regexp> matches? ] unit-test
+[ t ] [ "a" "[^-]" <regexp> matches? ] unit-test
 
-! [ f ] [ "-" "[a-c]" <regexp> matches? ] unit-test
-! [ t ] [ "-" "[^a-c]" <regexp> matches? ] unit-test
-! [ t ] [ "b" "[a-c]" <regexp> matches? ] unit-test
-! [ f ] [ "b" "[^a-c]" <regexp> matches? ] unit-test
+[ f ] [ "-" "[a-c]" <regexp> matches? ] unit-test
+[ t ] [ "-" "[^a-c]" <regexp> matches? ] unit-test
+[ t ] [ "b" "[a-c]" <regexp> matches? ] unit-test
+[ f ] [ "b" "[^a-c]" <regexp> matches? ] unit-test
 
-! [ t ] [ "-" "[a-c-]" <regexp> matches? ] unit-test
-! [ f ] [ "-" "[^a-c-]" <regexp> matches? ] unit-test
+[ t ] [ "-" "[a-c-]" <regexp> matches? ] unit-test
+[ f ] [ "-" "[^a-c-]" <regexp> matches? ] unit-test
 
-! [ t ] [ "\\" "[\\\\]" <regexp> matches? ] unit-test
-! [ f ] [ "a" "[\\\\]" <regexp> matches? ] unit-test
-! [ f ] [ "\\" "[^\\\\]" <regexp> matches? ] unit-test
-! [ t ] [ "a" "[^\\\\]" <regexp> matches? ] unit-test
+[ t ] [ "\\" "[\\\\]" <regexp> matches? ] unit-test
+[ f ] [ "a" "[\\\\]" <regexp> matches? ] unit-test
+[ f ] [ "\\" "[^\\\\]" <regexp> matches? ] unit-test
+[ t ] [ "a" "[^\\\\]" <regexp> matches? ] unit-test
 
 
 ! ((A)(B(C)))
diff --git a/extra/regexp4/regexp4.factor b/extra/regexp4/regexp4.factor
index a33a57bcea..07ef430de3 100644
--- a/extra/regexp4/regexp4.factor
+++ b/extra/regexp4/regexp4.factor
@@ -4,12 +4,12 @@ USING: accessors arrays assocs combinators kernel math
 sequences namespaces locals combinators.lib state-tables
 math.parser state-parser sets dlists unicode.categories
 math.order quotations shuffle math.ranges splitting
-symbols ;
+symbols fry ;
 IN: regexp4
 
 SYMBOLS: eps start-state final-state beginning-of-text
 end-of-text left-parenthesis alternation left-bracket
-caret dash ampersand semicolon ;
+caret dash ampersand colon ;
 
 SYMBOL: runtime-epsilon
 
@@ -84,12 +84,14 @@ ERROR: unbalanced-brackets ;
         [ (apply-alternation) ] [ (apply-concatenation) ] if apply-loop
     ] when ;
 
-: cut-stack ( n vector -- vector' vector )
-    <reversed> tuck index cut reverse dup pop* ;
+: cut-out ( vector n -- vector' vector ) cut rest ;
+
+: cut-stack ( obj vector -- vector' vector )
+    tuck last-index cut-out swap ;
     
-: apply-til-last ( token regexp -- )
+: apply-til-last ( regexp token -- )
     swap [ cut-stack ] change-stack
-    >r reverse r> apply-loop stack>> push-all ;
+    apply-loop stack>> push-all ;
 
 : concatenation-loop ( regexp -- )
     dup stack>> dup apply-concatenation?
@@ -300,13 +302,13 @@ ERROR: bad-hex number ;
 
 : parse-escaped ( regexp -- )
     next get-char {
-        { CHAR: \ [ CHAR: \ make-nontoken-nfa ] }
-        { CHAR: t [ CHAR: \t make-nontoken-nfa ] }
-        { CHAR: n [ CHAR: \n make-nontoken-nfa ] }
-        { CHAR: r [ CHAR: \r make-nontoken-nfa ] }
-        { CHAR: f [ HEX: c make-nontoken-nfa ] }
-        { CHAR: a [ HEX: 7 make-nontoken-nfa ] }
-        { CHAR: e [ HEX: 1b make-nontoken-nfa ] }
+        { CHAR: \ [ [ CHAR: \ = ] make-nontoken-nfa ] }
+        { CHAR: t [ [ CHAR: \t = ] make-nontoken-nfa ] }
+        { CHAR: n [ [ CHAR: \n = ] make-nontoken-nfa ] }
+        { CHAR: r [ [ CHAR: \r = ] make-nontoken-nfa ] }
+        { CHAR: f [ [ HEX: c = ] make-nontoken-nfa ] }
+        { CHAR: a [ [ HEX: 7 = ] make-nontoken-nfa ] }
+        { CHAR: e [ [ HEX: 1b = ] make-nontoken-nfa ] }
 
         { CHAR: d [ [ digit? ] make-nontoken-nfa ] }
         { CHAR: D [ [ digit? not ] make-nontoken-nfa ] }
@@ -335,31 +337,50 @@ ERROR: bad-hex number ;
     } case ;
 
 : make-character-set ( regexp -- )
-    stack>> throw ;
+    left-bracket over stack>> cut-stack
+    pick (>>stack)
+    [ dup number? [ '[ dup , = ] ] when ] map
+    [ [ drop t ] 2array ] map [ drop f ] suffix [ cond ] curry
+    make-nontoken-nfa ;
 
+: apply-dash ( regexp -- )
+    stack>> dup [ pop ] [ pop* ] [ pop ] tri
+    swap '[ dup , , between? ] swap push ;
+
+: apply-dash? ( regexp -- ? )
+    stack>> dup length 3 >=
+    [ [ length 2 - ] keep nth dash eq? ] [ drop f ] if ;
+
+DEFER: parse-character-set
 : (parse-character-set) ( regexp -- )
     [
         next get-char
         {
-            { CHAR: [ [ [ 1+ ] change-bracket-count left-bracket push-stack ] }
-            { CHAR: ] [ [ 1- ] change-bracket-count left-bracket over stack>> cut-stack ] }
+            { CHAR: [ [
+                [ 1+ ] change-bracket-count left-bracket push-stack
+                parse-character-set
+            ] }
+            { CHAR: ] [
+                [ 1- ] change-bracket-count
+                make-character-set
+            ] }
             { CHAR: - [ dash push-stack ] }
-            { CHAR: & [ ampersand push-stack ] }
-            { CHAR: : [ semicolon push-stack ] }
+            ! { CHAR: & [ ampersand push-stack ] }
+            ! { CHAR: : [ semicolon push-stack ] }
             { CHAR: \ [ parse-escaped ] }
             { f [ unbalanced-brackets ] }
-            [ make-nontoken-nfa ]
+            [ dupd push-stack dup apply-dash? [ apply-dash ] [ drop ] if ]
         } case
     ] [
         dup bracket-count>> 0 >
-        [ (parse-character-set) ]
-        [ make-character-set ] if
+        [ (parse-character-set) ] [ drop ] if
     ] bi ;
 
 : parse-character-set-first ( regexp -- )
     get-next
     {
         { CHAR: ^ [ caret push-stack next ] }
+        { CHAR: [ [ CHAR: [ make-nontoken-nfa next ] }
         { CHAR: ] [ CHAR: ] make-nontoken-nfa next ] }
         [ 2drop ]
     } case ;