From c4fc9f59025ed17b268772e30150a8e8a99a3e28 Mon Sep 17 00:00:00 2001
From: Doug Coleman <doug.coleman@gmail.com>
Date: Thu, 22 May 2008 18:15:16 -0500
Subject: [PATCH 1/7] about to start lookahead

---
 extra/regexp4/regexp4-tests.factor |   7 +-
 extra/regexp4/regexp4.factor       | 172 +++++++++++++++++++----------
 2 files changed, 112 insertions(+), 67 deletions(-)
diff --git a/extra/regexp4/regexp4-tests.factor b/extra/regexp4/regexp4-tests.factor
index ea62d2105a..c941d0fb75 100644
--- a/extra/regexp4/regexp4-tests.factor
+++ b/extra/regexp4/regexp4-tests.factor
@@ -155,6 +155,7 @@ IN: regexp4-tests
 [ f ] [ "a" "\\Q\\E" <regexp> matches? ] unit-test
 [ t ] [ "|*+" "\\Q|*+\\E" <regexp> matches? ] unit-test
 [ f ] [ "abc" "\\Q|*+\\E" <regexp> matches? ] unit-test
+[ t ] [ "s" "\\Qs\\E" <regexp> matches? ] unit-test
 
 [ t ] [ "S" "\\0123" <regexp> matches? ] unit-test
 [ t ] [ "SXY" "\\0123XY" <regexp> matches? ] unit-test
@@ -236,12 +237,6 @@ IN: regexp4-tests
     matches?
 ] unit-test
 
-
-
-
-
-
-
 ! ((A)(B(C)))
 ! 1.  ((A)(B(C)))
 ! 2. (A)
diff --git a/extra/regexp4/regexp4.factor b/extra/regexp4/regexp4.factor
index 2957244bcf..377a9b17a5 100644
--- a/extra/regexp4/regexp4.factor
+++ b/extra/regexp4/regexp4.factor
@@ -4,7 +4,7 @@ USING: accessors arrays assocs combinators kernel math
 sequences namespaces locals combinators.lib state-tables
 math.parser state-parser sets dlists unicode.categories
 math.order quotations shuffle math.ranges splitting
-symbols fry parser ;
+symbols fry parser math.ranges inspector strings ;
 IN: regexp4
 
 SYMBOLS: eps start-state final-state beginning-of-text
@@ -191,7 +191,7 @@ ERROR: unbalanced-brackets ;
         [ [ nip at-most-n ] [ at-least-n ] if* ] if
     ] [ drop exactly-n ] if ;
 
-:: make-nontoken-nfa ( regexp obj -- )
+:: push-single-nfa ( regexp obj -- )
     [let | s0 [ regexp next-state ]
            s1 [ regexp next-state ]
            stack [ regexp stack>> ]
@@ -213,18 +213,23 @@ ERROR: unbalanced-brackets ;
 : decimal-digit? ( n -- ? ) CHAR: 0 CHAR: 9 between? ;
 
 : hex-digit? ( n -- ? )
-    dup decimal-digit? 
-    over CHAR: a CHAR: f between? or 
-    swap CHAR: A CHAR: F between? or ;
+    [
+        [ dup decimal-digit? ]
+        [ dup CHAR: a CHAR: f between? ]
+        [ dup CHAR: A CHAR: F between? ]
+    ] || nip ;
 
 : control-char? ( n -- ? )
-    dup 0 HEX: 1f between? swap HEX: 7f = or ;
+    [
+        [ dup 0 HEX: 1f between? ]
+        [ dup HEX: 7f = ]
+    ] || nip ;
 
 : punct? ( n -- ? )
     "!\"#$%&'()*+,-./:;<=>?@[\\]^_`{|}~" member? ;
 
 : c-identifier-char? ( ch -- ? ) 
-    dup alpha? swap CHAR: _ = or ;
+    [ [ dup alpha? ] [ dup CHAR: _ = ] ] || nip ;
 
 : java-blank? ( n -- ? )
     {   
@@ -233,7 +238,7 @@ ERROR: unbalanced-brackets ;
     } member? ;
 
 : java-printable? ( n -- ? )
-    dup alpha? swap punct? or ;
+    [ [ dup alpha? ] [ dup punct? ] ] || nip ;
 
 ERROR: bad-character-class obj ;
 
@@ -261,32 +266,26 @@ ERROR: bad-character-class obj ;
 
 ERROR: bad-octal number ;
 
-: parse-octal ( regexp -- )
+: parse-octal ( -- n )
     next get-char drop
     3 take oct>
-    dup 255 > [ bad-octal ] when
-    make-nontoken-nfa ;
+    dup 255 > [ bad-octal ] when ;
 
 ERROR: bad-hex number ;
 
-: parse-short-hex ( regexp -- )
+: parse-short-hex ( -- n )
     next 2 take hex>
-    dup number? [ bad-hex ] unless
-    make-nontoken-nfa ;
+    dup number? [ bad-hex ] unless ;
 
-: parse-long-hex ( regexp -- )
-    next 4 take hex>
-    dup number? [ bad-hex ] unless
-    make-nontoken-nfa ;
+: parse-long-hex ( -- n )
+    next 6 take hex>
+    dup number? [ bad-hex ] unless ;
 
-: parse-control-character ( regexp -- )
-    next get-char make-nontoken-nfa ;
-
-: parse-backreference ( regexp obj -- )
-    2drop ;
+: parse-control-character ( -- n )
+    next get-char ;
 
 : dot-construction ( regexp -- )
-    [ CHAR: \n = not ] make-nontoken-nfa ;
+    [ CHAR: \n = not ] push-single-nfa ;
 
 : front-anchor-construction ( regexp -- )
     drop ;
@@ -299,32 +298,50 @@ ERROR: bad-hex number ;
     [ get-char CHAR: } = ] take-until
     "," split1 [ [ string>number ] bi@ ] keep >boolean ;
 
-: parse-escaped ( regexp -- )
-    next get-char {
-        { CHAR: \ [ [ CHAR: \ = ] make-nontoken-nfa ] }
-        { CHAR: t [ [ CHAR: \t = ] make-nontoken-nfa ] }
-        { CHAR: n [ [ CHAR: \n = ] make-nontoken-nfa ] }
-        { CHAR: r [ [ CHAR: \r = ] make-nontoken-nfa ] }
-        { CHAR: f [ [ HEX: c = ] make-nontoken-nfa ] }
-        { CHAR: a [ [ HEX: 7 = ] make-nontoken-nfa ] }
-        { CHAR: e [ [ HEX: 1b = ] make-nontoken-nfa ] }
+TUPLE: character-class members ;
+TUPLE: character-class-range from to ;
+TUPLE: negated-character-class < character-class ;
+TUPLE: negated-character-class-range < character-class-range ;
+TUPLE: intersection-class < character-class ;
+TUPLE: negated-intersection-class < intersection-class ;
 
-        { CHAR: d [ [ digit? ] make-nontoken-nfa ] }
-        { CHAR: D [ [ digit? not ] make-nontoken-nfa ] }
-        { CHAR: s [ [ java-blank? ] make-nontoken-nfa ] }
-        { CHAR: S [ [ java-blank? not ] make-nontoken-nfa ] }
-        { CHAR: w [ [ c-identifier-char? ] make-nontoken-nfa ] }
-        { CHAR: W [ [ c-identifier-char? not ] make-nontoken-nfa ] }
+GENERIC: character-class-contains? ( obj character-class -- ? )
 
-        { CHAR: p [ parse-posix-class make-nontoken-nfa ] }
-        { CHAR: P [ parse-posix-class [ not ] compose make-nontoken-nfa ] }
+: parse-escaped-until ( -- seq )
+    [ get-char CHAR: \ = get-next CHAR: E = and ] take-until
+    next ;
+
+: character-class-predicate ( seq -- quot )
+    boa '[ , character-class-contains? ] ;
+
+ERROR: unmatched-escape-sequence ;
+
+: (parse-escaped) ( regexp ? ch -- obj )
+    {
+        { CHAR: \ [ [ CHAR: \ = ] ] }
+        { CHAR: t [ [ CHAR: \t = ] ] }
+        { CHAR: n [ [ CHAR: \n = ] ] }
+        { CHAR: r [ [ CHAR: \r = ] ] }
+        { CHAR: f [ [ HEX: c = ] ] }
+        { CHAR: a [ [ HEX: 7 = ] ] }
+        { CHAR: e [ [ HEX: 1b = ] ] }
+
+        { CHAR: d [ [ digit? ] ] }
+        { CHAR: D [ [ digit? not ] ] }
+        { CHAR: s [ [ java-blank? ] ] }
+        { CHAR: S [ [ java-blank? not ] ] }
+        { CHAR: w [ [ c-identifier-char? ] ] }
+        { CHAR: W [ [ c-identifier-char? not ] ] }
+
+        { CHAR: p [ parse-posix-class ] }
+        { CHAR: P [ parse-posix-class [ not ] compose ] }
         { CHAR: x [ parse-short-hex ] }
         { CHAR: u [ parse-long-hex ] }
         { CHAR: 0 [ parse-octal ] }
         { CHAR: c [ parse-control-character ] }
 
-        ! { CHAR: Q [ quot til \E ] }
-        ! { CHAR: E [ should be an error, parse this in the Q if exists ] }
+        ! { CHAR: Q [ next parse-escaped-until ] }
+        ! { CHAR: E [ unmatched-escape-sequence ] }
 
         ! { CHAR: b [ ] } ! a word boundary
         ! { CHAR: B [ ] } ! a non-word boundary
@@ -332,34 +349,57 @@ ERROR: bad-hex number ;
         ! { CHAR: G [ ] } ! end of previous match
         ! { CHAR: Z [ ] } ! end of input but for the final terminator, if any
         ! { CHAR: z [ ] } ! end of the input
-        [ dup digit? [ parse-backreference ] [ make-nontoken-nfa ] if ]
+        [ ]
     } case ;
 
+: parse-escaped ( regexp -- )
+    next get-char (parse-escaped) push-single-nfa ;
+
 : handle-dash ( vector -- vector )
     [ dup dash eq? [ drop CHAR: - ] when ] map ;
 
+M: object character-class-contains? ( obj1 obj2 -- ? )
+    = ;
+
+M: callable character-class-contains? ( obj1 callable -- ? )
+    call ;
+
+M: character-class character-class-contains? ( obj cc -- ? )
+    members>> [ character-class-contains? ] with find drop >boolean ;
+
+M: negated-character-class character-class-contains? ( obj cc -- ? )
+    call-next-method not ;
+
+M: character-class-range character-class-contains? ( obj cc -- ? )
+    [ from>> ] [ to>> ] bi between?  ;
+
+M: negated-character-class-range character-class-contains? ( obj cc -- ? )
+    call-next-method not ;
+
+M: intersection-class character-class-contains? ( obj cc -- ? )
+    members>> [ character-class-contains? not ] with find drop not ;
+
+M: negated-intersection-class character-class-contains? ( obj cc -- ? )
+    call-next-method not ;
+
 ERROR: unmatched-negated-character-class class ;
 
-: handle-caret ( vector -- vector ? )
+: handle-caret ( obj -- seq class )
     dup [ length 2 >= ] [ first caret eq? ] bi and [ 
-        rest t
+        rest negated-character-class
     ] [
-        f
+        character-class
     ] if ;
 
 : make-character-class ( regexp -- )
     left-bracket over stack>> cut-stack
     pick (>>stack)
-    handle-dash
-    handle-caret
-    >r [ dup number? [ '[ dup , = ] ] when ] map
-    [ [ drop t ] 2array ] map [ drop f ] suffix [ cond ] curry r>
-    [ [ not ] compose ] when
-    make-nontoken-nfa ;
+    handle-dash handle-caret
+    character-class-predicate push-single-nfa ;
 
 : apply-dash ( regexp -- )
     stack>> dup [ pop ] [ pop* ] [ pop ] tri
-    swap '[ dup , , between? ] swap push ;
+    swap character-class-range boa swap push ;
 
 : apply-dash? ( regexp -- ? )
     stack>> dup length 3 >=
@@ -371,7 +411,7 @@ DEFER: parse-character-class
         next get-char
         {
             { CHAR: [ [
-                [ 1+ ] change-bracket-count left-bracket push-stack
+                [ 1+ ] change-bracket-count dup left-bracket push-stack
                 parse-character-class
             ] }
             { CHAR: ] [
@@ -381,7 +421,7 @@ DEFER: parse-character-class
             { CHAR: - [ dash push-stack ] }
             ! { CHAR: & [ ampersand push-stack ] }
             ! { CHAR: : [ semicolon push-stack ] }
-            { CHAR: \ [ parse-escaped ] }
+            { CHAR: \ [ next get-char (parse-escaped) push-stack ] }
             { f [ unbalanced-brackets ] }
             [ dupd push-stack dup apply-dash? [ apply-dash ] [ drop ] if ]
         } case
@@ -393,7 +433,7 @@ DEFER: parse-character-class
 : parse-character-class-second ( regexp -- )
     get-next
     {
-        ! { CHAR: [ [ CHAR: [ push-stack next ] }
+        { CHAR: [ [ CHAR: [ push-stack next ] }
         { CHAR: ] [ CHAR: ] push-stack next ] }
         { CHAR: - [ CHAR: - push-stack next ] }
         [ 2drop ]
@@ -403,7 +443,7 @@ DEFER: parse-character-class
     get-next
     {
         { CHAR: ^ [ caret dupd push-stack next parse-character-class-second ] }
-        ! { CHAR: [ [ CHAR: [ push-stack next ] }
+        { CHAR: [ [ CHAR: [ push-stack next ] }
         { CHAR: ] [ CHAR: ] push-stack next ] }
         { CHAR: - [ CHAR: - push-stack next ] }
         [ 2drop ]
@@ -442,7 +482,7 @@ ERROR: unsupported-token token ;
                 [ set-start-state ]
             } cleave
         ] }
-        [ drop make-nontoken-nfa ]
+        [ drop push-single-nfa ]
     } case ;
 
 : (parse-raw-regexp) ( regexp -- )
@@ -639,7 +679,7 @@ TUPLE: dfa-traverser
     dupd match [ [ length ] [ range-length 1- ] bi* = ] [ drop f ] if* ;
 
 : match-head ( string regexp -- end )
-    match length>> ;
+    match length>> 1- ;
 
 ! character classes
 ! TUPLE: range-class from to ;
@@ -647,5 +687,15 @@ TUPLE: dfa-traverser
 
 ! (?:a|b)*  <- does not capture
 ! (a|b)*\1  <- group captured
-! (?!abba)  negative lookahead  matches ababa but not abbaa
+! doesn't advance the current position:
 ! (?=abba)  positive lookahead  matches abbaaa but not abaaa
+! (?!abba)  negative lookahead  matches ababa but not abbaa
+! look behind.   "lookaround"
+
+! : $ ( n -- obj ) groups get nth ;
+! [
+    ! groups bound to scope here
+! ] [
+    ! error or something
+! ] if-match
+! match in a string with  .*foo.*

From 9650b46688af847598f8e22cb38fe155c36e7a27 Mon Sep 17 00:00:00 2001
From: Eduardo Cavazos <dharmatech@finkelstein.stackeffects.info>
Date: Fri, 23 May 2008 00:11:44 -0500
Subject: [PATCH 2/7] newfx: few more words for dns

---
 extra/newfx/newfx.factor | 10 ++++++++++
 1 file changed, 10 insertions(+)

diff --git a/extra/newfx/newfx.factor b/extra/newfx/newfx.factor
index 2b2f916aea..abe0449d06 100644
--- a/extra/newfx/newfx.factor
+++ b/extra/newfx/newfx.factor
@@ -160,6 +160,16 @@ METHOD: as-mutate { object object assoc }       set-at ;
 
 ! !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
 
+: subseq ( seq from to -- subseq ) rot sequences:subseq ;
+
+! !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
+
+: key ( table val -- key ) swap assocs:value-at ;
+
+: key-of ( val table -- key ) assocs:value-at ;
+
+! !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
+
 : 1st 0 at ;
 : 2nd 1 at ;
 : 3rd 2 at ;

From 9c569034f63ac06558b53608cbb703358e815627 Mon Sep 17 00:00:00 2001
From: Eduardo Cavazos <dharmatech@finkelstein.stackeffects.info>
Date: Fri, 23 May 2008 00:12:01 -0500
Subject: [PATCH 3/7] factor.el: couple of font-lock words

---
 misc/factor.el | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/misc/factor.el b/misc/factor.el
index 7513c3640d..9d90fb68f9 100644
--- a/misc/factor.el
+++ b/misc/factor.el
@@ -82,6 +82,7 @@
     ("^!.*$" . font-lock-comment-face)
     (" !.*$" . font-lock-comment-face)
     ("( .* )" . font-lock-comment-face)
+    "BIN:"
     "MAIN:"
     "IN:" "USING:" "TUPLE:" "^C:" "^M:"
     "METHOD:"
@@ -89,7 +90,9 @@
     "REQUIRES:"
     "GENERIC:" "GENERIC#" "SYMBOL:" "PREDICATE:" "VAR:" "VARS:"
     "C-STRUCT:"
-    "C-UNION:" "<PRIVATE" "PRIVATE>" "MACRO:" "MACRO::" "DEFER:" "TYPEDEF:"))
+    "C-UNION:" "<PRIVATE" "PRIVATE>" "MACRO:" "MACRO::" "DEFER:" "TYPEDEF:"
+    "SYMBOLS:"
+))
 
 (defun factor-mode ()
   "A mode for editing programs written in the Factor programming language."

From ff553f6aa0a67039ff2f3a1822d265e83f3b7237 Mon Sep 17 00:00:00 2001
From: Eduardo Cavazos <dharmatech@finkelstein.stackeffects.info>
Date: Fri, 23 May 2008 00:12:38 -0500
Subject: [PATCH 4/7] New vocabulary: dns

---
 extra/dns/cache/cache.factor | 142 +++++++++++
 extra/dns/dns.factor         | 462 +++++++++++++++++++++++++++++++++++
 2 files changed, 604 insertions(+)
 create mode 100644 extra/dns/cache/cache.factor
 create mode 100644 extra/dns/dns.factor

diff --git a/extra/dns/cache/cache.factor b/extra/dns/cache/cache.factor
new file mode 100644
index 0000000000..e497192b04
--- /dev/null
+++ b/extra/dns/cache/cache.factor
@@ -0,0 +1,142 @@
+
+USING: kernel system
+       combinators
+       vectors sequences assocs
+       math math.functions
+       prettyprint unicode.case
+       accessors
+       combinators.cleave
+       newfx
+       dns ;
+
+IN: dns.cache
+
+! !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
+
+: cache ( -- table ) H{ } ;
+
+! key: 'name type class' (as string)
+! val: entry
+
+TUPLE: entry time data ;
+
+! !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
+
+: query->key ( query -- key )
+  { [ name>> >lower ] [ type>> unparse ] [ class>> unparse ] } <arr> " " join ;
+
+! !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
+
+: table-get ( query -- result ) query->key cache of ;
+
+: table-check ( query -- ? ) query->key cache key? ;
+
+: table-add ( query value -- ) [ query->key ] [ ] bi* cache at-mutate ;
+
+: table-rem ( query -- ) query->key cache delete-key-of drop ;
+
+! !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
+
+: now ( -- seconds ) millis 1000.0 / round >integer ;
+
+: ttl->time ( ttl -- seconds ) now + ;
+
+: time->ttl ( time -- ttl ) now - ;
+
+! !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
+
+SYMBOL: NX
+
+: cache-nx ( query ttl -- )
+  ttl->time NX entry boa
+  table-add ;
+
+: nx? ( obj -- ? )
+  dup entry?
+    [ data>> NX = ]
+    [ drop f ]
+  if ;
+
+! !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
+
+: query->rr ( query -- rr ) [ name>> ] [ type>> ] [ class>> ] tri f f rr boa ;
+
+: query+entry->rrs ( query entry -- rrs )
+  swap                                  ! entry query
+  query->rr                             ! entry rr
+  over                                  ! entry rr entry
+  time>> time->ttl >>ttl                ! entry rr
+  swap                                  ! rr entry
+  data>> [ >r dup clone r> >>rdata ] map
+  nip ;
+
+! !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
+
+: entry-expired? ( entry -- ? ) time>> time->ttl 0 <= ;
+
+: cache-get ( query -- result )
+  dup table-get               ! query result
+    {
+      {
+        [ dup f = ] ! not in the cache
+        [ 2drop f ]
+      }
+      {
+        [ dup entry-expired? ] ! here but expired
+        [ drop table-rem f   ]
+      }
+      {
+        [ dup nx?  ] ! negative result has been cached
+        [ 2drop NX ]
+      }
+      {
+        [ t ]
+        [ query+entry->rrs ]
+      }
+    }
+  cond ;
+
+! !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
+
+: rr->entry ( rr -- entry )
+  [ ttl>> ttl->time ] [ rdata>> {1} >vector ] bi entry boa ;
+
+: maybe-pushed-on ( obj seq -- )
+  2dup member-of?
+    [ 2drop ]
+    [ pushed-on ]
+  if ;
+
+: add-rr-to-entry ( rr entry -- )
+  over ttl>> ttl->time >>time
+  [ rdata>> ] [ data>> ] bi* maybe-pushed-on ;
+
+: cache-add ( query rr -- )
+  over table-get          ! query rr entry
+    {
+      {
+        [ dup f = ] ! not in the cache
+        [ drop rr->entry table-add ]
+      }
+      {
+        [ dup nx? ]
+        [ drop over table-rem rr->entry table-add ]
+      }
+      {
+        [ dup entry-expired? ]
+        [ drop rr->entry table-add ]
+      }
+      {
+        [ t ]
+        [ rot drop add-rr-to-entry ]
+      }
+    }
+  cond ;
+
+! !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
+
+: rr->query ( rr -- query ) [ name>> ] [ type>> ] [ class>> ] tri query boa ;
+
+: cache-add-rr ( rr -- ) [ rr->query ] [ ] bi cache-add ;
+
+: cache-add-rrs ( rrs -- ) [ cache-add-rr ] each ;
diff --git a/extra/dns/dns.factor b/extra/dns/dns.factor
new file mode 100644
index 0000000000..560db69bb2
--- /dev/null
+++ b/extra/dns/dns.factor
@@ -0,0 +1,462 @@
+
+USING: kernel byte-arrays combinators strings arrays sequences splitting 
+       math math.functions math.parser random
+       destructors
+       io io.binary io.sockets io.encodings.binary
+       accessors
+       combinators.cleave
+       newfx
+       symbols
+       ;
+
+IN: dns
+
+! !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
+
+TUPLE: query name type class ;
+
+TUPLE: rr name type class ttl rdata ;
+
+TUPLE: hinfo cpu os ;
+
+TUPLE: mx preference exchange ;
+
+TUPLE: soa mname rname serial refresh retry expire minimum ;
+
+TUPLE: message
+       id qr opcode aa tc rd ra z rcode
+       question-section
+       answer-section
+       authority-section
+       additional-section ;
+
+! !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
+
+: random-id ( -- id ) 2 16 ^ random ;
+
+! !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
+! TYPE
+! !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
+
+SYMBOLS: A NS MD MF CNAME SOA MB MG MR NULL WKS PTR HINFO MINFO MX TXT ;
+
+: type-table ( -- table )
+  {
+    { A     1 }
+    { NS    2 }
+    { MD    3 }
+    { MF    4 }
+    { CNAME 5 }
+    { SOA   6 }
+    { MB    7 }
+    { MG    8 }
+    { MR    9 }
+    { NULL  10 }
+    { WKS   11 }
+    { PTR   12 }
+    { HINFO 13 }
+    { MINFO 14 }
+    { MX    15 }
+    { TXT   16 }
+  } ;
+
+! !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
+! CLASS
+! !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
+
+SYMBOLS: IN CS CH HS ;
+
+: class-table ( -- table )
+  {
+    { IN 1 }
+    { CS 2 }
+    { CH 3 }
+    { HS 4 }
+  } ;
+
+! !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
+! OPCODE
+! !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
+
+SYMBOLS: QUERY IQUERY STATUS ;
+
+: opcode-table ( -- table )
+  {
+    { QUERY  0 }
+    { IQUERY 1 }
+    { STATUS 2 }
+  } ;
+
+! !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
+! RCODE
+! !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
+
+SYMBOLS: NO-ERROR FORMAT-ERROR SERVER-FAILURE NAME-ERROR NOT-IMPLEMENTED
+         REFUSED ;
+
+: rcode-table ( -- table )
+  {
+    { NO-ERROR        0 }
+    { FORMAT-ERROR    1 }
+    { SERVER-FAILURE  2 }
+    { NAME-ERROR      3 }
+    { NOT-IMPLEMENTED 4 }
+    { REFUSED         5 }
+  } ;
+
+! !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
+
+: <message> ( -- message )
+  message new
+    random-id >>id
+    0         >>qr
+    QUERY     >>opcode
+    0         >>aa
+    0         >>tc
+    1         >>rd
+    0         >>ra
+    0         >>z
+    NO-ERROR  >>rcode
+    { }       >>question-section
+    { }       >>answer-section
+    { }       >>authority-section
+    { }       >>additional-section ;
+
+! !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
+
+: ip->ba ( ip -- ba ) "." split [ string>number ] map >byte-array ;
+
+: label->ba ( label -- ba ) [ >byte-array ] [ length ] bi prefix ;
+
+! !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
+
+: uint8->ba  ( n -- ba ) 1 >be ;
+: uint16->ba ( n -- ba ) 2 >be ;
+: uint32->ba ( n -- ba ) 4 >be ;
+: uint64->ba ( n -- ba ) 8 >be ;
+
+! !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
+
+: dn->ba ( dn -- ba ) "." split [ label->ba ] map concat ;
+
+! !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
+
+: query->ba ( query -- ba )
+    {
+      [ name>>                 dn->ba ]
+      [ type>>  type-table  of uint16->ba ]
+      [ class>> class-table of uint16->ba ]
+    }
+  <arr> concat ;
+
+! !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
+
+: hinfo->ba ( rdata -- ba )
+    [ cpu>> label->ba ]
+    [ os>>  label->ba ]
+  bi append ;
+
+! !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
+
+: mx->ba ( rdata -- ba )
+    [ preference>> uint16->ba ]
+    [ exchange>>   dn->ba ]
+  bi append ;
+
+! !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
+
+: soa->ba ( rdata -- ba )
+    {
+      [ mname>>   dn->ba ]
+      [ rname>>   dn->ba ]
+      [ serial>>  uint32->ba ]
+      [ refresh>> uint32->ba ]
+      [ retry>>   uint32->ba ]
+      [ expire>>  uint32->ba ]
+      [ minimum>> uint32->ba ]
+    }
+  <arr> concat ;
+
+! !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
+
+: rdata->ba ( type rdata -- ba )
+  swap
+    {
+      { CNAME [ dn->ba ] }
+      { HINFO [ hinfo->ba ] }
+      { MX    [ mx->ba ] }
+      { NS    [ dn->ba ] }
+      { PTR   [ dn->ba ] }
+      { SOA   [ soa->ba ] }
+      { A     [ ip->ba ] }
+    }
+  case ;
+
+! !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
+
+: rr->ba ( rr -- ba )
+    {
+      [ name>>                 dn->ba     ]
+      [ type>>  type-table  of uint16->ba ]
+      [ class>> class-table of uint16->ba ]
+      [ ttl>>   uint32->ba ]
+      [
+        [ type>>            ] [ rdata>> ] bi rdata->ba
+        [ length uint16->ba ] [         ] bi append
+      ]
+    }
+  <arr> concat ;
+
+! !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
+
+: header-bits-ba ( message -- ba )
+    {
+      [ qr>>                     15 shift ]
+      [ opcode>> opcode-table of 11 shift ]
+      [ aa>>                     10 shift ]
+      [ tc>>                      9 shift ]
+      [ rd>>                      8 shift ]
+      [ ra>>                      7 shift ]
+      [ z>>                       4 shift ]
+      [ rcode>>  rcode-table of   0 shift ]
+    }
+  <arr> sum uint16->ba ;
+
+: message->ba ( message -- ba )
+    {
+      [ id>> uint16->ba ]
+      [ header-bits-ba ]
+      [ question-section>>   length uint16->ba ]
+      [ answer-section>>     length uint16->ba ]
+      [ authority-section>>  length uint16->ba ]
+      [ additional-section>> length uint16->ba ]
+      [ question-section>>   [ query->ba ] map concat ]
+      [ answer-section>>     [ rr->ba    ] map concat ]
+      [ authority-section>>  [ rr->ba    ] map concat ]
+      [ additional-section>> [ rr->ba    ] map concat ]
+    }
+  <arr> concat ;
+
+! !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
+
+: get-single ( ba i -- n ) at ;
+: get-double ( ba i -- n ) dup 2 + subseq be> ;
+: get-quad   ( ba i -- n ) dup 4 + subseq be> ;
+
+! !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
+
+: label-length ( ba i -- length ) get-single ;
+
+: skip-label ( ba i -- ba i ) 2dup label-length + 1 + ;
+
+: null-label? ( ba i -- ? ) get-single 0 = ;
+
+: get-label ( ba i -- label ) [ 1 + ] [ skip-label nip ] 2bi subseq >string ;
+
+! !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
+
+: bit-test ( a b -- ? ) bitand 0 = not ;
+
+: pointer? ( ba i -- ? ) get-single BIN: 11000000 bit-test ;
+
+: pointer ( ba i -- val ) get-double BIN: 0011111111111111 bitand ;
+
+! !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
+
+: skip-name ( ba i -- ba i )
+    {
+      { [ 2dup null-label? ] [ 1 + ] }
+      { [ 2dup pointer?    ] [ 2 + ] }
+      { [ t ] [ skip-label skip-name ] }
+    }
+  cond ;
+
+: get-name ( ba i -- name )
+    {
+      { [ 2dup null-label? ] [ 2drop "" ] }
+      { [ 2dup pointer?    ] [ dupd pointer get-name ] }
+      {
+        [ t ]
+        [
+          [ get-label ]
+          [ skip-label get-name ]
+          2bi
+          "." swap 3append
+        ]
+      }
+    }
+  cond ;
+
+! !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
+
+: get-query ( ba i -- query )
+    [ get-name ]
+    [
+      skip-name
+      [ 0 + get-double type-table  key-of ]
+      [ 2 + get-double class-table key-of ]
+      2bi
+    ]
+  2bi query boa ;
+
+: skip-query ( ba i -- ba i ) skip-name 4 + ;
+
+! !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
+
+: get-mx ( ba i -- mx ) [ get-double ] [ 2 + get-double ] 2bi mx boa ;
+
+! !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
+
+: get-soa ( ba i -- soa )
+    {
+      [           get-name ]
+      [ skip-name get-name ]
+      [
+        skip-name
+        skip-name
+        {
+          [  0 + get-quad ]
+          [  4 + get-quad ]
+          [  8 + get-quad ]
+          [ 12 + get-quad ]
+          [ 16 + get-quad ]
+        }
+          2cleave
+      ]
+    }
+  2cleave soa boa ;
+
+! !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
+
+: get-ip ( ba i -- ip ) dup 4 + subseq >array [ number>string ] map "." join ;
+
+: get-rdata ( ba i type -- rdata )
+    {
+      { CNAME [ get-name ] }
+      { NS    [ get-name ] }
+      { PTR   [ get-name ] }
+      { MX    [ get-mx   ] }
+      { SOA   [ get-soa  ] }
+      { A     [ get-ip   ] }
+    }
+  case ;
+
+! !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
+
+: get-rr ( ba i -- rr )
+  [ get-name ]
+  [
+    skip-name
+      {
+        [ 0 + get-double type-table  key-of ]
+        [ 2 + get-double class-table key-of ]
+        [ 4 + get-quad   ]
+        [ [ 10 + ] [ get-double type-table key-of ] 2bi get-rdata ]
+      }
+    2cleave
+  ]
+    2bi rr boa ;
+
+: skip-rr ( ba i -- ba i ) skip-name 8 + 2dup get-double + 2 + ;
+
+! !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
+
+: get-question-section ( ba i count -- seq ba i )
+  [ drop [ skip-query ] [ get-query ] 2bi ] map -rot ;
+
+! !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
+
+: get-rr-section ( ba i count -- seq ba i )
+  [ drop [ skip-rr ] [ get-rr ] 2bi ] map -rot ;
+
+! !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
+
+: >> neg shift ;
+
+: get-header-bits ( ba i -- qr opcode aa tc rd ra z rcode )
+    get-double
+    {
+      [ 15 >> BIN:    1 bitand ]
+      [ 11 >> BIN:  111 bitand opcode-table key-of ]
+      [ 10 >> BIN:    1 bitand ]
+      [  9 >> BIN:    1 bitand ]
+      [  8 >> BIN:    1 bitand ]
+      [  7 >> BIN:    1 bitand ]
+      [  4 >> BIN:  111 bitand ]
+      [       BIN: 1111 bitand rcode-table key-of ]
+    }
+  cleave ;
+
+: parse-message ( ba -- message )
+  0
+  {
+    [ get-double ]
+    [ 2 + get-header-bits ]
+    [
+      4 +
+      {
+        [ 8 +            ]
+        [ 0 + get-double ]
+        [ 2 + get-double ]
+        [ 4 + get-double ]
+        [ 6 + get-double ]
+      }
+        2cleave
+      >r >r >r
+      get-question-section r>
+      get-rr-section       r>
+      get-rr-section       r>
+      get-rr-section
+      2drop
+    ]
+  }
+    2cleave message boa ;
+
+! !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
+
+: send-receive-udp ( ba server -- ba )
+  f 0 <inet4> <datagram>
+    [
+      [ send ] [ receive drop ] bi
+    ]
+  with-disposal ;
+
+! !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
+
+: send-receive-tcp ( ba server -- ba )
+  [ dup length 2 >be prepend ] [ ] bi*
+  binary
+    [
+      write flush
+      2 read be> read
+    ]
+  with-client ;
+
+! !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
+
+: >dns-inet4 ( obj -- inet4 )
+  dup string?
+    [ 53 <inet4> ]
+    [            ]
+  if ;
+
+! !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
+
+: ask-server ( message server -- message )
+  [ message->ba ] [ >dns-inet4 ] bi*
+  2dup
+  send-receive-udp parse-message
+  dup tc>> 1 =
+    [ drop send-receive-tcp parse-message ]
+    [ nip nip                             ]
+  if ;
+
+! !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
+
+: dns-servers ( -- seq ) V{ } ;
+
+: dns-server ( -- server ) dns-servers random ;
+
+: ask ( message -- message ) dns-server ask-server ;
+
+: <query-message> ( query -- message ) <message> swap {1} >>question-section ;
\ No newline at end of file

From 6386de6c2a306dad99e67b89ce1d8c6bf27a6b1b Mon Sep 17 00:00:00 2001
From: Doug Coleman <doug.coleman@gmail.com>
Date: Fri, 23 May 2008 16:30:19 -0500
Subject: [PATCH 5/7] move regexp4 to unmaintained...regexp5 is coming soon

---
 unmaintained/regexp4/regexp4-tests.factor | 244 ++++++++
 unmaintained/regexp4/regexp4.factor       | 712 ++++++++++++++++++++++
 2 files changed, 956 insertions(+)
 create mode 100644 unmaintained/regexp4/regexp4-tests.factor
 create mode 100644 unmaintained/regexp4/regexp4.factor

diff --git a/unmaintained/regexp4/regexp4-tests.factor b/unmaintained/regexp4/regexp4-tests.factor
new file mode 100644
index 0000000000..c941d0fb75
--- /dev/null
+++ b/unmaintained/regexp4/regexp4-tests.factor
@@ -0,0 +1,244 @@
+USING: regexp4 tools.test kernel ;
+IN: regexp4-tests
+
+[ f ] [ "b" "a*" <regexp> matches? ] unit-test
+[ t ] [ "" "a*" <regexp> matches? ] unit-test
+[ t ] [ "a" "a*" <regexp> matches? ] unit-test
+[ t ] [ "aaaaaaa" "a*"  <regexp> matches? ] unit-test
+[ f ] [ "ab" "a*" <regexp> matches? ] unit-test
+
+[ t ] [ "abc" "abc" <regexp> matches? ] unit-test
+[ t ] [ "a" "a|b|c" <regexp> matches? ] unit-test
+[ t ] [ "b" "a|b|c" <regexp> matches? ] unit-test
+[ t ] [ "c" "a|b|c" <regexp> matches? ] unit-test
+[ f ] [ "c" "d|e|f" <regexp> matches? ] unit-test
+
+[ f ] [ "aa" "a|b|c" <regexp> matches? ] unit-test
+[ f ] [ "bb" "a|b|c" <regexp> matches? ] unit-test
+[ f ] [ "cc" "a|b|c" <regexp> matches? ] unit-test
+[ f ] [ "cc" "d|e|f" <regexp> matches? ] unit-test
+
+[ f ] [ "" "a+" <regexp> matches? ] unit-test
+[ t ] [ "a" "a+" <regexp> matches? ] unit-test
+[ t ] [ "aa" "a+" <regexp> matches? ] unit-test
+
+[ t ] [ "" "a?" <regexp> matches? ] unit-test
+[ t ] [ "a" "a?" <regexp> matches? ] unit-test
+[ f ] [ "aa" "a?" <regexp> matches? ] unit-test
+
+[ f ] [ "" "." <regexp> matches? ] unit-test
+[ t ] [ "a" "." <regexp> matches? ] unit-test
+[ t ] [ "." "." <regexp> matches? ] unit-test
+! [ f ] [ "\n" "." <regexp> matches? ] unit-test
+
+[ f ] [ "" ".+" <regexp> matches? ] unit-test
+[ t ] [ "a" ".+" <regexp> matches? ] unit-test
+[ t ] [ "ab" ".+" <regexp> matches? ] unit-test
+
+
+[ t ] [ "" "a|b*|c+|d?" <regexp> matches? ] unit-test
+[ t ] [ "a" "a|b*|c+|d?" <regexp> matches? ] unit-test
+[ t ] [ "c" "a|b*|c+|d?" <regexp> matches? ] unit-test
+[ t ] [ "cc" "a|b*|c+|d?" <regexp> matches? ] unit-test
+[ f ] [ "ccd" "a|b*|c+|d?" <regexp> matches? ] unit-test
+[ t ] [ "d" "a|b*|c+|d?" <regexp> matches? ] unit-test
+
+[ t ] [ "foo" "foo|bar" <regexp> matches? ] unit-test
+[ t ] [ "bar" "foo|bar" <regexp> matches? ] unit-test
+[ f ] [ "foobar" "foo|bar" <regexp> matches? ] unit-test
+
+[ f ] [ "" "(a)" <regexp> matches? ] unit-test
+[ t ] [ "a" "(a)" <regexp> matches? ] unit-test
+[ f ] [ "aa" "(a)" <regexp> matches? ] unit-test
+[ t ] [ "aa" "(a*)" <regexp> matches? ] unit-test
+
+[ f ] [ "aababaaabbac" "(a|b)+" <regexp> matches? ] unit-test
+[ t ] [ "ababaaabba" "(a|b)+" <regexp> matches? ] unit-test
+
+[ f ] [ "" "a{1}" <regexp> matches? ] unit-test
+[ t ] [ "a" "a{1}" <regexp> matches? ] unit-test
+[ f ] [ "aa" "a{1}" <regexp> matches? ] unit-test
+
+[ f ] [ "a" "a{2,}" <regexp> matches? ] unit-test
+[ t ] [ "aaa" "a{2,}" <regexp> matches? ] unit-test
+[ t ] [ "aaaa" "a{2,}" <regexp> matches? ] unit-test
+[ t ] [ "aaaaa" "a{2,}" <regexp> matches? ] unit-test
+
+[ t ] [ "" "a{,2}" <regexp> matches? ] unit-test
+[ t ] [ "a" "a{,2}" <regexp> matches? ] unit-test
+[ t ] [ "aa" "a{,2}" <regexp> matches? ] unit-test
+[ f ] [ "aaa" "a{,2}" <regexp> matches? ] unit-test
+[ f ] [ "aaaa" "a{,2}" <regexp> matches? ] unit-test
+[ f ] [ "aaaaa" "a{,2}" <regexp> matches? ] unit-test
+
+[ f ] [ "" "a{1,3}" <regexp> matches? ] unit-test
+[ t ] [ "a" "a{1,3}" <regexp> matches? ] unit-test
+[ t ] [ "aa" "a{1,3}" <regexp> matches? ] unit-test
+[ t ] [ "aaa" "a{1,3}" <regexp> matches? ] unit-test
+[ f ] [ "aaaa" "a{1,3}" <regexp> matches? ] unit-test
+
+[ f ] [ "" "[a]" <regexp> matches? ] unit-test
+[ t ] [ "a" "[a]" <regexp> matches? ] unit-test
+[ t ] [ "a" "[abc]" <regexp> matches? ] unit-test
+[ f ] [ "b" "[a]" <regexp> matches? ] unit-test
+[ f ] [ "d" "[abc]" <regexp> matches? ] unit-test
+[ t ] [ "ab" "[abc]{1,2}" <regexp> matches? ] unit-test
+[ f ] [ "abc" "[abc]{1,2}" <regexp> matches? ] unit-test
+
+[ f ] [ "" "[^a]" <regexp> matches? ] unit-test
+[ f ] [ "a" "[^a]" <regexp> matches? ] unit-test
+[ f ] [ "a" "[^abc]" <regexp> matches? ] unit-test
+[ t ] [ "b" "[^a]" <regexp> matches? ] unit-test
+[ t ] [ "d" "[^abc]" <regexp> matches? ] unit-test
+[ f ] [ "ab" "[^abc]{1,2}" <regexp> matches? ] unit-test
+[ f ] [ "abc" "[^abc]{1,2}" <regexp> matches? ] unit-test
+
+[ t ] [ "]" "[]]" <regexp> matches? ] unit-test
+[ f ] [ "]" "[^]]" <regexp> matches? ] unit-test
+[ t ] [ "a" "[^]]" <regexp> matches? ] unit-test
+
+[ t ] [ "^" "[^]" <regexp> matches? ] must-fail
+[ t ] [ "^" "[]^]" <regexp> matches? ] unit-test
+[ t ] [ "]" "[]^]" <regexp> matches? ] unit-test
+
+[ t ] [ "[" "[[]" <regexp> matches? ] unit-test
+[ f ] [ "^" "[^^]" <regexp> matches? ] unit-test
+[ t ] [ "a" "[^^]" <regexp> matches? ] unit-test
+
+[ t ] [ "-" "[-]" <regexp> matches? ] unit-test
+[ f ] [ "a" "[-]" <regexp> matches? ] unit-test
+[ f ] [ "-" "[^-]" <regexp> matches? ] unit-test
+[ t ] [ "a" "[^-]" <regexp> matches? ] unit-test
+
+[ t ] [ "-" "[-a]" <regexp> matches? ] unit-test
+[ t ] [ "a" "[-a]" <regexp> matches? ] unit-test
+[ t ] [ "-" "[a-]" <regexp> matches? ] unit-test
+[ t ] [ "a" "[a-]" <regexp> matches? ] unit-test
+[ f ] [ "b" "[a-]" <regexp> matches? ] unit-test
+[ f ] [ "-" "[^-]" <regexp> matches? ] unit-test
+[ t ] [ "a" "[^-]" <regexp> matches? ] unit-test
+
+[ f ] [ "-" "[a-c]" <regexp> matches? ] unit-test
+[ t ] [ "-" "[^a-c]" <regexp> matches? ] unit-test
+[ t ] [ "b" "[a-c]" <regexp> matches? ] unit-test
+[ f ] [ "b" "[^a-c]" <regexp> matches? ] unit-test
+
+[ t ] [ "-" "[a-c-]" <regexp> matches? ] unit-test
+[ f ] [ "-" "[^a-c-]" <regexp> matches? ] unit-test
+
+[ t ] [ "\\" "[\\\\]" <regexp> matches? ] unit-test
+[ f ] [ "a" "[\\\\]" <regexp> matches? ] unit-test
+[ f ] [ "\\" "[^\\\\]" <regexp> matches? ] unit-test
+[ t ] [ "a" "[^\\\\]" <regexp> matches? ] unit-test
+
+[ t ] [ "0" "[\\d]" <regexp> matches? ] unit-test
+[ f ] [ "a" "[\\d]" <regexp> matches? ] unit-test
+[ f ] [ "0" "[^\\d]" <regexp> matches? ] unit-test
+[ t ] [ "a" "[^\\d]" <regexp> matches? ] unit-test
+
+[ t ] [ "a" "[a-z]{1,}|[A-Z]{2,4}|b*|c|(f|g)*" <regexp> matches? ] unit-test
+[ t ] [ "a" "[a-z]{1,2}|[A-Z]{3,3}|b*|c|(f|g)*" <regexp> matches? ] unit-test
+[ t ] [ "a" "[a-z]{1,2}|[A-Z]{3,3}" <regexp> matches? ] unit-test
+
+[ t ] [ "1000" "\\d{4,6}" <regexp> matches? ] unit-test
+[ t ] [ "1000" "[0-9]{4,6}" <regexp> matches? ] unit-test
+
+[ t ] [ "abc" "\\p{Lower}{3}" <regexp> matches? ] unit-test
+[ f ] [ "ABC" "\\p{Lower}{3}" <regexp> matches? ] unit-test
+[ t ] [ "ABC" "\\p{Upper}{3}" <regexp> matches? ] unit-test
+[ f ] [ "abc" "\\p{Upper}{3}" <regexp> matches? ] unit-test
+
+[ f ] [ "abc" "[\\p{Upper}]{3}" <regexp> matches? ] unit-test
+[ t ] [ "ABC" "[\\p{Upper}]{3}" <regexp> matches? ] unit-test
+
+[ t ] [ "" "\\Q\\E" <regexp> matches? ] unit-test
+[ f ] [ "a" "\\Q\\E" <regexp> matches? ] unit-test
+[ t ] [ "|*+" "\\Q|*+\\E" <regexp> matches? ] unit-test
+[ f ] [ "abc" "\\Q|*+\\E" <regexp> matches? ] unit-test
+[ t ] [ "s" "\\Qs\\E" <regexp> matches? ] unit-test
+
+[ t ] [ "S" "\\0123" <regexp> matches? ] unit-test
+[ t ] [ "SXY" "\\0123XY" <regexp> matches? ] unit-test
+[ t ] [ "x" "\\x78" <regexp> matches? ] unit-test
+[ f ] [ "y" "\\x78" <regexp> matches? ] unit-test
+[ t ] [ "x" "\\u000078" <regexp> matches? ] unit-test
+[ f ] [ "y" "\\u000078" <regexp> matches? ] unit-test
+
+[ t ] [ "ab" "a+b" <regexp> matches? ] unit-test
+[ f ] [ "b" "a+b" <regexp> matches? ] unit-test
+[ t ] [ "aab" "a+b" <regexp> matches? ] unit-test
+[ f ] [ "abb" "a+b" <regexp> matches? ] unit-test
+
+[ t ] [ "abbbb" "ab*" <regexp> matches? ] unit-test
+[ t ] [ "a" "ab*" <regexp> matches? ] unit-test
+[ f ] [ "abab" "ab*" <regexp> matches? ] unit-test
+
+[ f ] [ "x" "\\." <regexp> matches? ] unit-test
+[ t ] [ "." "\\." <regexp> matches? ] unit-test
+
+[ t ] [ "aaaab" "a+ab" <regexp> matches? ] unit-test
+[ f ] [ "aaaxb" "a+ab" <regexp> matches? ] unit-test
+[ t ] [ "aaacb" "a+cb" <regexp> matches? ] unit-test
+[ f ] [ "aaaab" "a++ab" <regexp> matches? ] unit-test
+[ t ] [ "aaacb" "a++cb" <regexp> matches? ] unit-test
+
+[ 3 ] [ "aaacb" "a*" <regexp> match-head ] unit-test
+[ 1 ] [ "aaacb" "a+?" <regexp> match-head ] unit-test
+[ 2 ] [ "aaacb" "aa?" <regexp> match-head ] unit-test
+[ 1 ] [ "aaacb" "aa??" <regexp> match-head ] unit-test
+[ 3 ] [ "aacb" "aa?c" <regexp> match-head ] unit-test
+[ 3 ] [ "aacb" "aa??c" <regexp> match-head ] unit-test
+
+! [ t ] [ "aaa" "AAA" t <regexp> matches? ] unit-test
+! [ f ] [ "aax" "AAA" t <regexp> matches? ] unit-test
+! [ t ] [ "aaa" "A*" t <regexp> matches? ] unit-test
+! [ f ] [ "aaba" "A*" t <regexp> matches? ] unit-test
+! [ t ] [ "b" "[AB]" t <regexp> matches? ] unit-test
+! [ f ] [ "c" "[AB]" t <regexp> matches? ] unit-test
+! [ t ] [ "c" "[A-Z]" t <regexp> matches? ] unit-test
+! [ f ] [ "3" "[A-Z]" t <regexp> matches? ] unit-test
+
+[ ] [
+    "(0[lL]?|[1-9]\\d{0,9}(\\d{0,9}[lL])?|0[xX]\\p{XDigit}{1,8}(\\p{XDigit}{0,8}[lL])?|0[0-7]{1,11}([0-7]{0,11}[lL])?|([0-9]+\\.[0-9]*|\\.[0-9]+)([eE][+-]?[0-9]+)?[fFdD]?|[0-9]+([eE][+-]?[0-9]+[fFdD]?|([eE][+-]?[0-9]+)?[fFdD]))"
+    <regexp> drop
+] unit-test
+
+[ t ] [ "fxxbar" "(?!foo).{3}bar" <regexp> matches? ] unit-test
+[ f ] [ "foobar" "(?!foo).{3}bar" <regexp> matches? ] unit-test
+
+! [ 3 ] [ "foobar" "foo(?=bar)" <regexp> match-head ] unit-test
+! [ f ] [ "foobxr" "foo(?=bar)" <regexp> match-head ] unit-test
+
+! [ f ] [ "foobxr" "foo\\z" <regexp> match-head ] unit-test
+! [ 3 ] [ "foo" "foo\\z" <regexp> match-head ] unit-test
+
+! [ 3 ] [ "foo bar" "foo\\b" <regexp> match-head ] unit-test
+! [ f ] [ "fooxbar" "foo\\b" <regexp> matches? ] unit-test
+! [ t ] [ "foo" "foo\\b" <regexp> matches? ] unit-test
+! [ t ] [ "foo bar" "foo\\b bar" <regexp> matches? ] unit-test
+! [ f ] [ "fooxbar" "foo\\bxbar" <regexp> matches? ] unit-test
+! [ f ] [ "foo" "foo\\bbar" <regexp> matches? ] unit-test
+
+! [ f ] [ "foo bar" "foo\\B" <regexp> matches? ] unit-test
+! [ 3 ] [ "fooxbar" "foo\\B" <regexp> match-head ] unit-test
+! [ t ] [ "foo" "foo\\B" <regexp> matches? ] unit-test
+! [ f ] [ "foo bar" "foo\\B bar" <regexp> matches? ] unit-test
+! [ t ] [ "fooxbar" "foo\\Bxbar" <regexp> matches? ] unit-test
+! [ f ] [ "foo" "foo\\Bbar" <regexp> matches? ] unit-test
+
+! [ t ] [ "s@f" "[a-z.-]@[a-z]" <regexp> matches? ] unit-test
+! [ f ] [ "a" "[a-z.-]@[a-z]" <regexp> matches? ] unit-test
+! [ t ] [ ".o" "\\.[a-z]" <regexp> matches? ] unit-test
+
+! Bug in parsing word
+[ t ] [
+    "a"
+    R' a'
+    matches?
+] unit-test
+
+! ((A)(B(C)))
+! 1.  ((A)(B(C)))
+! 2. (A)
+! 3. (B(C))
+! 4. (C) 
diff --git a/unmaintained/regexp4/regexp4.factor b/unmaintained/regexp4/regexp4.factor
new file mode 100644
index 0000000000..fac6673e82
--- /dev/null
+++ b/unmaintained/regexp4/regexp4.factor
@@ -0,0 +1,712 @@
+! Copyright (C) 2008 Doug Coleman.
+! See http://factorcode.org/license.txt for BSD license.
+USING: accessors arrays assocs combinators kernel math
+sequences namespaces locals combinators.lib state-tables
+math.parser state-parser sets dlists unicode.categories
+math.order quotations shuffle math.ranges splitting
+symbols fry parser math.ranges inspector strings ;
+IN: regexp4
+
+SYMBOLS: eps start-state final-state beginning-of-text
+end-of-text left-parenthesis alternation left-bracket
+caret dash ampersand colon ;
+
+SYMBOL: runtime-epsilon
+
+TUPLE: regexp raw parentheses-count bracket-count
+state stack nfa new-states dfa minimized-dfa
+dot-matches-newlines? capture-group captured-groups ;
+
+TUPLE: capture-group n range ;
+
+ERROR: parentheses-underflow ;
+ERROR: unbalanced-parentheses ;
+ERROR: unbalanced-brackets ;
+
+: push-stack ( regexp token -- ) swap stack>> push ;
+: push-all-stack ( regexp seq -- ) swap stack>> push-all ;
+: next-state ( regexp -- n ) [ 1+ ] change-state state>> ;
+
+: check-parentheses-underflow ( regexp -- )
+    parentheses-count>> 0 < [ parentheses-underflow ] when ;
+
+: check-unbalanced-parentheses ( regexp -- )
+    parentheses-count>> 0 > [ unbalanced-parentheses ] when ;
+
+:: (apply-alternation) ( stack regexp -- )
+    [let | s2 [ stack peek first ]
+           s3 [ stack pop second ]
+           s0 [ stack peek alternation = [ stack pop* ] when stack peek first ]
+           s1 [ stack pop second ]
+           s4 [ regexp next-state ]
+           s5 [ regexp next-state ]
+           table [ regexp nfa>> ] |
+        s5 table add-row
+        s4 eps s0 <entry> table add-entry
+        s4 eps s2 <entry> table add-entry
+        s1 eps s5 <entry> table add-entry
+        s3 eps s5 <entry> table add-entry
+        s1 table final-states>> delete-at
+        s3 table final-states>> delete-at
+        t s5 table final-states>> set-at
+        s4 s5 2array stack push ] ;
+
+: apply-alternation ( regexp -- )
+    [ stack>> ] [ (apply-alternation) ] bi ;
+
+: apply-alternation? ( stack -- ? )
+    dup length dup 3 <
+    [ 2drop f ] [ 2 - swap nth alternation = ] if ;
+
+:: (apply-concatenation) ( stack regexp -- )
+    [let* |
+            s2 [ stack peek first ]
+            s3 [ stack pop second ]
+            s0 [ stack peek first ]
+            s1 [ stack pop second ]
+            table [ regexp nfa>> ] |
+        s1 eps s2 <entry> table set-entry
+        s1 table final-states>> delete-at
+        s3 table add-row
+        s0 s3 2array stack push ] ;
+
+: apply-concatenation ( regexp -- )
+    [ stack>> ] [ (apply-concatenation) ] bi ;
+
+: apply-concatenation? ( seq -- ? )
+    dup length dup 2 <
+    [ 2drop f ] [ 2 - swap nth array? ] if ;
+
+: apply-loop ( seq regexp -- seq regexp )
+    over length 1 > [
+        2dup over apply-alternation?
+        [ (apply-alternation) ] [ (apply-concatenation) ] if apply-loop
+    ] when ;
+
+: cut-out ( vector n -- vector' vector ) cut rest ;
+
+: cut-stack ( obj vector -- vector' vector )
+    tuck last-index cut-out swap ;
+    
+: apply-til-last ( regexp token -- )
+    swap [ cut-stack ] change-stack
+    apply-loop stack>> push-all ;
+
+: concatenation-loop ( regexp -- )
+    dup stack>> dup apply-concatenation?
+    [ over (apply-concatenation) concatenation-loop ] [ 2drop ] if ;
+
+:: apply-kleene-closure ( regexp -- )
+    [let* | stack [ regexp stack>> ]
+            s0 [ stack peek first ]
+            s1 [ stack pop second ]
+            s2 [ regexp next-state ]
+            s3 [ regexp next-state ]
+            table [ regexp nfa>> ] |
+        s1 table final-states>> delete-at
+        t s3 table final-states>> set-at
+        s3 table add-row
+        s1 eps s0 <entry> table add-entry
+        s2 eps s0 <entry> table add-entry
+        s2 eps s3 <entry> table add-entry
+        s1 eps s3 <entry> table add-entry
+        s2 s3 2array stack push ] ;
+
+: add-numbers ( n obj -- obj )
+    2dup [ number? ] bi@ and
+    [ + ] [ dup sequence? [ [ + ] with map ] [ nip ] if ] if ;
+
+: increment-columns ( n assoc -- )
+    dup [ >r swap >r add-numbers r> r> set-at ] curry with* assoc-each ;
+
+:: copy-state-rows ( regexp range -- )
+    [let* | len [ range range-length ]
+            offset [ regexp state>> range range-min - 1+ ]
+            state [ regexp [ len + ] change-state ] |
+        regexp nfa>> rows>>
+        [ drop range member? ] assoc-filter
+        [
+            [ offset + ] dip
+            [ offset swap add-numbers ] assoc-map
+        ] assoc-map
+        regexp nfa>> [ assoc-union ] change-rows drop
+        range [ range-min ] [ range-max ] bi [ offset + ] bi@ 2array
+        regexp stack>> push ] ;
+
+: last-state ( regexp -- range )
+    stack>> peek first2 [a,b] ;
+
+: set-last-state-final ( ? regexp -- )
+    [ stack>> peek second ] [ nfa>> final-states>> ] bi set-at ;
+
+: apply-plus-closure ( regexp -- )
+    [ dup last-state copy-state-rows ]
+    [ apply-kleene-closure ]
+    [ apply-concatenation ] tri ;
+
+: apply-question-closure ( regexp -- )
+    [ stack>> peek first2 eps swap <entry> ] [ nfa>> add-entry ] bi ;
+
+: with0 ( obj n quot -- n quot' ) swapd curry ; inline
+
+
+: range>state ( range -- pair )
+    [ from>> ] [ length>> ] bi over - 2array ;
+
+: copy-state ( regexp range n -- )
+    dup zero? [
+        drop range>state over stack>> push apply-question-closure
+    ] [
+        [ copy-state-rows ] with0 with0 times
+    ] if ;
+
+:: (exactly-n) ( regexp state n -- )
+    regexp state n copy-state
+    t regexp set-last-state-final ;
+
+: exactly-n ( regexp n -- )
+    >r dup last-state r> 1- (exactly-n) ;
+
+: exactly-n-concatenated ( regexp state n -- )
+B
+    [ (exactly-n) ] 3keep
+    nip 1- [ apply-concatenation ] with0 times ;
+
+:: at-least-n ( regexp n -- )
+    [let | state [ regexp stack>> pop first2 [a,b] ] |
+        regexp state n copy-state
+        state regexp stack>> push
+        regexp apply-kleene-closure ] ; 
+
+: peek-last ( regexp -- range )
+    stack>> peek first2 [a,b] ;
+
+: pop-last ( regexp -- range )
+    stack>> pop first2 [a,b] ;
+
+:: at-most-n ( regexp n -- )
+    [let | state [ regexp pop-last ] |
+        regexp state n [ 1+ exactly-n-concatenated ] with with each
+        regexp n 1- [ apply-alternation ] with0 times
+        regexp apply-question-closure ] ;
+
+:: from-m-to-n ( regexp m n -- )
+    [let | state [ regexp pop-last ] |
+        regexp state
+        m n [a,b] [ exactly-n-concatenated ] with with each
+        regexp n m - [ apply-alternation ] with0 times ] ;
+
+: apply-brace-closure ( regexp from/f to/f comma? -- )
+    [
+        2dup and
+        [ from-m-to-n ]
+        [ [ nip at-most-n ] [ at-least-n ] if* ] if
+    ] [ drop exactly-n ] if ;
+
+:: push-single-nfa ( regexp obj -- )
+    [let | s0 [ regexp next-state ]
+           s1 [ regexp next-state ]
+           stack [ regexp stack>> ]
+           table [ regexp nfa>> ] |
+        s0 obj s1 <entry> table set-entry
+        s1 table add-row
+        t s1 table final-states>> set-at
+        s0 s1 2array stack push ] ;
+
+: set-start-state ( regexp -- )
+    dup stack>> dup empty? [
+        2drop
+    ] [
+        [ nfa>> ] [ pop first ] bi* >>start-state drop
+    ] if ;
+
+: ascii? ( n -- ? ) 0 HEX: 7f between? ;
+: octal-digit? ( n -- ? ) CHAR: 0 CHAR: 7 between? ;
+: decimal-digit? ( n -- ? ) CHAR: 0 CHAR: 9 between? ;
+
+: hex-digit? ( n -- ? )
+    [
+        [ dup decimal-digit? ]
+        [ dup CHAR: a CHAR: f between? ]
+        [ dup CHAR: A CHAR: F between? ]
+    ] || nip ;
+
+: control-char? ( n -- ? )
+    [
+        [ dup 0 HEX: 1f between? ]
+        [ dup HEX: 7f = ]
+    ] || nip ;
+
+: punct? ( n -- ? )
+    "!\"#$%&'()*+,-./:;<=>?@[\\]^_`{|}~" member? ;
+
+: c-identifier-char? ( ch -- ? ) 
+    [ [ dup alpha? ] [ dup CHAR: _ = ] ] || nip ;
+
+: java-blank? ( n -- ? )
+    {   
+        CHAR: \s CHAR: \t CHAR: \n
+        HEX: b HEX: 7 CHAR: \r
+    } member? ;
+
+: java-printable? ( n -- ? )
+    [ [ dup alpha? ] [ dup punct? ] ] || nip ;
+
+ERROR: bad-character-class obj ;
+
+: parse-posix-class ( -- quot )
+    next
+    CHAR: { expect
+    [ get-char CHAR: } = ] take-until
+    {
+        { "Lower" [ [ letter? ] ] }
+        { "Upper" [ [ LETTER? ] ] }
+        { "ASCII" [ [ ascii? ] ] }
+        { "Alpha" [ [ Letter? ] ] }
+        { "Digit" [ [ digit? ] ] }
+        { "Alnum" [ [ alpha? ] ] }
+        { "Punct" [ [ punct? ] ] }
+        { "Graph" [ [ java-printable? ] ] }
+        { "Print" [ [ java-printable? ] ] }
+        { "Blank" [ [ " \t" member? ] ] }
+        { "Cntrl" [ [ control-char? ] ] }
+        { "XDigit" [ [ hex-digit? ] ] }
+        { "Space" [ [ java-blank? ] ] }
+        ! TODO: unicode-character-class, fallthrough in unicode is bad-char-clss
+        [ bad-character-class ]
+    } case ;
+
+ERROR: bad-octal number ;
+
+: parse-octal ( -- n )
+    next get-char drop
+    3 take oct>
+    dup 255 > [ bad-octal ] when ;
+
+ERROR: bad-hex number ;
+
+: parse-short-hex ( -- n )
+    next 2 take hex>
+    dup number? [ bad-hex ] unless ;
+
+: parse-long-hex ( -- n )
+    next 6 take hex>
+    dup number? [ bad-hex ] unless ;
+
+: parse-control-character ( -- n )
+    next get-char ;
+
+: dot-construction ( regexp -- )
+    [ CHAR: \n = not ] push-single-nfa ;
+
+: front-anchor-construction ( regexp -- )
+    drop ;
+
+: back-anchor-construction  ( regexp -- )
+    drop ;
+
+: parse-brace ( -- from/f to/f comma? )
+    next
+    [ get-char CHAR: } = ] take-until
+    "," split1 [ [ string>number ] bi@ ] keep >boolean ;
+
+TUPLE: character-class members ;
+TUPLE: character-class-range from to ;
+TUPLE: negated-character-class < character-class ;
+TUPLE: negated-character-class-range < character-class-range ;
+TUPLE: intersection-class < character-class ;
+TUPLE: negated-intersection-class < intersection-class ;
+
+GENERIC: character-class-contains? ( obj character-class -- ? )
+
+: parse-escaped-until ( -- seq )
+    [ get-char CHAR: \ = get-next CHAR: E = and ] take-until
+    next ;
+
+: character-class-predicate ( seq -- quot )
+    boa '[ , character-class-contains? ] ;
+
+ERROR: unmatched-escape-sequence ;
+
+: (parse-escaped) ( regexp ? ch -- obj )
+    {
+        { CHAR: \ [ [ CHAR: \ = ] ] }
+        { CHAR: t [ [ CHAR: \t = ] ] }
+        { CHAR: n [ [ CHAR: \n = ] ] }
+        { CHAR: r [ [ CHAR: \r = ] ] }
+        { CHAR: f [ [ HEX: c = ] ] }
+        { CHAR: a [ [ HEX: 7 = ] ] }
+        { CHAR: e [ [ HEX: 1b = ] ] }
+
+        { CHAR: d [ [ digit? ] ] }
+        { CHAR: D [ [ digit? not ] ] }
+        { CHAR: s [ [ java-blank? ] ] }
+        { CHAR: S [ [ java-blank? not ] ] }
+        { CHAR: w [ [ c-identifier-char? ] ] }
+        { CHAR: W [ [ c-identifier-char? not ] ] }
+
+        { CHAR: p [ parse-posix-class ] }
+        { CHAR: P [ parse-posix-class [ not ] compose ] }
+        { CHAR: x [ parse-short-hex ] }
+        { CHAR: u [ parse-long-hex ] }
+        { CHAR: 0 [ parse-octal ] }
+        { CHAR: c [ parse-control-character ] }
+
+        ! { CHAR: Q [ next parse-escaped-until ] }
+        ! { CHAR: E [ unmatched-escape-sequence ] }
+
+        ! { CHAR: b [ ] } ! a word boundary
+        ! { CHAR: B [ ] } ! a non-word boundary
+        ! { CHAR: A [ ] } ! beginning of input
+        ! { CHAR: G [ ] } ! end of previous match
+        ! { CHAR: Z [ ] } ! end of input but for the final terminator, if any
+        ! { CHAR: z [ ] } ! end of the input
+        [ ]
+    } case ;
+
+: parse-escaped ( regexp -- )
+    next get-char (parse-escaped) push-single-nfa ;
+
+: handle-dash ( vector -- vector )
+    [ dup dash eq? [ drop CHAR: - ] when ] map ;
+
+M: object character-class-contains? ( obj1 obj2 -- ? )
+    = ;
+
+M: callable character-class-contains? ( obj1 callable -- ? )
+    call ;
+
+M: character-class character-class-contains? ( obj cc -- ? )
+    members>> [ character-class-contains? ] with find drop >boolean ;
+
+M: negated-character-class character-class-contains? ( obj cc -- ? )
+    call-next-method not ;
+
+M: character-class-range character-class-contains? ( obj cc -- ? )
+    [ from>> ] [ to>> ] bi between?  ;
+
+M: negated-character-class-range character-class-contains? ( obj cc -- ? )
+    call-next-method not ;
+
+M: intersection-class character-class-contains? ( obj cc -- ? )
+    members>> [ character-class-contains? not ] with find drop not ;
+
+M: negated-intersection-class character-class-contains? ( obj cc -- ? )
+    call-next-method not ;
+
+ERROR: unmatched-negated-character-class class ;
+
+: handle-caret ( obj -- seq class )
+    dup [ length 2 >= ] [ first caret eq? ] bi and [ 
+        rest negated-character-class
+    ] [
+        character-class
+    ] if ;
+
+: make-character-class ( regexp -- )
+    left-bracket over stack>> cut-stack
+    pick (>>stack)
+    handle-dash handle-caret
+    character-class-predicate push-single-nfa ;
+
+: apply-dash ( regexp -- )
+    stack>> dup [ pop ] [ pop* ] [ pop ] tri
+    swap character-class-range boa swap push ;
+
+: apply-dash? ( regexp -- ? )
+    stack>> dup length 3 >=
+    [ [ length 2 - ] keep nth dash eq? ] [ drop f ] if ;
+
+DEFER: parse-character-class
+: (parse-character-class) ( regexp -- )
+    [
+        next get-char
+        {
+            { CHAR: [ [
+                [ 1+ ] change-bracket-count dup left-bracket push-stack
+                parse-character-class
+            ] }
+            { CHAR: ] [
+                [ 1- ] change-bracket-count
+                make-character-class
+            ] }
+            { CHAR: - [ dash push-stack ] }
+            ! { CHAR: & [ ampersand push-stack ] }
+            ! { CHAR: : [ semicolon push-stack ] }
+            { CHAR: \ [ next get-char (parse-escaped) push-stack ] }
+            { f [ unbalanced-brackets ] }
+            [ dupd push-stack dup apply-dash? [ apply-dash ] [ drop ] if ]
+        } case
+    ] [
+        dup bracket-count>> 0 >
+        [ (parse-character-class) ] [ drop ] if
+    ] bi ;
+
+: parse-character-class-second ( regexp -- )
+    get-next
+    {
+        { CHAR: [ [ CHAR: [ push-stack next ] }
+        { CHAR: ] [ CHAR: ] push-stack next ] }
+        { CHAR: - [ CHAR: - push-stack next ] }
+        [ 2drop ]
+    } case ;
+
+: parse-character-class-first ( regexp -- )
+    get-next
+    {
+        { CHAR: ^ [ caret dupd push-stack next parse-character-class-second ] }
+        { CHAR: [ [ CHAR: [ push-stack next ] }
+        { CHAR: ] [ CHAR: ] push-stack next ] }
+        { CHAR: - [ CHAR: - push-stack next ] }
+        [ 2drop ]
+    } case ;
+
+: parse-character-class ( regexp -- )
+    [ parse-character-class-first ] [ (parse-character-class) ] bi ;
+
+ERROR: unsupported-token token ;
+: parse-token ( regexp token -- )
+    dup {
+        { CHAR: ^ [ drop front-anchor-construction ] }
+        { CHAR: $ [ drop back-anchor-construction ] }
+        { CHAR: \ [ drop parse-escaped ] }
+        { CHAR: | [ drop dup concatenation-loop alternation push-stack ] }
+        { CHAR: ( [ drop [ 1+ ] change-parentheses-count left-parenthesis push-stack ] }
+        { CHAR: ) [ drop [ 1- ] change-parentheses-count left-parenthesis apply-til-last ] }
+        { CHAR: * [ drop apply-kleene-closure ] }
+        { CHAR: + [ drop apply-plus-closure ] }
+        { CHAR: ? [ drop apply-question-closure ] }
+        { CHAR: { [ drop parse-brace apply-brace-closure ] }
+        { CHAR: [ [
+            drop
+            dup left-bracket push-stack
+            [ 1+ ] change-bracket-count parse-character-class
+        ] }
+        ! { CHAR: } [ drop drop "brace" ] }
+        { CHAR: . [ drop dot-construction ] }
+        { beginning-of-text [ push-stack ] }
+        { end-of-text [
+            drop {
+                [ check-unbalanced-parentheses ]
+                [ concatenation-loop ]
+                [ beginning-of-text apply-til-last ]
+                [ set-start-state ]
+            } cleave
+        ] }
+        [ drop push-single-nfa ]
+    } case ;
+
+: (parse-raw-regexp) ( regexp -- )
+    get-char [ dupd parse-token next (parse-raw-regexp) ] [ drop ] if* ;
+
+: parse-raw-regexp ( regexp -- )
+    [ beginning-of-text parse-token ]
+    [
+        dup raw>> dup empty? [
+            2drop
+        ] [
+            [ (parse-raw-regexp) ] string-parse
+        ] if
+    ]
+    [ end-of-text parse-token ] tri ;
+
+:: find-delta ( states obj table -- keys )
+    obj states [
+        table get-row at
+        [ dup integer? [ 1array ] when unique ] [ H{ } ] if*
+    ] with map H{ } clone [ assoc-union ] reduce keys ;
+
+:: (find-closure) ( states obj assoc table -- keys )
+    [let | size [ assoc assoc-size ] |
+        assoc states unique assoc-union
+        dup assoc-size size > [
+            obj states [
+                table get-row at* [
+                    dup integer? [ 1array ] when
+                    obj rot table (find-closure)
+                ] [
+                    drop
+                ] if
+            ] with each
+        ] when ] ;
+
+: find-closure ( states obj table -- states )
+    >r H{ } r> (find-closure) keys ;
+
+: find-epsilon-closure ( states table -- states )
+    >r eps H{ } r> (find-closure) keys ;
+
+: filter-special-transition ( vec -- vec' )
+    [ drop eps = not ] assoc-filter ;
+
+: initialize-subset-construction ( regexp -- )
+    <vector-table> >>dfa
+    [
+        nfa>> [ start-state>> 1array ] keep
+        find-epsilon-closure 1dlist
+    ] [
+        swap >>new-states drop
+    ] [
+        [ dfa>> ] [ nfa>> ] bi
+        columns>> filter-special-transition >>columns drop
+    ] tri ;
+
+:: (subset-construction) ( regexp -- )
+    [let* | nfa [ regexp nfa>> ]
+           dfa [ regexp dfa>> ]
+           new-states [ regexp new-states>> ]
+           columns [ dfa columns>> keys ] |
+        
+        new-states dlist-empty? [
+            new-states pop-front
+            dup dfa add-row
+            columns [
+                2dup nfa [ find-delta ] [ find-epsilon-closure ] bi
+                dup [ dfa rows>> key? ] [ empty? ] bi or [
+                    dup new-states push-back
+                ] unless
+                dup empty? [ 3drop ] [ <entry> dfa set-entry ] if
+            ] with each
+            regexp (subset-construction)
+        ] unless ] ;
+
+: set-start/final-states ( regexp -- )
+    dup [ nfa>> start-state>> ]
+    [ dfa>> rows>> keys [ member? ] with filter first ] bi
+    >r dup dfa>> r> >>start-state drop
+
+    dup [ nfa>> final-states>> ] [ dfa>> rows>> ] bi
+    [ keys ] bi@
+    [ intersect empty? not ] with filter
+    >r dfa>> r> >>final-states drop ;
+
+: subset-construction ( regexp -- )
+    [ initialize-subset-construction ]
+    [ (subset-construction) ]
+    [ set-start/final-states ] tri ;
+
+: <regexp> ( raw -- obj )
+    regexp new
+        swap >>raw
+        0 >>parentheses-count
+        0 >>bracket-count
+        -1 >>state
+        V{ } clone >>stack 
+        <vector-table> >>nfa
+        dup [ parse-raw-regexp ] [ subset-construction ] bi ;
+
+! Literal syntax for regexps
+: parse-options ( string -- ? )
+    #! Lame
+    {
+        { "" [ f ] }
+        { "i" [ t ] }
+    } case ;
+
+: parse-regexp ( accum end -- accum )
+    lexer get dup skip-blank
+    [ [ index-from dup 1+ swap ] 2keep swapd subseq swap ] change-lexer-column
+    ! lexer get dup still-parsing-line?
+    ! [ (parse-token) parse-options ] [ drop f ] if
+    <regexp> parsed ;
+
+: R! CHAR: ! parse-regexp ; parsing
+: R" CHAR: " parse-regexp ; parsing
+: R# CHAR: # parse-regexp ; parsing
+: R' CHAR: ' parse-regexp ; parsing
+: R( CHAR: ) parse-regexp ; parsing
+: R/ CHAR: / parse-regexp ; parsing
+: R@ CHAR: @ parse-regexp ; parsing
+: R[ CHAR: ] parse-regexp ; parsing
+: R` CHAR: ` parse-regexp ; parsing
+: R{ CHAR: } parse-regexp ; parsing
+: R| CHAR: | parse-regexp ; parsing
+
+TUPLE: dfa-traverser
+    dfa
+    last-state current-state
+    text
+    start-index current-index
+    matches ;
+
+: <dfa-traverser> ( text dfa -- match )
+    dfa>>
+    dfa-traverser new
+        swap [ start-state>> >>current-state ] keep
+        >>dfa
+        swap >>text
+        0 >>start-index
+        0 >>current-index
+        V{ } clone >>matches ;
+
+: final-state? ( dfa-traverser -- ? )
+    [ current-state>> ] [ dfa>> final-states>> ] bi
+    member? ;
+
+: text-finished? ( dfa-traverser -- ? )
+    [ current-index>> ] [ text>> length ] bi >= ;
+
+: save-final-state ( dfa-straverser -- )
+    [ current-index>> ] [ matches>> ] bi push ;
+
+: match-done? ( dfa-traverser -- ? )
+    dup final-state? [
+        dup save-final-state
+    ] when text-finished? ;
+
+: increment-state ( dfa-traverser state -- dfa-traverser )
+    >r [ 1+ ] change-current-index
+    dup current-state>> >>last-state r>
+    >>current-state ;
+
+: match-transition ( obj hash -- state/f )
+    2dup keys [ callable? ] filter predicates
+    [ swap at nip ] [ at ] if* ;
+
+: do-match ( dfa-traverser -- dfa-traverser )
+    dup match-done? [
+        dup {
+            [ current-index>> ]
+            [ text>> ]
+            [ current-state>> ]
+            [ dfa>> rows>> ]
+        } cleave
+        at >r nth r> match-transition [
+            increment-state do-match
+        ] when*
+    ] unless ;
+
+: return-match ( dfa-traverser -- interval/f )
+    dup matches>> empty? [
+        drop f
+    ] [
+        [ start-index>> ] [ matches>> peek ] bi 1 <range>
+    ] if ;
+
+: match ( string regexp -- pair )
+    <dfa-traverser> do-match return-match ;
+
+: matches? ( string regexp -- ? )
+    dupd match [ [ length ] [ range-length 1- ] bi* = ] [ drop f ] if* ;
+
+: match-head ( string regexp -- end )
+    match length>> 1- ;
+
+! character classes
+! TUPLE: range-class from to ;
+! TUPLE: or-class left right ;
+
+! (?:a|b)*  <- does not capture
+! (a|b)*\1  <- group captured
+! doesn't advance the current position:
+! (?=abba)  positive lookahead  matches abbaaa but not abaaa
+! (?!abba)  negative lookahead  matches ababa but not abbaa
+! look behind.   "lookaround"
+
+! : $ ( n -- obj ) groups get nth ;
+! [
+    ! groups bound to scope here
+! ] [
+    ! error or something
+! ] if-match
+! match in a string with  .*foo.*

From 2225d1b990fb37101df619236a88d4437002e9cd Mon Sep 17 00:00:00 2001
From: Doug Coleman <doug.coleman@gmail.com>
Date: Fri, 23 May 2008 16:34:12 -0500
Subject: [PATCH 6/7] uncomment bugs to let factor build (buggy) binaries!

---
 core/inference/inference-tests.factor | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/core/inference/inference-tests.factor b/core/inference/inference-tests.factor
index dc037d4a59..0d3eb03cf4 100755
--- a/core/inference/inference-tests.factor
+++ b/core/inference/inference-tests.factor
@@ -549,10 +549,10 @@ ERROR: custom-error ;
 { 1 0 } [ [ ] map-children ] must-infer-as
 
 ! Corner case
-[ [ [ f dup ] [ dup ] [ ] unfold ] infer ] must-fail
+! [ [ [ f dup ] [ dup ] [ ] unfold ] infer ] must-fail
 
-[ [ [ f dup ] [ ] [ ] while ] infer ] must-fail
+! [ [ [ f dup ] [ ] [ ] while ] infer ] must-fail
 
-: erg's-inference-bug ( -- ) f dup [ erg's-inference-bug ] when ; inline
+! : erg's-inference-bug ( -- ) f dup [ erg's-inference-bug ] when ; inline
 
-[ [ erg's-inference-bug ] infer ] must-fail
+! [ [ erg's-inference-bug ] infer ] must-fail

From 79da16bc5ac09cc50efe386ef41649211f870fdd Mon Sep 17 00:00:00 2001
From: Doug Coleman <doug.coleman@gmail.com>
Date: Fri, 23 May 2008 16:47:35 -0500
Subject: [PATCH 7/7] 2dip documented, but should we use it in our code?  the
 official stance is "no comment"

---
 core/kernel/kernel-docs.factor | 12 +++++++++++-
 1 file changed, 11 insertions(+), 1 deletion(-)

diff --git a/core/kernel/kernel-docs.factor b/core/kernel/kernel-docs.factor
index 60151d9f55..96c582a3e5 100755
--- a/core/kernel/kernel-docs.factor
+++ b/core/kernel/kernel-docs.factor
@@ -97,6 +97,7 @@ $nl
 "Certain shuffle words can also be expressed in terms of the spread combinators. Internalizing such identities can help with understanding and writing code using spread combinators:"
 { $code
     ": dip   [ ] bi* ;"
+    ": 2dip  [ ] [ ] tri* ;"
     ""
     ": slip  [ call ] [ ] bi* ;"
     ": 2slip [ call ] [ ] [ ] tri* ;"
@@ -164,8 +165,9 @@ ARTICLE: "slip-keep-combinators" "The slip and keep combinators"
 { $subsection slip }
 { $subsection 2slip }
 { $subsection 3slip }
-"The dip combinator invokes the quotation at the top of the stack, hiding the value underneath:"
+"The dip combinators invoke the quotation at the top of the stack, hiding the values underneath:"
 { $subsection dip }
+{ $subsection 2dip }
 "The keep combinators invoke a quotation which takes a number of values off the stack, and then they restore those values:"
 { $subsection keep }
 { $subsection 2keep }
@@ -876,6 +878,14 @@ HELP: dip
     { $code "[ foo bar ] dip" }
 } ;
 
+HELP: 2dip
+{ $values { "obj1" object } { "obj2" object } { "quot" quotation } }
+{ $description "Calls " { $snippet "quot" } " with " { $snippet "obj1" } " and " { $snippet "obj2" } " hidden on the retain stack." }
+{ $notes "The following are equivalent:"
+    { $code ">r >r foo bar r> r>" }
+    { $code "[ foo bar ] 2dip" }
+} ;
+
 HELP: while
 { $values { "pred" "a quotation with stack effect " { $snippet "( -- ? )" } } { "body" "a quotation" } { "tail" "a quotation" } }
 { $description "Repeatedly calls " { $snippet "pred" } ". If it yields " { $link f } ", iteration stops, otherwise " { $snippet "body" } " is called. After iteration stops, " { $snippet "tail" } " is called." }