From 365334fc61d92351ff7de89bc669cc4359b31391 Mon Sep 17 00:00:00 2001
From: Doug Coleman <doug.coleman@gmail.com>
Date: Tue, 26 Aug 2008 20:24:14 -0500
Subject: [PATCH 1/2] move regexp2 to unfinished for binaries

---
 unfinished/regexp2/authors.txt                |   1 +
 unfinished/regexp2/backend/backend.factor     |  25 ++
 unfinished/regexp2/classes/classes.factor     |  55 +++
 unfinished/regexp2/dfa/dfa.factor             |  70 ++++
 unfinished/regexp2/nfa/nfa.factor             | 126 ++++++
 unfinished/regexp2/parser/parser-tests.factor |  33 ++
 unfinished/regexp2/parser/parser.factor       | 391 ++++++++++++++++++
 unfinished/regexp2/regexp2-docs.factor        |  14 +
 unfinished/regexp2/regexp2-tests.factor       | 263 ++++++++++++
 unfinished/regexp2/regexp2.factor             |  59 +++
 unfinished/regexp2/summary.txt                |   1 +
 unfinished/regexp2/tags.txt                   |   2 +
 .../transition-tables.factor                  |  44 ++
 unfinished/regexp2/traversal/traversal.factor |  80 ++++
 unfinished/regexp2/utils/utils.factor         |  69 ++++
 15 files changed, 1233 insertions(+)
 create mode 100644 unfinished/regexp2/authors.txt
 create mode 100644 unfinished/regexp2/backend/backend.factor
 create mode 100644 unfinished/regexp2/classes/classes.factor
 create mode 100644 unfinished/regexp2/dfa/dfa.factor
 create mode 100644 unfinished/regexp2/nfa/nfa.factor
 create mode 100644 unfinished/regexp2/parser/parser-tests.factor
 create mode 100644 unfinished/regexp2/parser/parser.factor
 create mode 100644 unfinished/regexp2/regexp2-docs.factor
 create mode 100644 unfinished/regexp2/regexp2-tests.factor
 create mode 100644 unfinished/regexp2/regexp2.factor
 create mode 100644 unfinished/regexp2/summary.txt
 create mode 100644 unfinished/regexp2/tags.txt
 create mode 100644 unfinished/regexp2/transition-tables/transition-tables.factor
 create mode 100644 unfinished/regexp2/traversal/traversal.factor
 create mode 100644 unfinished/regexp2/utils/utils.factor

diff --git a/unfinished/regexp2/authors.txt b/unfinished/regexp2/authors.txt
new file mode 100644
index 0000000000..7c1b2f2279
--- /dev/null
+++ b/unfinished/regexp2/authors.txt
@@ -0,0 +1 @@
+Doug Coleman
diff --git a/unfinished/regexp2/backend/backend.factor b/unfinished/regexp2/backend/backend.factor
new file mode 100644
index 0000000000..c39d67e7b8
--- /dev/null
+++ b/unfinished/regexp2/backend/backend.factor
@@ -0,0 +1,25 @@
+! Copyright (C) 2008 Doug Coleman.
+! See http://factorcode.org/license.txt for BSD license.
+USING: accessors hashtables kernel math state-tables vars vectors ;
+IN: regexp2.backend
+
+TUPLE: regexp
+    raw
+    { stack vector }
+    parse-tree
+    { options hashtable }
+    nfa-table
+    dfa-table
+    minimized-table
+    { state integer }
+    { new-states vector }
+    { visited-states hashtable } ;
+
+: reset-regexp ( regexp -- regexp )
+    0 >>state
+    V{ } clone >>stack
+    V{ } clone >>new-states
+    H{ } clone >>options
+    H{ } clone >>visited-states ;
+
+SYMBOL: current-regexp
diff --git a/unfinished/regexp2/classes/classes.factor b/unfinished/regexp2/classes/classes.factor
new file mode 100644
index 0000000000..7737e02d40
--- /dev/null
+++ b/unfinished/regexp2/classes/classes.factor
@@ -0,0 +1,55 @@
+! Copyright (C) 2008 Doug Coleman.
+! See http://factorcode.org/license.txt for BSD license.
+USING: accessors kernel math math.order symbols regexp2.parser
+words regexp2.utils unicode.categories combinators.short-circuit ;
+IN: regexp2.classes
+
+GENERIC: class-member? ( obj class -- ? )
+
+M: word class-member? ( obj class -- ? ) 2drop f ;
+M: integer class-member? ( obj class -- ? ) 2drop f ;
+
+M: character-class-range class-member? ( obj class -- ? )
+    [ from>> ] [ to>> ] bi between? ;
+
+M: any-char class-member? ( obj class -- ? )
+    2drop t ;
+    
+M: letter-class class-member? ( obj class -- ? )
+    drop letter? ;
+            
+M: LETTER-class class-member? ( obj class -- ? )
+    drop LETTER? ;
+
+M: Letter-class class-member? ( obj class -- ? )
+    drop Letter? ;
+
+M: ascii-class class-member? ( obj class -- ? )
+    drop ascii? ;
+
+M: digit-class class-member? ( obj class -- ? )
+    drop digit? ;
+
+M: alpha-class class-member? ( obj class -- ? )
+    drop alpha? ;
+
+M: punctuation-class class-member? ( obj class -- ? )
+    drop punct? ;
+
+M: java-printable-class class-member? ( obj class -- ? )
+    drop java-printable? ;
+
+M: non-newline-blank-class class-member? ( obj class -- ? )
+    drop { [ blank? ] [ CHAR: \n = not ] } 1&& ;
+
+M: control-character-class class-member? ( obj class -- ? )
+    drop control-char? ;
+
+M: hex-digit-class class-member? ( obj class -- ? )
+    drop hex-digit? ;
+
+M: java-blank-class class-member? ( obj class -- ? )
+    drop java-blank? ;
+
+M: unmatchable-class class-member? ( obj class -- ? )
+    2drop f ;
diff --git a/unfinished/regexp2/dfa/dfa.factor b/unfinished/regexp2/dfa/dfa.factor
new file mode 100644
index 0000000000..0dcf6c4ab5
--- /dev/null
+++ b/unfinished/regexp2/dfa/dfa.factor
@@ -0,0 +1,70 @@
+! Copyright (C) 2008 Doug Coleman.
+! See http://factorcode.org/license.txt for BSD license.
+USING: accessors arrays assocs combinators fry kernel locals
+math math.order regexp2.nfa regexp2.transition-tables sequences
+sets sorting vectors regexp2.utils sequences.lib ;
+USING: io prettyprint threads ;
+IN: regexp2.dfa
+
+: find-delta ( states transition regexp -- new-states )
+    nfa-table>> transitions>>
+    rot [ swap at at ] with with map sift concat prune ;
+
+: (find-epsilon-closure) ( states regexp -- new-states )
+    eps swap find-delta ;
+
+: find-epsilon-closure ( states regexp -- new-states )
+    '[ dup , (find-epsilon-closure) union ] [ length ] while-changes
+    natural-sort ;
+
+: find-closure ( states transition regexp -- new-states )
+    [ find-delta ] 2keep nip find-epsilon-closure ;
+
+: find-start-state ( regexp -- state )
+    [ nfa-table>> start-state>> 1vector ] keep find-epsilon-closure ;
+
+: find-transitions ( seq1 regexp -- seq2 )
+    nfa-table>> transitions>>
+    [ at keys ] curry map concat eps swap remove ;
+
+: add-todo-state ( state regexp -- )
+    2dup visited-states>> key? [
+        2drop
+    ] [
+        [ visited-states>> conjoin ]
+        [ new-states>> push ] 2bi
+    ] if ;
+
+: new-transitions ( regexp -- )
+    dup new-states>> [
+        drop
+    ] [
+        dupd pop dup pick find-transitions rot
+        [
+            [ [ find-closure ] 2keep nip dupd add-todo-state ] 3keep
+            >r swapd transition boa r> dfa-table>> add-transition 
+        ] curry with each
+        new-transitions
+    ] if-empty ;
+
+: states ( hashtable -- array )
+    [ keys ]
+    [ values [ values concat ] map concat append ] bi ;
+
+: set-final-states ( regexp -- )
+    dup
+    [ nfa-table>> final-states>> keys ]
+    [ dfa-table>> transitions>> states ] bi
+    [ intersect empty? not ] with filter
+
+    swap dfa-table>> final-states>>
+    [ conjoin ] curry each ;
+
+: set-initial-state ( regexp -- )
+    dup
+    [ dfa-table>> ] [ find-start-state ] bi
+    [ >>start-state drop ] keep
+    1vector >>new-states drop ;
+
+: construct-dfa ( regexp -- )
+    [ set-initial-state ] [ new-transitions ] [ set-final-states ] tri ;
diff --git a/unfinished/regexp2/nfa/nfa.factor b/unfinished/regexp2/nfa/nfa.factor
new file mode 100644
index 0000000000..f87a2a7b52
--- /dev/null
+++ b/unfinished/regexp2/nfa/nfa.factor
@@ -0,0 +1,126 @@
+! Copyright (C) 2008 Doug Coleman.
+! See http://factorcode.org/license.txt for BSD license.
+USING: accessors arrays assocs grouping kernel regexp2.backend
+locals math namespaces regexp2.parser sequences state-tables fry
+quotations math.order math.ranges vectors unicode.categories
+regexp2.utils regexp2.transition-tables words sequences.lib ;
+IN: regexp2.nfa
+
+SYMBOL: negation-mode
+: negated? ( -- ? ) negation-mode get 0 or odd? ; 
+
+SINGLETON: eps
+
+: next-state ( regexp -- state )
+    [ state>> ] [ [ 1+ ] change-state drop ] bi ;
+
+: set-start-state ( regexp -- )
+    dup stack>> [
+        drop
+    ] [
+        [ nfa-table>> ] [ pop first ] bi* >>start-state drop
+    ] if-empty ;
+
+GENERIC: nfa-node ( node -- )
+
+:: add-simple-entry ( obj class -- )
+    [let* | regexp [ current-regexp get ]
+            s0 [ regexp next-state ]
+            s1 [ regexp next-state ]
+            stack [ regexp stack>> ]
+            table [ regexp nfa-table>> ] |
+        negated? [
+            s0 f obj class boa table add-transition
+            s0 s1 <default-transition> table add-transition
+        ] [
+            s0 s1 obj class boa table add-transition
+        ] if
+        s0 s1 2array stack push
+        t s1 table final-states>> set-at ] ;
+
+:: concatenate-nodes ( -- )
+    [let* | regexp [ current-regexp get ]
+            stack [ regexp stack>> ]
+            table [ regexp nfa-table>> ]
+            s2 [ stack peek first ]
+            s3 [ stack pop second ]
+            s0 [ stack peek first ]
+            s1 [ stack pop second ] |
+        s1 s2 eps <literal-transition> table add-transition
+        s1 table final-states>> delete-at
+        s0 s3 2array stack push ] ;
+
+:: alternate-nodes ( -- )
+    [let* | regexp [ current-regexp get ]
+            stack [ regexp stack>> ]
+            table [ regexp nfa-table>> ]
+            s2 [ stack peek first ]
+            s3 [ stack pop second ]
+            s0 [ stack peek first ]
+            s1 [ stack pop second ]
+            s4 [ regexp next-state ]
+            s5 [ regexp next-state ] |
+        s4 s0 eps <literal-transition> table add-transition
+        s4 s2 eps <literal-transition> table add-transition
+        s1 s5 eps <literal-transition> table add-transition
+        s3 s5 eps <literal-transition> table add-transition
+        s1 table final-states>> delete-at
+        s3 table final-states>> delete-at
+        t s5 table final-states>> set-at
+        s4 s5 2array stack push ] ;
+
+M: kleene-star nfa-node ( node -- )
+    term>> nfa-node
+    [let* | regexp [ current-regexp get ]
+            stack [ regexp stack>> ]
+            s0 [ stack peek first ]
+            s1 [ stack pop second ]
+            s2 [ regexp next-state ]
+            s3 [ regexp next-state ]
+            table [ regexp nfa-table>> ] |
+        s1 table final-states>> delete-at
+        t s3 table final-states>> set-at
+        s1 s0 eps <literal-transition> table add-transition
+        s2 s0 eps <literal-transition> table add-transition
+        s2 s3 eps <literal-transition> table add-transition
+        s1 s3 eps <literal-transition> table add-transition
+        s2 s3 2array stack push ] ;
+
+M: concatenation nfa-node ( node -- )
+    seq>>
+    [ [ nfa-node ] each ]
+    [ length 1- [ concatenate-nodes ] times ] bi ;
+
+M: alternation nfa-node ( node -- )
+    seq>>
+    [ [ nfa-node ] each ]
+    [ length 1- [ alternate-nodes ] times ] bi ;
+
+M: constant nfa-node ( node -- )
+    char>> literal-transition add-simple-entry ;
+
+M: epsilon nfa-node ( node -- )
+    drop eps literal-transition add-simple-entry ;
+
+M: word nfa-node ( node -- )
+    class-transition add-simple-entry ;
+
+M: character-class-range nfa-node ( node -- )
+    class-transition add-simple-entry ;
+
+M: capture-group nfa-node ( node -- )
+    term>> nfa-node ;
+
+M: negation nfa-node ( node -- )
+    negation-mode inc
+    term>> nfa-node 
+    negation-mode dec ;
+
+: construct-nfa ( regexp -- )
+    [
+        reset-regexp
+        negation-mode off
+        [ current-regexp set ]
+        [ parse-tree>> nfa-node ]
+        [ set-start-state ] tri
+    ] with-scope ;
diff --git a/unfinished/regexp2/parser/parser-tests.factor b/unfinished/regexp2/parser/parser-tests.factor
new file mode 100644
index 0000000000..9dc7dc7909
--- /dev/null
+++ b/unfinished/regexp2/parser/parser-tests.factor
@@ -0,0 +1,33 @@
+USING: kernel tools.test regexp2.backend regexp2 ;
+IN: regexp2.parser
+
+: test-regexp ( string -- )
+    default-regexp parse-regexp ;
+
+: test-regexp2 ( string -- regexp )
+    default-regexp dup parse-regexp ;
+
+[ "(" ] [ unmatched-parentheses? ] must-fail-with
+
+[ ] [ "a|b" test-regexp ] unit-test
+[ ] [ "a.b" test-regexp ] unit-test
+[ ] [ "a|b|c" test-regexp ] unit-test
+[ ] [ "abc|b" test-regexp ] unit-test
+[ ] [ "a|bcd" test-regexp ] unit-test
+[ ] [ "a|(b)" test-regexp ] unit-test
+[ ] [ "(a)|b" test-regexp ] unit-test
+[ ] [ "(a|b)" test-regexp ] unit-test
+[ ] [ "((a)|(b))" test-regexp ] unit-test
+
+[ ] [ "(?:a)" test-regexp ] unit-test
+[ ] [ "(?i:a)" test-regexp ] unit-test
+[ ] [ "(?-i:a)" test-regexp ] unit-test
+[ "(?z:a)" test-regexp ] [ bad-option? ] must-fail-with
+[ "(?-z:a)" test-regexp ] [ bad-option? ] must-fail-with
+
+[ ] [ "(?=a)" test-regexp ] unit-test
+
+[ ] [ "[abc]" test-regexp ] unit-test
+[ ] [ "[a-c]" test-regexp ] unit-test
+[ ] [ "[^a-c]" test-regexp ] unit-test
+[ "[^]" test-regexp ] must-fail
diff --git a/unfinished/regexp2/parser/parser.factor b/unfinished/regexp2/parser/parser.factor
new file mode 100644
index 0000000000..6eda3310d0
--- /dev/null
+++ b/unfinished/regexp2/parser/parser.factor
@@ -0,0 +1,391 @@
+! Copyright (C) 2008 Doug Coleman.
+! See http://factorcode.org/license.txt for BSD license.
+USING: accessors arrays assocs combinators io io.streams.string
+kernel math math.parser multi-methods namespaces qualified sets
+quotations sequences sequences.lib splitting symbols vectors
+dlists math.order combinators.lib unicode.categories strings
+sequences.lib regexp2.backend regexp2.utils unicode.case ;
+IN: regexp2.parser
+
+FROM: math.ranges => [a,b] ;
+
+MIXIN: node
+TUPLE: concatenation seq ; INSTANCE: concatenation node
+TUPLE: alternation seq ; INSTANCE: alternation node
+TUPLE: kleene-star term ; INSTANCE: kleene-star node
+TUPLE: question term ; INSTANCE: question node
+TUPLE: negation term ; INSTANCE: negation node
+TUPLE: constant char ; INSTANCE: constant node
+TUPLE: range from to ; INSTANCE: range node
+TUPLE: lookahead term ; INSTANCE: lookahead node
+TUPLE: lookbehind term ; INSTANCE: lookbehind node
+TUPLE: capture-group term ; INSTANCE: capture-group node
+TUPLE: non-capture-group term ; INSTANCE: non-capture-group node
+TUPLE: independent-group term ; INSTANCE: independent-group node
+TUPLE: character-class-range from to ; INSTANCE: character-class-range node
+SINGLETON: epsilon INSTANCE: epsilon node
+SINGLETON: any-char INSTANCE: any-char node
+SINGLETON: front-anchor INSTANCE: front-anchor node
+SINGLETON: back-anchor INSTANCE: back-anchor node
+
+TUPLE: option-on option ; INSTANCE: option-on node
+TUPLE: option-off option ; INSTANCE: option-off node
+SINGLETONS: unix-lines dotall multiline comments case-insensitive unicode-case reversed-regexp ;
+
+SINGLETONS: letter-class LETTER-class Letter-class digit-class
+alpha-class non-newline-blank-class
+ascii-class punctuation-class java-printable-class blank-class
+control-character-class hex-digit-class java-blank-class c-identifier-class
+unmatchable-class ;
+
+SINGLETONS: beginning-of-group end-of-group
+beginning-of-character-class end-of-character-class
+left-parenthesis pipe caret dash ;
+
+: get-option ( option -- ? ) current-regexp get options>> at ;
+: get-unix-lines ( -- ? ) unix-lines get-option ;
+: get-dotall ( -- ? ) dotall get-option ;
+: get-multiline ( -- ? ) multiline get-option ;
+: get-comments ( -- ? ) comments get-option ;
+: get-case-insensitive ( -- ? ) case-insensitive get-option ;
+: get-unicode-case ( -- ? ) unicode-case get-option ;
+: get-reversed-regexp ( -- ? ) reversed-regexp get-option ;
+
+: <negation> ( obj -- negation ) negation boa ;
+: <concatenation> ( seq -- concatenation )
+    >vector get-reversed-regexp [ reverse ] when
+    concatenation boa ;
+: <alternation> ( seq -- alternation ) >vector alternation boa ;
+: <capture-group> ( obj -- capture-group ) capture-group boa ;
+: <kleene-star> ( obj -- kleene-star ) kleene-star boa ;
+: <constant> ( obj -- constant )
+    dup Letter? get-case-insensitive and [
+        [ ch>lower constant boa ]
+        [ ch>upper constant boa ] bi 2array <alternation>
+    ] [
+        constant boa
+    ] if ;
+
+: first|concatenation ( seq -- first/concatenation )
+    dup length 1 = [ first ] [ <concatenation> ] if ;
+
+: first|alternation ( seq -- first/alternation )
+    dup length 1 = [ first ] [ <alternation> ] if ;
+
+: <character-class-range> ( from to -- obj )
+    2dup [ Letter? ] bi@ or get-case-insensitive and [
+        [ [ ch>lower ] bi@ character-class-range boa ]
+        [ [ ch>upper ] bi@ character-class-range boa ] 2bi
+        2array [ [ from>> ] [ to>> ] bi < ] filter
+        [ unmatchable-class ] [ first|alternation ] if-empty
+    ] [
+        2dup <
+        [ character-class-range boa ] [ 2drop unmatchable-class ] if
+    ] if ;
+
+ERROR: unmatched-parentheses ;
+
+: make-positive-lookahead ( string -- )
+    lookahead boa push-stack ;
+
+: make-negative-lookahead ( string -- )
+    <negation> lookahead boa push-stack ;
+
+: make-independent-group ( string -- )
+    #! no backtracking
+    independent-group boa push-stack ;
+
+: make-positive-lookbehind ( string -- )
+    lookbehind boa push-stack ;
+
+: make-negative-lookbehind ( string -- )
+    <negation> lookbehind boa push-stack ;
+
+DEFER: nested-parse-regexp
+: make-non-capturing-group ( string -- )
+    non-capture-group boa push-stack ;
+
+ERROR: bad-option ch ;
+
+: option ( ch -- singleton )
+    {
+        { CHAR: i [ case-insensitive ] }
+        { CHAR: d [ unix-lines ] }
+        { CHAR: m [ multiline ] }
+        { CHAR: r [ reversed-regexp ] }
+        { CHAR: s [ dotall ] }
+        { CHAR: u [ unicode-case ] }
+        { CHAR: x [ comments ] }
+        [ bad-option ]
+    } case ;
+
+: option-on ( option -- ) current-regexp get options>> conjoin ;
+: option-off ( option -- ) current-regexp get options>> delete-at ;
+
+: toggle-option ( ch ? -- ) [ option ] dip [ option-on ] [ option-off ] if ;
+: (parse-options) ( string ? -- ) [ toggle-option ] curry each ;
+
+: parse-options ( string -- )
+    "-" split1 [ t (parse-options) ] [ f (parse-options) ] bi* ;
+
+DEFER: (parse-regexp)
+: parse-special-group ( -- )
+    beginning-of-group push-stack
+    (parse-regexp) pop-stack make-non-capturing-group ;
+
+ERROR: bad-special-group string ;
+
+: (parse-special-group) ( -- )
+    read1 {
+        { [ dup CHAR: : = ]
+            [ drop nested-parse-regexp pop-stack make-non-capturing-group ] }
+        { [ dup CHAR: = = ]
+            [ drop nested-parse-regexp pop-stack make-positive-lookahead ] }
+        { [ dup CHAR: = = ]
+            [ drop nested-parse-regexp pop-stack make-negative-lookahead ] }
+        { [ dup CHAR: > = ]
+            [ drop nested-parse-regexp pop-stack make-independent-group ] }
+        { [ dup CHAR: < = peek1 CHAR: = = and ]
+            [ drop read1 drop nested-parse-regexp pop-stack make-positive-lookbehind ] }
+        { [ dup CHAR: < = peek1 CHAR: ! = and ]
+            [ drop read1 drop nested-parse-regexp pop-stack make-negative-lookbehind ] }
+        [
+            ":)" read-until
+            [ swap prefix ] dip
+            {
+                { CHAR: : [ parse-options parse-special-group ] }
+                { CHAR: ) [ parse-options ] }
+                [ drop bad-special-group ]
+            } case
+        ]
+    } cond ;
+
+: handle-left-parenthesis ( -- )
+    peek1 CHAR: ? =
+    [ read1 drop (parse-special-group) ]
+    [ nested-parse-regexp ] if ;
+
+: handle-dot ( -- ) any-char push-stack ;
+: handle-pipe ( -- ) pipe push-stack ;
+: handle-star ( -- ) stack pop <kleene-star> push-stack ;
+: handle-question ( -- )
+    stack pop epsilon 2array <alternation> push-stack ;
+: handle-plus ( -- )
+    stack pop dup <kleene-star> 2array <concatenation> push-stack ;
+
+ERROR: unmatched-brace ;
+: parse-repetition ( -- start finish ? )
+    "}" read-until [ unmatched-brace ] unless
+    [ "," split1 [ string>number ] bi@ ]
+    [ CHAR: , swap index >boolean ] bi ;
+
+: replicate/concatenate ( n obj -- obj' )
+    over zero? [ 2drop epsilon ]
+    [ <repetition> first|concatenation ] if ;
+
+: exactly-n ( n -- )
+    stack pop replicate/concatenate push-stack ;
+
+: at-least-n ( n -- )
+    stack pop
+    [ replicate/concatenate ] keep
+    <kleene-star> 2array <concatenation> push-stack ;
+
+: at-most-n ( n -- )
+    1+
+    stack pop
+    [ replicate/concatenate ] curry map <alternation> push-stack ;
+
+: from-m-to-n ( m n -- )
+    [a,b]
+    stack pop
+    [ replicate/concatenate ] curry map
+    <alternation> push-stack ;
+
+ERROR: invalid-range a b ;
+
+: handle-left-brace ( -- )
+    parse-repetition
+    >r 2dup [ [ 0 < [ invalid-range ] when ] when* ] bi@ r>
+    [
+        2dup and [ from-m-to-n ]
+        [ [ nip at-most-n ] [ at-least-n ] if* ] if
+    ] [ drop 0 max exactly-n ] if ;
+
+: handle-front-anchor ( -- ) front-anchor push-stack ;
+: handle-back-anchor ( -- ) back-anchor push-stack ;
+
+ERROR: bad-character-class obj ;
+ERROR: expected-posix-class ;
+
+: parse-posix-class ( -- obj )
+    read1 CHAR: { = [ expected-posix-class ] unless
+    "}" read-until [ bad-character-class ] unless
+    {
+        { "Lower" [ get-case-insensitive Letter-class letter-class ? ] }
+        { "Upper" [ get-case-insensitive Letter-class LETTER-class ? ] }
+        { "Alpha" [ Letter-class ] }
+        { "ASCII" [ ascii-class ] }
+        { "Digit" [ digit-class ] }
+        { "Alnum" [ alpha-class ] }
+        { "Punct" [ punctuation-class ] }
+        { "Graph" [ java-printable-class ] }
+        { "Print" [ java-printable-class ] }
+        { "Blank" [ non-newline-blank-class ] }
+        { "Cntrl" [ control-character-class ] }
+        { "XDigit" [ hex-digit-class ] }
+        { "Space" [ java-blank-class ] }
+        ! TODO: unicode-character-class, fallthrough in unicode is bad-char-clss
+        [ bad-character-class ]
+    } case ;
+
+: parse-octal ( -- n ) 3 read oct> check-octal ;
+: parse-short-hex ( -- n ) 2 read hex> check-hex ;
+: parse-long-hex ( -- n ) 6 read hex> check-hex ;
+: parse-control-character ( -- n ) read1 ;
+
+ERROR: bad-escaped-literals seq ;
+: parse-escaped-literals ( -- obj )
+    "\\E" read-until [ bad-escaped-literals ] unless
+    read1 drop
+    [ epsilon ] [
+        [ <constant> ] V{ } map-as
+        first|concatenation
+    ] if-empty ;
+
+: parse-escaped ( -- obj )
+    read1
+    {
+        { CHAR: \ [ CHAR: \ <constant> ] }
+        { CHAR: . [ CHAR: . <constant> ] }
+        { CHAR: t [ CHAR: \t <constant> ] }
+        { CHAR: n [ CHAR: \n <constant> ] }
+        { CHAR: r [ CHAR: \r <constant> ] }
+        { CHAR: f [ HEX: c <constant> ] }
+        { CHAR: a [ HEX: 7 <constant> ] }
+        { CHAR: e [ HEX: 1b <constant> ] }
+
+        { CHAR: d [ digit-class ] }
+        { CHAR: D [ digit-class <negation> ] }
+        { CHAR: s [ java-blank-class ] }
+        { CHAR: S [ java-blank-class <negation> ] }
+        { CHAR: w [ c-identifier-class ] }
+        { CHAR: W [ c-identifier-class <negation> ] }
+
+        { CHAR: p [ parse-posix-class ] }
+        { CHAR: P [ parse-posix-class <negation> ] }
+        { CHAR: x [ parse-short-hex <constant> ] }
+        { CHAR: u [ parse-long-hex <constant> ] }
+        { CHAR: 0 [ parse-octal <constant> ] }
+        { CHAR: c [ parse-control-character ] }
+
+        ! { CHAR: b [ handle-word-boundary ] }
+        ! { CHAR: B [ handle-word-boundary <negation> ] }
+        ! { CHAR: A [ handle-beginning-of-input ] }
+        ! { CHAR: G [ end of previous match ] }
+        ! { CHAR: Z [ handle-end-of-input ] }
+        ! { CHAR: z [ handle-end-of-input ] } ! except for terminator
+
+        { CHAR: Q [ parse-escaped-literals ] }
+    } case ;
+
+: handle-escape ( -- ) parse-escaped push-stack ;
+
+: handle-dash ( vector -- vector' )
+    H{ { dash CHAR: - } } substitute ;
+
+: character-class>alternation ( seq -- alternation )
+    [ dup number? [ <constant> ] when ] map first|alternation ;
+
+: handle-caret ( vector -- vector' )
+    dup [ length 2 >= ] [ first caret eq? ] bi and [
+        rest-slice character-class>alternation <negation>
+    ] [
+        character-class>alternation
+    ] if ;
+
+: make-character-class ( -- character-class )
+    [ beginning-of-character-class swap cut-stack ] change-whole-stack
+    handle-dash handle-caret ;
+
+: apply-dash ( -- )
+    stack [ pop3 nip <character-class-range> ] keep push ;
+
+: apply-dash? ( -- ? )
+    stack dup length 3 >=
+    [ [ length 2 - ] keep nth dash eq? ] [ drop f ] if ;
+
+ERROR: empty-negated-character-class ;
+DEFER: handle-left-bracket
+: (parse-character-class) ( -- )
+    read1 [ empty-negated-character-class ] unless* {
+        { CHAR: [ [ handle-left-bracket t ] }
+        { CHAR: ] [ make-character-class push-stack f ] }
+        { CHAR: - [ dash push-stack t ] }
+        { CHAR: \ [ parse-escaped push-stack t ] }
+        [ push-stack apply-dash? [ apply-dash ] when t ]
+    } case
+    [ (parse-character-class) ] when ;
+
+: parse-character-class-second ( -- )
+    read1 {
+        { CHAR: [ [ CHAR: [ <constant> push-stack ] }
+        { CHAR: ] [ CHAR: ] <constant> push-stack ] }
+        { CHAR: - [ CHAR: - <constant> push-stack ] }
+        [ push1 ]
+    } case ;
+
+: parse-character-class-first ( -- )
+    read1 {
+        { CHAR: ^ [ caret push-stack parse-character-class-second ] }
+        { CHAR: [ [ CHAR: [ <constant> push-stack ] }
+        { CHAR: ] [ CHAR: ] <constant> push-stack ] }
+        { CHAR: - [ CHAR: - <constant> push-stack ] }
+        [ push1 ]
+    } case ;
+
+: handle-left-bracket ( -- )
+    beginning-of-character-class push-stack
+    parse-character-class-first (parse-character-class) ;
+
+: finish-regexp-parse ( stack -- obj )
+    { pipe } split
+    [ first|concatenation ] map first|alternation ;
+
+: handle-right-parenthesis ( -- )
+    stack beginning-of-group over last-index cut rest
+    [ current-regexp get swap >>stack drop ]
+    [ finish-regexp-parse <capture-group> push-stack ] bi* ;
+
+: nested-parse-regexp ( -- )
+    beginning-of-group push-stack (parse-regexp) ;
+
+: ((parse-regexp)) ( token -- )
+    {
+        { CHAR: . [ handle-dot ] }
+        { CHAR: ( [ handle-left-parenthesis ] }
+        { CHAR: ) [ handle-right-parenthesis ] }
+        { CHAR: | [ handle-pipe ] }
+        { CHAR: ? [ handle-question ] }
+        { CHAR: * [ handle-star ] }
+        { CHAR: + [ handle-plus ] }
+        { CHAR: { [ handle-left-brace ] }
+        { CHAR: [ [ handle-left-bracket ] }
+        { CHAR: ^ [ handle-front-anchor ] }
+        { CHAR: $ [ handle-back-anchor ] }
+        { CHAR: \ [ handle-escape ] }
+        [ <constant> push-stack ]
+    } case ;
+
+: (parse-regexp) ( -- )
+    read1 [ ((parse-regexp)) (parse-regexp) ] when* ;
+
+: parse-regexp ( regexp -- )
+    dup current-regexp [
+        raw>> [
+            <string-reader> [ (parse-regexp) ] with-input-stream
+        ] unless-empty
+        current-regexp get
+        stack finish-regexp-parse
+            >>parse-tree drop
+    ] with-variable ;
diff --git a/unfinished/regexp2/regexp2-docs.factor b/unfinished/regexp2/regexp2-docs.factor
new file mode 100644
index 0000000000..f903c14bc4
--- /dev/null
+++ b/unfinished/regexp2/regexp2-docs.factor
@@ -0,0 +1,14 @@
+! Copyright (C) 2008 Doug Coleman.
+! See http://factorcode.org/license.txt for BSD license.
+USING: kernel strings help.markup help.syntax regexp2.backend ;
+IN: regexp2
+
+HELP: <regexp>
+{ $values { "string" string } { "regexp" regexp } }
+{ $description "Compiles a regular expression into a DFA and returns this object.  Regular expressions only have to be compiled once and can then be used multiple times to match input strings." } ;
+
+HELP: <iregexp>
+{ $values { "string" string } { "regexp" regexp } }
+{ $description "Compiles a case-insensitive regular expression into a DFA and returns this object.  Otherwise, exactly the same as " { $link <regexp> } } ;
+
+{ <regexp> <iregexp> } related-words
diff --git a/unfinished/regexp2/regexp2-tests.factor b/unfinished/regexp2/regexp2-tests.factor
new file mode 100644
index 0000000000..54626ea165
--- /dev/null
+++ b/unfinished/regexp2/regexp2-tests.factor
@@ -0,0 +1,263 @@
+USING: regexp2 tools.test kernel sequences regexp2.parser
+regexp2.traversal ;
+IN: regexp2-tests
+
+[ f ] [ "b" "a*" <regexp> matches? ] unit-test
+[ t ] [ "" "a*" <regexp> matches? ] unit-test
+[ t ] [ "a" "a*" <regexp> matches? ] unit-test
+[ t ] [ "aaaaaaa" "a*"  <regexp> matches? ] unit-test
+[ f ] [ "ab" "a*" <regexp> matches? ] unit-test
+
+[ t ] [ "abc" "abc" <regexp> matches? ] unit-test
+[ t ] [ "a" "a|b|c" <regexp> matches? ] unit-test
+[ t ] [ "b" "a|b|c" <regexp> matches? ] unit-test
+[ t ] [ "c" "a|b|c" <regexp> matches? ] unit-test
+[ f ] [ "c" "d|e|f" <regexp> matches? ] unit-test
+
+[ f ] [ "aa" "a|b|c" <regexp> matches? ] unit-test
+[ f ] [ "bb" "a|b|c" <regexp> matches? ] unit-test
+[ f ] [ "cc" "a|b|c" <regexp> matches? ] unit-test
+[ f ] [ "cc" "d|e|f" <regexp> matches? ] unit-test
+
+[ f ] [ "" "a+" <regexp> matches? ] unit-test
+[ t ] [ "a" "a+" <regexp> matches? ] unit-test
+[ t ] [ "aa" "a+" <regexp> matches? ] unit-test
+
+[ t ] [ "" "a?" <regexp> matches? ] unit-test
+[ t ] [ "a" "a?" <regexp> matches? ] unit-test
+[ f ] [ "aa" "a?" <regexp> matches? ] unit-test
+
+[ f ] [ "" "." <regexp> matches? ] unit-test
+[ t ] [ "a" "." <regexp> matches? ] unit-test
+[ t ] [ "." "." <regexp> matches? ] unit-test
+! [ f ] [ "\n" "." <regexp> matches? ] unit-test
+
+[ f ] [ "" ".+" <regexp> matches? ] unit-test
+[ t ] [ "a" ".+" <regexp> matches? ] unit-test
+[ t ] [ "ab" ".+" <regexp> matches? ] unit-test
+
+
+[ t ] [ "" "a|b*|c+|d?" <regexp> matches? ] unit-test
+[ t ] [ "a" "a|b*|c+|d?" <regexp> matches? ] unit-test
+[ t ] [ "c" "a|b*|c+|d?" <regexp> matches? ] unit-test
+[ t ] [ "cc" "a|b*|c+|d?" <regexp> matches? ] unit-test
+[ f ] [ "ccd" "a|b*|c+|d?" <regexp> matches? ] unit-test
+[ t ] [ "d" "a|b*|c+|d?" <regexp> matches? ] unit-test
+
+[ t ] [ "foo" "foo|bar" <regexp> matches? ] unit-test
+[ t ] [ "bar" "foo|bar" <regexp> matches? ] unit-test
+[ f ] [ "foobar" "foo|bar" <regexp> matches? ] unit-test
+
+[ f ] [ "" "(a)" <regexp> matches? ] unit-test
+[ t ] [ "a" "(a)" <regexp> matches? ] unit-test
+[ f ] [ "aa" "(a)" <regexp> matches? ] unit-test
+[ t ] [ "aa" "(a*)" <regexp> matches? ] unit-test
+
+[ f ] [ "aababaaabbac" "(a|b)+" <regexp> matches? ] unit-test
+[ t ] [ "ababaaabba" "(a|b)+" <regexp> matches? ] unit-test
+
+[ f ] [ "" "a{1}" <regexp> matches? ] unit-test
+[ t ] [ "a" "a{1}" <regexp> matches? ] unit-test
+[ f ] [ "aa" "a{1}" <regexp> matches? ] unit-test
+
+[ f ] [ "a" "a{2,}" <regexp> matches? ] unit-test
+[ t ] [ "aaa" "a{2,}" <regexp> matches? ] unit-test
+[ t ] [ "aaaa" "a{2,}" <regexp> matches? ] unit-test
+[ t ] [ "aaaaa" "a{2,}" <regexp> matches? ] unit-test
+
+[ t ] [ "" "a{,2}" <regexp> matches? ] unit-test
+[ t ] [ "a" "a{,2}" <regexp> matches? ] unit-test
+[ t ] [ "aa" "a{,2}" <regexp> matches? ] unit-test
+[ f ] [ "aaa" "a{,2}" <regexp> matches? ] unit-test
+[ f ] [ "aaaa" "a{,2}" <regexp> matches? ] unit-test
+[ f ] [ "aaaaa" "a{,2}" <regexp> matches? ] unit-test
+
+[ f ] [ "" "a{1,3}" <regexp> matches? ] unit-test
+[ t ] [ "a" "a{1,3}" <regexp> matches? ] unit-test
+[ t ] [ "aa" "a{1,3}" <regexp> matches? ] unit-test
+[ t ] [ "aaa" "a{1,3}" <regexp> matches? ] unit-test
+[ f ] [ "aaaa" "a{1,3}" <regexp> matches? ] unit-test
+
+[ f ] [ "" "[a]" <regexp> matches? ] unit-test
+[ t ] [ "a" "[a]" <regexp> matches? ] unit-test
+[ t ] [ "a" "[abc]" <regexp> matches? ] unit-test
+[ f ] [ "b" "[a]" <regexp> matches? ] unit-test
+[ f ] [ "d" "[abc]" <regexp> matches? ] unit-test
+[ t ] [ "ab" "[abc]{1,2}" <regexp> matches? ] unit-test
+[ f ] [ "abc" "[abc]{1,2}" <regexp> matches? ] unit-test
+
+[ f ] [ "" "[^a]" <regexp> matches? ] unit-test
+[ f ] [ "a" "[^a]" <regexp> matches? ] unit-test
+[ f ] [ "a" "[^abc]" <regexp> matches? ] unit-test
+[ t ] [ "b" "[^a]" <regexp> matches? ] unit-test
+[ t ] [ "d" "[^abc]" <regexp> matches? ] unit-test
+[ f ] [ "ab" "[^abc]{1,2}" <regexp> matches? ] unit-test
+[ f ] [ "abc" "[^abc]{1,2}" <regexp> matches? ] unit-test
+
+[ t ] [ "]" "[]]" <regexp> matches? ] unit-test
+[ f ] [ "]" "[^]]" <regexp> matches? ] unit-test
+[ t ] [ "a" "[^]]" <regexp> matches? ] unit-test
+
+[ "^" "[^]" <regexp> matches? ] must-fail
+[ t ] [ "^" "[]^]" <regexp> matches? ] unit-test
+[ t ] [ "]" "[]^]" <regexp> matches? ] unit-test
+
+[ t ] [ "[" "[[]" <regexp> matches? ] unit-test
+[ f ] [ "^" "[^^]" <regexp> matches? ] unit-test
+[ t ] [ "a" "[^^]" <regexp> matches? ] unit-test
+
+[ t ] [ "-" "[-]" <regexp> matches? ] unit-test
+[ f ] [ "a" "[-]" <regexp> matches? ] unit-test
+[ f ] [ "-" "[^-]" <regexp> matches? ] unit-test
+[ t ] [ "a" "[^-]" <regexp> matches? ] unit-test
+
+[ t ] [ "-" "[-a]" <regexp> matches? ] unit-test
+[ t ] [ "a" "[-a]" <regexp> matches? ] unit-test
+[ t ] [ "-" "[a-]" <regexp> matches? ] unit-test
+[ t ] [ "a" "[a-]" <regexp> matches? ] unit-test
+[ f ] [ "b" "[a-]" <regexp> matches? ] unit-test
+[ f ] [ "-" "[^-]" <regexp> matches? ] unit-test
+[ t ] [ "a" "[^-]" <regexp> matches? ] unit-test
+
+[ f ] [ "-" "[a-c]" <regexp> matches? ] unit-test
+[ t ] [ "-" "[^a-c]" <regexp> matches? ] unit-test
+[ t ] [ "b" "[a-c]" <regexp> matches? ] unit-test
+[ f ] [ "b" "[^a-c]" <regexp> matches? ] unit-test
+
+[ t ] [ "-" "[a-c-]" <regexp> matches? ] unit-test
+[ f ] [ "-" "[^a-c-]" <regexp> matches? ] unit-test
+
+[ t ] [ "\\" "[\\\\]" <regexp> matches? ] unit-test
+[ f ] [ "a" "[\\\\]" <regexp> matches? ] unit-test
+[ f ] [ "\\" "[^\\\\]" <regexp> matches? ] unit-test
+[ t ] [ "a" "[^\\\\]" <regexp> matches? ] unit-test
+
+[ t ] [ "0" "[\\d]" <regexp> matches? ] unit-test
+[ f ] [ "a" "[\\d]" <regexp> matches? ] unit-test
+[ f ] [ "0" "[^\\d]" <regexp> matches? ] unit-test
+[ t ] [ "a" "[^\\d]" <regexp> matches? ] unit-test
+
+[ t ] [ "a" "[a-z]{1,}|[A-Z]{2,4}|b*|c|(f|g)*" <regexp> matches? ] unit-test
+[ t ] [ "a" "[a-z]{1,2}|[A-Z]{3,3}|b*|c|(f|g)*" <regexp> matches? ] unit-test
+[ t ] [ "a" "[a-z]{1,2}|[A-Z]{3,3}" <regexp> matches? ] unit-test
+
+[ t ] [ "1000" "\\d{4,6}" <regexp> matches? ] unit-test
+[ t ] [ "1000" "[0-9]{4,6}" <regexp> matches? ] unit-test
+
+[ t ] [ "abc" "\\p{Lower}{3}" <regexp> matches? ] unit-test
+[ f ] [ "ABC" "\\p{Lower}{3}" <regexp> matches? ] unit-test
+[ t ] [ "ABC" "\\p{Upper}{3}" <regexp> matches? ] unit-test
+[ f ] [ "abc" "\\p{Upper}{3}" <regexp> matches? ] unit-test
+! 
+[ f ] [ "abc" "[\\p{Upper}]{3}" <regexp> matches? ] unit-test
+[ t ] [ "ABC" "[\\p{Upper}]{3}" <regexp> matches? ] unit-test
+
+[ f ] [ "" "\\Q\\E" <regexp> matches? ] unit-test
+[ f ] [ "a" "\\Q\\E" <regexp> matches? ] unit-test
+[ t ] [ "|*+" "\\Q|*+\\E" <regexp> matches? ] unit-test
+[ f ] [ "abc" "\\Q|*+\\E" <regexp> matches? ] unit-test
+[ t ] [ "s" "\\Qs\\E" <regexp> matches? ] unit-test
+
+[ t ] [ "S" "\\0123" <regexp> matches? ] unit-test
+[ t ] [ "SXY" "\\0123XY" <regexp> matches? ] unit-test
+[ t ] [ "x" "\\x78" <regexp> matches? ] unit-test
+[ f ] [ "y" "\\x78" <regexp> matches? ] unit-test
+[ t ] [ "x" "\\u000078" <regexp> matches? ] unit-test
+[ f ] [ "y" "\\u000078" <regexp> matches? ] unit-test
+
+[ t ] [ "ab" "a+b" <regexp> matches? ] unit-test
+[ f ] [ "b" "a+b" <regexp> matches? ] unit-test
+[ t ] [ "aab" "a+b" <regexp> matches? ] unit-test
+[ f ] [ "abb" "a+b" <regexp> matches? ] unit-test
+
+[ t ] [ "abbbb" "ab*" <regexp> matches? ] unit-test
+[ t ] [ "a" "ab*" <regexp> matches? ] unit-test
+[ f ] [ "abab" "ab*" <regexp> matches? ] unit-test
+
+[ f ] [ "x" "\\." <regexp> matches? ] unit-test
+[ t ] [ "." "\\." <regexp> matches? ] unit-test
+
+[ t ] [ "aaaab" "a+ab" <regexp> matches? ] unit-test
+[ f ] [ "aaaxb" "a+ab" <regexp> matches? ] unit-test
+[ t ] [ "aaacb" "a+cb" <regexp> matches? ] unit-test
+[ f ] [ "aaaab" "a++ab" <regexp> matches? ] unit-test
+[ t ] [ "aaacb" "a++cb" <regexp> matches? ] unit-test
+
+[ 3 ] [ "aaacb" "a*" <regexp> match-head ] unit-test
+[ 1 ] [ "aaacb" "a+?" <regexp> match-head ] unit-test
+[ 2 ] [ "aaacb" "aa?" <regexp> match-head ] unit-test
+[ 1 ] [ "aaacb" "aa??" <regexp> match-head ] unit-test
+[ 3 ] [ "aacb" "aa?c" <regexp> match-head ] unit-test
+[ 3 ] [ "aacb" "aa??c" <regexp> match-head ] unit-test
+
+! [ t ] [ "aaa" "AAA" t <regexp> matches? ] unit-test
+! [ f ] [ "aax" "AAA" t <regexp> matches? ] unit-test
+! [ t ] [ "aaa" "A*" t <regexp> matches? ] unit-test
+! [ f ] [ "aaba" "A*" t <regexp> matches? ] unit-test
+! [ t ] [ "b" "[AB]" t <regexp> matches? ] unit-test
+! [ f ] [ "c" "[AB]" t <regexp> matches? ] unit-test
+! [ t ] [ "c" "[A-Z]" t <regexp> matches? ] unit-test
+! [ f ] [ "3" "[A-Z]" t <regexp> matches? ] unit-test
+
+[ ] [
+    "(0[lL]?|[1-9]\\d{0,9}(\\d{0,9}[lL])?|0[xX]\\p{XDigit}{1,8}(\\p{XDigit}{0,8}[lL])?|0[0-7]{1,11}([0-7]{0,11}[lL])?|([0-9]+\\.[0-9]*|\\.[0-9]+)([eE][+-]?[0-9]+)?[fFdD]?|[0-9]+([eE][+-]?[0-9]+[fFdD]?|([eE][+-]?[0-9]+)?[fFdD]))"
+    <regexp> drop
+] unit-test
+
+[ "{Lower}" <regexp> ] [ invalid-range? ] must-fail-with
+
+[ t ] [ "fxxbar" "(?!foo).{3}bar" <regexp> matches? ] unit-test
+[ f ] [ "foobar" "(?!foo).{3}bar" <regexp> matches? ] unit-test
+
+! [ 3 ] [ "foobar" "foo(?=bar)" <regexp> match-head ] unit-test
+! [ f ] [ "foobxr" "foo(?=bar)" <regexp> match-head ] unit-test
+
+! [ f ] [ "foobxr" "foo\\z" <regexp> match-head ] unit-test
+! [ 3 ] [ "foo" "foo\\z" <regexp> match-head ] unit-test
+
+! [ 3 ] [ "foo bar" "foo\\b" <regexp> match-head ] unit-test
+! [ f ] [ "fooxbar" "foo\\b" <regexp> matches? ] unit-test
+! [ t ] [ "foo" "foo\\b" <regexp> matches? ] unit-test
+! [ t ] [ "foo bar" "foo\\b bar" <regexp> matches? ] unit-test
+! [ f ] [ "fooxbar" "foo\\bxbar" <regexp> matches? ] unit-test
+! [ f ] [ "foo" "foo\\bbar" <regexp> matches? ] unit-test
+
+! [ f ] [ "foo bar" "foo\\B" <regexp> matches? ] unit-test
+! [ 3 ] [ "fooxbar" "foo\\B" <regexp> match-head ] unit-test
+! [ t ] [ "foo" "foo\\B" <regexp> matches? ] unit-test
+! [ f ] [ "foo bar" "foo\\B bar" <regexp> matches? ] unit-test
+! [ t ] [ "fooxbar" "foo\\Bxbar" <regexp> matches? ] unit-test
+! [ f ] [ "foo" "foo\\Bbar" <regexp> matches? ] unit-test
+
+[ t ] [ "s@f" "[a-z.-]@[a-z]" <regexp> matches? ] unit-test
+[ f ] [ "a" "[a-z.-]@[a-z]" <regexp> matches? ] unit-test
+[ t ] [ ".o" "\\.[a-z]" <regexp> matches? ] unit-test
+
+[ t ] [ "a" "(?i)a" <regexp> matches? ] unit-test
+[ t ] [ "a" "(?i)a" <regexp> matches? ] unit-test
+[ t ] [ "A" "(?i)a" <regexp> matches? ] unit-test
+[ t ] [ "A" "(?i)a" <regexp> matches? ] unit-test
+
+[ t ] [ "a" "(?-i)a" <iregexp> matches? ] unit-test
+[ t ] [ "a" "(?-i)a" <iregexp> matches? ] unit-test
+[ f ] [ "A" "(?-i)a" <iregexp> matches? ] unit-test
+[ f ] [ "A" "(?-i)a" <iregexp> matches? ] unit-test
+
+[ f ] [ "A" "[a-z]" <regexp> matches? ] unit-test
+[ t ] [ "A" "[a-z]" <iregexp> matches? ] unit-test
+
+[ f ] [ "A" "\\p{Lower}" <regexp> matches? ] unit-test
+[ t ] [ "A" "\\p{Lower}" <iregexp> matches? ] unit-test
+
+[ t ] [ "abc" <reversed> "abc" <rregexp> matches? ] unit-test
+[ t ] [ "abc" <reversed> "a[bB][cC]" <rregexp> matches? ] unit-test
+[ t ] [ "adcbe" "a(?r)(bcd)(?-r)e" <rregexp> matches? ] unit-test
+
+! Bug in parsing word
+! [ t ] [ "a" R' a' matches?  ] unit-test
+
+! ((A)(B(C)))
+! 1.  ((A)(B(C)))
+! 2. (A)
+! 3. (B(C))
+! 4. (C) 
diff --git a/unfinished/regexp2/regexp2.factor b/unfinished/regexp2/regexp2.factor
new file mode 100644
index 0000000000..0b8994ca2b
--- /dev/null
+++ b/unfinished/regexp2/regexp2.factor
@@ -0,0 +1,59 @@
+! Copyright (C) 2008 Doug Coleman.
+! See http://factorcode.org/license.txt for BSD license.
+USING: accessors combinators kernel math math.ranges
+sequences regexp2.backend regexp2.utils memoize sets
+regexp2.parser regexp2.nfa regexp2.dfa regexp2.traversal
+regexp2.transition-tables ;
+IN: regexp2
+
+: default-regexp ( string -- regexp )
+    regexp new
+        swap >>raw
+        <transition-table> >>nfa-table
+        <transition-table> >>dfa-table
+        <transition-table> >>minimized-table
+        reset-regexp ;
+
+: construct-regexp ( regexp -- regexp' )
+    {
+        [ parse-regexp ]
+        [ construct-nfa ]
+        [ construct-dfa ]
+        [ ]
+    } cleave ;
+
+: match ( string regexp -- pair )
+    <dfa-traverser> do-match return-match ;
+
+: matches? ( string regexp -- ? )
+    dupd match [ [ length ] [ range-length 1- ] bi* = ] [ drop f ] if* ;
+
+: match-head ( string regexp -- end ) match length>> 1- ;
+
+: initial-option ( regexp option -- regexp' )
+    over options>> conjoin ;
+
+: <regexp> ( string -- regexp )
+    default-regexp construct-regexp ;
+
+: <iregexp> ( string -- regexp )
+    default-regexp
+    case-insensitive initial-option
+    construct-regexp ;
+
+: <rregexp> ( string -- regexp )
+    default-regexp
+    reversed-regexp initial-option
+    construct-regexp ;
+
+: R! CHAR: ! <regexp> ; parsing
+: R" CHAR: " <regexp> ; parsing
+: R# CHAR: # <regexp> ; parsing
+: R' CHAR: ' <regexp> ; parsing
+: R( CHAR: ) <regexp> ; parsing
+: R/ CHAR: / <regexp> ; parsing
+: R@ CHAR: @ <regexp> ; parsing
+: R[ CHAR: ] <regexp> ; parsing
+: R` CHAR: ` <regexp> ; parsing
+: R{ CHAR: } <regexp> ; parsing
+: R| CHAR: | <regexp> ; parsing
diff --git a/unfinished/regexp2/summary.txt b/unfinished/regexp2/summary.txt
new file mode 100644
index 0000000000..aa1e1c27a9
--- /dev/null
+++ b/unfinished/regexp2/summary.txt
@@ -0,0 +1 @@
+Regular expressions
diff --git a/unfinished/regexp2/tags.txt b/unfinished/regexp2/tags.txt
new file mode 100644
index 0000000000..65bc471f6b
--- /dev/null
+++ b/unfinished/regexp2/tags.txt
@@ -0,0 +1,2 @@
+parsing
+text
diff --git a/unfinished/regexp2/transition-tables/transition-tables.factor b/unfinished/regexp2/transition-tables/transition-tables.factor
new file mode 100644
index 0000000000..0547846655
--- /dev/null
+++ b/unfinished/regexp2/transition-tables/transition-tables.factor
@@ -0,0 +1,44 @@
+! Copyright (C) 2008 Doug Coleman.
+! See http://factorcode.org/license.txt for BSD license.
+USING: accessors arrays assocs fry hashtables kernel sequences
+vectors ;
+IN: regexp2.transition-tables
+
+: insert-at ( value key hash -- )
+    2dup at* [
+        2nip push
+    ] [
+        drop >r >r dup vector? [ 1vector ] unless r> r> set-at
+    ] if ;
+
+: ?insert-at ( value key hash/f -- hash )
+    [ H{ } clone ] unless* [ insert-at ] keep ;
+
+TUPLE: transition from to obj ;
+TUPLE: literal-transition < transition ;
+TUPLE: class-transition < transition ;
+TUPLE: default-transition < transition ;
+
+TUPLE: literal obj ;
+TUPLE: class obj ;
+TUPLE: default ;
+: <literal-transition> ( from to obj -- transition ) literal-transition boa ;
+: <class-transition> ( from to obj -- transition ) class-transition boa ;
+: <default-transition> ( from to -- transition ) t default-transition boa ;
+
+TUPLE: transition-table transitions
+    literals classes defaults
+    start-state final-states ;
+
+: <transition-table> ( -- transition-table )
+    transition-table new
+        H{ } clone >>transitions
+        H{ } clone >>final-states ;
+
+: set-transition ( transition hash -- )
+    >r [ to>> ] [ obj>> ] [ from>> ] tri r>
+    2dup at* [ 2nip insert-at ]
+    [ drop >r >r H{ } clone [ insert-at ] keep r> r> set-at ] if ;
+
+: add-transition ( transition transition-table -- )
+    transitions>> set-transition ;
diff --git a/unfinished/regexp2/traversal/traversal.factor b/unfinished/regexp2/traversal/traversal.factor
new file mode 100644
index 0000000000..94e96bb935
--- /dev/null
+++ b/unfinished/regexp2/traversal/traversal.factor
@@ -0,0 +1,80 @@
+! Copyright (C) 2008 Doug Coleman.
+! See http://factorcode.org/license.txt for BSD license.
+USING: accessors assocs combinators combinators.lib kernel
+math math.ranges quotations sequences regexp2.parser
+regexp2.classes combinators.short-circuit assocs.lib
+sequences.lib ;
+IN: regexp2.traversal
+
+TUPLE: dfa-traverser
+    dfa-table
+    last-state current-state
+    text
+    start-index current-index
+    matches ;
+
+: <dfa-traverser> ( text regexp -- match )
+    dfa-table>>
+    dfa-traverser new
+        swap [ start-state>> >>current-state ] keep
+        >>dfa-table
+        swap >>text
+        0 >>start-index
+        0 >>current-index
+        V{ } clone >>matches ;
+
+: final-state? ( dfa-traverser -- ? )
+    [ current-state>> ] [ dfa-table>> final-states>> ] bi
+    key? ;
+
+: text-finished? ( dfa-traverser -- ? )
+    [ current-index>> ] [ text>> length ] bi >= ;
+
+: save-final-state ( dfa-straverser -- )
+    [ current-index>> ] [ matches>> ] bi push ;
+
+: match-done? ( dfa-traverser -- ? )
+    dup final-state? [
+        dup save-final-state
+    ] when text-finished? ;
+
+: increment-state ( dfa-traverser state -- dfa-traverser )
+    >r [ 1+ ] change-current-index
+    dup current-state>> >>last-state r>
+    first >>current-state ;
+
+: match-failed ( dfa-traverser -- dfa-traverser )
+    V{ } clone >>matches ;
+
+: match-literal ( transition from-state table -- to-state/f )
+    transitions>> [ at ] [ 2drop f ] if-at ;
+
+: assoc-with ( param assoc quot -- assoc curry )
+    swapd [ [ -rot ] dip call ] 2curry ; inline
+
+: match-class ( transition from-state table -- to-state/f )
+    transitions>> at* [
+        [ drop class-member? ] assoc-with assoc-find [ nip ] [ drop ] if
+    ] [ drop ] if ;
+
+: match-default ( transition from-state table -- to-state/f )
+    [ nip ] dip transitions>>
+    [ t swap [ drop f ] unless-at ] [ drop f ] if-at ;
+
+: match-transition ( obj from-state dfa -- to-state/f )
+    { [ match-literal ] [ match-class ] [ match-default ] } 3|| ;
+
+: setup-match ( match -- obj state dfa-table )
+    { current-index>> text>> current-state>> dfa-table>> } get-slots
+    [ nth ] 2dip ;
+
+: do-match ( dfa-traverser -- dfa-traverser )
+    dup match-done? [
+        dup setup-match match-transition
+        [ increment-state do-match ] when*
+    ] unless ;
+
+: return-match ( dfa-traverser -- interval/f )
+    dup matches>>
+    [ drop f ]
+    [ [ start-index>> ] [ peek ] bi* 1 <range> ] if-empty ;
diff --git a/unfinished/regexp2/utils/utils.factor b/unfinished/regexp2/utils/utils.factor
new file mode 100644
index 0000000000..0167e73005
--- /dev/null
+++ b/unfinished/regexp2/utils/utils.factor
@@ -0,0 +1,69 @@
+! Copyright (C) 2008 Doug Coleman.
+! See http://factorcode.org/license.txt for BSD license.
+USING: accessors arrays assocs combinators.lib io kernel
+math math.order namespaces regexp2.backend sequences
+sequences.lib unicode.categories math.ranges fry
+combinators.short-circuit ;
+IN: regexp2.utils
+
+: (while-changes) ( obj quot pred pred-ret -- obj )
+    ! quot: ( obj -- obj' )
+    ! pred: ( obj -- <=> )
+    >r >r dup slip r> pick over call r> dupd =
+    [ 3drop ] [ (while-changes) ] if ; inline
+
+: while-changes ( obj quot pred -- obj' )
+    pick over call (while-changes) ; inline
+
+: last-state ( regexp -- range ) stack>> peek first2 [a,b] ;
+: push1 ( obj -- ) input-stream get stream>> push ;
+: peek1 ( -- obj ) input-stream get stream>> [ f ] [ peek ] if-empty ;
+: pop3 ( seq -- obj1 obj2 obj3 ) [ pop ] [ pop ] [ pop ] tri spin ;
+
+: stack ( -- obj ) current-regexp get stack>> ;
+: change-whole-stack ( quot -- )
+    current-regexp get
+    [ stack>> swap call ] keep (>>stack) ; inline
+: push-stack ( obj -- ) stack push ;
+: pop-stack ( -- obj ) stack pop ;
+: cut-out ( vector n -- vector' vector ) cut rest ;
+ERROR: cut-stack-error ;
+: cut-stack ( obj vector -- vector' vector )
+    tuck last-index [ cut-stack-error ] unless* cut-out swap ;
+
+ERROR: bad-octal number ;
+ERROR: bad-hex number ;
+: check-octal ( octal -- octal ) dup 255 > [ bad-octal ] when ;
+: check-hex ( hex -- hex ) dup number? [ bad-hex ] unless ;
+
+: ascii? ( n -- ? ) 0 HEX: 7f between? ;
+: octal-digit? ( n -- ? ) CHAR: 0 CHAR: 7 between? ;
+: decimal-digit? ( n -- ? ) CHAR: 0 CHAR: 9 between? ;
+
+: hex-digit? ( n -- ? )
+    [
+        [ decimal-digit? ]
+        [ CHAR: a CHAR: f between? ]
+        [ CHAR: A CHAR: F between? ]
+    ] 1|| ;
+
+: control-char? ( n -- ? )
+    [
+        [ 0 HEX: 1f between? ]
+        [ HEX: 7f = ]
+    ] 1|| ;
+
+: punct? ( n -- ? )
+    "!\"#$%&'()*+,-./:;<=>?@[\\]^_`{|}~" member? ;
+
+: c-identifier-char? ( ch -- ? )
+    [ [ alpha? ] [ CHAR: _ = ] ] 1|| ;
+
+: java-blank? ( n -- ? )
+    {
+        CHAR: \s CHAR: \t CHAR: \n
+        HEX: b HEX: 7 CHAR: \r
+    } member? ;
+
+: java-printable? ( n -- ? )
+    [ [ alpha? ] [ punct? ] ] 1|| ;

From c9ea133b16a8f33a28946b2212a0736487ed8a51 Mon Sep 17 00:00:00 2001
From: Slava Pestov <slava@slava-pestovs-macbook-pro.local>
Date: Wed, 27 Aug 2008 00:02:53 -0500
Subject: [PATCH 2/2] Remove

---
 extra/regexp2/authors.txt                     |   1 -
 extra/regexp2/backend/backend.factor          |  25 --
 extra/regexp2/classes/classes.factor          |  55 ---
 extra/regexp2/dfa/dfa.factor                  |  70 ----
 extra/regexp2/nfa/nfa.factor                  | 126 ------
 extra/regexp2/parser/parser-tests.factor      |  33 --
 extra/regexp2/parser/parser.factor            | 391 ------------------
 extra/regexp2/regexp2-docs.factor             |  14 -
 extra/regexp2/regexp2-tests.factor            | 263 ------------
 extra/regexp2/regexp2.factor                  |  59 ---
 extra/regexp2/summary.txt                     |   1 -
 extra/regexp2/tags.txt                        |   2 -
 .../transition-tables.factor                  |  44 --
 extra/regexp2/traversal/traversal.factor      |  80 ----
 extra/regexp2/utils/utils.factor              |  69 ----
 15 files changed, 1233 deletions(-)
 delete mode 100644 extra/regexp2/authors.txt
 delete mode 100644 extra/regexp2/backend/backend.factor
 delete mode 100644 extra/regexp2/classes/classes.factor
 delete mode 100644 extra/regexp2/dfa/dfa.factor
 delete mode 100644 extra/regexp2/nfa/nfa.factor
 delete mode 100644 extra/regexp2/parser/parser-tests.factor
 delete mode 100644 extra/regexp2/parser/parser.factor
 delete mode 100644 extra/regexp2/regexp2-docs.factor
 delete mode 100644 extra/regexp2/regexp2-tests.factor
 delete mode 100644 extra/regexp2/regexp2.factor
 delete mode 100644 extra/regexp2/summary.txt
 delete mode 100644 extra/regexp2/tags.txt
 delete mode 100644 extra/regexp2/transition-tables/transition-tables.factor
 delete mode 100644 extra/regexp2/traversal/traversal.factor
 delete mode 100644 extra/regexp2/utils/utils.factor

diff --git a/extra/regexp2/authors.txt b/extra/regexp2/authors.txt
deleted file mode 100644
index 7c1b2f2279..0000000000
--- a/extra/regexp2/authors.txt
+++ /dev/null
@@ -1 +0,0 @@
-Doug Coleman
diff --git a/extra/regexp2/backend/backend.factor b/extra/regexp2/backend/backend.factor
deleted file mode 100644
index c39d67e7b8..0000000000
--- a/extra/regexp2/backend/backend.factor
+++ /dev/null
@@ -1,25 +0,0 @@
-! Copyright (C) 2008 Doug Coleman.
-! See http://factorcode.org/license.txt for BSD license.
-USING: accessors hashtables kernel math state-tables vars vectors ;
-IN: regexp2.backend
-
-TUPLE: regexp
-    raw
-    { stack vector }
-    parse-tree
-    { options hashtable }
-    nfa-table
-    dfa-table
-    minimized-table
-    { state integer }
-    { new-states vector }
-    { visited-states hashtable } ;
-
-: reset-regexp ( regexp -- regexp )
-    0 >>state
-    V{ } clone >>stack
-    V{ } clone >>new-states
-    H{ } clone >>options
-    H{ } clone >>visited-states ;
-
-SYMBOL: current-regexp
diff --git a/extra/regexp2/classes/classes.factor b/extra/regexp2/classes/classes.factor
deleted file mode 100644
index 7737e02d40..0000000000
--- a/extra/regexp2/classes/classes.factor
+++ /dev/null
@@ -1,55 +0,0 @@
-! Copyright (C) 2008 Doug Coleman.
-! See http://factorcode.org/license.txt for BSD license.
-USING: accessors kernel math math.order symbols regexp2.parser
-words regexp2.utils unicode.categories combinators.short-circuit ;
-IN: regexp2.classes
-
-GENERIC: class-member? ( obj class -- ? )
-
-M: word class-member? ( obj class -- ? ) 2drop f ;
-M: integer class-member? ( obj class -- ? ) 2drop f ;
-
-M: character-class-range class-member? ( obj class -- ? )
-    [ from>> ] [ to>> ] bi between? ;
-
-M: any-char class-member? ( obj class -- ? )
-    2drop t ;
-    
-M: letter-class class-member? ( obj class -- ? )
-    drop letter? ;
-            
-M: LETTER-class class-member? ( obj class -- ? )
-    drop LETTER? ;
-
-M: Letter-class class-member? ( obj class -- ? )
-    drop Letter? ;
-
-M: ascii-class class-member? ( obj class -- ? )
-    drop ascii? ;
-
-M: digit-class class-member? ( obj class -- ? )
-    drop digit? ;
-
-M: alpha-class class-member? ( obj class -- ? )
-    drop alpha? ;
-
-M: punctuation-class class-member? ( obj class -- ? )
-    drop punct? ;
-
-M: java-printable-class class-member? ( obj class -- ? )
-    drop java-printable? ;
-
-M: non-newline-blank-class class-member? ( obj class -- ? )
-    drop { [ blank? ] [ CHAR: \n = not ] } 1&& ;
-
-M: control-character-class class-member? ( obj class -- ? )
-    drop control-char? ;
-
-M: hex-digit-class class-member? ( obj class -- ? )
-    drop hex-digit? ;
-
-M: java-blank-class class-member? ( obj class -- ? )
-    drop java-blank? ;
-
-M: unmatchable-class class-member? ( obj class -- ? )
-    2drop f ;
diff --git a/extra/regexp2/dfa/dfa.factor b/extra/regexp2/dfa/dfa.factor
deleted file mode 100644
index 0dcf6c4ab5..0000000000
--- a/extra/regexp2/dfa/dfa.factor
+++ /dev/null
@@ -1,70 +0,0 @@
-! Copyright (C) 2008 Doug Coleman.
-! See http://factorcode.org/license.txt for BSD license.
-USING: accessors arrays assocs combinators fry kernel locals
-math math.order regexp2.nfa regexp2.transition-tables sequences
-sets sorting vectors regexp2.utils sequences.lib ;
-USING: io prettyprint threads ;
-IN: regexp2.dfa
-
-: find-delta ( states transition regexp -- new-states )
-    nfa-table>> transitions>>
-    rot [ swap at at ] with with map sift concat prune ;
-
-: (find-epsilon-closure) ( states regexp -- new-states )
-    eps swap find-delta ;
-
-: find-epsilon-closure ( states regexp -- new-states )
-    '[ dup , (find-epsilon-closure) union ] [ length ] while-changes
-    natural-sort ;
-
-: find-closure ( states transition regexp -- new-states )
-    [ find-delta ] 2keep nip find-epsilon-closure ;
-
-: find-start-state ( regexp -- state )
-    [ nfa-table>> start-state>> 1vector ] keep find-epsilon-closure ;
-
-: find-transitions ( seq1 regexp -- seq2 )
-    nfa-table>> transitions>>
-    [ at keys ] curry map concat eps swap remove ;
-
-: add-todo-state ( state regexp -- )
-    2dup visited-states>> key? [
-        2drop
-    ] [
-        [ visited-states>> conjoin ]
-        [ new-states>> push ] 2bi
-    ] if ;
-
-: new-transitions ( regexp -- )
-    dup new-states>> [
-        drop
-    ] [
-        dupd pop dup pick find-transitions rot
-        [
-            [ [ find-closure ] 2keep nip dupd add-todo-state ] 3keep
-            >r swapd transition boa r> dfa-table>> add-transition 
-        ] curry with each
-        new-transitions
-    ] if-empty ;
-
-: states ( hashtable -- array )
-    [ keys ]
-    [ values [ values concat ] map concat append ] bi ;
-
-: set-final-states ( regexp -- )
-    dup
-    [ nfa-table>> final-states>> keys ]
-    [ dfa-table>> transitions>> states ] bi
-    [ intersect empty? not ] with filter
-
-    swap dfa-table>> final-states>>
-    [ conjoin ] curry each ;
-
-: set-initial-state ( regexp -- )
-    dup
-    [ dfa-table>> ] [ find-start-state ] bi
-    [ >>start-state drop ] keep
-    1vector >>new-states drop ;
-
-: construct-dfa ( regexp -- )
-    [ set-initial-state ] [ new-transitions ] [ set-final-states ] tri ;
diff --git a/extra/regexp2/nfa/nfa.factor b/extra/regexp2/nfa/nfa.factor
deleted file mode 100644
index f87a2a7b52..0000000000
--- a/extra/regexp2/nfa/nfa.factor
+++ /dev/null
@@ -1,126 +0,0 @@
-! Copyright (C) 2008 Doug Coleman.
-! See http://factorcode.org/license.txt for BSD license.
-USING: accessors arrays assocs grouping kernel regexp2.backend
-locals math namespaces regexp2.parser sequences state-tables fry
-quotations math.order math.ranges vectors unicode.categories
-regexp2.utils regexp2.transition-tables words sequences.lib ;
-IN: regexp2.nfa
-
-SYMBOL: negation-mode
-: negated? ( -- ? ) negation-mode get 0 or odd? ; 
-
-SINGLETON: eps
-
-: next-state ( regexp -- state )
-    [ state>> ] [ [ 1+ ] change-state drop ] bi ;
-
-: set-start-state ( regexp -- )
-    dup stack>> [
-        drop
-    ] [
-        [ nfa-table>> ] [ pop first ] bi* >>start-state drop
-    ] if-empty ;
-
-GENERIC: nfa-node ( node -- )
-
-:: add-simple-entry ( obj class -- )
-    [let* | regexp [ current-regexp get ]
-            s0 [ regexp next-state ]
-            s1 [ regexp next-state ]
-            stack [ regexp stack>> ]
-            table [ regexp nfa-table>> ] |
-        negated? [
-            s0 f obj class boa table add-transition
-            s0 s1 <default-transition> table add-transition
-        ] [
-            s0 s1 obj class boa table add-transition
-        ] if
-        s0 s1 2array stack push
-        t s1 table final-states>> set-at ] ;
-
-:: concatenate-nodes ( -- )
-    [let* | regexp [ current-regexp get ]
-            stack [ regexp stack>> ]
-            table [ regexp nfa-table>> ]
-            s2 [ stack peek first ]
-            s3 [ stack pop second ]
-            s0 [ stack peek first ]
-            s1 [ stack pop second ] |
-        s1 s2 eps <literal-transition> table add-transition
-        s1 table final-states>> delete-at
-        s0 s3 2array stack push ] ;
-
-:: alternate-nodes ( -- )
-    [let* | regexp [ current-regexp get ]
-            stack [ regexp stack>> ]
-            table [ regexp nfa-table>> ]
-            s2 [ stack peek first ]
-            s3 [ stack pop second ]
-            s0 [ stack peek first ]
-            s1 [ stack pop second ]
-            s4 [ regexp next-state ]
-            s5 [ regexp next-state ] |
-        s4 s0 eps <literal-transition> table add-transition
-        s4 s2 eps <literal-transition> table add-transition
-        s1 s5 eps <literal-transition> table add-transition
-        s3 s5 eps <literal-transition> table add-transition
-        s1 table final-states>> delete-at
-        s3 table final-states>> delete-at
-        t s5 table final-states>> set-at
-        s4 s5 2array stack push ] ;
-
-M: kleene-star nfa-node ( node -- )
-    term>> nfa-node
-    [let* | regexp [ current-regexp get ]
-            stack [ regexp stack>> ]
-            s0 [ stack peek first ]
-            s1 [ stack pop second ]
-            s2 [ regexp next-state ]
-            s3 [ regexp next-state ]
-            table [ regexp nfa-table>> ] |
-        s1 table final-states>> delete-at
-        t s3 table final-states>> set-at
-        s1 s0 eps <literal-transition> table add-transition
-        s2 s0 eps <literal-transition> table add-transition
-        s2 s3 eps <literal-transition> table add-transition
-        s1 s3 eps <literal-transition> table add-transition
-        s2 s3 2array stack push ] ;
-
-M: concatenation nfa-node ( node -- )
-    seq>>
-    [ [ nfa-node ] each ]
-    [ length 1- [ concatenate-nodes ] times ] bi ;
-
-M: alternation nfa-node ( node -- )
-    seq>>
-    [ [ nfa-node ] each ]
-    [ length 1- [ alternate-nodes ] times ] bi ;
-
-M: constant nfa-node ( node -- )
-    char>> literal-transition add-simple-entry ;
-
-M: epsilon nfa-node ( node -- )
-    drop eps literal-transition add-simple-entry ;
-
-M: word nfa-node ( node -- )
-    class-transition add-simple-entry ;
-
-M: character-class-range nfa-node ( node -- )
-    class-transition add-simple-entry ;
-
-M: capture-group nfa-node ( node -- )
-    term>> nfa-node ;
-
-M: negation nfa-node ( node -- )
-    negation-mode inc
-    term>> nfa-node 
-    negation-mode dec ;
-
-: construct-nfa ( regexp -- )
-    [
-        reset-regexp
-        negation-mode off
-        [ current-regexp set ]
-        [ parse-tree>> nfa-node ]
-        [ set-start-state ] tri
-    ] with-scope ;
diff --git a/extra/regexp2/parser/parser-tests.factor b/extra/regexp2/parser/parser-tests.factor
deleted file mode 100644
index 9dc7dc7909..0000000000
--- a/extra/regexp2/parser/parser-tests.factor
+++ /dev/null
@@ -1,33 +0,0 @@
-USING: kernel tools.test regexp2.backend regexp2 ;
-IN: regexp2.parser
-
-: test-regexp ( string -- )
-    default-regexp parse-regexp ;
-
-: test-regexp2 ( string -- regexp )
-    default-regexp dup parse-regexp ;
-
-[ "(" ] [ unmatched-parentheses? ] must-fail-with
-
-[ ] [ "a|b" test-regexp ] unit-test
-[ ] [ "a.b" test-regexp ] unit-test
-[ ] [ "a|b|c" test-regexp ] unit-test
-[ ] [ "abc|b" test-regexp ] unit-test
-[ ] [ "a|bcd" test-regexp ] unit-test
-[ ] [ "a|(b)" test-regexp ] unit-test
-[ ] [ "(a)|b" test-regexp ] unit-test
-[ ] [ "(a|b)" test-regexp ] unit-test
-[ ] [ "((a)|(b))" test-regexp ] unit-test
-
-[ ] [ "(?:a)" test-regexp ] unit-test
-[ ] [ "(?i:a)" test-regexp ] unit-test
-[ ] [ "(?-i:a)" test-regexp ] unit-test
-[ "(?z:a)" test-regexp ] [ bad-option? ] must-fail-with
-[ "(?-z:a)" test-regexp ] [ bad-option? ] must-fail-with
-
-[ ] [ "(?=a)" test-regexp ] unit-test
-
-[ ] [ "[abc]" test-regexp ] unit-test
-[ ] [ "[a-c]" test-regexp ] unit-test
-[ ] [ "[^a-c]" test-regexp ] unit-test
-[ "[^]" test-regexp ] must-fail
diff --git a/extra/regexp2/parser/parser.factor b/extra/regexp2/parser/parser.factor
deleted file mode 100644
index 6eda3310d0..0000000000
--- a/extra/regexp2/parser/parser.factor
+++ /dev/null
@@ -1,391 +0,0 @@
-! Copyright (C) 2008 Doug Coleman.
-! See http://factorcode.org/license.txt for BSD license.
-USING: accessors arrays assocs combinators io io.streams.string
-kernel math math.parser multi-methods namespaces qualified sets
-quotations sequences sequences.lib splitting symbols vectors
-dlists math.order combinators.lib unicode.categories strings
-sequences.lib regexp2.backend regexp2.utils unicode.case ;
-IN: regexp2.parser
-
-FROM: math.ranges => [a,b] ;
-
-MIXIN: node
-TUPLE: concatenation seq ; INSTANCE: concatenation node
-TUPLE: alternation seq ; INSTANCE: alternation node
-TUPLE: kleene-star term ; INSTANCE: kleene-star node
-TUPLE: question term ; INSTANCE: question node
-TUPLE: negation term ; INSTANCE: negation node
-TUPLE: constant char ; INSTANCE: constant node
-TUPLE: range from to ; INSTANCE: range node
-TUPLE: lookahead term ; INSTANCE: lookahead node
-TUPLE: lookbehind term ; INSTANCE: lookbehind node
-TUPLE: capture-group term ; INSTANCE: capture-group node
-TUPLE: non-capture-group term ; INSTANCE: non-capture-group node
-TUPLE: independent-group term ; INSTANCE: independent-group node
-TUPLE: character-class-range from to ; INSTANCE: character-class-range node
-SINGLETON: epsilon INSTANCE: epsilon node
-SINGLETON: any-char INSTANCE: any-char node
-SINGLETON: front-anchor INSTANCE: front-anchor node
-SINGLETON: back-anchor INSTANCE: back-anchor node
-
-TUPLE: option-on option ; INSTANCE: option-on node
-TUPLE: option-off option ; INSTANCE: option-off node
-SINGLETONS: unix-lines dotall multiline comments case-insensitive unicode-case reversed-regexp ;
-
-SINGLETONS: letter-class LETTER-class Letter-class digit-class
-alpha-class non-newline-blank-class
-ascii-class punctuation-class java-printable-class blank-class
-control-character-class hex-digit-class java-blank-class c-identifier-class
-unmatchable-class ;
-
-SINGLETONS: beginning-of-group end-of-group
-beginning-of-character-class end-of-character-class
-left-parenthesis pipe caret dash ;
-
-: get-option ( option -- ? ) current-regexp get options>> at ;
-: get-unix-lines ( -- ? ) unix-lines get-option ;
-: get-dotall ( -- ? ) dotall get-option ;
-: get-multiline ( -- ? ) multiline get-option ;
-: get-comments ( -- ? ) comments get-option ;
-: get-case-insensitive ( -- ? ) case-insensitive get-option ;
-: get-unicode-case ( -- ? ) unicode-case get-option ;
-: get-reversed-regexp ( -- ? ) reversed-regexp get-option ;
-
-: <negation> ( obj -- negation ) negation boa ;
-: <concatenation> ( seq -- concatenation )
-    >vector get-reversed-regexp [ reverse ] when
-    concatenation boa ;
-: <alternation> ( seq -- alternation ) >vector alternation boa ;
-: <capture-group> ( obj -- capture-group ) capture-group boa ;
-: <kleene-star> ( obj -- kleene-star ) kleene-star boa ;
-: <constant> ( obj -- constant )
-    dup Letter? get-case-insensitive and [
-        [ ch>lower constant boa ]
-        [ ch>upper constant boa ] bi 2array <alternation>
-    ] [
-        constant boa
-    ] if ;
-
-: first|concatenation ( seq -- first/concatenation )
-    dup length 1 = [ first ] [ <concatenation> ] if ;
-
-: first|alternation ( seq -- first/alternation )
-    dup length 1 = [ first ] [ <alternation> ] if ;
-
-: <character-class-range> ( from to -- obj )
-    2dup [ Letter? ] bi@ or get-case-insensitive and [
-        [ [ ch>lower ] bi@ character-class-range boa ]
-        [ [ ch>upper ] bi@ character-class-range boa ] 2bi
-        2array [ [ from>> ] [ to>> ] bi < ] filter
-        [ unmatchable-class ] [ first|alternation ] if-empty
-    ] [
-        2dup <
-        [ character-class-range boa ] [ 2drop unmatchable-class ] if
-    ] if ;
-
-ERROR: unmatched-parentheses ;
-
-: make-positive-lookahead ( string -- )
-    lookahead boa push-stack ;
-
-: make-negative-lookahead ( string -- )
-    <negation> lookahead boa push-stack ;
-
-: make-independent-group ( string -- )
-    #! no backtracking
-    independent-group boa push-stack ;
-
-: make-positive-lookbehind ( string -- )
-    lookbehind boa push-stack ;
-
-: make-negative-lookbehind ( string -- )
-    <negation> lookbehind boa push-stack ;
-
-DEFER: nested-parse-regexp
-: make-non-capturing-group ( string -- )
-    non-capture-group boa push-stack ;
-
-ERROR: bad-option ch ;
-
-: option ( ch -- singleton )
-    {
-        { CHAR: i [ case-insensitive ] }
-        { CHAR: d [ unix-lines ] }
-        { CHAR: m [ multiline ] }
-        { CHAR: r [ reversed-regexp ] }
-        { CHAR: s [ dotall ] }
-        { CHAR: u [ unicode-case ] }
-        { CHAR: x [ comments ] }
-        [ bad-option ]
-    } case ;
-
-: option-on ( option -- ) current-regexp get options>> conjoin ;
-: option-off ( option -- ) current-regexp get options>> delete-at ;
-
-: toggle-option ( ch ? -- ) [ option ] dip [ option-on ] [ option-off ] if ;
-: (parse-options) ( string ? -- ) [ toggle-option ] curry each ;
-
-: parse-options ( string -- )
-    "-" split1 [ t (parse-options) ] [ f (parse-options) ] bi* ;
-
-DEFER: (parse-regexp)
-: parse-special-group ( -- )
-    beginning-of-group push-stack
-    (parse-regexp) pop-stack make-non-capturing-group ;
-
-ERROR: bad-special-group string ;
-
-: (parse-special-group) ( -- )
-    read1 {
-        { [ dup CHAR: : = ]
-            [ drop nested-parse-regexp pop-stack make-non-capturing-group ] }
-        { [ dup CHAR: = = ]
-            [ drop nested-parse-regexp pop-stack make-positive-lookahead ] }
-        { [ dup CHAR: = = ]
-            [ drop nested-parse-regexp pop-stack make-negative-lookahead ] }
-        { [ dup CHAR: > = ]
-            [ drop nested-parse-regexp pop-stack make-independent-group ] }
-        { [ dup CHAR: < = peek1 CHAR: = = and ]
-            [ drop read1 drop nested-parse-regexp pop-stack make-positive-lookbehind ] }
-        { [ dup CHAR: < = peek1 CHAR: ! = and ]
-            [ drop read1 drop nested-parse-regexp pop-stack make-negative-lookbehind ] }
-        [
-            ":)" read-until
-            [ swap prefix ] dip
-            {
-                { CHAR: : [ parse-options parse-special-group ] }
-                { CHAR: ) [ parse-options ] }
-                [ drop bad-special-group ]
-            } case
-        ]
-    } cond ;
-
-: handle-left-parenthesis ( -- )
-    peek1 CHAR: ? =
-    [ read1 drop (parse-special-group) ]
-    [ nested-parse-regexp ] if ;
-
-: handle-dot ( -- ) any-char push-stack ;
-: handle-pipe ( -- ) pipe push-stack ;
-: handle-star ( -- ) stack pop <kleene-star> push-stack ;
-: handle-question ( -- )
-    stack pop epsilon 2array <alternation> push-stack ;
-: handle-plus ( -- )
-    stack pop dup <kleene-star> 2array <concatenation> push-stack ;
-
-ERROR: unmatched-brace ;
-: parse-repetition ( -- start finish ? )
-    "}" read-until [ unmatched-brace ] unless
-    [ "," split1 [ string>number ] bi@ ]
-    [ CHAR: , swap index >boolean ] bi ;
-
-: replicate/concatenate ( n obj -- obj' )
-    over zero? [ 2drop epsilon ]
-    [ <repetition> first|concatenation ] if ;
-
-: exactly-n ( n -- )
-    stack pop replicate/concatenate push-stack ;
-
-: at-least-n ( n -- )
-    stack pop
-    [ replicate/concatenate ] keep
-    <kleene-star> 2array <concatenation> push-stack ;
-
-: at-most-n ( n -- )
-    1+
-    stack pop
-    [ replicate/concatenate ] curry map <alternation> push-stack ;
-
-: from-m-to-n ( m n -- )
-    [a,b]
-    stack pop
-    [ replicate/concatenate ] curry map
-    <alternation> push-stack ;
-
-ERROR: invalid-range a b ;
-
-: handle-left-brace ( -- )
-    parse-repetition
-    >r 2dup [ [ 0 < [ invalid-range ] when ] when* ] bi@ r>
-    [
-        2dup and [ from-m-to-n ]
-        [ [ nip at-most-n ] [ at-least-n ] if* ] if
-    ] [ drop 0 max exactly-n ] if ;
-
-: handle-front-anchor ( -- ) front-anchor push-stack ;
-: handle-back-anchor ( -- ) back-anchor push-stack ;
-
-ERROR: bad-character-class obj ;
-ERROR: expected-posix-class ;
-
-: parse-posix-class ( -- obj )
-    read1 CHAR: { = [ expected-posix-class ] unless
-    "}" read-until [ bad-character-class ] unless
-    {
-        { "Lower" [ get-case-insensitive Letter-class letter-class ? ] }
-        { "Upper" [ get-case-insensitive Letter-class LETTER-class ? ] }
-        { "Alpha" [ Letter-class ] }
-        { "ASCII" [ ascii-class ] }
-        { "Digit" [ digit-class ] }
-        { "Alnum" [ alpha-class ] }
-        { "Punct" [ punctuation-class ] }
-        { "Graph" [ java-printable-class ] }
-        { "Print" [ java-printable-class ] }
-        { "Blank" [ non-newline-blank-class ] }
-        { "Cntrl" [ control-character-class ] }
-        { "XDigit" [ hex-digit-class ] }
-        { "Space" [ java-blank-class ] }
-        ! TODO: unicode-character-class, fallthrough in unicode is bad-char-clss
-        [ bad-character-class ]
-    } case ;
-
-: parse-octal ( -- n ) 3 read oct> check-octal ;
-: parse-short-hex ( -- n ) 2 read hex> check-hex ;
-: parse-long-hex ( -- n ) 6 read hex> check-hex ;
-: parse-control-character ( -- n ) read1 ;
-
-ERROR: bad-escaped-literals seq ;
-: parse-escaped-literals ( -- obj )
-    "\\E" read-until [ bad-escaped-literals ] unless
-    read1 drop
-    [ epsilon ] [
-        [ <constant> ] V{ } map-as
-        first|concatenation
-    ] if-empty ;
-
-: parse-escaped ( -- obj )
-    read1
-    {
-        { CHAR: \ [ CHAR: \ <constant> ] }
-        { CHAR: . [ CHAR: . <constant> ] }
-        { CHAR: t [ CHAR: \t <constant> ] }
-        { CHAR: n [ CHAR: \n <constant> ] }
-        { CHAR: r [ CHAR: \r <constant> ] }
-        { CHAR: f [ HEX: c <constant> ] }
-        { CHAR: a [ HEX: 7 <constant> ] }
-        { CHAR: e [ HEX: 1b <constant> ] }
-
-        { CHAR: d [ digit-class ] }
-        { CHAR: D [ digit-class <negation> ] }
-        { CHAR: s [ java-blank-class ] }
-        { CHAR: S [ java-blank-class <negation> ] }
-        { CHAR: w [ c-identifier-class ] }
-        { CHAR: W [ c-identifier-class <negation> ] }
-
-        { CHAR: p [ parse-posix-class ] }
-        { CHAR: P [ parse-posix-class <negation> ] }
-        { CHAR: x [ parse-short-hex <constant> ] }
-        { CHAR: u [ parse-long-hex <constant> ] }
-        { CHAR: 0 [ parse-octal <constant> ] }
-        { CHAR: c [ parse-control-character ] }
-
-        ! { CHAR: b [ handle-word-boundary ] }
-        ! { CHAR: B [ handle-word-boundary <negation> ] }
-        ! { CHAR: A [ handle-beginning-of-input ] }
-        ! { CHAR: G [ end of previous match ] }
-        ! { CHAR: Z [ handle-end-of-input ] }
-        ! { CHAR: z [ handle-end-of-input ] } ! except for terminator
-
-        { CHAR: Q [ parse-escaped-literals ] }
-    } case ;
-
-: handle-escape ( -- ) parse-escaped push-stack ;
-
-: handle-dash ( vector -- vector' )
-    H{ { dash CHAR: - } } substitute ;
-
-: character-class>alternation ( seq -- alternation )
-    [ dup number? [ <constant> ] when ] map first|alternation ;
-
-: handle-caret ( vector -- vector' )
-    dup [ length 2 >= ] [ first caret eq? ] bi and [
-        rest-slice character-class>alternation <negation>
-    ] [
-        character-class>alternation
-    ] if ;
-
-: make-character-class ( -- character-class )
-    [ beginning-of-character-class swap cut-stack ] change-whole-stack
-    handle-dash handle-caret ;
-
-: apply-dash ( -- )
-    stack [ pop3 nip <character-class-range> ] keep push ;
-
-: apply-dash? ( -- ? )
-    stack dup length 3 >=
-    [ [ length 2 - ] keep nth dash eq? ] [ drop f ] if ;
-
-ERROR: empty-negated-character-class ;
-DEFER: handle-left-bracket
-: (parse-character-class) ( -- )
-    read1 [ empty-negated-character-class ] unless* {
-        { CHAR: [ [ handle-left-bracket t ] }
-        { CHAR: ] [ make-character-class push-stack f ] }
-        { CHAR: - [ dash push-stack t ] }
-        { CHAR: \ [ parse-escaped push-stack t ] }
-        [ push-stack apply-dash? [ apply-dash ] when t ]
-    } case
-    [ (parse-character-class) ] when ;
-
-: parse-character-class-second ( -- )
-    read1 {
-        { CHAR: [ [ CHAR: [ <constant> push-stack ] }
-        { CHAR: ] [ CHAR: ] <constant> push-stack ] }
-        { CHAR: - [ CHAR: - <constant> push-stack ] }
-        [ push1 ]
-    } case ;
-
-: parse-character-class-first ( -- )
-    read1 {
-        { CHAR: ^ [ caret push-stack parse-character-class-second ] }
-        { CHAR: [ [ CHAR: [ <constant> push-stack ] }
-        { CHAR: ] [ CHAR: ] <constant> push-stack ] }
-        { CHAR: - [ CHAR: - <constant> push-stack ] }
-        [ push1 ]
-    } case ;
-
-: handle-left-bracket ( -- )
-    beginning-of-character-class push-stack
-    parse-character-class-first (parse-character-class) ;
-
-: finish-regexp-parse ( stack -- obj )
-    { pipe } split
-    [ first|concatenation ] map first|alternation ;
-
-: handle-right-parenthesis ( -- )
-    stack beginning-of-group over last-index cut rest
-    [ current-regexp get swap >>stack drop ]
-    [ finish-regexp-parse <capture-group> push-stack ] bi* ;
-
-: nested-parse-regexp ( -- )
-    beginning-of-group push-stack (parse-regexp) ;
-
-: ((parse-regexp)) ( token -- )
-    {
-        { CHAR: . [ handle-dot ] }
-        { CHAR: ( [ handle-left-parenthesis ] }
-        { CHAR: ) [ handle-right-parenthesis ] }
-        { CHAR: | [ handle-pipe ] }
-        { CHAR: ? [ handle-question ] }
-        { CHAR: * [ handle-star ] }
-        { CHAR: + [ handle-plus ] }
-        { CHAR: { [ handle-left-brace ] }
-        { CHAR: [ [ handle-left-bracket ] }
-        { CHAR: ^ [ handle-front-anchor ] }
-        { CHAR: $ [ handle-back-anchor ] }
-        { CHAR: \ [ handle-escape ] }
-        [ <constant> push-stack ]
-    } case ;
-
-: (parse-regexp) ( -- )
-    read1 [ ((parse-regexp)) (parse-regexp) ] when* ;
-
-: parse-regexp ( regexp -- )
-    dup current-regexp [
-        raw>> [
-            <string-reader> [ (parse-regexp) ] with-input-stream
-        ] unless-empty
-        current-regexp get
-        stack finish-regexp-parse
-            >>parse-tree drop
-    ] with-variable ;
diff --git a/extra/regexp2/regexp2-docs.factor b/extra/regexp2/regexp2-docs.factor
deleted file mode 100644
index f903c14bc4..0000000000
--- a/extra/regexp2/regexp2-docs.factor
+++ /dev/null
@@ -1,14 +0,0 @@
-! Copyright (C) 2008 Doug Coleman.
-! See http://factorcode.org/license.txt for BSD license.
-USING: kernel strings help.markup help.syntax regexp2.backend ;
-IN: regexp2
-
-HELP: <regexp>
-{ $values { "string" string } { "regexp" regexp } }
-{ $description "Compiles a regular expression into a DFA and returns this object.  Regular expressions only have to be compiled once and can then be used multiple times to match input strings." } ;
-
-HELP: <iregexp>
-{ $values { "string" string } { "regexp" regexp } }
-{ $description "Compiles a case-insensitive regular expression into a DFA and returns this object.  Otherwise, exactly the same as " { $link <regexp> } } ;
-
-{ <regexp> <iregexp> } related-words
diff --git a/extra/regexp2/regexp2-tests.factor b/extra/regexp2/regexp2-tests.factor
deleted file mode 100644
index 54626ea165..0000000000
--- a/extra/regexp2/regexp2-tests.factor
+++ /dev/null
@@ -1,263 +0,0 @@
-USING: regexp2 tools.test kernel sequences regexp2.parser
-regexp2.traversal ;
-IN: regexp2-tests
-
-[ f ] [ "b" "a*" <regexp> matches? ] unit-test
-[ t ] [ "" "a*" <regexp> matches? ] unit-test
-[ t ] [ "a" "a*" <regexp> matches? ] unit-test
-[ t ] [ "aaaaaaa" "a*"  <regexp> matches? ] unit-test
-[ f ] [ "ab" "a*" <regexp> matches? ] unit-test
-
-[ t ] [ "abc" "abc" <regexp> matches? ] unit-test
-[ t ] [ "a" "a|b|c" <regexp> matches? ] unit-test
-[ t ] [ "b" "a|b|c" <regexp> matches? ] unit-test
-[ t ] [ "c" "a|b|c" <regexp> matches? ] unit-test
-[ f ] [ "c" "d|e|f" <regexp> matches? ] unit-test
-
-[ f ] [ "aa" "a|b|c" <regexp> matches? ] unit-test
-[ f ] [ "bb" "a|b|c" <regexp> matches? ] unit-test
-[ f ] [ "cc" "a|b|c" <regexp> matches? ] unit-test
-[ f ] [ "cc" "d|e|f" <regexp> matches? ] unit-test
-
-[ f ] [ "" "a+" <regexp> matches? ] unit-test
-[ t ] [ "a" "a+" <regexp> matches? ] unit-test
-[ t ] [ "aa" "a+" <regexp> matches? ] unit-test
-
-[ t ] [ "" "a?" <regexp> matches? ] unit-test
-[ t ] [ "a" "a?" <regexp> matches? ] unit-test
-[ f ] [ "aa" "a?" <regexp> matches? ] unit-test
-
-[ f ] [ "" "." <regexp> matches? ] unit-test
-[ t ] [ "a" "." <regexp> matches? ] unit-test
-[ t ] [ "." "." <regexp> matches? ] unit-test
-! [ f ] [ "\n" "." <regexp> matches? ] unit-test
-
-[ f ] [ "" ".+" <regexp> matches? ] unit-test
-[ t ] [ "a" ".+" <regexp> matches? ] unit-test
-[ t ] [ "ab" ".+" <regexp> matches? ] unit-test
-
-
-[ t ] [ "" "a|b*|c+|d?" <regexp> matches? ] unit-test
-[ t ] [ "a" "a|b*|c+|d?" <regexp> matches? ] unit-test
-[ t ] [ "c" "a|b*|c+|d?" <regexp> matches? ] unit-test
-[ t ] [ "cc" "a|b*|c+|d?" <regexp> matches? ] unit-test
-[ f ] [ "ccd" "a|b*|c+|d?" <regexp> matches? ] unit-test
-[ t ] [ "d" "a|b*|c+|d?" <regexp> matches? ] unit-test
-
-[ t ] [ "foo" "foo|bar" <regexp> matches? ] unit-test
-[ t ] [ "bar" "foo|bar" <regexp> matches? ] unit-test
-[ f ] [ "foobar" "foo|bar" <regexp> matches? ] unit-test
-
-[ f ] [ "" "(a)" <regexp> matches? ] unit-test
-[ t ] [ "a" "(a)" <regexp> matches? ] unit-test
-[ f ] [ "aa" "(a)" <regexp> matches? ] unit-test
-[ t ] [ "aa" "(a*)" <regexp> matches? ] unit-test
-
-[ f ] [ "aababaaabbac" "(a|b)+" <regexp> matches? ] unit-test
-[ t ] [ "ababaaabba" "(a|b)+" <regexp> matches? ] unit-test
-
-[ f ] [ "" "a{1}" <regexp> matches? ] unit-test
-[ t ] [ "a" "a{1}" <regexp> matches? ] unit-test
-[ f ] [ "aa" "a{1}" <regexp> matches? ] unit-test
-
-[ f ] [ "a" "a{2,}" <regexp> matches? ] unit-test
-[ t ] [ "aaa" "a{2,}" <regexp> matches? ] unit-test
-[ t ] [ "aaaa" "a{2,}" <regexp> matches? ] unit-test
-[ t ] [ "aaaaa" "a{2,}" <regexp> matches? ] unit-test
-
-[ t ] [ "" "a{,2}" <regexp> matches? ] unit-test
-[ t ] [ "a" "a{,2}" <regexp> matches? ] unit-test
-[ t ] [ "aa" "a{,2}" <regexp> matches? ] unit-test
-[ f ] [ "aaa" "a{,2}" <regexp> matches? ] unit-test
-[ f ] [ "aaaa" "a{,2}" <regexp> matches? ] unit-test
-[ f ] [ "aaaaa" "a{,2}" <regexp> matches? ] unit-test
-
-[ f ] [ "" "a{1,3}" <regexp> matches? ] unit-test
-[ t ] [ "a" "a{1,3}" <regexp> matches? ] unit-test
-[ t ] [ "aa" "a{1,3}" <regexp> matches? ] unit-test
-[ t ] [ "aaa" "a{1,3}" <regexp> matches? ] unit-test
-[ f ] [ "aaaa" "a{1,3}" <regexp> matches? ] unit-test
-
-[ f ] [ "" "[a]" <regexp> matches? ] unit-test
-[ t ] [ "a" "[a]" <regexp> matches? ] unit-test
-[ t ] [ "a" "[abc]" <regexp> matches? ] unit-test
-[ f ] [ "b" "[a]" <regexp> matches? ] unit-test
-[ f ] [ "d" "[abc]" <regexp> matches? ] unit-test
-[ t ] [ "ab" "[abc]{1,2}" <regexp> matches? ] unit-test
-[ f ] [ "abc" "[abc]{1,2}" <regexp> matches? ] unit-test
-
-[ f ] [ "" "[^a]" <regexp> matches? ] unit-test
-[ f ] [ "a" "[^a]" <regexp> matches? ] unit-test
-[ f ] [ "a" "[^abc]" <regexp> matches? ] unit-test
-[ t ] [ "b" "[^a]" <regexp> matches? ] unit-test
-[ t ] [ "d" "[^abc]" <regexp> matches? ] unit-test
-[ f ] [ "ab" "[^abc]{1,2}" <regexp> matches? ] unit-test
-[ f ] [ "abc" "[^abc]{1,2}" <regexp> matches? ] unit-test
-
-[ t ] [ "]" "[]]" <regexp> matches? ] unit-test
-[ f ] [ "]" "[^]]" <regexp> matches? ] unit-test
-[ t ] [ "a" "[^]]" <regexp> matches? ] unit-test
-
-[ "^" "[^]" <regexp> matches? ] must-fail
-[ t ] [ "^" "[]^]" <regexp> matches? ] unit-test
-[ t ] [ "]" "[]^]" <regexp> matches? ] unit-test
-
-[ t ] [ "[" "[[]" <regexp> matches? ] unit-test
-[ f ] [ "^" "[^^]" <regexp> matches? ] unit-test
-[ t ] [ "a" "[^^]" <regexp> matches? ] unit-test
-
-[ t ] [ "-" "[-]" <regexp> matches? ] unit-test
-[ f ] [ "a" "[-]" <regexp> matches? ] unit-test
-[ f ] [ "-" "[^-]" <regexp> matches? ] unit-test
-[ t ] [ "a" "[^-]" <regexp> matches? ] unit-test
-
-[ t ] [ "-" "[-a]" <regexp> matches? ] unit-test
-[ t ] [ "a" "[-a]" <regexp> matches? ] unit-test
-[ t ] [ "-" "[a-]" <regexp> matches? ] unit-test
-[ t ] [ "a" "[a-]" <regexp> matches? ] unit-test
-[ f ] [ "b" "[a-]" <regexp> matches? ] unit-test
-[ f ] [ "-" "[^-]" <regexp> matches? ] unit-test
-[ t ] [ "a" "[^-]" <regexp> matches? ] unit-test
-
-[ f ] [ "-" "[a-c]" <regexp> matches? ] unit-test
-[ t ] [ "-" "[^a-c]" <regexp> matches? ] unit-test
-[ t ] [ "b" "[a-c]" <regexp> matches? ] unit-test
-[ f ] [ "b" "[^a-c]" <regexp> matches? ] unit-test
-
-[ t ] [ "-" "[a-c-]" <regexp> matches? ] unit-test
-[ f ] [ "-" "[^a-c-]" <regexp> matches? ] unit-test
-
-[ t ] [ "\\" "[\\\\]" <regexp> matches? ] unit-test
-[ f ] [ "a" "[\\\\]" <regexp> matches? ] unit-test
-[ f ] [ "\\" "[^\\\\]" <regexp> matches? ] unit-test
-[ t ] [ "a" "[^\\\\]" <regexp> matches? ] unit-test
-
-[ t ] [ "0" "[\\d]" <regexp> matches? ] unit-test
-[ f ] [ "a" "[\\d]" <regexp> matches? ] unit-test
-[ f ] [ "0" "[^\\d]" <regexp> matches? ] unit-test
-[ t ] [ "a" "[^\\d]" <regexp> matches? ] unit-test
-
-[ t ] [ "a" "[a-z]{1,}|[A-Z]{2,4}|b*|c|(f|g)*" <regexp> matches? ] unit-test
-[ t ] [ "a" "[a-z]{1,2}|[A-Z]{3,3}|b*|c|(f|g)*" <regexp> matches? ] unit-test
-[ t ] [ "a" "[a-z]{1,2}|[A-Z]{3,3}" <regexp> matches? ] unit-test
-
-[ t ] [ "1000" "\\d{4,6}" <regexp> matches? ] unit-test
-[ t ] [ "1000" "[0-9]{4,6}" <regexp> matches? ] unit-test
-
-[ t ] [ "abc" "\\p{Lower}{3}" <regexp> matches? ] unit-test
-[ f ] [ "ABC" "\\p{Lower}{3}" <regexp> matches? ] unit-test
-[ t ] [ "ABC" "\\p{Upper}{3}" <regexp> matches? ] unit-test
-[ f ] [ "abc" "\\p{Upper}{3}" <regexp> matches? ] unit-test
-! 
-[ f ] [ "abc" "[\\p{Upper}]{3}" <regexp> matches? ] unit-test
-[ t ] [ "ABC" "[\\p{Upper}]{3}" <regexp> matches? ] unit-test
-
-[ f ] [ "" "\\Q\\E" <regexp> matches? ] unit-test
-[ f ] [ "a" "\\Q\\E" <regexp> matches? ] unit-test
-[ t ] [ "|*+" "\\Q|*+\\E" <regexp> matches? ] unit-test
-[ f ] [ "abc" "\\Q|*+\\E" <regexp> matches? ] unit-test
-[ t ] [ "s" "\\Qs\\E" <regexp> matches? ] unit-test
-
-[ t ] [ "S" "\\0123" <regexp> matches? ] unit-test
-[ t ] [ "SXY" "\\0123XY" <regexp> matches? ] unit-test
-[ t ] [ "x" "\\x78" <regexp> matches? ] unit-test
-[ f ] [ "y" "\\x78" <regexp> matches? ] unit-test
-[ t ] [ "x" "\\u000078" <regexp> matches? ] unit-test
-[ f ] [ "y" "\\u000078" <regexp> matches? ] unit-test
-
-[ t ] [ "ab" "a+b" <regexp> matches? ] unit-test
-[ f ] [ "b" "a+b" <regexp> matches? ] unit-test
-[ t ] [ "aab" "a+b" <regexp> matches? ] unit-test
-[ f ] [ "abb" "a+b" <regexp> matches? ] unit-test
-
-[ t ] [ "abbbb" "ab*" <regexp> matches? ] unit-test
-[ t ] [ "a" "ab*" <regexp> matches? ] unit-test
-[ f ] [ "abab" "ab*" <regexp> matches? ] unit-test
-
-[ f ] [ "x" "\\." <regexp> matches? ] unit-test
-[ t ] [ "." "\\." <regexp> matches? ] unit-test
-
-[ t ] [ "aaaab" "a+ab" <regexp> matches? ] unit-test
-[ f ] [ "aaaxb" "a+ab" <regexp> matches? ] unit-test
-[ t ] [ "aaacb" "a+cb" <regexp> matches? ] unit-test
-[ f ] [ "aaaab" "a++ab" <regexp> matches? ] unit-test
-[ t ] [ "aaacb" "a++cb" <regexp> matches? ] unit-test
-
-[ 3 ] [ "aaacb" "a*" <regexp> match-head ] unit-test
-[ 1 ] [ "aaacb" "a+?" <regexp> match-head ] unit-test
-[ 2 ] [ "aaacb" "aa?" <regexp> match-head ] unit-test
-[ 1 ] [ "aaacb" "aa??" <regexp> match-head ] unit-test
-[ 3 ] [ "aacb" "aa?c" <regexp> match-head ] unit-test
-[ 3 ] [ "aacb" "aa??c" <regexp> match-head ] unit-test
-
-! [ t ] [ "aaa" "AAA" t <regexp> matches? ] unit-test
-! [ f ] [ "aax" "AAA" t <regexp> matches? ] unit-test
-! [ t ] [ "aaa" "A*" t <regexp> matches? ] unit-test
-! [ f ] [ "aaba" "A*" t <regexp> matches? ] unit-test
-! [ t ] [ "b" "[AB]" t <regexp> matches? ] unit-test
-! [ f ] [ "c" "[AB]" t <regexp> matches? ] unit-test
-! [ t ] [ "c" "[A-Z]" t <regexp> matches? ] unit-test
-! [ f ] [ "3" "[A-Z]" t <regexp> matches? ] unit-test
-
-[ ] [
-    "(0[lL]?|[1-9]\\d{0,9}(\\d{0,9}[lL])?|0[xX]\\p{XDigit}{1,8}(\\p{XDigit}{0,8}[lL])?|0[0-7]{1,11}([0-7]{0,11}[lL])?|([0-9]+\\.[0-9]*|\\.[0-9]+)([eE][+-]?[0-9]+)?[fFdD]?|[0-9]+([eE][+-]?[0-9]+[fFdD]?|([eE][+-]?[0-9]+)?[fFdD]))"
-    <regexp> drop
-] unit-test
-
-[ "{Lower}" <regexp> ] [ invalid-range? ] must-fail-with
-
-[ t ] [ "fxxbar" "(?!foo).{3}bar" <regexp> matches? ] unit-test
-[ f ] [ "foobar" "(?!foo).{3}bar" <regexp> matches? ] unit-test
-
-! [ 3 ] [ "foobar" "foo(?=bar)" <regexp> match-head ] unit-test
-! [ f ] [ "foobxr" "foo(?=bar)" <regexp> match-head ] unit-test
-
-! [ f ] [ "foobxr" "foo\\z" <regexp> match-head ] unit-test
-! [ 3 ] [ "foo" "foo\\z" <regexp> match-head ] unit-test
-
-! [ 3 ] [ "foo bar" "foo\\b" <regexp> match-head ] unit-test
-! [ f ] [ "fooxbar" "foo\\b" <regexp> matches? ] unit-test
-! [ t ] [ "foo" "foo\\b" <regexp> matches? ] unit-test
-! [ t ] [ "foo bar" "foo\\b bar" <regexp> matches? ] unit-test
-! [ f ] [ "fooxbar" "foo\\bxbar" <regexp> matches? ] unit-test
-! [ f ] [ "foo" "foo\\bbar" <regexp> matches? ] unit-test
-
-! [ f ] [ "foo bar" "foo\\B" <regexp> matches? ] unit-test
-! [ 3 ] [ "fooxbar" "foo\\B" <regexp> match-head ] unit-test
-! [ t ] [ "foo" "foo\\B" <regexp> matches? ] unit-test
-! [ f ] [ "foo bar" "foo\\B bar" <regexp> matches? ] unit-test
-! [ t ] [ "fooxbar" "foo\\Bxbar" <regexp> matches? ] unit-test
-! [ f ] [ "foo" "foo\\Bbar" <regexp> matches? ] unit-test
-
-[ t ] [ "s@f" "[a-z.-]@[a-z]" <regexp> matches? ] unit-test
-[ f ] [ "a" "[a-z.-]@[a-z]" <regexp> matches? ] unit-test
-[ t ] [ ".o" "\\.[a-z]" <regexp> matches? ] unit-test
-
-[ t ] [ "a" "(?i)a" <regexp> matches? ] unit-test
-[ t ] [ "a" "(?i)a" <regexp> matches? ] unit-test
-[ t ] [ "A" "(?i)a" <regexp> matches? ] unit-test
-[ t ] [ "A" "(?i)a" <regexp> matches? ] unit-test
-
-[ t ] [ "a" "(?-i)a" <iregexp> matches? ] unit-test
-[ t ] [ "a" "(?-i)a" <iregexp> matches? ] unit-test
-[ f ] [ "A" "(?-i)a" <iregexp> matches? ] unit-test
-[ f ] [ "A" "(?-i)a" <iregexp> matches? ] unit-test
-
-[ f ] [ "A" "[a-z]" <regexp> matches? ] unit-test
-[ t ] [ "A" "[a-z]" <iregexp> matches? ] unit-test
-
-[ f ] [ "A" "\\p{Lower}" <regexp> matches? ] unit-test
-[ t ] [ "A" "\\p{Lower}" <iregexp> matches? ] unit-test
-
-[ t ] [ "abc" <reversed> "abc" <rregexp> matches? ] unit-test
-[ t ] [ "abc" <reversed> "a[bB][cC]" <rregexp> matches? ] unit-test
-[ t ] [ "adcbe" "a(?r)(bcd)(?-r)e" <rregexp> matches? ] unit-test
-
-! Bug in parsing word
-! [ t ] [ "a" R' a' matches?  ] unit-test
-
-! ((A)(B(C)))
-! 1.  ((A)(B(C)))
-! 2. (A)
-! 3. (B(C))
-! 4. (C) 
diff --git a/extra/regexp2/regexp2.factor b/extra/regexp2/regexp2.factor
deleted file mode 100644
index 0b8994ca2b..0000000000
--- a/extra/regexp2/regexp2.factor
+++ /dev/null
@@ -1,59 +0,0 @@
-! Copyright (C) 2008 Doug Coleman.
-! See http://factorcode.org/license.txt for BSD license.
-USING: accessors combinators kernel math math.ranges
-sequences regexp2.backend regexp2.utils memoize sets
-regexp2.parser regexp2.nfa regexp2.dfa regexp2.traversal
-regexp2.transition-tables ;
-IN: regexp2
-
-: default-regexp ( string -- regexp )
-    regexp new
-        swap >>raw
-        <transition-table> >>nfa-table
-        <transition-table> >>dfa-table
-        <transition-table> >>minimized-table
-        reset-regexp ;
-
-: construct-regexp ( regexp -- regexp' )
-    {
-        [ parse-regexp ]
-        [ construct-nfa ]
-        [ construct-dfa ]
-        [ ]
-    } cleave ;
-
-: match ( string regexp -- pair )
-    <dfa-traverser> do-match return-match ;
-
-: matches? ( string regexp -- ? )
-    dupd match [ [ length ] [ range-length 1- ] bi* = ] [ drop f ] if* ;
-
-: match-head ( string regexp -- end ) match length>> 1- ;
-
-: initial-option ( regexp option -- regexp' )
-    over options>> conjoin ;
-
-: <regexp> ( string -- regexp )
-    default-regexp construct-regexp ;
-
-: <iregexp> ( string -- regexp )
-    default-regexp
-    case-insensitive initial-option
-    construct-regexp ;
-
-: <rregexp> ( string -- regexp )
-    default-regexp
-    reversed-regexp initial-option
-    construct-regexp ;
-
-: R! CHAR: ! <regexp> ; parsing
-: R" CHAR: " <regexp> ; parsing
-: R# CHAR: # <regexp> ; parsing
-: R' CHAR: ' <regexp> ; parsing
-: R( CHAR: ) <regexp> ; parsing
-: R/ CHAR: / <regexp> ; parsing
-: R@ CHAR: @ <regexp> ; parsing
-: R[ CHAR: ] <regexp> ; parsing
-: R` CHAR: ` <regexp> ; parsing
-: R{ CHAR: } <regexp> ; parsing
-: R| CHAR: | <regexp> ; parsing
diff --git a/extra/regexp2/summary.txt b/extra/regexp2/summary.txt
deleted file mode 100644
index aa1e1c27a9..0000000000
--- a/extra/regexp2/summary.txt
+++ /dev/null
@@ -1 +0,0 @@
-Regular expressions
diff --git a/extra/regexp2/tags.txt b/extra/regexp2/tags.txt
deleted file mode 100644
index 65bc471f6b..0000000000
--- a/extra/regexp2/tags.txt
+++ /dev/null
@@ -1,2 +0,0 @@
-parsing
-text
diff --git a/extra/regexp2/transition-tables/transition-tables.factor b/extra/regexp2/transition-tables/transition-tables.factor
deleted file mode 100644
index 0547846655..0000000000
--- a/extra/regexp2/transition-tables/transition-tables.factor
+++ /dev/null
@@ -1,44 +0,0 @@
-! Copyright (C) 2008 Doug Coleman.
-! See http://factorcode.org/license.txt for BSD license.
-USING: accessors arrays assocs fry hashtables kernel sequences
-vectors ;
-IN: regexp2.transition-tables
-
-: insert-at ( value key hash -- )
-    2dup at* [
-        2nip push
-    ] [
-        drop >r >r dup vector? [ 1vector ] unless r> r> set-at
-    ] if ;
-
-: ?insert-at ( value key hash/f -- hash )
-    [ H{ } clone ] unless* [ insert-at ] keep ;
-
-TUPLE: transition from to obj ;
-TUPLE: literal-transition < transition ;
-TUPLE: class-transition < transition ;
-TUPLE: default-transition < transition ;
-
-TUPLE: literal obj ;
-TUPLE: class obj ;
-TUPLE: default ;
-: <literal-transition> ( from to obj -- transition ) literal-transition boa ;
-: <class-transition> ( from to obj -- transition ) class-transition boa ;
-: <default-transition> ( from to -- transition ) t default-transition boa ;
-
-TUPLE: transition-table transitions
-    literals classes defaults
-    start-state final-states ;
-
-: <transition-table> ( -- transition-table )
-    transition-table new
-        H{ } clone >>transitions
-        H{ } clone >>final-states ;
-
-: set-transition ( transition hash -- )
-    >r [ to>> ] [ obj>> ] [ from>> ] tri r>
-    2dup at* [ 2nip insert-at ]
-    [ drop >r >r H{ } clone [ insert-at ] keep r> r> set-at ] if ;
-
-: add-transition ( transition transition-table -- )
-    transitions>> set-transition ;
diff --git a/extra/regexp2/traversal/traversal.factor b/extra/regexp2/traversal/traversal.factor
deleted file mode 100644
index 94e96bb935..0000000000
--- a/extra/regexp2/traversal/traversal.factor
+++ /dev/null
@@ -1,80 +0,0 @@
-! Copyright (C) 2008 Doug Coleman.
-! See http://factorcode.org/license.txt for BSD license.
-USING: accessors assocs combinators combinators.lib kernel
-math math.ranges quotations sequences regexp2.parser
-regexp2.classes combinators.short-circuit assocs.lib
-sequences.lib ;
-IN: regexp2.traversal
-
-TUPLE: dfa-traverser
-    dfa-table
-    last-state current-state
-    text
-    start-index current-index
-    matches ;
-
-: <dfa-traverser> ( text regexp -- match )
-    dfa-table>>
-    dfa-traverser new
-        swap [ start-state>> >>current-state ] keep
-        >>dfa-table
-        swap >>text
-        0 >>start-index
-        0 >>current-index
-        V{ } clone >>matches ;
-
-: final-state? ( dfa-traverser -- ? )
-    [ current-state>> ] [ dfa-table>> final-states>> ] bi
-    key? ;
-
-: text-finished? ( dfa-traverser -- ? )
-    [ current-index>> ] [ text>> length ] bi >= ;
-
-: save-final-state ( dfa-straverser -- )
-    [ current-index>> ] [ matches>> ] bi push ;
-
-: match-done? ( dfa-traverser -- ? )
-    dup final-state? [
-        dup save-final-state
-    ] when text-finished? ;
-
-: increment-state ( dfa-traverser state -- dfa-traverser )
-    >r [ 1+ ] change-current-index
-    dup current-state>> >>last-state r>
-    first >>current-state ;
-
-: match-failed ( dfa-traverser -- dfa-traverser )
-    V{ } clone >>matches ;
-
-: match-literal ( transition from-state table -- to-state/f )
-    transitions>> [ at ] [ 2drop f ] if-at ;
-
-: assoc-with ( param assoc quot -- assoc curry )
-    swapd [ [ -rot ] dip call ] 2curry ; inline
-
-: match-class ( transition from-state table -- to-state/f )
-    transitions>> at* [
-        [ drop class-member? ] assoc-with assoc-find [ nip ] [ drop ] if
-    ] [ drop ] if ;
-
-: match-default ( transition from-state table -- to-state/f )
-    [ nip ] dip transitions>>
-    [ t swap [ drop f ] unless-at ] [ drop f ] if-at ;
-
-: match-transition ( obj from-state dfa -- to-state/f )
-    { [ match-literal ] [ match-class ] [ match-default ] } 3|| ;
-
-: setup-match ( match -- obj state dfa-table )
-    { current-index>> text>> current-state>> dfa-table>> } get-slots
-    [ nth ] 2dip ;
-
-: do-match ( dfa-traverser -- dfa-traverser )
-    dup match-done? [
-        dup setup-match match-transition
-        [ increment-state do-match ] when*
-    ] unless ;
-
-: return-match ( dfa-traverser -- interval/f )
-    dup matches>>
-    [ drop f ]
-    [ [ start-index>> ] [ peek ] bi* 1 <range> ] if-empty ;
diff --git a/extra/regexp2/utils/utils.factor b/extra/regexp2/utils/utils.factor
deleted file mode 100644
index 0167e73005..0000000000
--- a/extra/regexp2/utils/utils.factor
+++ /dev/null
@@ -1,69 +0,0 @@
-! Copyright (C) 2008 Doug Coleman.
-! See http://factorcode.org/license.txt for BSD license.
-USING: accessors arrays assocs combinators.lib io kernel
-math math.order namespaces regexp2.backend sequences
-sequences.lib unicode.categories math.ranges fry
-combinators.short-circuit ;
-IN: regexp2.utils
-
-: (while-changes) ( obj quot pred pred-ret -- obj )
-    ! quot: ( obj -- obj' )
-    ! pred: ( obj -- <=> )
-    >r >r dup slip r> pick over call r> dupd =
-    [ 3drop ] [ (while-changes) ] if ; inline
-
-: while-changes ( obj quot pred -- obj' )
-    pick over call (while-changes) ; inline
-
-: last-state ( regexp -- range ) stack>> peek first2 [a,b] ;
-: push1 ( obj -- ) input-stream get stream>> push ;
-: peek1 ( -- obj ) input-stream get stream>> [ f ] [ peek ] if-empty ;
-: pop3 ( seq -- obj1 obj2 obj3 ) [ pop ] [ pop ] [ pop ] tri spin ;
-
-: stack ( -- obj ) current-regexp get stack>> ;
-: change-whole-stack ( quot -- )
-    current-regexp get
-    [ stack>> swap call ] keep (>>stack) ; inline
-: push-stack ( obj -- ) stack push ;
-: pop-stack ( -- obj ) stack pop ;
-: cut-out ( vector n -- vector' vector ) cut rest ;
-ERROR: cut-stack-error ;
-: cut-stack ( obj vector -- vector' vector )
-    tuck last-index [ cut-stack-error ] unless* cut-out swap ;
-
-ERROR: bad-octal number ;
-ERROR: bad-hex number ;
-: check-octal ( octal -- octal ) dup 255 > [ bad-octal ] when ;
-: check-hex ( hex -- hex ) dup number? [ bad-hex ] unless ;
-
-: ascii? ( n -- ? ) 0 HEX: 7f between? ;
-: octal-digit? ( n -- ? ) CHAR: 0 CHAR: 7 between? ;
-: decimal-digit? ( n -- ? ) CHAR: 0 CHAR: 9 between? ;
-
-: hex-digit? ( n -- ? )
-    [
-        [ decimal-digit? ]
-        [ CHAR: a CHAR: f between? ]
-        [ CHAR: A CHAR: F between? ]
-    ] 1|| ;
-
-: control-char? ( n -- ? )
-    [
-        [ 0 HEX: 1f between? ]
-        [ HEX: 7f = ]
-    ] 1|| ;
-
-: punct? ( n -- ? )
-    "!\"#$%&'()*+,-./:;<=>?@[\\]^_`{|}~" member? ;
-
-: c-identifier-char? ( ch -- ? )
-    [ [ alpha? ] [ CHAR: _ = ] ] 1|| ;
-
-: java-blank? ( n -- ? )
-    {
-        CHAR: \s CHAR: \t CHAR: \n
-        HEX: b HEX: 7 CHAR: \r
-    } member? ;
-
-: java-printable? ( n -- ? )
-    [ [ alpha? ] [ punct? ] ] 1|| ;