Merge branch 'master' of git://factorcode.org/git/factor

2008-08-21 12:46:29 -05:00 · 2008-08-21 12:46:29 -05:00 · e0091012e6
parent e846d0d98e 95657e5742
commit e0091012e6
37 changed files with 1457 additions and 294 deletions
--- a/basis/io/ports/ports.factor
+++ b/basis/io/ports/ports.factor
@ -4,7 +4,7 @@ USING: math kernel io sequences io.buffers io.timeouts generic
 byte-vectors system io.encodings math.order io.backend
 continuations debugger classes byte-arrays namespaces splitting
 grouping dlists assocs io.encodings.binary summary accessors
-destructors ;
+destructors combinators ;
 IN: io.ports
 SYMBOL: default-buffer-size
@ -133,10 +133,12 @@ M: output-port stream-flush ( port -- )
 M: output-port dispose*
    [
        {
            [ handle>> &dispose drop ]
            [ buffer>> &dispose drop ]
            [ port-flush ]
            [ handle>> shutdown ]
-        tri
+        } cleave
    ] with-destructors ;
 M: buffered-port dispose*
--- a/basis/smtp/smtp-tests.factor
+++ b/basis/smtp/smtp-tests.factor
@ -3,12 +3,6 @@ smtp.server kernel sequences namespaces logging accessors
 assocs sorting ;
 IN: smtp.tests
 [ t ] [
    <email>
    dup clone "a" "b" set-header drop
    headers>> assoc-empty?
 ] unit-test
 { 0 0 } [ [ ] with-smtp-connection ] must-infer-as
 [ "hello\nworld" validate-address ] must-fail
@ -60,12 +54,13 @@ IN: smtp.tests
            "Ed <dharmatech@factorcode.org>"
        } >>to
        "Doug <erg@factorcode.org>" >>from
-    prepare
+    [
-    dup headers>> >alist sort-keys [
+        email>headers sort-keys [
            drop { "Date" "Message-Id" } member? not
        ] assoc-filter
-    over to>>
+    ]
-    rot from>>
+    [ to>> [ extract-email ] map ]
    [ from>> extract-email ] tri
 ] unit-test
 [ ] [ [ 4321 mock-smtp-server ] "SMTP server" spawn drop ] unit-test
--- a/basis/smtp/smtp.factor
+++ b/basis/smtp/smtp.factor
@ -1,10 +1,10 @@
 ! Copyright (C) 2007, 2008 Elie CHAFTARI, Dirk Vleugels,
 ! Slava Pestov.
 ! See http://factorcode.org/license.txt for BSD license.
-USING: namespaces io io.timeouts kernel logging io.sockets
+USING: arrays namespaces io io.timeouts kernel logging io.sockets
 sequences combinators sequences.lib splitting assocs strings
-math.parser random system calendar io.encodings.ascii
+math.parser random system calendar io.encodings.ascii summary
-calendar.format accessors sets ;
+calendar.format accessors sets hashtables ;
 IN: smtp
 SYMBOL: smtp-domain
@ -23,6 +23,16 @@ LOG: log-smtp-connection NOTICE ( addrspec -- )
        call
    ] with-client ; inline
 TUPLE: email
    { from string }
    { to array }
    { cc array }
    { bcc array }
    { subject string }
    { body string } ;
 : <email> ( -- email ) email new ;
 : crlf ( -- ) "\r\n" write ;
 : command ( string -- ) write crlf flush ;
@ -30,10 +40,12 @@ LOG: log-smtp-connection NOTICE ( addrspec -- )
 : helo ( -- )
    esmtp get "EHLO " "HELO " ? host-name append command ;
 ERROR: bad-email-address email ;
 : validate-address ( string -- string' )
    #! Make sure we send funky stuff to the server by accident.
    dup "\r\n>" intersect empty?
-    [ "Bad e-mail address: " prepend throw ] unless ;
+    [ bad-email-address ] unless ;
 : mail-from ( fromaddr -- )
    "MAIL FROM:<" swap validate-address ">" 3append command ;
@ -44,8 +56,15 @@ LOG: log-smtp-connection NOTICE ( addrspec -- )
 : data ( -- )
    "DATA" command ;
 ERROR: message-contains-dot message ;
 M: message-contains-dot summary ( obj -- string )
    drop
    "Message cannot contain . on a line by itself" ;
 : validate-message ( msg -- msg' )
-    "." over member? [ "Message cannot contain . on a line by itself" throw ] when ;
+    "." over member?
    [ message-contains-dot ] when ;
 : send-body ( body -- )
    string-lines
@ -58,19 +77,37 @@ LOG: log-smtp-connection NOTICE ( addrspec -- )
 LOG: smtp-response DEBUG
 ERROR: smtp-error message ;
 ERROR: smtp-server-busy < smtp-error ;
 ERROR: smtp-syntax-error < smtp-error ;
 ERROR: smtp-command-not-implemented < smtp-error ;
 ERROR: smtp-bad-authentication < smtp-error ;
 ERROR: smtp-mailbox-unavailable < smtp-error ;
 ERROR: smtp-user-not-local < smtp-error ;
 ERROR: smtp-exceeded-storage-allocation < smtp-error ;
 ERROR: smtp-bad-mailbox-name < smtp-error ;
 ERROR: smtp-transaction-failed < smtp-error ;
 : check-response ( response -- )
    dup smtp-response
    {
-        { [ dup "220" head? ] [ smtp-response ] }
+        { [ dup "bye" head? ] [ drop ] }
-        { [ dup "235" swap subseq? ] [ smtp-response ] }
+        { [ dup "220" head? ] [ drop ] }
-        { [ dup "250" head? ] [ smtp-response ] }
+        { [ dup "235" swap subseq? ] [ drop ] }
-        { [ dup "221" head? ] [ smtp-response ] }
+        { [ dup "250" head? ] [ drop ] }
-        { [ dup "bye" head? ] [ smtp-response ] }
+        { [ dup "221" head? ] [ drop ] }
-        { [ dup "4" head? ] [ "server busy" throw ] }
+        { [ dup "354" head? ] [ drop ] }
-        { [ dup "354" head? ] [ smtp-response ] }
+        { [ dup "4" head? ] [ smtp-server-busy ] }
-        { [ dup "50" head? ] [ smtp-response "syntax error" throw ] }
+        { [ dup "500" head? ] [ smtp-syntax-error ] }
-        { [ dup "53" head? ] [ smtp-response "invalid authentication data" throw ] }
+        { [ dup "501" head? ] [ smtp-command-not-implemented ] }
-        { [ dup "55" head? ] [ smtp-response "fatal error" throw ] }
+        { [ dup "50" head? ] [ smtp-syntax-error ] }
-        [ "unknown error" throw ]
+        { [ dup "53" head? ] [ smtp-bad-authentication ] }
        { [ dup "550" head? ] [ smtp-mailbox-unavailable ] }
        { [ dup "551" head? ] [ smtp-user-not-local ] }
        { [ dup "552" head? ] [ smtp-exceeded-storage-allocation ] }
        { [ dup "553" head? ] [ smtp-bad-mailbox-name ] }
        { [ dup "554" head? ] [ smtp-transaction-failed ] }
        [ smtp-error ]
    } cond ;
 : multiline? ( response -- boolean )
@ -89,41 +126,19 @@ LOG: smtp-response DEBUG
 : get-ok ( -- ) receive-response check-response ;
 ERROR: invalid-header-string string ;
 : validate-header ( string -- string' )
    dup "\r\n" intersect empty?
-    [ "Invalid header string: " prepend throw ] unless ;
+    [ invalid-header-string ] unless ;
 : write-header ( key value -- )
-    swap
+    [ validate-header write ]
-    validate-header write
+    [ ": " write validate-header write ] bi* crlf ;
    ": " write
    validate-header write
    crlf ;
 : write-headers ( assoc -- )
    [ write-header ] assoc-each ;
 TUPLE: email from to subject headers body ;
 M: email clone
    call-next-method [ clone ] change-headers ;
 : (send) ( email -- )
    [
        helo get-ok
        dup from>> mail-from get-ok
        dup to>> [ rcpt-to get-ok ] each
        data get-ok
        dup headers>> write-headers
        crlf
        body>> send-body get-ok
        quit get-ok
    ] with-smtp-connection ;
 : extract-email ( recepient -- email )
    #! This could be much smarter.
    " " last-split1 swap or "<" ?head drop ">" ?tail drop ;
 : message-id ( -- string )
    [
        "<" %
@ -135,25 +150,38 @@ M: email clone
        ">" %
    ] "" make ;
-: set-header ( email value key -- email )
+: extract-email ( recepient -- email )
-    pick headers>> set-at ;
+    #! This could be much smarter.
    " " last-split1 swap or "<" ?head drop ">" ?tail drop ;
-: prepare ( email -- email )
+: email>headers ( email -- hashtable )
-    clone
+    [
-    dup from>> "From" set-header
+        {
-    [ extract-email ] change-from
+            [ from>> "From" set ]
-    dup to>> ", " join "To" set-header
+            [ to>> ", " join "To" set ]
-    [ [ extract-email ] map ] change-to
+            [ cc>> ", " join [ "Cc" set ] unless-empty ]
-    dup subject>> "Subject" set-header
+            [ subject>> "Subject" set ]
-    now timestamp>rfc822 "Date" set-header
+        } cleave
-    message-id "Message-Id" set-header ;
+        now timestamp>rfc822 "Date" set
        message-id "Message-Id" set
    ] { } make-assoc ;
-: <email> ( -- email )
+: (send-email) ( headers email -- )
-    email new
+    [
-    H{ } clone >>headers ;
+        helo get-ok
        dup from>> extract-email mail-from get-ok
        dup to>> [ extract-email rcpt-to get-ok ] each
        dup cc>> [ extract-email rcpt-to get-ok ] each
        dup bcc>> [ extract-email rcpt-to get-ok ] each
        data get-ok
        swap write-headers
        crlf
        body>> send-body get-ok
        quit get-ok
    ] with-smtp-connection ;
 : send-email ( email -- )
-    prepare (send) ;
+    [ email>headers ] keep (send-email) ;
 ! Dirk's old AUTH CRAM-MD5 code. I don't know anything about
 ! CRAM MD5, and the old code didn't work properly either, so here
--- a/core/classes/tuple/tuple-tests.factor
+++ b/core/classes/tuple/tuple-tests.factor
@ -697,3 +697,7 @@ DEFER: error-y
    <string-reader> "forget-subclass-test" parse-stream
    drop
 ] unit-test
 [ ] [
    "IN: sequences TUPLE: reversed { seq read-only } ;" eval
 ] unit-test
--- a/core/classes/tuple/tuple.factor
+++ b/core/classes/tuple/tuple.factor
@ -104,8 +104,7 @@ ERROR: bad-superclass class ;
    [ tuple-instance? ] 2curry define-predicate ;
 : superclass-size ( class -- n )
-    superclasses but-last-slice
+    superclasses but-last [ "slots" word-prop length ] sigma ;
    [ "slots" word-prop length ] sigma ;
 : (instance-check-quot) ( class -- quot )
    [
@ -203,11 +202,11 @@ ERROR: bad-superclass class ;
 M: tuple-class update-class
    {
        [ define-boa-check ]
        [ define-tuple-layout ]
        [ define-tuple-slots ]
        [ define-tuple-predicate ]
        [ define-tuple-prototype ]
        [ define-boa-check ]
    } cleave ;
 : define-new-tuple-class ( class superclass slots -- )
@ -280,11 +279,8 @@ M: tuple-class reset-class
        ] with each
    ] [
        [ call-next-method ]
-        [
+        [ { "layout" "slots" "boa-check" "prototype" } reset-props ]
-            {
+        bi
                "layout" "slots" "boa-check" "prototype"
            } reset-props
        ] bi
    ] bi ;
 M: tuple-class rank-class drop 0 ;
--- a/core/combinators/combinators.factor
+++ b/core/combinators/combinators.factor
@ -117,10 +117,10 @@ ERROR: no-case ;
    ] [ drop f ] if ;
 : dispatch-case ( value from to default array -- )
-    >r >r 3dup between? [
+    >r >r 3dup between? r> r> rot [
-        drop - >fixnum r> drop r> dispatch
+        >r 2drop - >fixnum r> dispatch
    ] [
-        2drop r> call r> drop
+        drop 2nip call
    ] if ; inline
 : dispatch-case-quot ( default assoc -- quot )
--- a/core/generic/standard/engines/engines.factor
+++ b/core/generic/standard/engines/engines.factor
@ -34,10 +34,10 @@ GENERIC: engine>quot ( engine -- quot )
    [ [ nip class<=     ] curry assoc-filter ] 2bi ;
 : convert-methods ( assoc class word -- assoc' )
-    over >r >r split-methods dup assoc-empty? [
+    over [ split-methods ] 2dip pick assoc-empty? [
-        r> r> 3drop
+        3drop
    ] [
-        r> execute r> pick set-at
+        [ execute ] dip pick set-at
    ] if ; inline
 : (picker) ( n -- quot )
--- a/core/io/encodings/encodings.factor
+++ b/core/io/encodings/encodings.factor
@ -61,8 +61,8 @@ M: decoder stream-read1
 : (read) ( n quot -- n string )
    over 0 <string> [
        [
-            >r call dup
+            slip over
-            [ swap r> set-nth-unsafe f ] [ r> 3drop t ] if
+            [ swapd set-nth-unsafe f ] [ 3drop t ] if
        ] 2curry find-integer
    ] keep ; inline
--- a/core/io/encodings/utf8/utf8.factor
+++ b/core/io/encodings/utf8/utf8.factor
@ -24,7 +24,7 @@ SINGLETON: utf8
 : triple ( stream byte -- stream char )
    BIN: 1111 bitand append-nums append-nums ; inline
-: quad ( stream byte -- stream char )
+: quadruple ( stream byte -- stream char )
    BIN: 111 bitand append-nums append-nums append-nums ; inline
 : begin-utf8 ( stream byte -- stream char )
@ -32,7 +32,7 @@ SINGLETON: utf8
        { [ dup -7 shift zero? ] [ ] }
        { [ dup -5 shift BIN: 110 number= ] [ double ] }
        { [ dup -4 shift BIN: 1110 number= ] [ triple ] }
-        { [ dup -3 shift BIN: 11110 number= ] [ quad ] }
+        { [ dup -3 shift BIN: 11110 number= ] [ quadruple ] }
        [ drop replacement-char ]
    } cond ; inline
--- a/core/math/parser/parser.factor
+++ b/core/math/parser/parser.factor
@ -96,8 +96,8 @@ PRIVATE>
 : integer, ( num radix -- )
    dup 1 <= [ "Invalid radix" throw ] when
-    dup >r /mod >digit , dup 0 >
+    [ /mod >digit , ] keep over 0 >
-    [ r> integer, ] [ r> 2drop ] if ;
+    [ integer, ] [ 2drop ] if ;
 PRIVATE>
--- a/core/sequences/sequences.factor
+++ b/core/sequences/sequences.factor
@ -173,8 +173,6 @@ M: reversed length seq>> length ;
 INSTANCE: reversed virtual-sequence
 : reverse ( seq -- newseq ) [ <reversed> ] [ like ] bi ;
 ! A slice of another sequence.
 TUPLE: slice
 { from read-only }
@ -336,11 +334,10 @@ M: immutable-sequence clone-like like ;
    pick >r >r (each) r> call r> finish-find ; inline
 : (find-from) ( n seq quot quot' -- i elt )
-    >r >r 2dup bounds-check? [
+    [ 2dup bounds-check? ] 2dip
-        r> r> (find)
+    [ (find) ] 2curry
-    ] [
+    [ 2drop f f ]
-        r> r> 2drop 2drop f f
+    if ; inline
    ] if ; inline
 : (monotonic) ( seq quot -- ? )
    [ 2dup nth-unsafe rot 1+ rot nth-unsafe ]
@ -601,6 +598,13 @@ M: slice equal? over slice? [ sequence= ] [ 2drop f ] if ;
        tuck - 1- rot exchange-unsafe
    ] each 2drop ;
 : reverse ( seq -- newseq )
    [
        dup [ length ] keep new-sequence
        [ 0 swap copy ] keep
        [ reverse-here ] keep
    ] keep like ;
 : sum-lengths ( seq -- n )
    0 [ length + ] reduce ;
@ -624,8 +628,10 @@ M: slice equal? over slice? [ sequence= ] [ 2drop f ] if ;
    ] keep like ;
 : padding ( seq n elt quot -- newseq )
-    >r >r over length [-] dup zero?
+    [
-    [ r> r> 3drop ] [ r> <repetition> r> call ] if ; inline
+        [ over length [-] dup zero? [ drop ] ] dip
        [ <repetition> ] curry
    ] dip compose if ; inline
 : pad-left ( seq n elt -- padded )
    [ swap dup (append) ] padding ;
@ -730,9 +736,11 @@ PRIVATE>
    [ left-trim ] [ right-trim ] bi ; inline
 : sum ( seq -- n ) 0 [ + ] binary-reduce ;
 : product ( seq -- n ) 1 [ * ] binary-reduce ;
 : infimum ( seq -- n ) dup first [ min ] reduce ;
 : supremum ( seq -- n ) dup first [ max ] reduce ;
 : flip ( matrix -- newmatrix )
@ -744,4 +752,3 @@ PRIVATE>
 : sigma ( seq quot -- n ) [ + ] compose 0 swap reduce ; inline
 : count ( seq quot -- n ) [ 1 0 ? ] compose sigma ; inline
--- a/core/sorting/sorting.factor
+++ b/core/sorting/sorting.factor
@ -25,19 +25,19 @@ TUPLE: merge
 : dump ( from to seq accum -- )
    #! Optimize common case where to - from = 1, 2, or 3.
-    >r >r 2dup swap - dup 1 =
+    >r >r 2dup swap - r> r> pick 1 = 
-    [ 2drop r> nth-unsafe r> push ] [
+    [ >r >r 2drop r> nth-unsafe r> push ] [
-        dup 2 = [
+        pick 2 = [
-            2drop dup 1+
+            >r >r 2drop dup 1+
            r> [ nth-unsafe ] curry bi@
            r> [ push ] curry bi@
        ] [
-            dup 3 = [
+            pick 3 = [
-                2drop dup 1+ dup 1+
+                >r >r 2drop dup 1+ dup 1+
                r> [ nth-unsafe ] curry tri@
                r> [ push ] curry tri@
            ] [
-                drop r> subseq r> push-all
+                >r nip subseq r> push-all
            ] if
        ] if
    ] if ; inline
@ -120,11 +120,13 @@ TUPLE: merge
    [ [ 1 shift dup 1+ ] dip ] prepose curry each-integer ; inline
 : (sort-pairs) ( i1 i2 seq quot accum -- )
-    >r >r 2dup length = [
+    [ 2dup length = ] 2dip rot [
-        nip nth r> drop r> push
+        [ drop nip nth ] dip push
    ] [
-        tuck [ nth-unsafe ] 2bi@ 2dup r> call +gt+ eq?
+        [
-        [ swap ] when r> tuck [ push ] 2bi@
+            [ tuck [ nth-unsafe ] 2bi@ 2dup ] dip call +gt+ eq?
            [ swap ] when
        ] dip tuck [ push ] 2bi@
    ] if ; inline
 : sort-pairs ( merge quot -- )
--- a/extra/assocs/lib/lib-tests.factor
+++ b/extra/assocs/lib/lib-tests.factor
@ -1,4 +1,17 @@
 USING: kernel tools.test sequences vectors assocs.lib ;
 IN: assocs.lib.tests
 USING: assocs.lib tools.test vectors ;
 { 1 1 } [ [ ?push ] histogram ] must-infer-as
 ! substitute
 [ { 2 } ] [ { 1 } H{ { 1 2 } } [ ?at drop ] curry map ] unit-test
 [ { 3 } ] [ { 3 } H{ { 1 2 } } [ ?at drop ] curry map ] unit-test
 [ 2 ] [ 1 H{ { 1 2 } } [ ] [ ] if-at ] unit-test
 [ 3 ] [ 3 H{ { 1 2 } } [ ] [ ] if-at ] unit-test
 [ "hi" ] [ 1 H{ { 1 2 } } [ drop "hi" ] when-at ] unit-test
 [ 3 ] [ 3 H{ { 1 2 } } [ drop "hi" ] when-at ] unit-test
 [ 2 ] [ 1 H{ { 1 2 } } [ drop "hi" ] unless-at ] unit-test
 [ "hi" ] [ 3 H{ { 1 2 } } [ drop "hi" ] unless-at ] unit-test
--- a/extra/assocs/lib/lib.factor
+++ b/extra/assocs/lib/lib.factor
@ -37,3 +37,13 @@ IN: assocs.lib
    H{ } clone [
        swap [ change-at ] 2curry assoc-each
    ] keep ; inline
 : ?at ( obj assoc -- value/obj ? )
    dupd at* [ [ nip ] [ drop ] if ] keep ;
 : if-at ( obj assoc quot1 quot2 -- )
    [ ?at ] 2dip if ; inline
 : when-at ( obj assoc quot -- ) [ ] if-at ; inline
 : unless-at ( obj assoc quot -- ) [ ] swap if-at ; inline
--- a/extra/combinators/lib/lib-docs.factor
+++ b/extra/combinators/lib/lib-docs.factor
@ -11,3 +11,12 @@ HELP: generate
    "[ 20 random-prime ] [ 4 mod 3 = ] generate ."
    "526367"
 } ;
 HELP: %chance
 { $values { "quot" quotation } { "n" integer } }
 { $description "Calls the quotation " { $snippet "n" } " percent of the time." }
 { $unchecked-example
    "USING: io ;"
    "[ \"hello, world!  maybe.\" print ] 50 %chance"
    ""
 } ;
--- a/extra/combinators/lib/lib.factor
+++ b/extra/combinators/lib/lib.factor
@ -4,7 +4,7 @@
 ! See http://factorcode.org/license.txt for BSD license.
 USING: kernel combinators fry namespaces quotations hashtables
 sequences assocs arrays inference effects math math.ranges
-generalizations macros continuations locals ;
+generalizations macros continuations random locals ;
 IN: combinators.lib
@ -31,6 +31,8 @@ IN: combinators.lib
 ! Generalized versions of core combinators
 ! !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
 : quad ( x p q r s -- ) >r >r >r keep r> keep r> keep r> call ; inline
 : 4slip ( quot a b c d -- a b c d ) 4 nslip ; inline
 : 4keep ( w x y z quot -- w x y z ) 4 nkeep ; inline 
@ -137,7 +139,7 @@ MACRO: multikeep ( word out-indexes -- ... )
    [ drop ] rot compose attempt-all ; inline
 : do-while ( pred body tail -- )
-    >r tuck 2slip r> while ;
+    >r tuck 2slip r> while ; inline
 : generate ( generator predicate -- obj )
    [ dup ] swap [ dup [ nip ] unless not ] 3compose
@ -147,3 +149,5 @@ MACRO: predicates ( seq -- quot/f )
    dup [ 1quotation [ drop ] prepend ] map
    >r [ [ dup ] prepend ] map r> zip [ drop f ] suffix
    [ cond ] curry ;
 : %chance ( quot integer -- ) 100 random > swap when ; inline
--- a/extra/html/parser/analyzer/analyzer.factor
+++ b/extra/html/parser/analyzer/analyzer.factor
@ -56,8 +56,7 @@ TUPLE: link attributes clickable ;
 : trim-text ( vector -- vector' )
    [
        dup name>> text = [
-            [ text>> [ blank? ] trim ] keep
+            [ [ blank? ] trim ] change-text
            [ set-tag-text ] keep
        ] when
    ] map ;
@ -173,8 +172,7 @@ TUPLE: link attributes clickable ;
    [
        {
            { [ dup name>> "form" = ]
-                [ "form action: " write attributes>> "action" swap at print
+                [ "form action: " write attributes>> "action" swap at print ] }
            ] }
            { [ dup name>> "input" = ] [ input. ] }
            [ drop ]
        } cond
--- a/extra/html/parser/parser-tests.factor
+++ b/extra/html/parser/parser-tests.factor
@ -2,19 +2,19 @@ USING: html.parser kernel tools.test ;
 IN: html.parser.tests
 [
-    V{ T{ tag f "html" H{ } f f f } }
+    V{ T{ tag f "html" H{ } f f } }
 ] [ "<html>" parse-html ] unit-test
 [
-    V{ T{ tag f "html" H{ } f f t } }
+    V{ T{ tag f "html" H{ } f t } }
 ] [ "</html>" parse-html ] unit-test
 [
-    V{ T{ tag f "a" H{ { "href" "http://factorcode.org/" } } f f f } }
+    V{ T{ tag f "a" H{ { "href" "http://factorcode.org/" } } f f } }
 ] [ "<a href=\"http://factorcode.org/\">" parse-html ] unit-test
 [
-    V{ T{ tag f "a" H{ { "href" "http://factorcode.org/" } } f f f } }
+    V{ T{ tag f "a" H{ { "href" "http://factorcode.org/" } } f f } }
 ] [ "<a   href  =  \"http://factorcode.org/\"   >" parse-html ] unit-test
 [
@ -26,7 +26,6 @@ V{
        H{ { "baz" "\"quux\"" } { "foo" "bar's" } }
        f
        f
        f
    }
 }
 ] [ "<a   foo=\"bar's\" baz='\"quux\"'  >" parse-html ] unit-test
@ -39,25 +38,25 @@ V{
            { "foo" "bar" }
            { "href" "http://factorcode.org/" }
            { "baz" "quux" }
-        } f f f }
+        } f f }
 }
 ] [ "<a   href  =    \"http://factorcode.org/\"    foo   =  bar baz='quux'a=pirsqd  >" parse-html ] unit-test
 [
 V{
-    T{ tag f "html" H{ } f f f }
+    T{ tag f "html" H{ } f f }
-    T{ tag f "head" H{ } f f f }
+    T{ tag f "head" H{ } f f }
-    T{ tag f "head" H{ } f f t }
+    T{ tag f "head" H{ } f t }
-    T{ tag f "html" H{ } f f t }
+    T{ tag f "html" H{ } f t }
 }
 ] [ "<html<head</head</html" parse-html ] unit-test
 [
 V{
-    T{ tag f "head" H{ } f f f }
+    T{ tag f "head" H{ } f f }
-    T{ tag f "title" H{ } f f f }
+    T{ tag f "title" H{ } f f }
-    T{ tag f text f "Spagna" f f }
+    T{ tag f text f "Spagna" f }
-    T{ tag f "title" H{ } f f t }
+    T{ tag f "title" H{ } f t }
-    T{ tag f "head" H{ } f f t }
+    T{ tag f "head" H{ } f t }
 }
 ] [ "<head<title>Spagna</title></head" parse-html ] unit-test
--- a/extra/html/parser/parser.factor
+++ b/extra/html/parser/parser.factor
@ -1,26 +1,22 @@
 USING: accessors arrays html.parser.utils hashtables io kernel
 namespaces prettyprint quotations
-sequences splitting state-parser strings unicode.categories unicode.case ;
+sequences splitting state-parser strings unicode.categories unicode.case
 sequences.lib ;
 IN: html.parser
-TUPLE: tag name attributes text matched? closing? ;
+TUPLE: tag name attributes text closing? ;
-SYMBOL: text
+SINGLETON: text
-SYMBOL: dtd
+SINGLETON: dtd
-SYMBOL: comment
+SINGLETON: comment
 SYMBOL: javascript
 SYMBOL: tagstack
 : push-tag ( tag -- )
    tagstack get push ;
 : closing-tag? ( string -- ? )
-    dup empty? [
+    [ f ]
-        drop f
+    [ [ first ] [ peek ] bi [ CHAR: / = ] bi@ or ] if-empty ;
    ] [
        dup first CHAR: / =
        swap peek CHAR: / = or
    ] if ;
 : <tag> ( name attributes closing? -- tag )
    tag new
@ -28,56 +24,55 @@ SYMBOL: tagstack
        swap >>attributes
        swap >>name ;
-: make-tag ( str attribs -- tag )
+: make-tag ( string attribs -- tag )
    >r [ closing-tag? ] keep "/" trim1 r> rot <tag> ;
-: make-text-tag ( str -- tag )
+: make-text-tag ( string -- tag )
-    T{ tag f text } clone [ set-tag-text ] keep ;
+    tag new
        text >>name
        swap >>text ;
-: make-comment-tag ( str -- tag )
+: make-comment-tag ( string -- tag )
-    T{ tag f comment } clone [ set-tag-text ] keep ;
+    tag new
        comment >>name
        swap >>text ;
-: make-dtd-tag ( str -- tag )
+: make-dtd-tag ( string -- tag )
-    T{ tag f dtd } clone [ set-tag-text ] keep ;
+    tag new
        dtd >>name
        swap >>text ;
-: read-whitespace ( -- str )
+: read-whitespace ( -- string )
    [ get-char blank? not ] take-until ;
-: read-whitespace* ( -- )
+: read-whitespace* ( -- ) read-whitespace drop ;
    read-whitespace drop ;
-: read-token ( -- str )
+: read-token ( -- string )
    read-whitespace*
    [ get-char blank? ] take-until ;
-: read-single-quote ( -- str )
+: read-single-quote ( -- string )
    [ get-char CHAR: ' = ] take-until ;
-: read-double-quote ( -- str )
+: read-double-quote ( -- string )
    [ get-char CHAR: " = ] take-until ;
-: read-quote ( -- str )
+: read-quote ( -- string )
-    get-char next* CHAR: ' = [
+    get-char next* CHAR: ' =
-        read-single-quote
+    [ read-single-quote ] [ read-double-quote ] if next* ;
    ] [
        read-double-quote
    ] if next* ;
-: read-key ( -- str )
+: read-key ( -- string )
    read-whitespace*
-    [ get-char CHAR: = = get-char blank? or ] take-until ;
+    [ get-char [ CHAR: = = ] [ blank? ] bi or ] take-until ;
 : read-= ( -- )
    read-whitespace*
    [ get-char CHAR: = = ] take-until drop next* ;
-: read-value ( -- str )
+: read-value ( -- string )
    read-whitespace*
-    get-char quote? [
+    get-char quote? [ read-quote ] [ read-token ] if
-        read-quote
+    [ blank? ] trim ;
    ] [
        read-token
    ] if [ blank? ] trim ;
 : read-comment ( -- )
    "-->" take-string* make-comment-tag push-tag ;
@ -97,14 +92,14 @@ SYMBOL: tagstack
    [ get-char CHAR: > = get-char CHAR: < = or ] take-until
    get-char CHAR: < = [ next* ] unless ;
-: read-< ( -- str )
+: read-< ( -- string )
    next* get-char CHAR: ! = [
        read-bang f
    ] [
        read-tag
    ] if ;
-: read-until-< ( -- str )
+: read-until-< ( -- string )
    [ get-char CHAR: < = ] take-until ;
 : parse-text ( -- )
@ -131,11 +126,9 @@ SYMBOL: tagstack
    ] string-parse ;
 : parse-tag ( -- )
-    read-< dup empty? [
+    read-< [
        drop
    ] [
        (parse-tag) make-tag push-tag
-    ] if ;
+    ] unless-empty ;
 : (parse-html) ( -- )
    get-next [
@ -145,13 +138,7 @@ SYMBOL: tagstack
    ] when ;
 : tag-parse ( quot -- vector )
-    [
+    V{ } clone tagstack [ string-parse ] with-variable ;
        V{ } clone tagstack set
        string-parse
    ] with-scope ;
 : parse-html ( string -- vector )
-    [
+    [ (parse-html) tagstack get ] tag-parse ;
        (parse-html)
        tagstack get
    ] tag-parse ;
--- a/extra/html/parser/printer/printer.factor
+++ b/extra/html/parser/printer/printer.factor
@ -1,123 +1,89 @@
-USING: assocs html.parser html.parser.utils combinators
+USING: accessors assocs html.parser html.parser.utils combinators
 continuations hashtables
 hashtables.private io kernel math
 namespaces prettyprint quotations sequences splitting
 strings ;
 IN: html.parser.printer
-SYMBOL: no-section
+SYMBOL: printer
 SYMBOL: html
 SYMBOL: head
 SYMBOL: body
 TUPLE: state section ;
-! TUPLE: text bold? underline? strikethrough? ;
+TUPLE: html-printer ;
 TUPLE: text-printer < html-printer ;
 TUPLE: src-printer < html-printer ;
 TUPLE: html-prettyprinter < html-printer ;
-TUPLE: text-printer ;
+HOOK: print-text-tag html-printer ( tag -- )
-TUPLE: ui-printer ;
+HOOK: print-comment-tag html-printer ( tag -- )
-TUPLE: src-printer ;
+HOOK: print-dtd-tag html-printer ( tag -- )
-TUPLE: html-prettyprinter ;
+HOOK: print-opening-tag html-printer ( tag -- )
-UNION: printer text-printer ui-printer src-printer html-prettyprinter ;
+HOOK: print-closing-tag html-printer ( tag -- )
 HOOK: print-tag printer ( tag -- )
 HOOK: print-text-tag printer ( tag -- )
 HOOK: print-comment-tag printer ( tag -- )
 HOOK: print-dtd-tag printer ( tag -- )
 HOOK: print-opening-named-tag printer ( tag -- )
 HOOK: print-closing-named-tag printer ( tag -- )
-: print-tags ( vector -- )
+ERROR: unknown-tag-error tag ;
-    [ print-tag ] each ;
+
 : print-tag ( tag -- )
    {
        { [ dup name>> text = ] [ print-text-tag ] }
        { [ dup name>> comment = ] [ print-comment-tag ] }
        { [ dup name>> dtd = ] [ print-dtd-tag ] }
        { [ dup [ name>> string? ] [ closing?>> ] bi and ]
            [ print-closing-tag ] }
        { [ dup name>> string? ]
            [ print-opening-tag ] }
        [ unknown-tag-error ]
    } cond ;
 : print-tags ( vector -- ) [ print-tag ] each ;
 : html-text. ( vector -- )
-    [
+    T{ text-printer } html-printer [ print-tags ] with-variable ;
        T{ text-printer } printer set
        print-tags
    ] with-scope ;
 : html-src. ( vector -- )
-    [
+    T{ src-printer } html-printer [ print-tags ] with-variable ;
        T{ src-printer } printer set
        print-tags
    ] with-scope ;
-M: printer print-text-tag ( tag -- )
+M: html-printer print-text-tag ( tag -- ) text>> write ;
    tag-text write ;
-M: printer print-comment-tag ( tag -- )
+M: html-printer print-comment-tag ( tag -- )
-    "<!--" write
+    "<!--" write text>> write "-->" write ;
    tag-text write
    "-->" write ;
-M: printer print-dtd-tag ( tag -- )
+M: html-printer print-dtd-tag ( tag -- )
-    "<!" write
+    "<!" write text>> write ">" write ;
    tag-text write
    ">" write ;
 M: printer print-opening-named-tag ( tag -- )
    dup tag-name {
        { "html" [ drop ] }
        { "head" [ drop ] }
        { "body" [ drop ] }
        { "title" [ "Title: " write tag-text print ] }
    } case ;
 M: printer print-closing-named-tag ( tag -- )
    drop ;
 : print-attributes ( hashtable -- )
-    [
+    [ [ bl write "=" write ] [ ?quote write ] bi* ] assoc-each ;
        swap bl write "=" write ?quote write
    ] assoc-each ;
-M: src-printer print-opening-named-tag ( tag -- )
+M: src-printer print-opening-tag ( tag -- )
    "<" write
-    [ tag-name write ]
+    [ name>> write ]
-    [ tag-attributes dup assoc-empty? [ drop ] [ print-attributes ] if ] bi
+    [ attributes>> dup assoc-empty? [ drop ] [ print-attributes ] if ] bi
    ">" write ;
-M: src-printer print-closing-named-tag ( tag -- )
+M: src-printer print-closing-tag ( tag -- )
    "</" write
-    tag-name write
+    name>> write
    ">" write ;
 SYMBOL: tab-width
 SYMBOL: #indentations
 SYMBOL: tagstack
 : prettyprint-html ( vector -- )
    [
        T{ html-prettyprinter } printer set
        V{ } clone tagstack set
        2 tab-width set
        0 #indentations set
        print-tags
    ] with-scope ;
 : print-tabs ( -- )
    tab-width get #indentations get * CHAR: \s <repetition> write ; 
-M: html-prettyprinter print-opening-named-tag ( tag -- )
+M: html-prettyprinter print-opening-tag ( tag -- )
    print-tabs "<" write
-    tag-name write
+    name>> write
    ">\n" write ;
-M: html-prettyprinter print-closing-named-tag ( tag -- )
+M: html-prettyprinter print-closing-tag ( tag -- )
    "</" write
-    tag-name write
+    name>> write
    ">" write ;
 ERROR: unknown-tag-error tag ;
 M: printer print-tag ( tag -- )
    {
        { [ dup tag-name text = ] [ print-text-tag ] }
        { [ dup tag-name comment = ] [ print-comment-tag ] }
        { [ dup tag-name dtd = ] [ print-dtd-tag ] }
        { [ dup tag-name string? over tag-closing? and ]
            [ print-closing-named-tag ] }
        { [ dup tag-name string? ]
            [ print-opening-named-tag ] }
        [ unknown-tag-error ]
    } cond ;
 ! SYMBOL: tablestack
 ! : with-html-printer ( vector quot -- )
    ! [ V{ } clone tablestack set ] with-scope ;
 ! { { 1 2 } { 3 4 } }
 ! H{ { table-gap { 10 10 } } } [
    ! [ [ [ [ . ] with-cell ] each ] with-row ] each
 ! ] tabular-output
 ! : html-pp ( vector -- )
    ! [ 0 #indentations set 2 tab-width set ] with-scope ; 
--- a/extra/html/parser/utils/utils.factor
+++ b/extra/html/parser/utils/utils.factor
@ -4,8 +4,7 @@ namespaces prettyprint quotations sequences splitting
 state-parser strings sequences.lib ;
 IN: html.parser.utils
-: string-parse-end? ( -- ? )
+: string-parse-end? ( -- ? ) get-next not ;
    get-next not ;
 : take-string* ( match -- string )
    dup length <circular-string>
@ -16,17 +15,18 @@ IN: html.parser.utils
    [ ?head drop ] [ ?tail drop ] bi ;
 : single-quote ( str -- newstr )
-    >r "'" r> "'" 3append ;
+    "'" swap "'" 3append ;
 : double-quote ( str -- newstr )
-    >r "\"" r> "\"" 3append ;
+    "\"" swap "\"" 3append ;
 : quote ( str -- newstr )
    CHAR: ' over member?
    [ double-quote ] [ single-quote ] if ;
 : quoted? ( str -- ? )
-    [ [ first ] [ peek ] bi [ = ] keep "'\"" member? and ] [ f ] if-seq ;
+    [ f ]
    [ [ first ] [ peek ] bi [ = ] keep "'\"" member? and ] if-empty ;
 : ?quote ( str -- newstr )
    dup quoted? [ quote ] unless ;
--- a/extra/regexp2/authors.txt
+++ b/extra/regexp2/authors.txt
@ -0,0 +1 @@
 Doug Coleman
--- a/extra/regexp2/backend/backend.factor
+++ b/extra/regexp2/backend/backend.factor
@ -0,0 +1,23 @@
 ! Copyright (C) 2008 Doug Coleman.
 ! See http://factorcode.org/license.txt for BSD license.
 USING: accessors hashtables kernel math state-tables vars vectors ;
 IN: regexp2.backend
 TUPLE: regexp
    raw
    { stack vector }
    parse-tree
    nfa-table
    dfa-table
    minimized-table
    { state integer }
    { new-states vector }
    { visited-states hashtable } ;
 : reset-regexp ( regexp -- regexp )
    0 >>state
    V{ } clone >>stack
    V{ } clone >>new-states
    H{ } clone >>visited-states ;
 SYMBOL: current-regexp
--- a/extra/regexp2/classes/classes.factor
+++ b/extra/regexp2/classes/classes.factor
@ -0,0 +1,49 @@
 ! Copyright (C) 2008 Doug Coleman.
 ! See http://factorcode.org/license.txt for BSD license.
 USING: accessors kernel math math.order symbols regexp2.parser
 words regexp2.utils unicode.categories combinators.short-circuit ;
 IN: regexp2.classes
 GENERIC: class-member? ( obj class -- ? )
 M: word class-member? ( obj class -- ? ) 2drop f ;
 M: integer class-member? ( obj class -- ? ) 2drop f ;
 M: character-class-range class-member? ( obj class -- ? )
    [ from>> ] [ to>> ] bi between? ;
 M: any-char class-member? ( obj class -- ? )
    2drop t ;
 M: letter-class class-member? ( obj class -- ? )
    drop letter? ;
 M: LETTER-class class-member? ( obj class -- ? )
    drop LETTER? ;
 M: ascii-class class-member? ( obj class -- ? )
    drop ascii? ;
 M: digit-class class-member? ( obj class -- ? )
    drop digit? ;
 M: alpha-class class-member? ( obj class -- ? )
    drop alpha? ;
 M: punctuation-class class-member? ( obj class -- ? )
    drop punct? ;
 M: java-printable-class class-member? ( obj class -- ? )
    drop java-printable? ;
 M: non-newline-blank-class class-member? ( obj class -- ? )
    drop { [ blank? ] [ CHAR: \n = not ] } 1&& ;
 M: control-character-class class-member? ( obj class -- ? )
    drop control-char? ;
 M: hex-digit-class class-member? ( obj class -- ? )
    drop hex-digit? ;
 M: java-blank-class class-member? ( obj class -- ? )
    drop java-blank? ;
--- a/extra/regexp2/dfa/dfa.factor
+++ b/extra/regexp2/dfa/dfa.factor
@ -0,0 +1,70 @@
 ! Copyright (C) 2008 Doug Coleman.
 ! See http://factorcode.org/license.txt for BSD license.
 USING: accessors arrays assocs combinators fry kernel locals
 math math.order regexp2.nfa regexp2.transition-tables sequences
 sets sorting vectors regexp2.utils sequences.lib ;
 USING: io prettyprint threads ;
 IN: regexp2.dfa
 : find-delta ( states transition regexp -- new-states )
    nfa-table>> transitions>>
    rot [ swap at at ] with with map sift concat prune ;
 : (find-epsilon-closure) ( states regexp -- new-states )
    eps swap find-delta ;
 : find-epsilon-closure ( states regexp -- new-states )
    '[ dup , (find-epsilon-closure) union ] [ length ] while-changes
    natural-sort ;
 : find-closure ( states transition regexp -- new-states )
    [ find-delta ] 2keep nip find-epsilon-closure ;
 : find-start-state ( regexp -- state )
    [ nfa-table>> start-state>> 1vector ] keep find-epsilon-closure ;
 : find-transitions ( seq1 regexp -- seq2 )
    nfa-table>> transitions>>
    [ at keys ] curry map concat eps swap remove ;
 : add-todo-state ( state regexp -- )
    2dup visited-states>> key? [
        2drop
    ] [
        [ visited-states>> conjoin ]
        [ new-states>> push ] 2bi
    ] if ;
 : new-transitions ( regexp -- )
    dup new-states>> [
        drop
    ] [
        dupd pop dup pick find-transitions rot
        [
            [ [ find-closure ] 2keep nip dupd add-todo-state ] 3keep
            >r swapd transition boa r> dfa-table>> add-transition 
        ] curry with each
        new-transitions
    ] if-empty ;
 : states ( hashtable -- array )
    [ keys ]
    [ values [ values concat ] map concat append ] bi ;
 : set-final-states ( regexp -- )
    dup
    [ nfa-table>> final-states>> keys ]
    [ dfa-table>> transitions>> states ] bi
    [ intersect empty? not ] with filter
    swap dfa-table>> final-states>>
    [ conjoin ] curry each ;
 : set-initial-state ( regexp -- )
    dup
    [ dfa-table>> ] [ find-start-state ] bi
    [ >>start-state drop ] keep
    1vector >>new-states drop ;
 : construct-dfa ( regexp -- )
    [ set-initial-state ] [ new-transitions ] [ set-final-states ] tri ;
--- a/extra/regexp2/nfa/nfa.factor
+++ b/extra/regexp2/nfa/nfa.factor
@ -0,0 +1,126 @@
 ! Copyright (C) 2008 Doug Coleman.
 ! See http://factorcode.org/license.txt for BSD license.
 USING: accessors arrays assocs grouping kernel regexp2.backend
 locals math namespaces regexp2.parser sequences state-tables fry
 quotations math.order math.ranges vectors unicode.categories
 regexp2.utils regexp2.transition-tables words sequences.lib ;
 IN: regexp2.nfa
 SYMBOL: negation-mode
 : negated? ( -- ? ) negation-mode get 0 or odd? ; 
 SINGLETON: eps
 : next-state ( regexp -- state )
    [ state>> ] [ [ 1+ ] change-state drop ] bi ;
 : set-start-state ( regexp -- )
    dup stack>> [
        drop
    ] [
        [ nfa-table>> ] [ pop first ] bi* >>start-state drop
    ] if-empty ;
 GENERIC: nfa-node ( node -- )
 :: add-simple-entry ( obj class -- )
    [let* | regexp [ current-regexp get ]
            s0 [ regexp next-state ]
            s1 [ regexp next-state ]
            stack [ regexp stack>> ]
            table [ regexp nfa-table>> ] |
        negated? [
            s0 f obj class boa table add-transition
            s0 s1 <default-transition> table add-transition
        ] [
            s0 s1 obj class boa table add-transition
        ] if
        s0 s1 2array stack push
        t s1 table final-states>> set-at ] ;
 :: concatenate-nodes ( -- )
    [let* | regexp [ current-regexp get ]
            stack [ regexp stack>> ]
            table [ regexp nfa-table>> ]
            s2 [ stack peek first ]
            s3 [ stack pop second ]
            s0 [ stack peek first ]
            s1 [ stack pop second ] |
        s1 s2 eps <literal-transition> table add-transition
        s1 table final-states>> delete-at
        s0 s3 2array stack push ] ;
 :: alternate-nodes ( -- )
    [let* | regexp [ current-regexp get ]
            stack [ regexp stack>> ]
            table [ regexp nfa-table>> ]
            s2 [ stack peek first ]
            s3 [ stack pop second ]
            s0 [ stack peek first ]
            s1 [ stack pop second ]
            s4 [ regexp next-state ]
            s5 [ regexp next-state ] |
        s4 s0 eps <literal-transition> table add-transition
        s4 s2 eps <literal-transition> table add-transition
        s1 s5 eps <literal-transition> table add-transition
        s3 s5 eps <literal-transition> table add-transition
        s1 table final-states>> delete-at
        s3 table final-states>> delete-at
        t s5 table final-states>> set-at
        s4 s5 2array stack push ] ;
 M: kleene-star nfa-node ( node -- )
    term>> nfa-node
    [let* | regexp [ current-regexp get ]
            stack [ regexp stack>> ]
            s0 [ stack peek first ]
            s1 [ stack pop second ]
            s2 [ regexp next-state ]
            s3 [ regexp next-state ]
            table [ regexp nfa-table>> ] |
        s1 table final-states>> delete-at
        t s3 table final-states>> set-at
        s1 s0 eps <literal-transition> table add-transition
        s2 s0 eps <literal-transition> table add-transition
        s2 s3 eps <literal-transition> table add-transition
        s1 s3 eps <literal-transition> table add-transition
        s2 s3 2array stack push ] ;
 M: concatenation nfa-node ( node -- )
    seq>>
    [ [ nfa-node ] each ]
    [ length 1- [ concatenate-nodes ] times ] bi ;
 M: alternation nfa-node ( node -- )
    seq>>
    [ [ nfa-node ] each ]
    [ length 1- [ alternate-nodes ] times ] bi ;
 M: constant nfa-node ( node -- )
    char>> literal-transition add-simple-entry ;
 M: epsilon nfa-node ( node -- )
    drop eps literal-transition add-simple-entry ;
 M: word nfa-node ( node -- )
    class-transition add-simple-entry ;
 M: character-class-range nfa-node ( node -- )
    class-transition add-simple-entry ;
 M: capture-group nfa-node ( node -- )
    term>> nfa-node ;
 M: negation nfa-node ( node -- )
    negation-mode inc
    term>> nfa-node 
    negation-mode dec ;
 : construct-nfa ( regexp -- )
    [
        reset-regexp
        negation-mode off
        [ current-regexp set ]
        [ parse-tree>> nfa-node ]
        [ set-start-state ] tri
    ] with-scope ;
--- a/extra/regexp2/parser/parser-tests.factor
+++ b/extra/regexp2/parser/parser-tests.factor
@ -0,0 +1,33 @@
 USING: kernel tools.test regexp2.backend regexp2 ;
 IN: regexp2.parser
 : test-regexp ( string -- )
    default-regexp parse-regexp ;
 : test-regexp2 ( string -- regexp )
    default-regexp dup parse-regexp ;
 [ "(" ] [ unmatched-parentheses? ] must-fail-with
 [ ] [ "a|b" test-regexp ] unit-test
 [ ] [ "a.b" test-regexp ] unit-test
 [ ] [ "a|b|c" test-regexp ] unit-test
 [ ] [ "abc|b" test-regexp ] unit-test
 [ ] [ "a|bcd" test-regexp ] unit-test
 [ ] [ "a|(b)" test-regexp ] unit-test
 [ ] [ "(a)|b" test-regexp ] unit-test
 [ ] [ "(a|b)" test-regexp ] unit-test
 [ ] [ "((a)|(b))" test-regexp ] unit-test
 [ ] [ "(?:a)" test-regexp ] unit-test
 [ ] [ "(?i:a)" test-regexp ] unit-test
 [ ] [ "(?-i:a)" test-regexp ] unit-test
 [ "(?z:a)" test-regexp ] [ bad-option? ] must-fail-with
 [ "(?-z:a)" test-regexp ] [ bad-option? ] must-fail-with
 [ ] [ "(?=a)" test-regexp ] unit-test
 [ ] [ "[abc]" test-regexp ] unit-test
 [ ] [ "[a-c]" test-regexp ] unit-test
 [ ] [ "[^a-c]" test-regexp ] unit-test
 [ "[^]" test-regexp ] must-fail
--- a/extra/regexp2/parser/parser.factor
+++ b/extra/regexp2/parser/parser.factor
@ -0,0 +1,362 @@
 ! Copyright (C) 2008 Doug Coleman.
 ! See http://factorcode.org/license.txt for BSD license.
 USING: accessors arrays assocs combinators io io.streams.string
 kernel math math.parser multi-methods namespaces qualified
 quotations sequences sequences.lib splitting symbols vectors
 dlists math.order combinators.lib unicode.categories
 sequences.lib regexp2.backend regexp2.utils ;
 IN: regexp2.parser
 FROM: math.ranges => [a,b] ;
 MIXIN: node
 TUPLE: concatenation seq ; INSTANCE: concatenation node
 TUPLE: alternation seq ; INSTANCE: alternation node
 TUPLE: kleene-star term ; INSTANCE: kleene-star node
 TUPLE: question term ; INSTANCE: question node
 TUPLE: negation term ; INSTANCE: negation node
 TUPLE: constant char ; INSTANCE: constant node
 TUPLE: range from to ; INSTANCE: range node
 TUPLE: lookahead term ; INSTANCE: lookahead node
 TUPLE: lookbehind term ; INSTANCE: lookbehind node
 TUPLE: capture-group term ; INSTANCE: capture-group node
 TUPLE: non-capture-group term ; INSTANCE: non-capture-group node
 TUPLE: independent-group term ; INSTANCE: independent-group node
 TUPLE: character-class-range from to ; INSTANCE: character-class-range node
 SINGLETON: epsilon INSTANCE: epsilon node
 SINGLETON: any-char INSTANCE: any-char node
 SINGLETON: front-anchor INSTANCE: front-anchor node
 SINGLETON: back-anchor INSTANCE: back-anchor node
 TUPLE: option-on option ; INSTANCE: option-on node
 TUPLE: option-off option ; INSTANCE: option-off node
 SINGLETONS: unix-lines dotall multiline comments case-insensitive unicode-case ;
 MIXIN: regexp-option
 INSTANCE: unix-lines regexp-option
 INSTANCE: dotall regexp-option
 INSTANCE: multiline regexp-option
 INSTANCE: comments regexp-option
 INSTANCE: case-insensitive regexp-option
 INSTANCE: unicode-case regexp-option
 SINGLETONS: letter-class LETTER-class Letter-class digit-class
 alpha-class non-newline-blank-class
 ascii-class punctuation-class java-printable-class blank-class
 control-character-class hex-digit-class java-blank-class c-identifier-class ;
 SINGLETONS: beginning-of-group end-of-group
 beginning-of-character-class end-of-character-class
 left-parenthesis pipe caret dash ;
 : <constant> ( obj -- constant ) constant boa ;
 : <negation> ( obj -- negation ) negation boa ;
 : <concatenation> ( seq -- concatenation ) >vector concatenation boa ;
 : <alternation> ( seq -- alternation ) >vector alternation boa ;
 : <capture-group> ( obj -- capture-group ) capture-group boa ;
 : <kleene-star> ( obj -- kleene-star ) kleene-star boa ;
 : first|concatenation ( seq -- first/concatenation )
    dup length 1 = [ first ] [ <concatenation> ] if ;
 : first|alternation ( seq -- first/alternation )
    dup length 1 = [ first ] [ <alternation> ] if ;
 ERROR: unmatched-parentheses ;
 : make-positive-lookahead ( string -- )
    lookahead boa push-stack ;
 : make-negative-lookahead ( string -- )
    <negation> lookahead boa push-stack ;
 : make-independent-group ( string -- )
    #! no backtracking
    independent-group boa push-stack ;
 : make-positive-lookbehind ( string -- )
    lookbehind boa push-stack ;
 : make-negative-lookbehind ( string -- )
    <negation> lookbehind boa push-stack ;
 DEFER: nested-parse-regexp
 : make-non-capturing-group ( string -- )
    non-capture-group boa push-stack ;
 ERROR: bad-option ch ;
 : option ( ch -- singleton )
    {
        { CHAR: i [ case-insensitive ] }
        { CHAR: d [ unix-lines ] }
        { CHAR: m [ multiline ] }
        { CHAR: s [ dotall ] }
        { CHAR: u [ unicode-case ] }
        { CHAR: x [ comments ] }
        [ bad-option ]
    } case ;
 : option-on ( ch -- ) option \ option-on boa push-stack ;
 : option-off ( ch -- ) option \ option-off boa push-stack ;
 : toggle-option ( ch ? -- ) [ option-on ] [ option-off ] if ;
 : (parse-options) ( string ? -- ) [ toggle-option ] curry each ;
 : parse-options ( string -- )
    "-" split1 [ t (parse-options) ] [ f (parse-options) ] bi* ;
 DEFER: (parse-regexp)
 : parse-special-group-options ( options -- )
    beginning-of-group push-stack
    parse-options (parse-regexp) pop-stack make-non-capturing-group ;
 ERROR: bad-special-group string ;
 : (parse-special-group) ( -- )
    read1 {
        { [ dup CHAR: : = ]
            [ drop nested-parse-regexp pop-stack make-non-capturing-group ] }
        { [ dup CHAR: = = ]
            [ drop nested-parse-regexp pop-stack make-positive-lookahead ] }
        { [ dup CHAR: = = ]
            [ drop nested-parse-regexp pop-stack make-negative-lookahead ] }
        { [ dup CHAR: > = ]
            [ drop nested-parse-regexp pop-stack make-independent-group ] }
        { [ dup CHAR: < = peek1 CHAR: = = and ]
            [ drop read1 drop nested-parse-regexp pop-stack make-positive-lookbehind ] }
        { [ dup CHAR: < = peek1 CHAR: ! = and ]
            [ drop read1 drop nested-parse-regexp pop-stack make-negative-lookbehind ] }
        [
            ":" read-until [ bad-special-group ] unless
            swap prefix parse-special-group-options
        ]
    } cond ;
 : handle-left-parenthesis ( -- )
    peek1 CHAR: ? =
    [ read1 drop (parse-special-group) ]
    [ nested-parse-regexp ] if ;
 : handle-dot ( -- ) any-char push-stack ;
 : handle-pipe ( -- ) pipe push-stack ;
 : handle-star ( -- ) stack pop <kleene-star> push-stack ;
 : handle-question ( -- )
    stack pop epsilon 2array <alternation> push-stack ;
 : handle-plus ( -- )
    stack pop dup <kleene-star> 2array <concatenation> push-stack ;
 ERROR: unmatched-brace ;
 : parse-repetition ( -- start finish ? )
    "}" read-until [ unmatched-brace ] unless
    [ "," split1 [ string>number ] bi@ ]
    [ CHAR: , swap index >boolean ] bi ;
 : replicate/concatenate ( n obj -- obj' )
    over zero? [ 2drop epsilon ]
    [ <repetition> first|concatenation ] if ;
 : exactly-n ( n -- )
    stack pop replicate/concatenate push-stack ;
 : at-least-n ( n -- )
    stack pop
    [ replicate/concatenate ] keep
    <kleene-star> 2array <concatenation> push-stack ;
 : at-most-n ( n -- )
    1+
    stack pop
    [ replicate/concatenate ] curry map <alternation> push-stack ;
 : from-m-to-n ( m n -- )
    [a,b]
    stack pop
    [ replicate/concatenate ] curry map
    <alternation> push-stack ;
 ERROR: invalid-range a b ;
 : handle-left-brace ( -- )
    parse-repetition
    >r 2dup [ [ 0 < [ invalid-range ] when ] when* ] bi@ r>
    [
        2dup and [ from-m-to-n ]
        [ [ nip at-most-n ] [ at-least-n ] if* ] if
    ] [ drop 0 max exactly-n ] if ;
 : handle-front-anchor ( -- ) front-anchor push-stack ;
 : handle-back-anchor ( -- ) back-anchor push-stack ;
 ERROR: bad-character-class obj ;
 ERROR: expected-posix-class ;
 : parse-posix-class ( -- obj )
    read1 CHAR: { = [ expected-posix-class ] unless
    "}" read-until [ bad-character-class ] unless
    {
        { "Lower" [ letter-class ] }
        { "Upper" [ LETTER-class ] }
        { "ASCII" [ ascii-class ] }
        { "Alpha" [ Letter-class ] }
        { "Digit" [ digit-class ] }
        { "Alnum" [ alpha-class ] }
        { "Punct" [ punctuation-class ] }
        { "Graph" [ java-printable-class ] }
        { "Print" [ java-printable-class ] }
        { "Blank" [ non-newline-blank-class ] }
        { "Cntrl" [ control-character-class ] }
        { "XDigit" [ hex-digit-class ] }
        { "Space" [ java-blank-class ] }
        ! TODO: unicode-character-class, fallthrough in unicode is bad-char-clss
        [ bad-character-class ]
    } case ;
 : parse-octal ( -- n ) 3 read oct> check-octal ;
 : parse-short-hex ( -- n ) 2 read hex> check-hex ;
 : parse-long-hex ( -- n ) 6 read hex> check-hex ;
 : parse-control-character ( -- n ) read1 ;
 ERROR: bad-escaped-literals seq ;
 : parse-escaped-literals ( -- obj )
    "\\E" read-until [ bad-escaped-literals ] unless
    read1 drop
    [ epsilon ] [
        [ <constant> ] V{ } map-as
        first|concatenation
    ] if-empty ;
 : parse-escaped ( -- obj )
    read1
    {
        { CHAR: \ [ CHAR: \ <constant> ] }
        { CHAR: . [ CHAR: . <constant> ] }
        { CHAR: t [ CHAR: \t <constant> ] }
        { CHAR: n [ CHAR: \n <constant> ] }
        { CHAR: r [ CHAR: \r <constant> ] }
        { CHAR: f [ HEX: c <constant> ] }
        { CHAR: a [ HEX: 7 <constant> ] }
        { CHAR: e [ HEX: 1b <constant> ] }
        { CHAR: d [ digit-class ] }
        { CHAR: D [ digit-class <negation> ] }
        { CHAR: s [ java-blank-class ] }
        { CHAR: S [ java-blank-class <negation> ] }
        { CHAR: w [ c-identifier-class ] }
        { CHAR: W [ c-identifier-class <negation> ] }
        { CHAR: p [ parse-posix-class ] }
        { CHAR: P [ parse-posix-class <negation> ] }
        { CHAR: x [ parse-short-hex <constant> ] }
        { CHAR: u [ parse-long-hex <constant> ] }
        { CHAR: 0 [ parse-octal <constant> ] }
        { CHAR: c [ parse-control-character ] }
        { CHAR: Q [ parse-escaped-literals ] }
    } case ;
 : handle-escape ( -- ) parse-escaped push-stack ;
 : handle-dash ( vector -- vector' )
    H{ { dash CHAR: - } } substitute ;
 : character-class>alternation ( seq -- alternation )
    [ dup number? [ <constant> ] when ] map first|alternation ;
 : handle-caret ( vector -- vector' )
    dup [ length 2 >= ] [ first caret eq? ] bi and [
        rest-slice character-class>alternation <negation>
    ] [
        character-class>alternation
    ] if ;
 : make-character-class ( -- character-class )
    [ beginning-of-character-class swap cut-stack ] change-whole-stack
    handle-dash handle-caret ;
 : apply-dash ( -- )
    stack [ pop3 nip character-class-range boa ] keep push ;
 : apply-dash? ( -- ? )
    stack dup length 3 >=
    [ [ length 2 - ] keep nth dash eq? ] [ drop f ] if ;
 ERROR: empty-negated-character-class ;
 DEFER: handle-left-bracket
 : (parse-character-class) ( -- )
    read1 [ empty-negated-character-class ] unless* {
        { CHAR: [ [ handle-left-bracket t ] }
        { CHAR: ] [ make-character-class push-stack f ] }
        { CHAR: - [ dash push-stack t ] }
        { CHAR: \ [ parse-escaped push-stack t ] }
        [ push-stack apply-dash? [ apply-dash ] when t ]
    } case
    [ (parse-character-class) ] when ;
 : parse-character-class-second ( -- )
    read1 {
        { CHAR: [ [ CHAR: [ <constant> push-stack ] }
        { CHAR: ] [ CHAR: ] <constant> push-stack ] }
        { CHAR: - [ CHAR: - <constant> push-stack ] }
        [ push1 ]
    } case ;
 : parse-character-class-first ( -- )
    read1 {
        { CHAR: ^ [ caret push-stack parse-character-class-second ] }
        { CHAR: [ [ CHAR: [ <constant> push-stack ] }
        { CHAR: ] [ CHAR: ] <constant> push-stack ] }
        { CHAR: - [ CHAR: - <constant> push-stack ] }
        [ push1 ]
    } case ;
 : handle-left-bracket ( -- )
    beginning-of-character-class push-stack
    parse-character-class-first (parse-character-class) ;
 ERROR: empty-regexp ;
 : finish-regexp-parse ( stack -- obj )
    dup length {
        { 0 [ empty-regexp ] }
        { 1 [ first ] }
        [
            drop { pipe } split
            [ first|concatenation ] map first|alternation
        ]
    } case ;
 : handle-right-parenthesis ( -- )
    stack beginning-of-group over last-index cut rest
    [ current-regexp get swap >>stack drop ]
    [ finish-regexp-parse <capture-group> push-stack ] bi* ;
 : nested-parse-regexp ( -- )
    beginning-of-group push-stack (parse-regexp) ;
 : ((parse-regexp)) ( token -- )
    {
        { CHAR: . [ handle-dot ] }
        { CHAR: ( [ handle-left-parenthesis ] }
        { CHAR: ) [ handle-right-parenthesis ] }
        { CHAR: | [ handle-pipe ] }
        { CHAR: ? [ handle-question ] }
        { CHAR: * [ handle-star ] }
        { CHAR: + [ handle-plus ] }
        { CHAR: { [ handle-left-brace ] }
        { CHAR: [ [ handle-left-bracket ] }
        { CHAR: ^ [ handle-front-anchor ] }
        { CHAR: $ [ handle-back-anchor ] }
        { CHAR: \ [ handle-escape ] }
        [ <constant> push-stack ]
    } case ;
 : (parse-regexp) ( -- )
    read1 [ ((parse-regexp)) (parse-regexp) ] when* ;
 : parse-regexp ( regexp -- )
    dup current-regexp [
        raw>> [
            <string-reader> [ (parse-regexp) ] with-input-stream
        ] unless-empty
        current-regexp get
        stack finish-regexp-parse
            >>parse-tree drop
    ] with-variable ;
--- a/extra/regexp2/regexp2-tests.factor
+++ b/extra/regexp2/regexp2-tests.factor
@ -0,0 +1,240 @@
 USING: regexp2 tools.test kernel regexp2.traversal ;
 IN: regexp2-tests
 [ f ] [ "b" "a*" <regexp> matches? ] unit-test
 [ t ] [ "" "a*" <regexp> matches? ] unit-test
 [ t ] [ "a" "a*" <regexp> matches? ] unit-test
 [ t ] [ "aaaaaaa" "a*"  <regexp> matches? ] unit-test
 [ f ] [ "ab" "a*" <regexp> matches? ] unit-test
 [ t ] [ "abc" "abc" <regexp> matches? ] unit-test
 [ t ] [ "a" "a|b|c" <regexp> matches? ] unit-test
 [ t ] [ "b" "a|b|c" <regexp> matches? ] unit-test
 [ t ] [ "c" "a|b|c" <regexp> matches? ] unit-test
 [ f ] [ "c" "d|e|f" <regexp> matches? ] unit-test
 [ f ] [ "aa" "a|b|c" <regexp> matches? ] unit-test
 [ f ] [ "bb" "a|b|c" <regexp> matches? ] unit-test
 [ f ] [ "cc" "a|b|c" <regexp> matches? ] unit-test
 [ f ] [ "cc" "d|e|f" <regexp> matches? ] unit-test
 [ f ] [ "" "a+" <regexp> matches? ] unit-test
 [ t ] [ "a" "a+" <regexp> matches? ] unit-test
 [ t ] [ "aa" "a+" <regexp> matches? ] unit-test
 [ t ] [ "" "a?" <regexp> matches? ] unit-test
 [ t ] [ "a" "a?" <regexp> matches? ] unit-test
 [ f ] [ "aa" "a?" <regexp> matches? ] unit-test
 [ f ] [ "" "." <regexp> matches? ] unit-test
 [ t ] [ "a" "." <regexp> matches? ] unit-test
 [ t ] [ "." "." <regexp> matches? ] unit-test
 ! [ f ] [ "\n" "." <regexp> matches? ] unit-test
 [ f ] [ "" ".+" <regexp> matches? ] unit-test
 [ t ] [ "a" ".+" <regexp> matches? ] unit-test
 [ t ] [ "ab" ".+" <regexp> matches? ] unit-test
 [ t ] [ "" "a|b*|c+|d?" <regexp> matches? ] unit-test
 [ t ] [ "a" "a|b*|c+|d?" <regexp> matches? ] unit-test
 [ t ] [ "c" "a|b*|c+|d?" <regexp> matches? ] unit-test
 [ t ] [ "cc" "a|b*|c+|d?" <regexp> matches? ] unit-test
 [ f ] [ "ccd" "a|b*|c+|d?" <regexp> matches? ] unit-test
 [ t ] [ "d" "a|b*|c+|d?" <regexp> matches? ] unit-test
 [ t ] [ "foo" "foo|bar" <regexp> matches? ] unit-test
 [ t ] [ "bar" "foo|bar" <regexp> matches? ] unit-test
 [ f ] [ "foobar" "foo|bar" <regexp> matches? ] unit-test
 [ f ] [ "" "(a)" <regexp> matches? ] unit-test
 [ t ] [ "a" "(a)" <regexp> matches? ] unit-test
 [ f ] [ "aa" "(a)" <regexp> matches? ] unit-test
 [ t ] [ "aa" "(a*)" <regexp> matches? ] unit-test
 [ f ] [ "aababaaabbac" "(a|b)+" <regexp> matches? ] unit-test
 [ t ] [ "ababaaabba" "(a|b)+" <regexp> matches? ] unit-test
 [ f ] [ "" "a{1}" <regexp> matches? ] unit-test
 [ t ] [ "a" "a{1}" <regexp> matches? ] unit-test
 [ f ] [ "aa" "a{1}" <regexp> matches? ] unit-test
 [ f ] [ "a" "a{2,}" <regexp> matches? ] unit-test
 [ t ] [ "aaa" "a{2,}" <regexp> matches? ] unit-test
 [ t ] [ "aaaa" "a{2,}" <regexp> matches? ] unit-test
 [ t ] [ "aaaaa" "a{2,}" <regexp> matches? ] unit-test
 [ t ] [ "" "a{,2}" <regexp> matches? ] unit-test
 [ t ] [ "a" "a{,2}" <regexp> matches? ] unit-test
 [ t ] [ "aa" "a{,2}" <regexp> matches? ] unit-test
 [ f ] [ "aaa" "a{,2}" <regexp> matches? ] unit-test
 [ f ] [ "aaaa" "a{,2}" <regexp> matches? ] unit-test
 [ f ] [ "aaaaa" "a{,2}" <regexp> matches? ] unit-test
 [ f ] [ "" "a{1,3}" <regexp> matches? ] unit-test
 [ t ] [ "a" "a{1,3}" <regexp> matches? ] unit-test
 [ t ] [ "aa" "a{1,3}" <regexp> matches? ] unit-test
 [ t ] [ "aaa" "a{1,3}" <regexp> matches? ] unit-test
 [ f ] [ "aaaa" "a{1,3}" <regexp> matches? ] unit-test
 [ f ] [ "" "[a]" <regexp> matches? ] unit-test
 [ t ] [ "a" "[a]" <regexp> matches? ] unit-test
 [ t ] [ "a" "[abc]" <regexp> matches? ] unit-test
 [ f ] [ "b" "[a]" <regexp> matches? ] unit-test
 [ f ] [ "d" "[abc]" <regexp> matches? ] unit-test
 [ t ] [ "ab" "[abc]{1,2}" <regexp> matches? ] unit-test
 [ f ] [ "abc" "[abc]{1,2}" <regexp> matches? ] unit-test
 [ f ] [ "" "[^a]" <regexp> matches? ] unit-test
 [ f ] [ "a" "[^a]" <regexp> matches? ] unit-test
 [ f ] [ "a" "[^abc]" <regexp> matches? ] unit-test
 [ t ] [ "b" "[^a]" <regexp> matches? ] unit-test
 [ t ] [ "d" "[^abc]" <regexp> matches? ] unit-test
 [ f ] [ "ab" "[^abc]{1,2}" <regexp> matches? ] unit-test
 [ f ] [ "abc" "[^abc]{1,2}" <regexp> matches? ] unit-test
 [ t ] [ "]" "[]]" <regexp> matches? ] unit-test
 [ f ] [ "]" "[^]]" <regexp> matches? ] unit-test
 [ t ] [ "a" "[^]]" <regexp> matches? ] unit-test
 [ "^" "[^]" <regexp> matches? ] must-fail
 [ t ] [ "^" "[]^]" <regexp> matches? ] unit-test
 [ t ] [ "]" "[]^]" <regexp> matches? ] unit-test
 [ t ] [ "[" "[[]" <regexp> matches? ] unit-test
 [ f ] [ "^" "[^^]" <regexp> matches? ] unit-test
 [ t ] [ "a" "[^^]" <regexp> matches? ] unit-test
 [ t ] [ "-" "[-]" <regexp> matches? ] unit-test
 [ f ] [ "a" "[-]" <regexp> matches? ] unit-test
 [ f ] [ "-" "[^-]" <regexp> matches? ] unit-test
 [ t ] [ "a" "[^-]" <regexp> matches? ] unit-test
 [ t ] [ "-" "[-a]" <regexp> matches? ] unit-test
 [ t ] [ "a" "[-a]" <regexp> matches? ] unit-test
 [ t ] [ "-" "[a-]" <regexp> matches? ] unit-test
 [ t ] [ "a" "[a-]" <regexp> matches? ] unit-test
 [ f ] [ "b" "[a-]" <regexp> matches? ] unit-test
 [ f ] [ "-" "[^-]" <regexp> matches? ] unit-test
 [ t ] [ "a" "[^-]" <regexp> matches? ] unit-test
 [ f ] [ "-" "[a-c]" <regexp> matches? ] unit-test
 [ t ] [ "-" "[^a-c]" <regexp> matches? ] unit-test
 [ t ] [ "b" "[a-c]" <regexp> matches? ] unit-test
 [ f ] [ "b" "[^a-c]" <regexp> matches? ] unit-test
 [ t ] [ "-" "[a-c-]" <regexp> matches? ] unit-test
 [ f ] [ "-" "[^a-c-]" <regexp> matches? ] unit-test
 [ t ] [ "\\" "[\\\\]" <regexp> matches? ] unit-test
 [ f ] [ "a" "[\\\\]" <regexp> matches? ] unit-test
 [ f ] [ "\\" "[^\\\\]" <regexp> matches? ] unit-test
 [ t ] [ "a" "[^\\\\]" <regexp> matches? ] unit-test
 [ t ] [ "0" "[\\d]" <regexp> matches? ] unit-test
 [ f ] [ "a" "[\\d]" <regexp> matches? ] unit-test
 [ f ] [ "0" "[^\\d]" <regexp> matches? ] unit-test
 [ t ] [ "a" "[^\\d]" <regexp> matches? ] unit-test
 [ t ] [ "a" "[a-z]{1,}|[A-Z]{2,4}|b*|c|(f|g)*" <regexp> matches? ] unit-test
 [ t ] [ "a" "[a-z]{1,2}|[A-Z]{3,3}|b*|c|(f|g)*" <regexp> matches? ] unit-test
 [ t ] [ "a" "[a-z]{1,2}|[A-Z]{3,3}" <regexp> matches? ] unit-test
 [ t ] [ "1000" "\\d{4,6}" <regexp> matches? ] unit-test
 [ t ] [ "1000" "[0-9]{4,6}" <regexp> matches? ] unit-test
 [ t ] [ "abc" "\\p{Lower}{3}" <regexp> matches? ] unit-test
 [ f ] [ "ABC" "\\p{Lower}{3}" <regexp> matches? ] unit-test
 [ t ] [ "ABC" "\\p{Upper}{3}" <regexp> matches? ] unit-test
 [ f ] [ "abc" "\\p{Upper}{3}" <regexp> matches? ] unit-test
 ! 
 [ f ] [ "abc" "[\\p{Upper}]{3}" <regexp> matches? ] unit-test
 [ t ] [ "ABC" "[\\p{Upper}]{3}" <regexp> matches? ] unit-test
 [ t ] [ "" "\\Q\\E" <regexp> matches? ] unit-test
 [ f ] [ "a" "\\Q\\E" <regexp> matches? ] unit-test
 [ t ] [ "|*+" "\\Q|*+\\E" <regexp> matches? ] unit-test
 [ f ] [ "abc" "\\Q|*+\\E" <regexp> matches? ] unit-test
 [ t ] [ "s" "\\Qs\\E" <regexp> matches? ] unit-test
 [ t ] [ "S" "\\0123" <regexp> matches? ] unit-test
 [ t ] [ "SXY" "\\0123XY" <regexp> matches? ] unit-test
 [ t ] [ "x" "\\x78" <regexp> matches? ] unit-test
 [ f ] [ "y" "\\x78" <regexp> matches? ] unit-test
 [ t ] [ "x" "\\u000078" <regexp> matches? ] unit-test
 [ f ] [ "y" "\\u000078" <regexp> matches? ] unit-test
 [ t ] [ "ab" "a+b" <regexp> matches? ] unit-test
 [ f ] [ "b" "a+b" <regexp> matches? ] unit-test
 [ t ] [ "aab" "a+b" <regexp> matches? ] unit-test
 [ f ] [ "abb" "a+b" <regexp> matches? ] unit-test
 [ t ] [ "abbbb" "ab*" <regexp> matches? ] unit-test
 [ t ] [ "a" "ab*" <regexp> matches? ] unit-test
 [ f ] [ "abab" "ab*" <regexp> matches? ] unit-test
 [ f ] [ "x" "\\." <regexp> matches? ] unit-test
 [ t ] [ "." "\\." <regexp> matches? ] unit-test
 [ t ] [ "aaaab" "a+ab" <regexp> matches? ] unit-test
 [ f ] [ "aaaxb" "a+ab" <regexp> matches? ] unit-test
 [ t ] [ "aaacb" "a+cb" <regexp> matches? ] unit-test
 [ f ] [ "aaaab" "a++ab" <regexp> matches? ] unit-test
 [ t ] [ "aaacb" "a++cb" <regexp> matches? ] unit-test
 [ 3 ] [ "aaacb" "a*" <regexp> match-head ] unit-test
 [ 1 ] [ "aaacb" "a+?" <regexp> match-head ] unit-test
 [ 2 ] [ "aaacb" "aa?" <regexp> match-head ] unit-test
 [ 1 ] [ "aaacb" "aa??" <regexp> match-head ] unit-test
 [ 3 ] [ "aacb" "aa?c" <regexp> match-head ] unit-test
 [ 3 ] [ "aacb" "aa??c" <regexp> match-head ] unit-test
 ! [ t ] [ "aaa" "AAA" t <regexp> matches? ] unit-test
 ! [ f ] [ "aax" "AAA" t <regexp> matches? ] unit-test
 ! [ t ] [ "aaa" "A*" t <regexp> matches? ] unit-test
 ! [ f ] [ "aaba" "A*" t <regexp> matches? ] unit-test
 ! [ t ] [ "b" "[AB]" t <regexp> matches? ] unit-test
 ! [ f ] [ "c" "[AB]" t <regexp> matches? ] unit-test
 ! [ t ] [ "c" "[A-Z]" t <regexp> matches? ] unit-test
 ! [ f ] [ "3" "[A-Z]" t <regexp> matches? ] unit-test
 [ ] [
    "(0[lL]?|[1-9]\\d{0,9}(\\d{0,9}[lL])?|0[xX]\\p{XDigit}{1,8}(\\p{XDigit}{0,8}[lL])?|0[0-7]{1,11}([0-7]{0,11}[lL])?|([0-9]+\\.[0-9]*|\\.[0-9]+)([eE][+-]?[0-9]+)?[fFdD]?|[0-9]+([eE][+-]?[0-9]+[fFdD]?|([eE][+-]?[0-9]+)?[fFdD]))"
    <regexp> drop
 ] unit-test
 [ t ] [ "fxxbar" "(?!foo).{3}bar" <regexp> matches? ] unit-test
 [ f ] [ "foobar" "(?!foo).{3}bar" <regexp> matches? ] unit-test
 ! [ 3 ] [ "foobar" "foo(?=bar)" <regexp> match-head ] unit-test
 ! [ f ] [ "foobxr" "foo(?=bar)" <regexp> match-head ] unit-test
 ! [ f ] [ "foobxr" "foo\\z" <regexp> match-head ] unit-test
 ! [ 3 ] [ "foo" "foo\\z" <regexp> match-head ] unit-test
 ! [ 3 ] [ "foo bar" "foo\\b" <regexp> match-head ] unit-test
 ! [ f ] [ "fooxbar" "foo\\b" <regexp> matches? ] unit-test
 ! [ t ] [ "foo" "foo\\b" <regexp> matches? ] unit-test
 ! [ t ] [ "foo bar" "foo\\b bar" <regexp> matches? ] unit-test
 ! [ f ] [ "fooxbar" "foo\\bxbar" <regexp> matches? ] unit-test
 ! [ f ] [ "foo" "foo\\bbar" <regexp> matches? ] unit-test
 ! [ f ] [ "foo bar" "foo\\B" <regexp> matches? ] unit-test
 ! [ 3 ] [ "fooxbar" "foo\\B" <regexp> match-head ] unit-test
 ! [ t ] [ "foo" "foo\\B" <regexp> matches? ] unit-test
 ! [ f ] [ "foo bar" "foo\\B bar" <regexp> matches? ] unit-test
 ! [ t ] [ "fooxbar" "foo\\Bxbar" <regexp> matches? ] unit-test
 ! [ f ] [ "foo" "foo\\Bbar" <regexp> matches? ] unit-test
 ! [ t ] [ "s@f" "[a-z.-]@[a-z]" <regexp> matches? ] unit-test
 ! [ f ] [ "a" "[a-z.-]@[a-z]" <regexp> matches? ] unit-test
 ! [ t ] [ ".o" "\\.[a-z]" <regexp> matches? ] unit-test
 ! Bug in parsing word
 ! [ t ] [ "a" R' a' matches?  ] unit-test
 ! ((A)(B(C)))
 ! 1.  ((A)(B(C)))
 ! 2. (A)
 ! 3. (B(C))
 ! 4. (C) 
--- a/extra/regexp2/regexp2.factor
+++ b/extra/regexp2/regexp2.factor
@ -0,0 +1,35 @@
 ! Copyright (C) 2008 Doug Coleman.
 ! See http://factorcode.org/license.txt for BSD license.
 USING: accessors combinators kernel regexp2.backend regexp2.utils
 regexp2.parser regexp2.nfa regexp2.dfa regexp2.traversal state-tables
 regexp2.transition-tables ;
 IN: regexp2
 : default-regexp ( string -- regexp )
    regexp new
        swap >>raw
        <transition-table> >>nfa-table
        <transition-table> >>dfa-table
        <transition-table> >>minimized-table
        reset-regexp ;
 : <regexp> ( string -- regexp )
    default-regexp
    {
        [ parse-regexp ]
        [ construct-nfa ]
        [ construct-dfa ]
        [ ]
    } cleave ;
 : R! CHAR: ! <regexp> ; parsing
 : R" CHAR: " <regexp> ; parsing
 : R# CHAR: # <regexp> ; parsing
 : R' CHAR: ' <regexp> ; parsing
 : R( CHAR: ) <regexp> ; parsing
 : R/ CHAR: / <regexp> ; parsing
 : R@ CHAR: @ <regexp> ; parsing
 : R[ CHAR: ] <regexp> ; parsing
 : R` CHAR: ` <regexp> ; parsing
 : R{ CHAR: } <regexp> ; parsing
 : R| CHAR: | <regexp> ; parsing
--- a/extra/regexp2/summary.txt
+++ b/extra/regexp2/summary.txt
@ -0,0 +1 @@
 Regular expressions
--- a/extra/regexp2/tags.txt
+++ b/extra/regexp2/tags.txt
@ -0,0 +1,2 @@
 parsing
 text
--- a/extra/regexp2/transition-tables/transition-tables.factor
+++ b/extra/regexp2/transition-tables/transition-tables.factor
@ -0,0 +1,44 @@
 ! Copyright (C) 2008 Doug Coleman.
 ! See http://factorcode.org/license.txt for BSD license.
 USING: accessors arrays assocs fry hashtables kernel sequences
 vectors ;
 IN: regexp2.transition-tables
 : insert-at ( value key hash -- )
    2dup at* [
        2nip push
    ] [
        drop >r >r dup vector? [ 1vector ] unless r> r> set-at
    ] if ;
 : ?insert-at ( value key hash/f -- hash )
    [ H{ } clone ] unless* [ insert-at ] keep ;
 TUPLE: transition from to obj ;
 TUPLE: literal-transition < transition ;
 TUPLE: class-transition < transition ;
 TUPLE: default-transition < transition ;
 TUPLE: literal obj ;
 TUPLE: class obj ;
 TUPLE: default ;
 : <literal-transition> ( from to obj -- transition ) literal-transition boa ;
 : <class-transition> ( from to obj -- transition ) class-transition boa ;
 : <default-transition> ( from to -- transition ) t default-transition boa ;
 TUPLE: transition-table transitions
    literals classes defaults
    start-state final-states ;
 : <transition-table> ( -- transition-table )
    transition-table new
        H{ } clone >>transitions
        H{ } clone >>final-states ;
 : set-transition ( transition hash -- )
    >r [ to>> ] [ obj>> ] [ from>> ] tri r>
    2dup at* [ 2nip insert-at ]
    [ drop >r >r H{ } clone [ insert-at ] keep r> r> set-at ] if ;
 : add-transition ( transition transition-table -- )
    transitions>> set-transition ;
--- a/extra/regexp2/traversal/traversal.factor
+++ b/extra/regexp2/traversal/traversal.factor
@ -0,0 +1,88 @@
 ! Copyright (C) 2008 Doug Coleman.
 ! See http://factorcode.org/license.txt for BSD license.
 USING: accessors assocs combinators combinators.lib kernel
 math math.ranges quotations sequences regexp2.parser
 regexp2.classes combinators.short-circuit assocs.lib
 sequences.lib ;
 IN: regexp2.traversal
 TUPLE: dfa-traverser
    dfa-table
    last-state current-state
    text
    start-index current-index
    matches ;
 : <dfa-traverser> ( text regexp -- match )
    dfa-table>>
    dfa-traverser new
        swap [ start-state>> >>current-state ] keep
        >>dfa-table
        swap >>text
        0 >>start-index
        0 >>current-index
        V{ } clone >>matches ;
 : final-state? ( dfa-traverser -- ? )
    [ current-state>> ] [ dfa-table>> final-states>> ] bi
    key? ;
 : text-finished? ( dfa-traverser -- ? )
    [ current-index>> ] [ text>> length ] bi >= ;
 : save-final-state ( dfa-straverser -- )
    [ current-index>> ] [ matches>> ] bi push ;
 : match-done? ( dfa-traverser -- ? )
    dup final-state? [
        dup save-final-state
    ] when text-finished? ;
 : increment-state ( dfa-traverser state -- dfa-traverser )
    >r [ 1+ ] change-current-index
    dup current-state>> >>last-state r>
    first >>current-state ;
 : match-failed ( dfa-traverser -- dfa-traverser )
    V{ } clone >>matches ;
 : match-literal ( transition from-state table -- to-state/f )
    transitions>> [ at ] [ 2drop f ] if-at ;
 : assoc-with ( param assoc quot -- assoc curry )
    swapd [ [ -rot ] dip call ] 2curry ; inline
 : match-class ( transition from-state table -- to-state/f )
    transitions>> at* [
        [ drop class-member? ] assoc-with assoc-find [ nip ] [ drop ] if
    ] [ drop ] if ;
 : match-default ( transition from-state table -- to-state/f )
    [ nip ] dip transitions>>
    [ t swap [ drop f ] unless-at ] [ drop f ] if-at ;
 : match-transition ( obj from-state dfa -- to-state/f )
    { [ match-literal ] [ match-class ] [ match-default ] } 3|| ;
 : setup-match ( match -- obj state dfa-table )
    { current-index>> text>> current-state>> dfa-table>> } get-slots
    [ nth ] 2dip ;
 : do-match ( dfa-traverser -- dfa-traverser )
    dup match-done? [
        dup setup-match match-transition
        [ increment-state do-match ] when*
    ] unless ;
 : return-match ( dfa-traverser -- interval/f )
    dup matches>>
    [ drop f ]
    [ [ start-index>> ] [ peek ] bi* 1 <range> ] if-empty ;
 : match ( string regexp -- pair )
    <dfa-traverser> do-match return-match ;
 : matches? ( string regexp -- ? )
    dupd match [ [ length ] [ range-length 1- ] bi* = ] [ drop f ] if* ;
 : match-head ( string regexp -- end ) match length>> 1- ;
--- a/extra/regexp2/utils/utils.factor
+++ b/extra/regexp2/utils/utils.factor
@ -0,0 +1,69 @@
 ! Copyright (C) 2008 Doug Coleman.
 ! See http://factorcode.org/license.txt for BSD license.
 USING: accessors arrays assocs combinators.lib io kernel
 math math.order namespaces regexp2.backend sequences
 sequences.lib unicode.categories math.ranges fry
 combinators.short-circuit ;
 IN: regexp2.utils
 : (while-changes) ( obj quot pred pred-ret -- obj )
    ! quot: ( obj -- obj' )
    ! pred: ( obj -- <=> )
    >r >r dup slip r> pick over call r> dupd =
    [ 3drop ] [ (while-changes) ] if ; inline
 : while-changes ( obj quot pred -- obj' )
    pick over call (while-changes) ; inline
 : last-state ( regexp -- range ) stack>> peek first2 [a,b] ;
 : push1 ( obj -- ) input-stream get stream>> push ;
 : peek1 ( -- obj ) input-stream get stream>> [ f ] [ peek ] if-empty ;
 : pop3 ( seq -- obj1 obj2 obj3 ) [ pop ] [ pop ] [ pop ] tri spin ;
 : stack ( -- obj ) current-regexp get stack>> ;
 : change-whole-stack ( quot -- )
    current-regexp get
    [ stack>> swap call ] keep (>>stack) ; inline
 : push-stack ( obj -- ) stack push ;
 : pop-stack ( -- obj ) stack pop ;
 : cut-out ( vector n -- vector' vector ) cut rest ;
 ERROR: cut-stack-error ;
 : cut-stack ( obj vector -- vector' vector )
    tuck last-index [ cut-stack-error ] unless* cut-out swap ;
 ERROR: bad-octal number ;
 ERROR: bad-hex number ;
 : check-octal ( octal -- octal ) dup 255 > [ bad-octal ] when ;
 : check-hex ( hex -- hex ) dup number? [ bad-hex ] unless ;
 : ascii? ( n -- ? ) 0 HEX: 7f between? ;
 : octal-digit? ( n -- ? ) CHAR: 0 CHAR: 7 between? ;
 : decimal-digit? ( n -- ? ) CHAR: 0 CHAR: 9 between? ;
 : hex-digit? ( n -- ? )
    [
        [ decimal-digit? ]
        [ CHAR: a CHAR: f between? ]
        [ CHAR: A CHAR: F between? ]
    ] 1|| ;
 : control-char? ( n -- ? )
    [
        [ 0 HEX: 1f between? ]
        [ HEX: 7f = ]
    ] 1|| ;
 : punct? ( n -- ? )
    "!\"#$%&'()*+,-./:;<=>?@[\\]^_`{|}~" member? ;
 : c-identifier-char? ( ch -- ? )
    [ [ alpha? ] [ CHAR: _ = ] ] 1|| ;
 : java-blank? ( n -- ? )
    {
        CHAR: \s CHAR: \t CHAR: \n
        HEX: b HEX: 7 CHAR: \r
    } member? ;
 : java-printable? ( n -- ? )
    [ [ alpha? ] [ punct? ] ] 1|| ;
--- a/extra/taxes/tags.txt
+++ b/extra/taxes/tags.txt
@ -1 +1 @@
-
+taxes
--- a/extra/taxes/taxes.factor
+++ b/extra/taxes/taxes.factor
@ -1,5 +1,7 @@
-USING: arrays assocs kernel math math.intervals namespaces
+! Copyright (C) 2008 Doug Coleman.
-sequences combinators.lib money math.order ;
+! See http://factorcode.org/license.txt for BSD license.
 USING: accessors arrays assocs kernel math math.intervals
 namespaces sequences combinators.lib money math.order ;
 IN: taxes
 : monthly ( x -- y ) 12 / ;
@ -14,22 +16,21 @@ C: <w4> w4
 : allowance ( -- x ) 3500 ; inline
-: calculate-w4-allowances ( w4 -- x )
+: calculate-w4-allowances ( w4 -- x ) allowances>> allowance * ;
    w4-allowances allowance * ;
 ! Withhold: FICA, Medicare, Federal (FICA is social security)
 : fica-tax-rate ( -- x ) DECIMAL: .062 ; inline
 ! Base rate -- income over this rate is not taxed
-TUPLE: fica-base-unknown ;
+ERROR: fica-base-unknown ;
 : fica-base-rate ( year -- x )
    H{
        { 2008 102000 }
        { 2007  97500 }
-    } at* [ T{ fica-base-unknown } throw ] unless ;
+    } at* [ fica-base-unknown ] unless ;
 : fica-tax ( salary w4 -- x )
-    w4-year fica-base-rate min fica-tax-rate * ;
+    year>> fica-base-rate min fica-tax-rate * ;
 ! Employer tax only, not withheld
 : futa-tax-rate ( -- x ) DECIMAL: .062 ; inline
@ -64,8 +65,7 @@ TUPLE: tax-table single married ;
    0 -rot [ tax-bracket ] each drop ;
 : marriage-table ( w4 tax-table -- triples )
-    swap w4-married?
+    swap married?>> [ married>> ] [ single>> ] if ;
    [ tax-table-married ] [ tax-table-single ] if ;
 : federal-tax ( salary w4 tax-table -- n )
    [ adjust-allowances ] 2keep marriage-table tax ;