Merge branch 'master' of git://factorcode.org/git/factor

2008-05-20 18:25:43 -05:00 · 2008-05-20 18:25:43 -05:00 · 6bdd818b59
parent 41490d386f 539804c2c5
commit 6bdd818b59
10 changed files with 906 additions and 36 deletions
--- a/extra/ftp/server/server.factor
+++ b/extra/ftp/server/server.factor
@ -107,9 +107,14 @@ ERROR: type-error type ;
    drop
    215 "UNIX Type: L8" server-response ;

+: if-command-promise ( quot -- )
+    >r client get command-promise>> r>
+    [ "Establish an active or passive connection first" ftp-error ] if* ;
+
 : handle-STOR ( obj -- )
    [
-        drop
+        tokenized>> second
+        [ >r <ftp-put> r> fulfill ] if-command-promise
    ] [
        2drop
    ] recover ;
@ -122,7 +127,7 @@ ERROR: type-error type ;
    150 "Here comes the directory listing." server-response ;

 : finish-directory ( -- )
-    226 "Directory send OK." server-response ;
+    226 "Opening " server-response ;

 GENERIC: service-command ( stream obj -- )

@ -135,21 +140,25 @@ M: ftp-list service-command ( stream obj -- )
    ] with-output-stream
    finish-directory ;

-: start-file-transfer ( path -- )
+: transfer-outgoing-file ( path -- )
    150 "Opening BINARY mode data connection for "
    rot   
    [ file-name ] [
        " " swap  file-info file-info-size number>string
        "(" " bytes)." swapd 3append append
    ] bi 3append server-response ;
-    
+
+: transfer-incoming-file ( path -- )
+    150 "Opening BINARY mode data connection for " rot append
+    server-response ;
+
 : finish-file-transfer ( -- )
    226 "File send OK." server-response ;

 M: ftp-get service-command ( stream obj -- )
    [
        path>>
-        [ start-file-transfer ]
+        [ transfer-outgoing-file ]
        [ binary <file-reader> swap stream-copy ] bi
        finish-file-transfer
    ] [
@ -159,8 +168,8 @@ M: ftp-get service-command ( stream obj -- )
 M: ftp-put service-command ( stream obj -- )
    [
        path>>
-        [ start-file-transfer ]
-        [ binary <file-reader> swap stream-copy ] bi
+        [ transfer-incoming-file ]
+        [ binary <file-writer> stream-copy ] bi
        finish-file-transfer
    ] [
        3drop "File transfer failed" ftp-error
@ -177,16 +186,12 @@ M: ftp-put service-command ( stream obj -- )
            service-command
        ]
        [ client get f >>command-promise drop ]
-        [ ] cleanup
+        [ drop ] cleanup
    ] with-destructors ;

-: if-command-promise ( quot -- )
-    >r client get command-promise>> r>
-    [ "Establish an active or passive connection first" ftp-error ] if* ;
-
 : handle-LIST ( obj -- )
    drop
-    [ <ftp-list> swap fulfill ] if-command-promise ;
+    [ >r <ftp-list> r> fulfill ] if-command-promise ;

 : handle-SIZE ( obj -- )
    [
@ -262,7 +267,7 @@ ERROR: not-a-directory ;
        ! { "REIN" [ drop client get reset-ftp-client t ] }
        { "QUIT" [ handle-QUIT f ] }

-        ! { "PORT" [ ] }
+        ! { "PORT" [  ] } ! TODO
        { "PASV" [ handle-PASV t ] }
        ! { "MODE" [ ] }
        { "TYPE" [ handle-TYPE t ] }
@ -270,7 +275,7 @@ ERROR: not-a-directory ;

        ! { "ALLO" [ ] }
        ! { "REST" [ ] }
-        ! { "STOR" [ handle-STOR t ] }
+        { "STOR" [ handle-STOR t ] }
        ! { "STOU" [ ] }
        { "RETR" [ handle-RETR t ] }
        { "LIST" [ handle-LIST t ] }
@ -279,9 +284,10 @@ ERROR: not-a-directory ;
        ! { "APPE" [ ] }
        ! { "RNFR" [ ] }
        ! { "RNTO" [ ] }
-        ! { "DELE" [ ] }
-        ! { "RMD" [ ] }
-        ! { "MKD" [ ] }
+        ! { "DELE" [ handle-DELE t ] }
+        ! { "RMD" [ handle-RMD t ] }
+        ! ! { "XRMD" [ handle-XRMD t ] }
+        ! { "MKD" [ handle-MKD t ] }
        { "PWD" [ handle-PWD t ] }
        ! { "ABOR" [ ] }

--- a/extra/money/money.factor
+++ b/extra/money/money.factor
@ -15,17 +15,14 @@ IN: money
        "." % number>string 2 CHAR: 0 pad-left %
    ] "" make print ;

-TUPLE: not-a-decimal ;
-
-: not-a-decimal ( -- * )
-    T{ not-a-decimal } throw ;
+ERROR: not-a-decimal x ;

 : parse-decimal ( str -- ratio )
    "." split1
    >r dup "-" head? [ drop t "0" ] [ f swap ] if r>
    [ dup empty? [ drop "0" ] when ] bi@
    dup length
-    >r [ string>number dup [ not-a-decimal ] unless ] bi@ r>
+    >r [ dup string>number [ nip ] [ not-a-decimal ] if* ] bi@ r>
    10 swap ^ / + swap [ neg ] when ;

 : DECIMAL:
--- a/extra/regexp4/regexp4-tests.factor
+++ b/extra/regexp4/regexp4-tests.factor
@ -0,0 +1,138 @@
+USING: regexp4 tools.test kernel ;
+IN: regexp4-tests
+
+[ f ] [ "b" "a*" <regexp> matches? ] unit-test
+[ t ] [ "" "a*" <regexp> matches? ] unit-test
+[ t ] [ "a" "a*" <regexp> matches? ] unit-test
+[ t ] [ "aaaaaaa" "a*"  <regexp> matches? ] unit-test
+[ f ] [ "ab" "a*" <regexp> matches? ] unit-test
+
+[ t ] [ "abc" "abc" <regexp> matches? ] unit-test
+[ t ] [ "a" "a|b|c" <regexp> matches? ] unit-test
+[ t ] [ "b" "a|b|c" <regexp> matches? ] unit-test
+[ t ] [ "c" "a|b|c" <regexp> matches? ] unit-test
+[ f ] [ "c" "d|e|f" <regexp> matches? ] unit-test
+
+[ f ] [ "aa" "a|b|c" <regexp> matches? ] unit-test
+[ f ] [ "bb" "a|b|c" <regexp> matches? ] unit-test
+[ f ] [ "cc" "a|b|c" <regexp> matches? ] unit-test
+[ f ] [ "cc" "d|e|f" <regexp> matches? ] unit-test
+
+[ f ] [ "" "a+" <regexp> matches? ] unit-test
+[ t ] [ "a" "a+" <regexp> matches? ] unit-test
+[ t ] [ "aa" "a+" <regexp> matches? ] unit-test
+
+[ t ] [ "" "a?" <regexp> matches? ] unit-test
+[ t ] [ "a" "a?" <regexp> matches? ] unit-test
+[ f ] [ "aa" "a?" <regexp> matches? ] unit-test
+
+[ f ] [ "" "." <regexp> matches? ] unit-test
+[ t ] [ "a" "." <regexp> matches? ] unit-test
+[ t ] [ "." "." <regexp> matches? ] unit-test
+! [ f ] [ "\n" "." <regexp> matches? ] unit-test
+
+[ f ] [ "" ".+" <regexp> matches? ] unit-test
+[ t ] [ "a" ".+" <regexp> matches? ] unit-test
+[ t ] [ "ab" ".+" <regexp> matches? ] unit-test
+
+
+[ t ] [ "" "a|b*|c+|d?" <regexp> matches? ] unit-test
+[ t ] [ "a" "a|b*|c+|d?" <regexp> matches? ] unit-test
+[ t ] [ "c" "a|b*|c+|d?" <regexp> matches? ] unit-test
+[ t ] [ "cc" "a|b*|c+|d?" <regexp> matches? ] unit-test
+[ f ] [ "ccd" "a|b*|c+|d?" <regexp> matches? ] unit-test
+[ t ] [ "d" "a|b*|c+|d?" <regexp> matches? ] unit-test
+
+[ t ] [ "foo" "foo|bar" <regexp> matches? ] unit-test
+[ t ] [ "bar" "foo|bar" <regexp> matches? ] unit-test
+[ f ] [ "foobar" "foo|bar" <regexp> matches? ] unit-test
+
+[ f ] [ "" "(a)" <regexp> matches? ] unit-test
+[ t ] [ "a" "(a)" <regexp> matches? ] unit-test
+[ f ] [ "aa" "(a)" <regexp> matches? ] unit-test
+[ t ] [ "aa" "(a*)" <regexp> matches? ] unit-test
+
+[ f ] [ "aababaaabbac" "(a|b)+" <regexp> matches? ] unit-test
+[ t ] [ "ababaaabba" "(a|b)+" <regexp> matches? ] unit-test
+
+[ f ] [ "" "a{1}" <regexp> matches? ] unit-test
+[ t ] [ "a" "a{1}" <regexp> matches? ] unit-test
+[ f ] [ "aa" "a{1}" <regexp> matches? ] unit-test
+
+[ f ] [ "a" "a{2,}" <regexp> matches? ] unit-test
+[ t ] [ "aaa" "a{2,}" <regexp> matches? ] unit-test
+[ t ] [ "aaaa" "a{2,}" <regexp> matches? ] unit-test
+[ t ] [ "aaaaa" "a{2,}" <regexp> matches? ] unit-test
+
+[ t ] [ "" "a{,2}" <regexp> matches? ] unit-test
+[ t ] [ "a" "a{,2}" <regexp> matches? ] unit-test
+[ t ] [ "aa" "a{,2}" <regexp> matches? ] unit-test
+[ f ] [ "aaa" "a{,2}" <regexp> matches? ] unit-test
+[ f ] [ "aaaa" "a{,2}" <regexp> matches? ] unit-test
+[ f ] [ "aaaaa" "a{,2}" <regexp> matches? ] unit-test
+
+[ f ] [ "" "a{1,3}" <regexp> matches? ] unit-test
+[ t ] [ "a" "a{1,3}" <regexp> matches? ] unit-test
+[ t ] [ "aa" "a{1,3}" <regexp> matches? ] unit-test
+[ t ] [ "aaa" "a{1,3}" <regexp> matches? ] unit-test
+[ f ] [ "aaaa" "a{1,3}" <regexp> matches? ] unit-test
+
+[ f ] [ "" "[a]" <regexp> matches? ] unit-test
+[ t ] [ "a" "[a]" <regexp> matches? ] unit-test
+[ t ] [ "a" "[abc]" <regexp> matches? ] unit-test
+[ f ] [ "b" "[a]" <regexp> matches? ] unit-test
+[ f ] [ "d" "[abc]" <regexp> matches? ] unit-test
+[ t ] [ "ab" "[abc]{1,2}" <regexp> matches? ] unit-test
+[ f ] [ "abc" "[abc]{1,2}" <regexp> matches? ] unit-test
+
+[ f ] [ "" "[^a]" <regexp> matches? ] unit-test
+[ f ] [ "a" "[^a]" <regexp> matches? ] unit-test
+[ f ] [ "a" "[^abc]" <regexp> matches? ] unit-test
+[ t ] [ "b" "[^a]" <regexp> matches? ] unit-test
+[ t ] [ "d" "[^abc]" <regexp> matches? ] unit-test
+[ f ] [ "ab" "[^abc]{1,2}" <regexp> matches? ] unit-test
+[ f ] [ "abc" "[^abc]{1,2}" <regexp> matches? ] unit-test
+
+[ t ] [ "]" "[]]" <regexp> matches? ] unit-test
+[ f ] [ "]" "[^]]" <regexp> matches? ] unit-test
+
+! [ "^" "[^]" <regexp> matches? ] must-fail
+[ t ] [ "^" "[]^]" <regexp> matches? ] unit-test
+[ t ] [ "]" "[]^]" <regexp> matches? ] unit-test
+
+[ t ] [ "[" "[[]" <regexp> matches? ] unit-test
+[ f ] [ "^" "[^^]" <regexp> matches? ] unit-test
+[ t ] [ "a" "[^^]" <regexp> matches? ] unit-test
+
+[ t ] [ "-" "[-]" <regexp> matches? ] unit-test
+[ f ] [ "a" "[-]" <regexp> matches? ] unit-test
+[ f ] [ "-" "[^-]" <regexp> matches? ] unit-test
+[ t ] [ "a" "[^-]" <regexp> matches? ] unit-test
+
+[ t ] [ "-" "[-a]" <regexp> matches? ] unit-test
+[ t ] [ "a" "[-a]" <regexp> matches? ] unit-test
+[ t ] [ "-" "[a-]" <regexp> matches? ] unit-test
+[ t ] [ "a" "[a-]" <regexp> matches? ] unit-test
+[ f ] [ "b" "[a-]" <regexp> matches? ] unit-test
+[ f ] [ "-" "[^-]" <regexp> matches? ] unit-test
+[ t ] [ "a" "[^-]" <regexp> matches? ] unit-test
+
+[ f ] [ "-" "[a-c]" <regexp> matches? ] unit-test
+[ t ] [ "-" "[^a-c]" <regexp> matches? ] unit-test
+[ t ] [ "b" "[a-c]" <regexp> matches? ] unit-test
+[ f ] [ "b" "[^a-c]" <regexp> matches? ] unit-test
+
+[ t ] [ "-" "[a-c-]" <regexp> matches? ] unit-test
+[ f ] [ "-" "[^a-c-]" <regexp> matches? ] unit-test
+
+[ t ] [ "\\" "[\\\\]" <regexp> matches? ] unit-test
+[ f ] [ "a" "[\\\\]" <regexp> matches? ] unit-test
+[ f ] [ "\\" "[^\\\\]" <regexp> matches? ] unit-test
+[ t ] [ "a" "[^\\\\]" <regexp> matches? ] unit-test
+
+
+! ((A)(B(C)))
+! 1.  ((A)(B(C)))
+! 2. (A)
+! 3. (B(C))
+! 4. (C) 
--- a/extra/regexp4/regexp4.factor
+++ b/extra/regexp4/regexp4.factor
@ -0,0 +1,547 @@
+! Copyright (C) 2008 Doug Coleman.
+! See http://factorcode.org/license.txt for BSD license.
+USING: accessors arrays assocs combinators kernel math
+sequences namespaces locals combinators.lib state-tables
+math.parser state-parser sets dlists unicode.categories
+math.order quotations shuffle math.ranges splitting
+symbols ;
+IN: regexp4
+
+SYMBOLS: eps start-state final-state beginning-of-text
+end-of-text left-paren right-paren alternation ;
+
+SYMBOL: runtim-epsilon
+
+TUPLE: regexp raw paren-count bracket-count
+state stack nfa new-states dfa minimized-dfa
+dot-matches-newlines? character-sets capture-group
+captured-groups ;
+
+TUPLE: capture-group n range ;
+
+ERROR: paren-underflow ;
+ERROR: unbalanced-paren ;
+
+: push-stack ( regexp token -- ) swap stack>> push ;
+: push-all-stack ( regexp seq -- ) swap stack>> push-all ;
+: next-state ( regexp -- n ) [ 1+ ] change-state state>> ;
+
+: check-paren-underflow ( regexp -- )
+    paren-count>> 0 < [ paren-underflow ] when ;
+
+: check-unbalanced-paren ( regexp -- )
+    paren-count>> 0 > [ unbalanced-paren ] when ;
+
+:: (apply-alternation) ( stack regexp -- )
+    [let | s2 [ stack peek first ]
+           s3 [ stack pop second ]
+           s0 [ stack peek alternation = [ stack pop* ] when stack peek first ]
+           s1 [ stack pop second ]
+           s4 [ regexp next-state ]
+           s5 [ regexp next-state ]
+           table [ regexp nfa>> ] |
+        s5 table add-row
+        s4 eps s0 <entry> table add-entry
+        s4 eps s2 <entry> table add-entry
+        s1 eps s5 <entry> table add-entry
+        s3 eps s5 <entry> table add-entry
+        s1 table final-states>> delete-at
+        s3 table final-states>> delete-at
+        t s5 table final-states>> set-at
+        s4 s5 2array stack push ] ;
+
+: apply-alternation ( regexp -- )
+    [ stack>> ] [ (apply-alternation) ] bi ;
+
+: apply-alternation? ( stack -- ? )
+    dup length dup 3 <
+    [ 2drop f ] [ 2 - swap nth alternation = ] if ;
+
+:: (apply-concatenation) ( stack regexp -- )
+    [let* |
+            s2 [ stack peek first ]
+            s3 [ stack pop second ]
+            s0 [ stack peek first ]
+            s1 [ stack pop second ]
+            table [ regexp nfa>> ] |
+        s1 eps s2 <entry> table set-entry
+        s1 table final-states>> delete-at
+        s3 table add-row
+        s0 s3 2array stack push ] ;
+
+: apply-concatenation ( regexp -- )
+    [ stack>> ] [ (apply-concatenation) ] bi ;
+
+: apply-concatenation? ( seq -- ? )
+    dup length dup 2 <
+    [ 2drop f ] [ 2 - swap nth array? ] if ;
+
+: apply-loop ( seq regexp -- seq regexp )
+    over length 1 > [
+        2dup over apply-alternation?
+        [ (apply-alternation) ] [ (apply-concatenation) ] if apply-loop
+    ] when ;
+
+: apply-til-last ( token regexp -- )
+    swap [
+        <reversed> tuck index cut reverse dup pop*
+    ] change-stack >r reverse r> apply-loop stack>> push-all ;
+
+: concatenation-loop ( regexp -- )
+    dup stack>> dup apply-concatenation?
+    [ over (apply-concatenation) concatenation-loop ] [ 2drop ] if ;
+
+:: apply-kleene-closure ( regexp -- )
+    [let* | stack [ regexp stack>> ]
+            s0 [ stack peek first ]
+            s1 [ stack pop second ]
+            s2 [ regexp next-state ]
+            s3 [ regexp next-state ]
+            table [ regexp nfa>> ] |
+        s1 table final-states>> delete-at
+        t s3 table final-states>> set-at
+        s3 table add-row
+        s1 eps s0 <entry> table add-entry
+        s2 eps s0 <entry> table add-entry
+        s2 eps s3 <entry> table add-entry
+        s1 eps s3 <entry> table add-entry
+        s2 s3 2array stack push ] ;
+
+: add-numbers ( n obj -- obj )
+    2dup [ number? ] bi@ and
+    [ + ] [ dup sequence? [ [ + ] with map ] [ nip ] if ] if ;
+
+: increment-columns ( n assoc -- )
+    dup [ >r swap >r add-numbers r> r> set-at ] curry with* assoc-each ;
+
+:: copy-state-rows ( regexp range -- )
+    [let* | len [ range range-length ]
+            offset [ regexp state>> range range-min - 1+ ]
+            state [ regexp [ len + ] change-state ] |
+        regexp nfa>> rows>>
+        [ drop range member? ] assoc-filter
+        [
+            [ offset + ] dip
+            [ offset swap add-numbers ] assoc-map
+        ] assoc-map
+        regexp nfa>> [ assoc-union ] change-rows drop
+        range [ range-min ] [ range-max ] bi [ offset + ] bi@ 2array
+        regexp stack>> push ] ;
+
+: last-state ( regexp -- range )
+    stack>> peek first2 [a,b] ;
+
+: set-last-state-final ( ? regexp -- )
+    [ stack>> peek second ] [ nfa>> final-states>> ] bi set-at ;
+
+: apply-plus-closure ( regexp -- )
+    [ dup last-state copy-state-rows ]
+    [ apply-kleene-closure ]
+    [ apply-concatenation ] tri ;
+
+: apply-question-closure ( regexp -- )
+    [ stack>> peek first2 eps swap <entry> ] [ nfa>> add-entry ] bi ;
+
+: with0 ( obj n quot -- n quot' ) swapd curry ; inline
+
+: copy-state ( regexp state n -- )
+    [ copy-state-rows ] with0 with0 times ;
+
+:: (exactly-n) ( regexp state n -- )
+    regexp state n copy-state
+    t regexp set-last-state-final ;
+
+: exactly-n ( regexp n -- )
+    >r dup last-state r> 1- (exactly-n) ;
+
+: exactly-n-concatenated ( regexp state n -- )
+    [ (exactly-n) ] 3keep
+    nip 1- [ apply-concatenation ] with0 times ;
+
+:: at-least-n ( regexp n -- )
+    [let | state [ regexp stack>> pop first2 [a,b] ] |
+        regexp state n copy-state
+        state regexp stack>> push
+        regexp apply-kleene-closure ] ; 
+
+: pop-last ( regexp -- range )
+    stack>> pop first2 [a,b] ;
+
+:: at-most-n ( regexp n -- )
+    [let | state [ regexp pop-last ] |
+        regexp state n [ 1+ exactly-n-concatenated ] with with each
+        regexp n 1- [ apply-alternation ] with0 times
+        regexp apply-question-closure ] ;
+
+:: from-m-to-n ( regexp m n -- )
+    [let | state [ regexp pop-last ] |
+        regexp state
+        m n [a,b] [ exactly-n-concatenated ] with with each
+        regexp n m - [ apply-alternation ] with0 times ] ;
+
+: apply-brace-closure ( regexp from/f to/f comma? -- )
+    [
+        2dup and
+        [ from-m-to-n ]
+        [ [ nip at-most-n ] [ at-least-n ] if* ] if
+    ] [ drop exactly-n ] if ;
+
+:: make-nontoken-nfa ( regexp obj -- )
+    [let | s0 [ regexp next-state ]
+           s1 [ regexp next-state ]
+           stack [ regexp stack>> ]
+           table [ regexp nfa>> ] |
+        s0 obj s1 <entry> table set-entry
+        s1 table add-row
+        t s1 table final-states>> set-at
+        s0 s1 2array stack push ] ;
+
+: set-start-state ( regexp -- )
+    dup stack>> dup empty? [
+        2drop
+    ] [
+        [ nfa>> ] [ pop first ] bi* >>start-state drop
+    ] if ;
+
+: ascii? ( n -- ? ) 0 HEX: 7f between? ;
+: octal-digit? ( n -- ? ) CHAR: 0 CHAR: 7 between? ;
+: decimal-digit? ( n -- ? ) CHAR: 0 CHAR: 9 between? ;
+
+: hex-digit? ( n -- ? )
+    dup decimal-digit? 
+    over CHAR: a CHAR: f between? or 
+    swap CHAR: A CHAR: F between? or ;
+
+: control-char? ( n -- ? )
+    dup 0 HEX: 1f between? swap HEX: 7f = or ;
+
+: punct? ( n -- ? )
+    "!\"#$%&'()*+,-./:;<=>?@[\\]^_`{|}~" member? ;
+
+: c-identifier-char? ( ch -- ? ) 
+    dup alpha? swap CHAR: _ = or ;
+
+: java-blank? ( n -- ? )
+    {   
+        CHAR: \s CHAR: \t CHAR: \n
+        HEX: b HEX: 7 CHAR: \r
+    } member? ;
+
+: java-printable? ( n -- ? )
+    dup alpha? swap punct? or ;
+
+ERROR: bad-character-class obj ;
+
+: parse-posix-class ( -- quot )
+    next
+    CHAR: { expect
+    [ get-char CHAR: } = ] take-until
+    {
+        { "Lower" [ [ letter? ] ] }
+        { "Upper" [ [ LETTER? ] ] }
+        { "ASCII" [ [ ascii? ] ] }
+        { "Alpha" [ [ Letter? ] ] }
+        { "Digit" [ [ digit? ] ] }
+        { "Alnum" [ [ alpha? ] ] }
+        { "Punct" [ [ punct? ] ] }
+        { "Graph" [ [ java-printable? ] ] }
+        { "Print" [ [ java-printable? ] ] }
+        { "Blank" [ [ " \t" member? ] ] }
+        { "Cntrl" [ [ control-char? ] ] }
+        { "XDigit" [ [ hex-digit? ] ] }
+        { "Space" [ [ java-blank? ] ] }
+        ! TODO: unicode-character-class, fallthrough in unicode is bad-char-clss
+        [ bad-character-class ]
+    } case ;
+
+ERROR: bad-octal number ;
+
+: parse-octal ( regexp -- )
+    next get-char drop
+    3 take oct>
+    dup 255 > [ bad-octal ] when
+    make-nontoken-nfa ;
+
+ERROR: bad-hex number ;
+
+: parse-short-hex ( regexp -- )
+    next 2 take hex>
+    dup number? [ bad-hex ] unless
+    make-nontoken-nfa ;
+
+: parse-long-hex ( regexp -- )
+    next 4 take hex>
+    dup number? [ bad-hex ] unless
+    make-nontoken-nfa ;
+
+: parse-control-character ( regexp -- )
+    next get-char make-nontoken-nfa ;
+
+: parse-backreference ( regexp obj -- )
+    2drop ;
+
+: dot-construction ( regexp -- )
+    [ CHAR: \n = not ] make-nontoken-nfa ;
+
+: front-anchor-construction ( regexp -- )
+    drop ;
+
+: back-anchor-construction  ( regexp -- )
+    drop ;
+
+: parse-brace ( -- from/f to/f comma? )
+    next
+    [ get-char CHAR: } = ] take-until
+    "," split1 [ [ string>number ] bi@ ] keep >boolean ;
+
+: take-until-]
+    [ get-char CHAR: ] = ] take-until ;
+
+: make-character-set ( regexp str -- )
+    dup
+    [ length 1 > ] [ first CHAR: ^ = ] bi and
+    [ rest t ] [ f ] if
+    >r [ member? ] curry r>
+    [ [ not ] compose ] when make-nontoken-nfa ;
+
+: parse-escaped ( regexp -- )
+    next get-char {
+        { CHAR: \ [ CHAR: \ make-nontoken-nfa ] }
+        { CHAR: t [ CHAR: \t make-nontoken-nfa ] }
+        { CHAR: n [ CHAR: \n make-nontoken-nfa ] }
+        { CHAR: r [ CHAR: \r make-nontoken-nfa ] }
+        { CHAR: f [ HEX: c make-nontoken-nfa ] }
+        { CHAR: a [ HEX: 7 make-nontoken-nfa ] }
+        { CHAR: e [ HEX: 1b make-nontoken-nfa ] }
+
+        { CHAR: d [ [ digit? ] make-nontoken-nfa ] }
+        { CHAR: D [ [ digit? not ] make-nontoken-nfa ] }
+        { CHAR: s [ [ java-blank? ] make-nontoken-nfa ] }
+        { CHAR: S [ [ java-blank? not ] make-nontoken-nfa ] }
+        { CHAR: w [ [ c-identifier-char? ] make-nontoken-nfa ] }
+        { CHAR: W [ [ c-identifier-char? not ] make-nontoken-nfa ] }
+
+        { CHAR: p [ parse-posix-class make-nontoken-nfa ] }
+        { CHAR: P [ parse-posix-class [ not ] compose make-nontoken-nfa ] }
+        { CHAR: x [ parse-short-hex ] }
+        { CHAR: u [ parse-long-hex ] }
+        { CHAR: 0 [ parse-octal ] }
+        { CHAR: c [ parse-control-character ] }
+
+        ! { CHAR: Q [ quot til \E ] }
+        ! { CHAR: E [ should be an error, parse this in the Q if exists ] }
+
+        ! { CHAR: b [ ] } ! a word boundary
+        ! { CHAR: B [ ] } ! a non-word boundary
+        ! { CHAR: A [ ] } ! beginning of input
+        ! { CHAR: G [ ] } ! end of previous match
+        ! { CHAR: Z [ ] } ! end of input but for the final terminator, if any
+        ! { CHAR: z [ ] } ! end of the input
+        [ dup digit? [ parse-backreference ] [ make-nontoken-nfa ] if ]
+    } case ;
+
+ERROR: unsupported-token token ;
+: parse-token ( regexp token -- )
+    dup {
+        { CHAR: ^ [ drop front-anchor-construction ] }
+        { CHAR: $ [ drop back-anchor-construction ] }
+        { CHAR: \ [ drop parse-escaped ] }
+        { CHAR: | [ drop dup concatenation-loop alternation push-stack ] }
+        { CHAR: ( [ drop [ 1+ ] change-paren-count left-paren push-stack ] }
+        { CHAR: ) [ drop [ 1- ] change-paren-count left-paren apply-til-last ] }
+        { CHAR: * [ drop apply-kleene-closure ] }
+        { CHAR: + [ drop apply-plus-closure ] }
+        { CHAR: ? [ drop apply-question-closure ] }
+        { CHAR: { [ drop parse-brace apply-brace-closure ] }
+        ! { CHAR: [ [ drop parse-character-set ] }
+        ! { CHAR: } [ drop drop "brace" ] }
+        ! { CHAR: ? [ drop ] }
+        { CHAR: . [ drop dot-construction ] }
+        { beginning-of-text [ push-stack ] }
+        { end-of-text [
+            drop {
+                [ check-unbalanced-paren ]
+                [ concatenation-loop ]
+                [ beginning-of-text apply-til-last ]
+                [ set-start-state ]
+            } cleave
+        ] }
+        [ drop make-nontoken-nfa ]
+    } case ;
+
+: (parse-raw-regexp) ( regexp -- )
+    get-char [ dupd parse-token next (parse-raw-regexp) ] [ drop ] if* ;
+
+: parse-raw-regexp ( regexp -- )
+    [ beginning-of-text parse-token ]
+    [
+        dup raw>> dup empty? [
+            2drop
+        ] [
+            [ (parse-raw-regexp) ] string-parse
+        ] if
+    ]
+    [ end-of-text parse-token ] tri ;
+
+:: find-delta ( states obj table -- keys )
+    obj states [
+        table get-row at
+        [ dup integer? [ 1array ] when unique ] [ H{ } ] if*
+    ] with map H{ } clone [ assoc-union ] reduce keys ;
+
+:: (find-closure) ( states obj assoc table -- keys )
+    [let | size [ assoc assoc-size ] |
+        assoc states unique assoc-union
+        dup assoc-size size > [
+            obj states [
+                table get-row at* [
+                    dup integer? [ 1array ] when
+                    obj rot table (find-closure)
+                ] [
+                    drop
+                ] if
+            ] with each
+        ] when ] ;
+
+: find-closure ( states obj table -- states )
+    >r H{ } r> (find-closure) keys ;
+
+: find-epsilon-closure ( states table -- states )
+    >r eps H{ } r> (find-closure) keys ;
+
+: filter-special-transition ( vec -- vec' )
+    [ drop eps = not ] assoc-filter ;
+
+: initialize-subset-construction ( regexp -- )
+    <vector-table> >>dfa
+    [
+        nfa>> [ start-state>> 1array ] keep
+        find-epsilon-closure 1dlist
+    ] [
+        swap >>new-states drop
+    ] [
+        [ dfa>> ] [ nfa>> ] bi
+        columns>> filter-special-transition >>columns drop
+    ] tri ;
+
+:: (subset-construction) ( regexp -- )
+    [let* | nfa [ regexp nfa>> ]
+           dfa [ regexp dfa>> ]
+           new-states [ regexp new-states>> ]
+           columns [ dfa columns>> keys ] |
+        
+        new-states dlist-empty? [
+            new-states pop-front
+            dup dfa add-row
+            columns [
+                2dup nfa [ find-delta ] [ find-epsilon-closure ] bi
+                dup [ dfa rows>> key? ] [ empty? ] bi or [
+                    dup new-states push-back
+                ] unless
+                dup empty? [ 3drop ] [ <entry> dfa set-entry ] if
+            ] with each
+            regexp (subset-construction)
+        ] unless ] ;
+
+: set-start/final-states ( regexp -- )
+    dup [ nfa>> start-state>> ]
+    [ dfa>> rows>> keys [ member? ] with filter first ] bi
+    >r dup dfa>> r> >>start-state drop
+
+    dup [ nfa>> final-states>> ] [ dfa>> rows>> ] bi
+    [ keys ] bi@
+    [ intersect empty? not ] with filter
+    >r dfa>> r> >>final-states drop ;
+
+: subset-construction ( regexp -- )
+    [ initialize-subset-construction ]
+    [ (subset-construction) ]
+    [ set-start/final-states ] tri ;
+
+: <regexp> ( raw -- obj )
+    regexp new
+        swap >>raw
+        0 >>paren-count
+        -1 >>state
+        V{ } clone >>stack 
+        V{ } clone >>character-sets
+        <vector-table> >>nfa
+        dup [ parse-raw-regexp ] [ subset-construction ] bi ;
+
+TUPLE: dfa-traverser
+    dfa
+    last-state current-state
+    text
+    start-index current-index
+    matches ;
+
+: <dfa-traverser> ( text dfa -- match )
+    dfa>>
+    dfa-traverser new
+        swap [ start-state>> >>current-state ] keep
+        >>dfa
+        swap >>text
+        0 >>start-index
+        0 >>current-index
+        V{ } clone >>matches ;
+
+: final-state? ( dfa-traverser -- ? )
+    [ current-state>> ] [ dfa>> final-states>> ] bi
+    member? ;
+
+: text-finished? ( dfa-traverser -- ? )
+    [ current-index>> ] [ text>> length ] bi >= ;
+
+: save-final-state ( dfa-straverser -- )
+    [ current-index>> ] [ matches>> ] bi push ;
+
+: match-done? ( dfa-traverser -- ? )
+    dup final-state? [
+        dup save-final-state
+    ] when text-finished? ;
+
+: increment-state ( dfa-traverser state -- dfa-traverser )
+    >r [ 1+ ] change-current-index
+    dup current-state>> >>last-state r>
+    >>current-state ;
+
+: match-transition ( obj hash -- state/f )
+    2dup keys [ callable? ] filter predicates
+    [ swap at nip ] [ at ] if* ;
+
+: do-match ( dfa-traverser -- dfa-traverser )
+    dup match-done? [
+        dup {
+            [ current-index>> ]
+            [ text>> ]
+            [ current-state>> ]
+            [ dfa>> rows>> ]
+        } cleave
+        at >r nth r> match-transition [
+            increment-state do-match
+        ] when*
+    ] unless ;
+
+: return-match ( dfa-traverser -- interval/f )
+    dup matches>> empty? [
+        drop f
+    ] [
+        [ start-index>> ] [ matches>> peek ] bi 1 <range>
+    ] if ;
+
+: match ( string regexp -- pair )
+    <dfa-traverser> do-match return-match ;
+
+: matches? ( string regexp -- ? )
+    dupd match [ [ length ] [ range-length 1- ] bi* = ] [ drop f ] if* ;
+
+
+
+! character classes
+! TUPLE: range-class from to ;
+! TUPLE: or-class left right ;
+
+! (?:a|b)*  <- does not capture
+! (a|b)*\1  <- group captured
+! (?!abba)  negative lookahead  matches ababa but not abbaa
+! (?=abba)  positive lookahead  matches abbaaa but not abaaa
--- a/extra/state-tables/authors.txt
+++ b/extra/state-tables/authors.txt
@ -0,0 +1 @@
+Doug Coleman
--- a/extra/state-tables/state-tables-tests.factor
+++ b/extra/state-tables/state-tables-tests.factor
@ -0,0 +1,49 @@
+USING: kernel tables tools.test ;
+IN: tables.tests
+
+: test-table
+    <table>
+    "a" "c" "z" <entry> over set-entry
+    "a" "o" "y" <entry> over set-entry
+    "a" "l" "x" <entry> over set-entry
+    "b" "o" "y" <entry> over set-entry
+    "b" "l" "x" <entry> over set-entry
+    "b" "s" "u" <entry> over set-entry ;
+
+[
+    T{ table f
+    H{ 
+        { "a" H{ { "l" "x" } { "c" "z" } { "o" "y" } } }
+        { "b" H{ { "l" "x" } { "s" "u" } { "o" "y" } } }
+    }
+    H{ { "l" t } { "s" t } { "c" t } { "o" t } } }
+] [ test-table ] unit-test
+
+[ "x" t ] [ "a" "l" test-table get-entry ] unit-test
+[ "har" t ] [
+    "a" "z" "har" <entry> test-table [ set-entry ] keep
+    >r "a" "z" r> get-entry
+] unit-test
+
+: vector-test-table
+    <vector-table>
+    "a" "c" "z" <entry> over add-value
+    "a" "c" "r" <entry> over add-value
+    "a" "o" "y" <entry> over add-value
+    "a" "l" "x" <entry> over add-value
+    "b" "o" "y" <entry> over add-value
+    "b" "l" "x" <entry> over add-value
+    "b" "s" "u" <entry> over add-value ;
+
+[
+T{ vector-table f
+    H{ 
+        { "a"
+            H{ { "l" "x" } { "c" V{ "z" "r" } } { "o" "y" } } }
+        { "b"
+            H{ { "l" "x" } { "s" "u" } { "o" "y" } } }
+    }
+    H{ { "l" t } { "s" t } { "c" t } { "o" t } }
+}
+] [ vector-test-table ] unit-test
+
--- a/extra/state-tables/state-tables.factor
+++ b/extra/state-tables/state-tables.factor
@ -0,0 +1,123 @@
+! Copyright (C) 2008 Doug Coleman.
+! See http://factorcode.org/license.txt for BSD license.
+USING: kernel namespaces sequences vectors assocs accessors ;
+IN: state-tables
+
+TUPLE: table rows columns start-state final-states ;
+TUPLE: entry row-key column-key value ;
+
+GENERIC: add-entry ( entry table -- )
+
+: make-table ( class -- obj )
+    new
+        H{ } clone >>rows
+        H{ } clone >>columns
+        H{ } clone >>final-states ;
+
+: <table> ( -- obj )
+    table make-table ;
+
+C: <entry> entry
+
+: (add-row) ( row-key table -- row )
+    2dup rows>> at* [
+        2nip
+    ] [
+        drop H{ } clone [ -rot rows>> set-at ] keep
+    ] if ;
+
+: add-row ( row-key table -- )
+    (add-row) drop ;
+
+: add-column ( column-key table -- )
+    t -rot columns>> set-at ;
+
+: set-row ( row row-key table -- )
+    rows>> set-at ;
+
+: lookup-row ( row-key table -- row/f ? )
+    rows>> at* ;
+
+: row-exists? ( row-key table -- ? )
+    lookup-row nip ;
+
+: lookup-column ( column-key table -- column/f ? )
+    columns>> at* ;
+
+: column-exists? ( column-key table -- ? )
+    lookup-column nip ;
+
+ERROR: no-row key ;
+ERROR: no-column key ;
+
+: get-row ( row-key table -- row )
+    dupd lookup-row [
+        nip
+    ] [
+        drop no-row
+    ] if ;
+
+: get-column ( column-key table -- column )
+    dupd lookup-column [
+        nip
+    ] [
+        drop no-column
+    ] if ;
+
+: get-entry ( row-key column-key table -- obj ? )
+    swapd lookup-row [
+        at*
+    ] [
+        2drop f f
+    ] if ;
+
+: (set-entry) ( entry table -- value column-key row )
+    [ >r column-key>> r> add-column ] 2keep
+    dupd >r row-key>> r> (add-row)
+    >r [ value>> ] keep column-key>> r> ;
+
+: set-entry ( entry table -- )
+    (set-entry) set-at ;
+
+: delete-entry ( entry table -- )
+    >r [ column-key>> ] [ row-key>> ] bi r>
+    lookup-row [ delete-at ] [ 2drop ] if ;
+
+: swap-rows ( row-key1 row-key2 table -- )
+    [ tuck get-row >r get-row r> ] 3keep
+    >r >r rot r> r> [ set-row ] keep set-row ;
+
+: member?* ( obj obj -- bool )
+    2dup = [ 2drop t ] [ member? ] if ;
+
+: find-by-column ( column-key data table -- seq )
+    swapd 2dup lookup-column 2drop 
+    [
+        rows>> [
+            pick swap at* [ 
+                >r pick r> member?* [ , ] [ drop ] if
+            ] [ 
+                2drop
+            ] if 
+        ] assoc-each
+    ] { } make 2nip ;
+
+
+TUPLE: vector-table < table ;
+: <vector-table> ( -- obj )
+    vector-table make-table ;
+
+: add-hash-vector ( value key hash -- )
+    2dup at* [
+        dup vector? [
+            2nip push
+        ] [
+            V{ } clone [ push ] keep
+            -rot >r >r [ push ] keep r> r> set-at
+        ] if
+    ] [
+        drop set-at
+    ] if ;
+ 
+M: vector-table add-entry ( entry table -- )
+    (set-entry) add-hash-vector ;
--- a/extra/unix/bsd/macosx/macosx.factor
+++ b/extra/unix/bsd/macosx/macosx.factor
@ -12,3 +12,16 @@ C-STRUCT: addrinfo
    { "char*" "canonname" }
    { "void*" "addr" }
    { "addrinfo*" "next" } ;
+
+C-STRUCT: passwd
+    { "char*"  "pw_name" }
+    { "char*"  "pw_passwd" }
+    { "uid_t"  "pw_uid" }
+    { "gid_t"  "pw_gid" }
+    { "time_t" "pw_change" }
+    { "char*"  "pw_class" }
+    { "char*"  "pw_gecos" }
+    { "char*"  "pw_dir" }
+    { "char*"  "pw_shell" }
+    { "time_t" "pw_expire" }
+    { "int"    "pw_fields" } ;
--- a/extra/unix/linux/linux.factor
+++ b/extra/unix/linux/linux.factor
@ -84,3 +84,12 @@ C-STRUCT: sockaddr-un
 : SEEK_SET 0 ; inline
 : SEEK_CUR 1 ; inline
 : SEEK_END 2 ; inline
+
+C-STRUCT: passwd
+    { "char*"  "pw_name" }
+    { "char*"  "pw_passwd" }
+    { "uid_t"  "pw_uid" }
+    { "gid_t"  "pw_gid" }
+    { "char*"  "pw_gecos" }
+    { "char*"  "pw_dir" }
+    { "char*"  "pw_shell" } ;
--- a/extra/unix/unix.factor
+++ b/extra/unix/unix.factor
@ -34,19 +34,6 @@ C-STRUCT: group
    { "int" "gr_gid" }
    { "char**" "gr_mem" } ;

-C-STRUCT: passwd
-    { "char*"  "pw_name" }
-    { "char*"  "pw_passwd" }
-    { "uid_t"  "pw_uid" }
-    { "gid_t"  "pw_gid" }
-    { "time_t" "pw_change" }
-    { "char*"  "pw_class" }
-    { "char*"  "pw_gecos" }
-    { "char*"  "pw_dir" }
-    { "char*"  "pw_shell" }
-    { "time_t" "pw_expire" }
-    { "int"    "pw_fields" } ;
-
 LIBRARY: factor

 FUNCTION: void clear_err_no ( ) ;