Merge branch 'master' of git://factorcode.org/git/factor

2008-05-20 18:43:12 -05:00 · 2008-05-20 18:43:12 -05:00 · 1f7f4bb278
parent c4fb27f538 72959dc4ee
commit 1f7f4bb278
14 changed files with 1147 additions and 12 deletions
--- a/core/kernel/kernel-docs.factor
+++ b/core/kernel/kernel-docs.factor
@ -630,7 +630,7 @@ HELP: tri*
    "The following two lines are equivalent:"
    { $code
        "[ p ] [ q ] [ r ] tri*"
-        ">r >r q r> q r> r"
+        ">r >r p r> q r> r"
    }
 } ;
--- a/extra/http/client/client-tests.factor
+++ b/extra/http/client/client-tests.factor
@ -17,7 +17,7 @@ tuple-syntax namespaces ;
        path: "/index.html"
        version: "1.1"
        cookies: V{ }
-        header: H{ { "connection" "close" } }
+        header: H{ { "connection" "close" } { "user-agent" "Factor http.client vocabulary" } }
    }
 ] [
    [
@ -35,7 +35,7 @@ tuple-syntax namespaces ;
        path: "/index.html"
        version: "1.1"
        cookies: V{ }
-        header: H{ { "connection" "close" } }
+        header: H{ { "connection" "close" } { "user-agent" "Factor http.client vocabulary" } }
    }
 ] [
    [
--- a/extra/http/client/client.factor
+++ b/extra/http/client/client.factor
@ -4,7 +4,7 @@ USING: assocs http kernel math math.parser namespaces sequences
 io io.sockets io.streams.string io.files io.timeouts strings
 splitting calendar continuations accessors vectors math.order
 io.encodings.8-bit io.encodings.binary io.streams.duplex
-fry debugger inspector ;
+fry debugger inspector ascii ;
 IN: http.client
 : max-redirects 10 ;
@ -37,8 +37,12 @@ SYMBOL: redirects
 PRIVATE>
 : read-chunk-size ( -- n )
    read-crlf ";" split1 drop [ blank? ] right-trim
    hex> [ "Bad chunk size" throw ] unless* ;
 : read-chunks ( -- )
-    read-crlf ";" split1 drop hex> dup { f 0 } member?
+    read-chunk-size dup zero?
    [ drop ] [ read % read-crlf "" assert= read-chunks ] if ;
 : read-response-body ( response -- response data )
--- a/extra/http/http.factor
+++ b/extra/http/http.factor
@ -256,7 +256,8 @@ cookies ;
        H{ } clone >>header
        H{ } clone >>query
        V{ } clone >>cookies
-        "close" "connection" set-header ;
+        "close" "connection" set-header
        "Factor http.client vocabulary" "user-agent" set-header ;
 : query-param ( request key -- value )
    swap query>> at ;
--- a/extra/openssl/openssl.factor
+++ b/extra/openssl/openssl.factor
@ -153,13 +153,13 @@ M: openssl-context dispose*
 TUPLE: ssl-handle file handle connected disposed ;
-ERROR: no-ssl-context ;
+ERROR: no-secure-context ;
-M: no-ssl-context summary
+M: no-secure-context summary
-    drop "SSL operations must be wrapped in calls to with-ssl-context" ;
+    drop "Secure socket operations must be wrapped in calls to with-secure-context" ;
 : current-ssl-context ( -- ctx )
-    secure-context get [ no-ssl-context ] unless* ;
+    secure-context get [ no-secure-context ] unless* ;
 : <ssl-handle> ( fd -- ssl )
    current-ssl-context handle>> SSL_new dup ssl-error
--- a/extra/regexp4/regexp4-tests.factor
+++ b/extra/regexp4/regexp4-tests.factor
@ -0,0 +1,138 @@
 USING: regexp4 tools.test kernel ;
 IN: regexp4-tests
 [ f ] [ "b" "a*" <regexp> matches? ] unit-test
 [ t ] [ "" "a*" <regexp> matches? ] unit-test
 [ t ] [ "a" "a*" <regexp> matches? ] unit-test
 [ t ] [ "aaaaaaa" "a*"  <regexp> matches? ] unit-test
 [ f ] [ "ab" "a*" <regexp> matches? ] unit-test
 [ t ] [ "abc" "abc" <regexp> matches? ] unit-test
 [ t ] [ "a" "a|b|c" <regexp> matches? ] unit-test
 [ t ] [ "b" "a|b|c" <regexp> matches? ] unit-test
 [ t ] [ "c" "a|b|c" <regexp> matches? ] unit-test
 [ f ] [ "c" "d|e|f" <regexp> matches? ] unit-test
 [ f ] [ "aa" "a|b|c" <regexp> matches? ] unit-test
 [ f ] [ "bb" "a|b|c" <regexp> matches? ] unit-test
 [ f ] [ "cc" "a|b|c" <regexp> matches? ] unit-test
 [ f ] [ "cc" "d|e|f" <regexp> matches? ] unit-test
 [ f ] [ "" "a+" <regexp> matches? ] unit-test
 [ t ] [ "a" "a+" <regexp> matches? ] unit-test
 [ t ] [ "aa" "a+" <regexp> matches? ] unit-test
 [ t ] [ "" "a?" <regexp> matches? ] unit-test
 [ t ] [ "a" "a?" <regexp> matches? ] unit-test
 [ f ] [ "aa" "a?" <regexp> matches? ] unit-test
 [ f ] [ "" "." <regexp> matches? ] unit-test
 [ t ] [ "a" "." <regexp> matches? ] unit-test
 [ t ] [ "." "." <regexp> matches? ] unit-test
 ! [ f ] [ "\n" "." <regexp> matches? ] unit-test
 [ f ] [ "" ".+" <regexp> matches? ] unit-test
 [ t ] [ "a" ".+" <regexp> matches? ] unit-test
 [ t ] [ "ab" ".+" <regexp> matches? ] unit-test
 [ t ] [ "" "a|b*|c+|d?" <regexp> matches? ] unit-test
 [ t ] [ "a" "a|b*|c+|d?" <regexp> matches? ] unit-test
 [ t ] [ "c" "a|b*|c+|d?" <regexp> matches? ] unit-test
 [ t ] [ "cc" "a|b*|c+|d?" <regexp> matches? ] unit-test
 [ f ] [ "ccd" "a|b*|c+|d?" <regexp> matches? ] unit-test
 [ t ] [ "d" "a|b*|c+|d?" <regexp> matches? ] unit-test
 [ t ] [ "foo" "foo|bar" <regexp> matches? ] unit-test
 [ t ] [ "bar" "foo|bar" <regexp> matches? ] unit-test
 [ f ] [ "foobar" "foo|bar" <regexp> matches? ] unit-test
 [ f ] [ "" "(a)" <regexp> matches? ] unit-test
 [ t ] [ "a" "(a)" <regexp> matches? ] unit-test
 [ f ] [ "aa" "(a)" <regexp> matches? ] unit-test
 [ t ] [ "aa" "(a*)" <regexp> matches? ] unit-test
 [ f ] [ "aababaaabbac" "(a|b)+" <regexp> matches? ] unit-test
 [ t ] [ "ababaaabba" "(a|b)+" <regexp> matches? ] unit-test
 [ f ] [ "" "a{1}" <regexp> matches? ] unit-test
 [ t ] [ "a" "a{1}" <regexp> matches? ] unit-test
 [ f ] [ "aa" "a{1}" <regexp> matches? ] unit-test
 [ f ] [ "a" "a{2,}" <regexp> matches? ] unit-test
 [ t ] [ "aaa" "a{2,}" <regexp> matches? ] unit-test
 [ t ] [ "aaaa" "a{2,}" <regexp> matches? ] unit-test
 [ t ] [ "aaaaa" "a{2,}" <regexp> matches? ] unit-test
 [ t ] [ "" "a{,2}" <regexp> matches? ] unit-test
 [ t ] [ "a" "a{,2}" <regexp> matches? ] unit-test
 [ t ] [ "aa" "a{,2}" <regexp> matches? ] unit-test
 [ f ] [ "aaa" "a{,2}" <regexp> matches? ] unit-test
 [ f ] [ "aaaa" "a{,2}" <regexp> matches? ] unit-test
 [ f ] [ "aaaaa" "a{,2}" <regexp> matches? ] unit-test
 [ f ] [ "" "a{1,3}" <regexp> matches? ] unit-test
 [ t ] [ "a" "a{1,3}" <regexp> matches? ] unit-test
 [ t ] [ "aa" "a{1,3}" <regexp> matches? ] unit-test
 [ t ] [ "aaa" "a{1,3}" <regexp> matches? ] unit-test
 [ f ] [ "aaaa" "a{1,3}" <regexp> matches? ] unit-test
 [ f ] [ "" "[a]" <regexp> matches? ] unit-test
 [ t ] [ "a" "[a]" <regexp> matches? ] unit-test
 [ t ] [ "a" "[abc]" <regexp> matches? ] unit-test
 [ f ] [ "b" "[a]" <regexp> matches? ] unit-test
 [ f ] [ "d" "[abc]" <regexp> matches? ] unit-test
 [ t ] [ "ab" "[abc]{1,2}" <regexp> matches? ] unit-test
 [ f ] [ "abc" "[abc]{1,2}" <regexp> matches? ] unit-test
 [ f ] [ "" "[^a]" <regexp> matches? ] unit-test
 [ f ] [ "a" "[^a]" <regexp> matches? ] unit-test
 [ f ] [ "a" "[^abc]" <regexp> matches? ] unit-test
 [ t ] [ "b" "[^a]" <regexp> matches? ] unit-test
 [ t ] [ "d" "[^abc]" <regexp> matches? ] unit-test
 [ f ] [ "ab" "[^abc]{1,2}" <regexp> matches? ] unit-test
 [ f ] [ "abc" "[^abc]{1,2}" <regexp> matches? ] unit-test
 [ t ] [ "]" "[]]" <regexp> matches? ] unit-test
 [ f ] [ "]" "[^]]" <regexp> matches? ] unit-test
 ! [ "^" "[^]" <regexp> matches? ] must-fail
 [ t ] [ "^" "[]^]" <regexp> matches? ] unit-test
 [ t ] [ "]" "[]^]" <regexp> matches? ] unit-test
 [ t ] [ "[" "[[]" <regexp> matches? ] unit-test
 [ f ] [ "^" "[^^]" <regexp> matches? ] unit-test
 [ t ] [ "a" "[^^]" <regexp> matches? ] unit-test
 [ t ] [ "-" "[-]" <regexp> matches? ] unit-test
 [ f ] [ "a" "[-]" <regexp> matches? ] unit-test
 [ f ] [ "-" "[^-]" <regexp> matches? ] unit-test
 [ t ] [ "a" "[^-]" <regexp> matches? ] unit-test
 [ t ] [ "-" "[-a]" <regexp> matches? ] unit-test
 [ t ] [ "a" "[-a]" <regexp> matches? ] unit-test
 [ t ] [ "-" "[a-]" <regexp> matches? ] unit-test
 [ t ] [ "a" "[a-]" <regexp> matches? ] unit-test
 [ f ] [ "b" "[a-]" <regexp> matches? ] unit-test
 [ f ] [ "-" "[^-]" <regexp> matches? ] unit-test
 [ t ] [ "a" "[^-]" <regexp> matches? ] unit-test
 [ f ] [ "-" "[a-c]" <regexp> matches? ] unit-test
 [ t ] [ "-" "[^a-c]" <regexp> matches? ] unit-test
 [ t ] [ "b" "[a-c]" <regexp> matches? ] unit-test
 [ f ] [ "b" "[^a-c]" <regexp> matches? ] unit-test
 [ t ] [ "-" "[a-c-]" <regexp> matches? ] unit-test
 [ f ] [ "-" "[^a-c-]" <regexp> matches? ] unit-test
 [ t ] [ "\\" "[\\\\]" <regexp> matches? ] unit-test
 [ f ] [ "a" "[\\\\]" <regexp> matches? ] unit-test
 [ f ] [ "\\" "[^\\\\]" <regexp> matches? ] unit-test
 [ t ] [ "a" "[^\\\\]" <regexp> matches? ] unit-test
 ! ((A)(B(C)))
 ! 1.  ((A)(B(C)))
 ! 2. (A)
 ! 3. (B(C))
 ! 4. (C) 
--- a/extra/regexp4/regexp4.factor
+++ b/extra/regexp4/regexp4.factor
@ -0,0 +1,547 @@
 ! Copyright (C) 2008 Doug Coleman.
 ! See http://factorcode.org/license.txt for BSD license.
 USING: accessors arrays assocs combinators kernel math
 sequences namespaces locals combinators.lib state-tables
 math.parser state-parser sets dlists unicode.categories
 math.order quotations shuffle math.ranges splitting
 symbols ;
 IN: regexp4
 SYMBOLS: eps start-state final-state beginning-of-text
 end-of-text left-paren right-paren alternation ;
 SYMBOL: runtim-epsilon
 TUPLE: regexp raw paren-count bracket-count
 state stack nfa new-states dfa minimized-dfa
 dot-matches-newlines? character-sets capture-group
 captured-groups ;
 TUPLE: capture-group n range ;
 ERROR: paren-underflow ;
 ERROR: unbalanced-paren ;
 : push-stack ( regexp token -- ) swap stack>> push ;
 : push-all-stack ( regexp seq -- ) swap stack>> push-all ;
 : next-state ( regexp -- n ) [ 1+ ] change-state state>> ;
 : check-paren-underflow ( regexp -- )
    paren-count>> 0 < [ paren-underflow ] when ;
 : check-unbalanced-paren ( regexp -- )
    paren-count>> 0 > [ unbalanced-paren ] when ;
 :: (apply-alternation) ( stack regexp -- )
    [let | s2 [ stack peek first ]
           s3 [ stack pop second ]
           s0 [ stack peek alternation = [ stack pop* ] when stack peek first ]
           s1 [ stack pop second ]
           s4 [ regexp next-state ]
           s5 [ regexp next-state ]
           table [ regexp nfa>> ] |
        s5 table add-row
        s4 eps s0 <entry> table add-entry
        s4 eps s2 <entry> table add-entry
        s1 eps s5 <entry> table add-entry
        s3 eps s5 <entry> table add-entry
        s1 table final-states>> delete-at
        s3 table final-states>> delete-at
        t s5 table final-states>> set-at
        s4 s5 2array stack push ] ;
 : apply-alternation ( regexp -- )
    [ stack>> ] [ (apply-alternation) ] bi ;
 : apply-alternation? ( stack -- ? )
    dup length dup 3 <
    [ 2drop f ] [ 2 - swap nth alternation = ] if ;
 :: (apply-concatenation) ( stack regexp -- )
    [let* |
            s2 [ stack peek first ]
            s3 [ stack pop second ]
            s0 [ stack peek first ]
            s1 [ stack pop second ]
            table [ regexp nfa>> ] |
        s1 eps s2 <entry> table set-entry
        s1 table final-states>> delete-at
        s3 table add-row
        s0 s3 2array stack push ] ;
 : apply-concatenation ( regexp -- )
    [ stack>> ] [ (apply-concatenation) ] bi ;
 : apply-concatenation? ( seq -- ? )
    dup length dup 2 <
    [ 2drop f ] [ 2 - swap nth array? ] if ;
 : apply-loop ( seq regexp -- seq regexp )
    over length 1 > [
        2dup over apply-alternation?
        [ (apply-alternation) ] [ (apply-concatenation) ] if apply-loop
    ] when ;
 : apply-til-last ( token regexp -- )
    swap [
        <reversed> tuck index cut reverse dup pop*
    ] change-stack >r reverse r> apply-loop stack>> push-all ;
 : concatenation-loop ( regexp -- )
    dup stack>> dup apply-concatenation?
    [ over (apply-concatenation) concatenation-loop ] [ 2drop ] if ;
 :: apply-kleene-closure ( regexp -- )
    [let* | stack [ regexp stack>> ]
            s0 [ stack peek first ]
            s1 [ stack pop second ]
            s2 [ regexp next-state ]
            s3 [ regexp next-state ]
            table [ regexp nfa>> ] |
        s1 table final-states>> delete-at
        t s3 table final-states>> set-at
        s3 table add-row
        s1 eps s0 <entry> table add-entry
        s2 eps s0 <entry> table add-entry
        s2 eps s3 <entry> table add-entry
        s1 eps s3 <entry> table add-entry
        s2 s3 2array stack push ] ;
 : add-numbers ( n obj -- obj )
    2dup [ number? ] bi@ and
    [ + ] [ dup sequence? [ [ + ] with map ] [ nip ] if ] if ;
 : increment-columns ( n assoc -- )
    dup [ >r swap >r add-numbers r> r> set-at ] curry with* assoc-each ;
 :: copy-state-rows ( regexp range -- )
    [let* | len [ range range-length ]
            offset [ regexp state>> range range-min - 1+ ]
            state [ regexp [ len + ] change-state ] |
        regexp nfa>> rows>>
        [ drop range member? ] assoc-filter
        [
            [ offset + ] dip
            [ offset swap add-numbers ] assoc-map
        ] assoc-map
        regexp nfa>> [ assoc-union ] change-rows drop
        range [ range-min ] [ range-max ] bi [ offset + ] bi@ 2array
        regexp stack>> push ] ;
 : last-state ( regexp -- range )
    stack>> peek first2 [a,b] ;
 : set-last-state-final ( ? regexp -- )
    [ stack>> peek second ] [ nfa>> final-states>> ] bi set-at ;
 : apply-plus-closure ( regexp -- )
    [ dup last-state copy-state-rows ]
    [ apply-kleene-closure ]
    [ apply-concatenation ] tri ;
 : apply-question-closure ( regexp -- )
    [ stack>> peek first2 eps swap <entry> ] [ nfa>> add-entry ] bi ;
 : with0 ( obj n quot -- n quot' ) swapd curry ; inline
 : copy-state ( regexp state n -- )
    [ copy-state-rows ] with0 with0 times ;
 :: (exactly-n) ( regexp state n -- )
    regexp state n copy-state
    t regexp set-last-state-final ;
 : exactly-n ( regexp n -- )
    >r dup last-state r> 1- (exactly-n) ;
 : exactly-n-concatenated ( regexp state n -- )
    [ (exactly-n) ] 3keep
    nip 1- [ apply-concatenation ] with0 times ;
 :: at-least-n ( regexp n -- )
    [let | state [ regexp stack>> pop first2 [a,b] ] |
        regexp state n copy-state
        state regexp stack>> push
        regexp apply-kleene-closure ] ; 
 : pop-last ( regexp -- range )
    stack>> pop first2 [a,b] ;
 :: at-most-n ( regexp n -- )
    [let | state [ regexp pop-last ] |
        regexp state n [ 1+ exactly-n-concatenated ] with with each
        regexp n 1- [ apply-alternation ] with0 times
        regexp apply-question-closure ] ;
 :: from-m-to-n ( regexp m n -- )
    [let | state [ regexp pop-last ] |
        regexp state
        m n [a,b] [ exactly-n-concatenated ] with with each
        regexp n m - [ apply-alternation ] with0 times ] ;
 : apply-brace-closure ( regexp from/f to/f comma? -- )
    [
        2dup and
        [ from-m-to-n ]
        [ [ nip at-most-n ] [ at-least-n ] if* ] if
    ] [ drop exactly-n ] if ;
 :: make-nontoken-nfa ( regexp obj -- )
    [let | s0 [ regexp next-state ]
           s1 [ regexp next-state ]
           stack [ regexp stack>> ]
           table [ regexp nfa>> ] |
        s0 obj s1 <entry> table set-entry
        s1 table add-row
        t s1 table final-states>> set-at
        s0 s1 2array stack push ] ;
 : set-start-state ( regexp -- )
    dup stack>> dup empty? [
        2drop
    ] [
        [ nfa>> ] [ pop first ] bi* >>start-state drop
    ] if ;
 : ascii? ( n -- ? ) 0 HEX: 7f between? ;
 : octal-digit? ( n -- ? ) CHAR: 0 CHAR: 7 between? ;
 : decimal-digit? ( n -- ? ) CHAR: 0 CHAR: 9 between? ;
 : hex-digit? ( n -- ? )
    dup decimal-digit? 
    over CHAR: a CHAR: f between? or 
    swap CHAR: A CHAR: F between? or ;
 : control-char? ( n -- ? )
    dup 0 HEX: 1f between? swap HEX: 7f = or ;
 : punct? ( n -- ? )
    "!\"#$%&'()*+,-./:;<=>?@[\\]^_`{|}~" member? ;
 : c-identifier-char? ( ch -- ? ) 
    dup alpha? swap CHAR: _ = or ;
 : java-blank? ( n -- ? )
    {   
        CHAR: \s CHAR: \t CHAR: \n
        HEX: b HEX: 7 CHAR: \r
    } member? ;
 : java-printable? ( n -- ? )
    dup alpha? swap punct? or ;
 ERROR: bad-character-class obj ;
 : parse-posix-class ( -- quot )
    next
    CHAR: { expect
    [ get-char CHAR: } = ] take-until
    {
        { "Lower" [ [ letter? ] ] }
        { "Upper" [ [ LETTER? ] ] }
        { "ASCII" [ [ ascii? ] ] }
        { "Alpha" [ [ Letter? ] ] }
        { "Digit" [ [ digit? ] ] }
        { "Alnum" [ [ alpha? ] ] }
        { "Punct" [ [ punct? ] ] }
        { "Graph" [ [ java-printable? ] ] }
        { "Print" [ [ java-printable? ] ] }
        { "Blank" [ [ " \t" member? ] ] }
        { "Cntrl" [ [ control-char? ] ] }
        { "XDigit" [ [ hex-digit? ] ] }
        { "Space" [ [ java-blank? ] ] }
        ! TODO: unicode-character-class, fallthrough in unicode is bad-char-clss
        [ bad-character-class ]
    } case ;
 ERROR: bad-octal number ;
 : parse-octal ( regexp -- )
    next get-char drop
    3 take oct>
    dup 255 > [ bad-octal ] when
    make-nontoken-nfa ;
 ERROR: bad-hex number ;
 : parse-short-hex ( regexp -- )
    next 2 take hex>
    dup number? [ bad-hex ] unless
    make-nontoken-nfa ;
 : parse-long-hex ( regexp -- )
    next 4 take hex>
    dup number? [ bad-hex ] unless
    make-nontoken-nfa ;
 : parse-control-character ( regexp -- )
    next get-char make-nontoken-nfa ;
 : parse-backreference ( regexp obj -- )
    2drop ;
 : dot-construction ( regexp -- )
    [ CHAR: \n = not ] make-nontoken-nfa ;
 : front-anchor-construction ( regexp -- )
    drop ;
 : back-anchor-construction  ( regexp -- )
    drop ;
 : parse-brace ( -- from/f to/f comma? )
    next
    [ get-char CHAR: } = ] take-until
    "," split1 [ [ string>number ] bi@ ] keep >boolean ;
 : take-until-]
    [ get-char CHAR: ] = ] take-until ;
 : make-character-set ( regexp str -- )
    dup
    [ length 1 > ] [ first CHAR: ^ = ] bi and
    [ rest t ] [ f ] if
    >r [ member? ] curry r>
    [ [ not ] compose ] when make-nontoken-nfa ;
 : parse-escaped ( regexp -- )
    next get-char {
        { CHAR: \ [ CHAR: \ make-nontoken-nfa ] }
        { CHAR: t [ CHAR: \t make-nontoken-nfa ] }
        { CHAR: n [ CHAR: \n make-nontoken-nfa ] }
        { CHAR: r [ CHAR: \r make-nontoken-nfa ] }
        { CHAR: f [ HEX: c make-nontoken-nfa ] }
        { CHAR: a [ HEX: 7 make-nontoken-nfa ] }
        { CHAR: e [ HEX: 1b make-nontoken-nfa ] }
        { CHAR: d [ [ digit? ] make-nontoken-nfa ] }
        { CHAR: D [ [ digit? not ] make-nontoken-nfa ] }
        { CHAR: s [ [ java-blank? ] make-nontoken-nfa ] }
        { CHAR: S [ [ java-blank? not ] make-nontoken-nfa ] }
        { CHAR: w [ [ c-identifier-char? ] make-nontoken-nfa ] }
        { CHAR: W [ [ c-identifier-char? not ] make-nontoken-nfa ] }
        { CHAR: p [ parse-posix-class make-nontoken-nfa ] }
        { CHAR: P [ parse-posix-class [ not ] compose make-nontoken-nfa ] }
        { CHAR: x [ parse-short-hex ] }
        { CHAR: u [ parse-long-hex ] }
        { CHAR: 0 [ parse-octal ] }
        { CHAR: c [ parse-control-character ] }
        ! { CHAR: Q [ quot til \E ] }
        ! { CHAR: E [ should be an error, parse this in the Q if exists ] }
        ! { CHAR: b [ ] } ! a word boundary
        ! { CHAR: B [ ] } ! a non-word boundary
        ! { CHAR: A [ ] } ! beginning of input
        ! { CHAR: G [ ] } ! end of previous match
        ! { CHAR: Z [ ] } ! end of input but for the final terminator, if any
        ! { CHAR: z [ ] } ! end of the input
        [ dup digit? [ parse-backreference ] [ make-nontoken-nfa ] if ]
    } case ;
 ERROR: unsupported-token token ;
 : parse-token ( regexp token -- )
    dup {
        { CHAR: ^ [ drop front-anchor-construction ] }
        { CHAR: $ [ drop back-anchor-construction ] }
        { CHAR: \ [ drop parse-escaped ] }
        { CHAR: | [ drop dup concatenation-loop alternation push-stack ] }
        { CHAR: ( [ drop [ 1+ ] change-paren-count left-paren push-stack ] }
        { CHAR: ) [ drop [ 1- ] change-paren-count left-paren apply-til-last ] }
        { CHAR: * [ drop apply-kleene-closure ] }
        { CHAR: + [ drop apply-plus-closure ] }
        { CHAR: ? [ drop apply-question-closure ] }
        { CHAR: { [ drop parse-brace apply-brace-closure ] }
        ! { CHAR: [ [ drop parse-character-set ] }
        ! { CHAR: } [ drop drop "brace" ] }
        ! { CHAR: ? [ drop ] }
        { CHAR: . [ drop dot-construction ] }
        { beginning-of-text [ push-stack ] }
        { end-of-text [
            drop {
                [ check-unbalanced-paren ]
                [ concatenation-loop ]
                [ beginning-of-text apply-til-last ]
                [ set-start-state ]
            } cleave
        ] }
        [ drop make-nontoken-nfa ]
    } case ;
 : (parse-raw-regexp) ( regexp -- )
    get-char [ dupd parse-token next (parse-raw-regexp) ] [ drop ] if* ;
 : parse-raw-regexp ( regexp -- )
    [ beginning-of-text parse-token ]
    [
        dup raw>> dup empty? [
            2drop
        ] [
            [ (parse-raw-regexp) ] string-parse
        ] if
    ]
    [ end-of-text parse-token ] tri ;
 :: find-delta ( states obj table -- keys )
    obj states [
        table get-row at
        [ dup integer? [ 1array ] when unique ] [ H{ } ] if*
    ] with map H{ } clone [ assoc-union ] reduce keys ;
 :: (find-closure) ( states obj assoc table -- keys )
    [let | size [ assoc assoc-size ] |
        assoc states unique assoc-union
        dup assoc-size size > [
            obj states [
                table get-row at* [
                    dup integer? [ 1array ] when
                    obj rot table (find-closure)
                ] [
                    drop
                ] if
            ] with each
        ] when ] ;
 : find-closure ( states obj table -- states )
    >r H{ } r> (find-closure) keys ;
 : find-epsilon-closure ( states table -- states )
    >r eps H{ } r> (find-closure) keys ;
 : filter-special-transition ( vec -- vec' )
    [ drop eps = not ] assoc-filter ;
 : initialize-subset-construction ( regexp -- )
    <vector-table> >>dfa
    [
        nfa>> [ start-state>> 1array ] keep
        find-epsilon-closure 1dlist
    ] [
        swap >>new-states drop
    ] [
        [ dfa>> ] [ nfa>> ] bi
        columns>> filter-special-transition >>columns drop
    ] tri ;
 :: (subset-construction) ( regexp -- )
    [let* | nfa [ regexp nfa>> ]
           dfa [ regexp dfa>> ]
           new-states [ regexp new-states>> ]
           columns [ dfa columns>> keys ] |
        new-states dlist-empty? [
            new-states pop-front
            dup dfa add-row
            columns [
                2dup nfa [ find-delta ] [ find-epsilon-closure ] bi
                dup [ dfa rows>> key? ] [ empty? ] bi or [
                    dup new-states push-back
                ] unless
                dup empty? [ 3drop ] [ <entry> dfa set-entry ] if
            ] with each
            regexp (subset-construction)
        ] unless ] ;
 : set-start/final-states ( regexp -- )
    dup [ nfa>> start-state>> ]
    [ dfa>> rows>> keys [ member? ] with filter first ] bi
    >r dup dfa>> r> >>start-state drop
    dup [ nfa>> final-states>> ] [ dfa>> rows>> ] bi
    [ keys ] bi@
    [ intersect empty? not ] with filter
    >r dfa>> r> >>final-states drop ;
 : subset-construction ( regexp -- )
    [ initialize-subset-construction ]
    [ (subset-construction) ]
    [ set-start/final-states ] tri ;
 : <regexp> ( raw -- obj )
    regexp new
        swap >>raw
        0 >>paren-count
        -1 >>state
        V{ } clone >>stack 
        V{ } clone >>character-sets
        <vector-table> >>nfa
        dup [ parse-raw-regexp ] [ subset-construction ] bi ;
 TUPLE: dfa-traverser
    dfa
    last-state current-state
    text
    start-index current-index
    matches ;
 : <dfa-traverser> ( text dfa -- match )
    dfa>>
    dfa-traverser new
        swap [ start-state>> >>current-state ] keep
        >>dfa
        swap >>text
        0 >>start-index
        0 >>current-index
        V{ } clone >>matches ;
 : final-state? ( dfa-traverser -- ? )
    [ current-state>> ] [ dfa>> final-states>> ] bi
    member? ;
 : text-finished? ( dfa-traverser -- ? )
    [ current-index>> ] [ text>> length ] bi >= ;
 : save-final-state ( dfa-straverser -- )
    [ current-index>> ] [ matches>> ] bi push ;
 : match-done? ( dfa-traverser -- ? )
    dup final-state? [
        dup save-final-state
    ] when text-finished? ;
 : increment-state ( dfa-traverser state -- dfa-traverser )
    >r [ 1+ ] change-current-index
    dup current-state>> >>last-state r>
    >>current-state ;
 : match-transition ( obj hash -- state/f )
    2dup keys [ callable? ] filter predicates
    [ swap at nip ] [ at ] if* ;
 : do-match ( dfa-traverser -- dfa-traverser )
    dup match-done? [
        dup {
            [ current-index>> ]
            [ text>> ]
            [ current-state>> ]
            [ dfa>> rows>> ]
        } cleave
        at >r nth r> match-transition [
            increment-state do-match
        ] when*
    ] unless ;
 : return-match ( dfa-traverser -- interval/f )
    dup matches>> empty? [
        drop f
    ] [
        [ start-index>> ] [ matches>> peek ] bi 1 <range>
    ] if ;
 : match ( string regexp -- pair )
    <dfa-traverser> do-match return-match ;
 : matches? ( string regexp -- ? )
    dupd match [ [ length ] [ range-length 1- ] bi* = ] [ drop f ] if* ;
 ! character classes
 ! TUPLE: range-class from to ;
 ! TUPLE: or-class left right ;
 ! (?:a|b)*  <- does not capture
 ! (a|b)*\1  <- group captured
 ! (?!abba)  negative lookahead  matches ababa but not abbaa
 ! (?=abba)  positive lookahead  matches abbaaa but not abaaa
--- a/extra/state-tables/authors.txt
+++ b/extra/state-tables/authors.txt
@ -0,0 +1 @@
 Doug Coleman
--- a/extra/state-tables/state-tables-tests.factor
+++ b/extra/state-tables/state-tables-tests.factor
@ -0,0 +1,49 @@
 USING: kernel tables tools.test ;
 IN: tables.tests
 : test-table
    <table>
    "a" "c" "z" <entry> over set-entry
    "a" "o" "y" <entry> over set-entry
    "a" "l" "x" <entry> over set-entry
    "b" "o" "y" <entry> over set-entry
    "b" "l" "x" <entry> over set-entry
    "b" "s" "u" <entry> over set-entry ;
 [
    T{ table f
    H{ 
        { "a" H{ { "l" "x" } { "c" "z" } { "o" "y" } } }
        { "b" H{ { "l" "x" } { "s" "u" } { "o" "y" } } }
    }
    H{ { "l" t } { "s" t } { "c" t } { "o" t } } }
 ] [ test-table ] unit-test
 [ "x" t ] [ "a" "l" test-table get-entry ] unit-test
 [ "har" t ] [
    "a" "z" "har" <entry> test-table [ set-entry ] keep
    >r "a" "z" r> get-entry
 ] unit-test
 : vector-test-table
    <vector-table>
    "a" "c" "z" <entry> over add-value
    "a" "c" "r" <entry> over add-value
    "a" "o" "y" <entry> over add-value
    "a" "l" "x" <entry> over add-value
    "b" "o" "y" <entry> over add-value
    "b" "l" "x" <entry> over add-value
    "b" "s" "u" <entry> over add-value ;
 [
 T{ vector-table f
    H{ 
        { "a"
            H{ { "l" "x" } { "c" V{ "z" "r" } } { "o" "y" } } }
        { "b"
            H{ { "l" "x" } { "s" "u" } { "o" "y" } } }
    }
    H{ { "l" t } { "s" t } { "c" t } { "o" t } }
 }
 ] [ vector-test-table ] unit-test
--- a/extra/state-tables/state-tables.factor
+++ b/extra/state-tables/state-tables.factor
@ -0,0 +1,123 @@
 ! Copyright (C) 2008 Doug Coleman.
 ! See http://factorcode.org/license.txt for BSD license.
 USING: kernel namespaces sequences vectors assocs accessors ;
 IN: state-tables
 TUPLE: table rows columns start-state final-states ;
 TUPLE: entry row-key column-key value ;
 GENERIC: add-entry ( entry table -- )
 : make-table ( class -- obj )
    new
        H{ } clone >>rows
        H{ } clone >>columns
        H{ } clone >>final-states ;
 : <table> ( -- obj )
    table make-table ;
 C: <entry> entry
 : (add-row) ( row-key table -- row )
    2dup rows>> at* [
        2nip
    ] [
        drop H{ } clone [ -rot rows>> set-at ] keep
    ] if ;
 : add-row ( row-key table -- )
    (add-row) drop ;
 : add-column ( column-key table -- )
    t -rot columns>> set-at ;
 : set-row ( row row-key table -- )
    rows>> set-at ;
 : lookup-row ( row-key table -- row/f ? )
    rows>> at* ;
 : row-exists? ( row-key table -- ? )
    lookup-row nip ;
 : lookup-column ( column-key table -- column/f ? )
    columns>> at* ;
 : column-exists? ( column-key table -- ? )
    lookup-column nip ;
 ERROR: no-row key ;
 ERROR: no-column key ;
 : get-row ( row-key table -- row )
    dupd lookup-row [
        nip
    ] [
        drop no-row
    ] if ;
 : get-column ( column-key table -- column )
    dupd lookup-column [
        nip
    ] [
        drop no-column
    ] if ;
 : get-entry ( row-key column-key table -- obj ? )
    swapd lookup-row [
        at*
    ] [
        2drop f f
    ] if ;
 : (set-entry) ( entry table -- value column-key row )
    [ >r column-key>> r> add-column ] 2keep
    dupd >r row-key>> r> (add-row)
    >r [ value>> ] keep column-key>> r> ;
 : set-entry ( entry table -- )
    (set-entry) set-at ;
 : delete-entry ( entry table -- )
    >r [ column-key>> ] [ row-key>> ] bi r>
    lookup-row [ delete-at ] [ 2drop ] if ;
 : swap-rows ( row-key1 row-key2 table -- )
    [ tuck get-row >r get-row r> ] 3keep
    >r >r rot r> r> [ set-row ] keep set-row ;
 : member?* ( obj obj -- bool )
    2dup = [ 2drop t ] [ member? ] if ;
 : find-by-column ( column-key data table -- seq )
    swapd 2dup lookup-column 2drop 
    [
        rows>> [
            pick swap at* [ 
                >r pick r> member?* [ , ] [ drop ] if
            ] [ 
                2drop
            ] if 
        ] assoc-each
    ] { } make 2nip ;
 TUPLE: vector-table < table ;
 : <vector-table> ( -- obj )
    vector-table make-table ;
 : add-hash-vector ( value key hash -- )
    2dup at* [
        dup vector? [
            2nip push
        ] [
            V{ } clone [ push ] keep
            -rot >r >r [ push ] keep r> r> set-at
        ] if
    ] [
        drop set-at
    ] if ;
 M: vector-table add-entry ( entry table -- )
    (set-entry) add-hash-vector ;
--- a/extra/symbols/symbols-tests.factor
+++ b/extra/symbols/symbols-tests.factor
@ -1,4 +1,4 @@
-USING: kernel symbols tools.test parser generic words ;
+USING: kernel symbols tools.test parser generic words accessors ;
 IN: symbols.tests
 [ ] [ SYMBOLS: a b c ; ] unit-test
@ -13,3 +13,8 @@ DEFER: blah
 [ f ] [ \ blah generic? ] unit-test
 [ t ] [ \ blah symbol? ] unit-test
 [ "IN: symbols.tests USE: symbols SINGLETONS: blah blah blah ;" eval ]
 [ error>> error>> def>> \ blah eq? ]
 must-fail-with
--- a/extra/symbols/symbols.factor
+++ b/extra/symbols/symbols.factor
@ -10,5 +10,5 @@ IN: symbols
 : SINGLETONS:
    ";" parse-tokens
-    [ create-class-in dup save-location define-singleton-class ] each ;
+    [ create-class-in define-singleton-class ] each ;
    parsing
--- a/unmaintained/regexp2/regexp2-tests.factor
+++ b/unmaintained/regexp2/regexp2-tests.factor
@ -0,0 +1,5 @@
 USING: kernel peg regexp2 sequences tools.test ;
 IN: regexp2.tests
 [ T{ parse-result f T{ slice f 3 3 "056" } 46 } ]
    [ "056" 'octal' parse ] unit-test
--- a/unmaintained/regexp2/regexp2.factor
+++ b/unmaintained/regexp2/regexp2.factor
@ -0,0 +1,262 @@
 USING: assocs combinators.lib kernel math math.parser
 namespaces peg unicode.case sequences unicode.categories
 memoize peg.parsers math.order ;
 USE: io
 USE: tools.walker
 IN: regexp2
 <PRIVATE
 SYMBOL: ignore-case?
 : char=-quot ( ch -- quot )
    ignore-case? get
    [ ch>upper [ swap ch>upper = ] ] [ [ = ] ] if
    curry ;
 : char-between?-quot ( ch1 ch2 -- quot )
    ignore-case? get
    [ [ ch>upper ] bi@ [ >r >r ch>upper r> r> between? ] ]
    [ [ between? ] ]
    if 2curry ;
 : or-predicates ( quots -- quot )
    [ \ dup prefix ] map [ [ t ] ] f short-circuit \ nip suffix ;
 : literal-action [ nip ] curry action ;
 : delay-action [ curry ] curry action ;
 PRIVATE>
 : ascii? ( n -- ? )
    0 HEX: 7f between? ;
 : octal-digit? ( n -- ? ) 
    CHAR: 0 CHAR: 7 between? ;
 : hex-digit? ( n -- ? )
    {
        [ dup digit? ]
        [ dup CHAR: a CHAR: f between? ]
        [ dup CHAR: A CHAR: F between? ]
    } || nip ;
 : control-char? ( n -- ? )
    { [ dup 0 HEX: 1f between? ] [ dup HEX: 7f = ] } || nip ;
 : punct? ( n -- ? )
    "!\"#$%&'()*+,-./:;<=>?@[\\]^_`{|}~" member? ;
 : c-identifier-char? ( ch -- ? )
    { [ dup alpha? ] [ dup CHAR: _ = ] } || nip ;
 : java-blank? ( n -- ? )
    {
        CHAR: \s
        CHAR: \t CHAR: \n CHAR: \r
        HEX: c HEX: 7 HEX: 1b
    } member? ;
 : java-printable? ( n -- ? )
    { [ dup alpha? ] [ dup punct? ] } || nip ;
 MEMO: 'ordinary-char' ( -- parser )
    [ "\\^*+?|(){}[$" member? not ] satisfy
    [ char=-quot ] action ;
 MEMO: 'octal-digit' ( -- parser ) [ octal-digit? ] satisfy ;
 MEMO: 'octal' ( -- parser )
    "0" token hide 'octal-digit' 1 3 from-m-to-n 2seq
    [ first oct> ] action ;
 MEMO: 'hex-digit' ( -- parser ) [ hex-digit? ] satisfy ;
 MEMO: 'hex' ( -- parser )
    "x" token hide 'hex-digit' 2 exactly-n 2seq
    "u" token hide 'hex-digit' 6 exactly-n 2seq 2choice
    [ first hex> ] action ;
 : satisfy-tokens ( assoc -- parser )
    [ >r token r> literal-action ] { } assoc>map choice ;
 MEMO: 'simple-escape-char' ( -- parser )
    {
        { "\\" CHAR: \\ }
        { "t"  CHAR: \t }
        { "n"  CHAR: \n }
        { "r"  CHAR: \r }
        { "f"  HEX: c   }
        { "a"  HEX: 7   }
        { "e"  HEX: 1b  }
    } [ char=-quot ] assoc-map satisfy-tokens ;
 MEMO: 'predefined-char-class' ( -- parser )
    {   
        { "d" [ digit? ] } 
        { "D" [ digit? not ] }
        { "s" [ java-blank? ] } 
        { "S" [ java-blank? not ] }
        { "w" [ c-identifier-char? ] } 
        { "W" [ c-identifier-char? not ] }
    } satisfy-tokens ;
 MEMO: 'posix-character-class' ( -- parser )
    {   
        { "Lower" [ letter? ] }
        { "Upper" [ LETTER? ] }
        { "ASCII" [ ascii? ] }
        { "Alpha" [ Letter? ] }
        { "Digit" [ digit? ] }
        { "Alnum" [ alpha? ] }
        { "Punct" [ punct? ] }
        { "Graph" [ java-printable? ] }
        { "Print" [ java-printable? ] }
        { "Blank" [ " \t" member? ] }
        { "Cntrl" [ control-char? ] }
        { "XDigit" [ hex-digit? ] }
        { "Space" [ java-blank? ] }
    } satisfy-tokens "p{" "}" surrounded-by ;
 MEMO: 'simple-escape' ( -- parser )
    [
        'octal' ,
        'hex' ,
        "c" token hide [ LETTER? ] satisfy 2seq ,
        any-char ,
    ] choice* [ char=-quot ] action ;
 MEMO: 'escape' ( -- parser )
    "\\" token hide [
        'simple-escape-char' ,
        'predefined-char-class' ,
        'posix-character-class' ,
        'simple-escape' ,
    ] choice* 2seq ;
 MEMO: 'any-char' ( -- parser )
    "." token [ drop t ] literal-action ;
 MEMO: 'char' ( -- parser )
    'any-char' 'escape' 'ordinary-char' 3choice [ satisfy ] action ;
 DEFER: 'regexp'
 TUPLE: group-result str ;
 C: <group-result> group-result
 MEMO: 'non-capturing-group' ( -- parser )
    "?:" token hide 'regexp' ;
 MEMO: 'positive-lookahead-group' ( -- parser )
    "?=" token hide 'regexp' [ ensure ] action ;
 MEMO: 'negative-lookahead-group' ( -- parser )
    "?!" token hide 'regexp' [ ensure-not ] action ;
 MEMO: 'simple-group' ( -- parser )
    'regexp' [ [ <group-result> ] action ] action ;
 MEMO: 'group' ( -- parser )
    [
        'non-capturing-group' ,
        'positive-lookahead-group' ,
        'negative-lookahead-group' ,
        'simple-group' ,
    ] choice* "(" ")" surrounded-by ;
 MEMO: 'range' ( -- parser )
    any-char "-" token hide any-char 3seq
    [ first2 char-between?-quot ] action ;
 MEMO: 'character-class-term' ( -- parser )
    'range'
    'escape'
    [ "\\]" member? not ] satisfy [ char=-quot ] action
    3choice ;
 MEMO: 'positive-character-class' ( -- parser )
    ! todo
    "]" token [ CHAR: ] = ] literal-action 'character-class-term' repeat0 2seq 
    'character-class-term' repeat1 2choice [ or-predicates ] action ;
 MEMO: 'negative-character-class' ( -- parser )
    "^" token hide 'positive-character-class' 2seq
    [ [ not ] append ] action ;
 MEMO: 'character-class' ( -- parser )
    'negative-character-class' 'positive-character-class' 2choice
    "[" "]" surrounded-by [ satisfy ] action ;
 MEMO: 'escaped-seq' ( -- parser )
    any-char repeat1
    [ ignore-case? get token ] action "\\Q" "\\E" surrounded-by ;
 MEMO: 'break' ( quot -- parser )
    satisfy ensure
    epsilon just 2choice ;
 MEMO: 'break-escape' ( -- parser )
    "$" token [ "\r\n" member? ] 'break' literal-action
    "\\b" token [ blank? ] 'break' literal-action
    "\\B" token [ blank? not ] 'break' literal-action
    "\\z" token epsilon just literal-action 4choice ;
 MEMO: 'simple' ( -- parser )
    [
        'escaped-seq' ,
        'break-escape' ,
        'group' ,
        'character-class' ,
        'char' ,
    ] choice* ;
 MEMO: 'exactly-n' ( -- parser )
    'integer' [ exactly-n ] delay-action ;
 MEMO: 'at-least-n' ( -- parser )
    'integer' "," token hide 2seq [ at-least-n ] delay-action ;
 MEMO: 'at-most-n' ( -- parser )
    "," token hide 'integer' 2seq [ at-most-n ] delay-action ;
 MEMO: 'from-m-to-n' ( -- parser )
    'integer' "," token hide 'integer' 3seq
    [ first2 from-m-to-n ] delay-action ;
 MEMO: 'greedy-interval' ( -- parser )
    'exactly-n' 'at-least-n' 'at-most-n' 'from-m-to-n' 4choice ;
 MEMO: 'interval' ( -- parser )
    'greedy-interval'
    'greedy-interval' "?" token hide 2seq [ "reluctant {}" print ] action
    'greedy-interval' "+" token hide 2seq [ "possessive {}" print ] action
    3choice "{" "}" surrounded-by ;
 MEMO: 'repetition' ( -- parser )
    [
        ! Possessive
        ! "*+" token [ <!*> ] literal-action ,
        ! "++" token [ <!+> ] literal-action ,
        ! "?+" token [ <!?> ] literal-action ,
        ! Reluctant
        ! "*?" token [ <(*)> ] literal-action ,
        ! "+?" token [ <(+)> ] literal-action ,
        ! "??" token [ <(?)> ] literal-action ,
        ! Greedy
        "*" token [ repeat0 ] literal-action ,
        "+" token [ repeat1 ] literal-action ,
        "?" token [ optional ] literal-action ,
    ] choice* ;
 MEMO: 'dummy' ( -- parser )
    epsilon [ ] literal-action ;
 ! todo -- check the action
 ! MEMO: 'term' ( -- parser )
    ! 'simple'
    ! 'repetition' 'interval' 'dummy' 3choice 2seq [ first2 call ] action
    ! <!+> [ <and-parser> ] action ;