diff --git a/extra/ftp/server/server.factor b/extra/ftp/server/server.factor index ef20885a5f..cce69dde0f 100644 --- a/extra/ftp/server/server.factor +++ b/extra/ftp/server/server.factor @@ -107,9 +107,14 @@ ERROR: type-error type ; drop 215 "UNIX Type: L8" server-response ; +: if-command-promise ( quot -- ) + >r client get command-promise>> r> + [ "Establish an active or passive connection first" ftp-error ] if* ; + : handle-STOR ( obj -- ) [ - drop + tokenized>> second + [ >r r> fulfill ] if-command-promise ] [ 2drop ] recover ; @@ -122,7 +127,7 @@ ERROR: type-error type ; 150 "Here comes the directory listing." server-response ; : finish-directory ( -- ) - 226 "Directory send OK." server-response ; + 226 "Opening " server-response ; GENERIC: service-command ( stream obj -- ) @@ -135,21 +140,25 @@ M: ftp-list service-command ( stream obj -- ) ] with-output-stream finish-directory ; -: start-file-transfer ( path -- ) +: transfer-outgoing-file ( path -- ) 150 "Opening BINARY mode data connection for " rot [ file-name ] [ " " swap file-info file-info-size number>string "(" " bytes)." swapd 3append append ] bi 3append server-response ; - + +: transfer-incoming-file ( path -- ) + 150 "Opening BINARY mode data connection for " rot append + server-response ; + : finish-file-transfer ( -- ) 226 "File send OK." server-response ; M: ftp-get service-command ( stream obj -- ) [ path>> - [ start-file-transfer ] + [ transfer-outgoing-file ] [ binary swap stream-copy ] bi finish-file-transfer ] [ @@ -159,8 +168,8 @@ M: ftp-get service-command ( stream obj -- ) M: ftp-put service-command ( stream obj -- ) [ path>> - [ start-file-transfer ] - [ binary swap stream-copy ] bi + [ transfer-incoming-file ] + [ binary stream-copy ] bi finish-file-transfer ] [ 3drop "File transfer failed" ftp-error @@ -177,16 +186,12 @@ M: ftp-put service-command ( stream obj -- ) service-command ] [ client get f >>command-promise drop ] - [ ] cleanup + [ drop ] cleanup ] with-destructors ; -: if-command-promise ( quot -- ) - >r client get command-promise>> r> - [ "Establish an active or passive connection first" ftp-error ] if* ; - : handle-LIST ( obj -- ) drop - [ swap fulfill ] if-command-promise ; + [ >r r> fulfill ] if-command-promise ; : handle-SIZE ( obj -- ) [ @@ -262,7 +267,7 @@ ERROR: not-a-directory ; ! { "REIN" [ drop client get reset-ftp-client t ] } { "QUIT" [ handle-QUIT f ] } - ! { "PORT" [ ] } + ! { "PORT" [ ] } ! TODO { "PASV" [ handle-PASV t ] } ! { "MODE" [ ] } { "TYPE" [ handle-TYPE t ] } @@ -270,7 +275,7 @@ ERROR: not-a-directory ; ! { "ALLO" [ ] } ! { "REST" [ ] } - ! { "STOR" [ handle-STOR t ] } + { "STOR" [ handle-STOR t ] } ! { "STOU" [ ] } { "RETR" [ handle-RETR t ] } { "LIST" [ handle-LIST t ] } @@ -279,9 +284,10 @@ ERROR: not-a-directory ; ! { "APPE" [ ] } ! { "RNFR" [ ] } ! { "RNTO" [ ] } - ! { "DELE" [ ] } - ! { "RMD" [ ] } - ! { "MKD" [ ] } + ! { "DELE" [ handle-DELE t ] } + ! { "RMD" [ handle-RMD t ] } + ! ! { "XRMD" [ handle-XRMD t ] } + ! { "MKD" [ handle-MKD t ] } { "PWD" [ handle-PWD t ] } ! { "ABOR" [ ] } diff --git a/extra/money/money.factor b/extra/money/money.factor index 4584daf592..1fd0a66555 100644 --- a/extra/money/money.factor +++ b/extra/money/money.factor @@ -15,17 +15,14 @@ IN: money "." % number>string 2 CHAR: 0 pad-left % ] "" make print ; -TUPLE: not-a-decimal ; - -: not-a-decimal ( -- * ) - T{ not-a-decimal } throw ; +ERROR: not-a-decimal x ; : parse-decimal ( str -- ratio ) "." split1 >r dup "-" head? [ drop t "0" ] [ f swap ] if r> [ dup empty? [ drop "0" ] when ] bi@ dup length - >r [ string>number dup [ not-a-decimal ] unless ] bi@ r> + >r [ dup string>number [ nip ] [ not-a-decimal ] if* ] bi@ r> 10 swap ^ / + swap [ neg ] when ; : DECIMAL: diff --git a/extra/regexp4/regexp4-tests.factor b/extra/regexp4/regexp4-tests.factor new file mode 100644 index 0000000000..e878351b7e --- /dev/null +++ b/extra/regexp4/regexp4-tests.factor @@ -0,0 +1,138 @@ +USING: regexp4 tools.test kernel ; +IN: regexp4-tests + +[ f ] [ "b" "a*" matches? ] unit-test +[ t ] [ "" "a*" matches? ] unit-test +[ t ] [ "a" "a*" matches? ] unit-test +[ t ] [ "aaaaaaa" "a*" matches? ] unit-test +[ f ] [ "ab" "a*" matches? ] unit-test + +[ t ] [ "abc" "abc" matches? ] unit-test +[ t ] [ "a" "a|b|c" matches? ] unit-test +[ t ] [ "b" "a|b|c" matches? ] unit-test +[ t ] [ "c" "a|b|c" matches? ] unit-test +[ f ] [ "c" "d|e|f" matches? ] unit-test + +[ f ] [ "aa" "a|b|c" matches? ] unit-test +[ f ] [ "bb" "a|b|c" matches? ] unit-test +[ f ] [ "cc" "a|b|c" matches? ] unit-test +[ f ] [ "cc" "d|e|f" matches? ] unit-test + +[ f ] [ "" "a+" matches? ] unit-test +[ t ] [ "a" "a+" matches? ] unit-test +[ t ] [ "aa" "a+" matches? ] unit-test + +[ t ] [ "" "a?" matches? ] unit-test +[ t ] [ "a" "a?" matches? ] unit-test +[ f ] [ "aa" "a?" matches? ] unit-test + +[ f ] [ "" "." matches? ] unit-test +[ t ] [ "a" "." matches? ] unit-test +[ t ] [ "." "." matches? ] unit-test +! [ f ] [ "\n" "." matches? ] unit-test + +[ f ] [ "" ".+" matches? ] unit-test +[ t ] [ "a" ".+" matches? ] unit-test +[ t ] [ "ab" ".+" matches? ] unit-test + + +[ t ] [ "" "a|b*|c+|d?" matches? ] unit-test +[ t ] [ "a" "a|b*|c+|d?" matches? ] unit-test +[ t ] [ "c" "a|b*|c+|d?" matches? ] unit-test +[ t ] [ "cc" "a|b*|c+|d?" matches? ] unit-test +[ f ] [ "ccd" "a|b*|c+|d?" matches? ] unit-test +[ t ] [ "d" "a|b*|c+|d?" matches? ] unit-test + +[ t ] [ "foo" "foo|bar" matches? ] unit-test +[ t ] [ "bar" "foo|bar" matches? ] unit-test +[ f ] [ "foobar" "foo|bar" matches? ] unit-test + +[ f ] [ "" "(a)" matches? ] unit-test +[ t ] [ "a" "(a)" matches? ] unit-test +[ f ] [ "aa" "(a)" matches? ] unit-test +[ t ] [ "aa" "(a*)" matches? ] unit-test + +[ f ] [ "aababaaabbac" "(a|b)+" matches? ] unit-test +[ t ] [ "ababaaabba" "(a|b)+" matches? ] unit-test + +[ f ] [ "" "a{1}" matches? ] unit-test +[ t ] [ "a" "a{1}" matches? ] unit-test +[ f ] [ "aa" "a{1}" matches? ] unit-test + +[ f ] [ "a" "a{2,}" matches? ] unit-test +[ t ] [ "aaa" "a{2,}" matches? ] unit-test +[ t ] [ "aaaa" "a{2,}" matches? ] unit-test +[ t ] [ "aaaaa" "a{2,}" matches? ] unit-test + +[ t ] [ "" "a{,2}" matches? ] unit-test +[ t ] [ "a" "a{,2}" matches? ] unit-test +[ t ] [ "aa" "a{,2}" matches? ] unit-test +[ f ] [ "aaa" "a{,2}" matches? ] unit-test +[ f ] [ "aaaa" "a{,2}" matches? ] unit-test +[ f ] [ "aaaaa" "a{,2}" matches? ] unit-test + +[ f ] [ "" "a{1,3}" matches? ] unit-test +[ t ] [ "a" "a{1,3}" matches? ] unit-test +[ t ] [ "aa" "a{1,3}" matches? ] unit-test +[ t ] [ "aaa" "a{1,3}" matches? ] unit-test +[ f ] [ "aaaa" "a{1,3}" matches? ] unit-test + +[ f ] [ "" "[a]" matches? ] unit-test +[ t ] [ "a" "[a]" matches? ] unit-test +[ t ] [ "a" "[abc]" matches? ] unit-test +[ f ] [ "b" "[a]" matches? ] unit-test +[ f ] [ "d" "[abc]" matches? ] unit-test +[ t ] [ "ab" "[abc]{1,2}" matches? ] unit-test +[ f ] [ "abc" "[abc]{1,2}" matches? ] unit-test + +[ f ] [ "" "[^a]" matches? ] unit-test +[ f ] [ "a" "[^a]" matches? ] unit-test +[ f ] [ "a" "[^abc]" matches? ] unit-test +[ t ] [ "b" "[^a]" matches? ] unit-test +[ t ] [ "d" "[^abc]" matches? ] unit-test +[ f ] [ "ab" "[^abc]{1,2}" matches? ] unit-test +[ f ] [ "abc" "[^abc]{1,2}" matches? ] unit-test + +[ t ] [ "]" "[]]" matches? ] unit-test +[ f ] [ "]" "[^]]" matches? ] unit-test + +! [ "^" "[^]" matches? ] must-fail +[ t ] [ "^" "[]^]" matches? ] unit-test +[ t ] [ "]" "[]^]" matches? ] unit-test + +[ t ] [ "[" "[[]" matches? ] unit-test +[ f ] [ "^" "[^^]" matches? ] unit-test +[ t ] [ "a" "[^^]" matches? ] unit-test + +[ t ] [ "-" "[-]" matches? ] unit-test +[ f ] [ "a" "[-]" matches? ] unit-test +[ f ] [ "-" "[^-]" matches? ] unit-test +[ t ] [ "a" "[^-]" matches? ] unit-test + +[ t ] [ "-" "[-a]" matches? ] unit-test +[ t ] [ "a" "[-a]" matches? ] unit-test +[ t ] [ "-" "[a-]" matches? ] unit-test +[ t ] [ "a" "[a-]" matches? ] unit-test +[ f ] [ "b" "[a-]" matches? ] unit-test +[ f ] [ "-" "[^-]" matches? ] unit-test +[ t ] [ "a" "[^-]" matches? ] unit-test + +[ f ] [ "-" "[a-c]" matches? ] unit-test +[ t ] [ "-" "[^a-c]" matches? ] unit-test +[ t ] [ "b" "[a-c]" matches? ] unit-test +[ f ] [ "b" "[^a-c]" matches? ] unit-test + +[ t ] [ "-" "[a-c-]" matches? ] unit-test +[ f ] [ "-" "[^a-c-]" matches? ] unit-test + +[ t ] [ "\\" "[\\\\]" matches? ] unit-test +[ f ] [ "a" "[\\\\]" matches? ] unit-test +[ f ] [ "\\" "[^\\\\]" matches? ] unit-test +[ t ] [ "a" "[^\\\\]" matches? ] unit-test + + +! ((A)(B(C))) +! 1. ((A)(B(C))) +! 2. (A) +! 3. (B(C)) +! 4. (C) diff --git a/extra/regexp4/regexp4.factor b/extra/regexp4/regexp4.factor new file mode 100644 index 0000000000..faf52f098d --- /dev/null +++ b/extra/regexp4/regexp4.factor @@ -0,0 +1,547 @@ +! Copyright (C) 2008 Doug Coleman. +! See http://factorcode.org/license.txt for BSD license. +USING: accessors arrays assocs combinators kernel math +sequences namespaces locals combinators.lib state-tables +math.parser state-parser sets dlists unicode.categories +math.order quotations shuffle math.ranges splitting +symbols ; +IN: regexp4 + +SYMBOLS: eps start-state final-state beginning-of-text +end-of-text left-paren right-paren alternation ; + +SYMBOL: runtim-epsilon + +TUPLE: regexp raw paren-count bracket-count +state stack nfa new-states dfa minimized-dfa +dot-matches-newlines? character-sets capture-group +captured-groups ; + +TUPLE: capture-group n range ; + +ERROR: paren-underflow ; +ERROR: unbalanced-paren ; + +: push-stack ( regexp token -- ) swap stack>> push ; +: push-all-stack ( regexp seq -- ) swap stack>> push-all ; +: next-state ( regexp -- n ) [ 1+ ] change-state state>> ; + +: check-paren-underflow ( regexp -- ) + paren-count>> 0 < [ paren-underflow ] when ; + +: check-unbalanced-paren ( regexp -- ) + paren-count>> 0 > [ unbalanced-paren ] when ; + +:: (apply-alternation) ( stack regexp -- ) + [let | s2 [ stack peek first ] + s3 [ stack pop second ] + s0 [ stack peek alternation = [ stack pop* ] when stack peek first ] + s1 [ stack pop second ] + s4 [ regexp next-state ] + s5 [ regexp next-state ] + table [ regexp nfa>> ] | + s5 table add-row + s4 eps s0 table add-entry + s4 eps s2 table add-entry + s1 eps s5 table add-entry + s3 eps s5 table add-entry + s1 table final-states>> delete-at + s3 table final-states>> delete-at + t s5 table final-states>> set-at + s4 s5 2array stack push ] ; + +: apply-alternation ( regexp -- ) + [ stack>> ] [ (apply-alternation) ] bi ; + +: apply-alternation? ( stack -- ? ) + dup length dup 3 < + [ 2drop f ] [ 2 - swap nth alternation = ] if ; + +:: (apply-concatenation) ( stack regexp -- ) + [let* | + s2 [ stack peek first ] + s3 [ stack pop second ] + s0 [ stack peek first ] + s1 [ stack pop second ] + table [ regexp nfa>> ] | + s1 eps s2 table set-entry + s1 table final-states>> delete-at + s3 table add-row + s0 s3 2array stack push ] ; + +: apply-concatenation ( regexp -- ) + [ stack>> ] [ (apply-concatenation) ] bi ; + +: apply-concatenation? ( seq -- ? ) + dup length dup 2 < + [ 2drop f ] [ 2 - swap nth array? ] if ; + +: apply-loop ( seq regexp -- seq regexp ) + over length 1 > [ + 2dup over apply-alternation? + [ (apply-alternation) ] [ (apply-concatenation) ] if apply-loop + ] when ; + +: apply-til-last ( token regexp -- ) + swap [ + tuck index cut reverse dup pop* + ] change-stack >r reverse r> apply-loop stack>> push-all ; + +: concatenation-loop ( regexp -- ) + dup stack>> dup apply-concatenation? + [ over (apply-concatenation) concatenation-loop ] [ 2drop ] if ; + +:: apply-kleene-closure ( regexp -- ) + [let* | stack [ regexp stack>> ] + s0 [ stack peek first ] + s1 [ stack pop second ] + s2 [ regexp next-state ] + s3 [ regexp next-state ] + table [ regexp nfa>> ] | + s1 table final-states>> delete-at + t s3 table final-states>> set-at + s3 table add-row + s1 eps s0 table add-entry + s2 eps s0 table add-entry + s2 eps s3 table add-entry + s1 eps s3 table add-entry + s2 s3 2array stack push ] ; + +: add-numbers ( n obj -- obj ) + 2dup [ number? ] bi@ and + [ + ] [ dup sequence? [ [ + ] with map ] [ nip ] if ] if ; + +: increment-columns ( n assoc -- ) + dup [ >r swap >r add-numbers r> r> set-at ] curry with* assoc-each ; + +:: copy-state-rows ( regexp range -- ) + [let* | len [ range range-length ] + offset [ regexp state>> range range-min - 1+ ] + state [ regexp [ len + ] change-state ] | + regexp nfa>> rows>> + [ drop range member? ] assoc-filter + [ + [ offset + ] dip + [ offset swap add-numbers ] assoc-map + ] assoc-map + regexp nfa>> [ assoc-union ] change-rows drop + range [ range-min ] [ range-max ] bi [ offset + ] bi@ 2array + regexp stack>> push ] ; + +: last-state ( regexp -- range ) + stack>> peek first2 [a,b] ; + +: set-last-state-final ( ? regexp -- ) + [ stack>> peek second ] [ nfa>> final-states>> ] bi set-at ; + +: apply-plus-closure ( regexp -- ) + [ dup last-state copy-state-rows ] + [ apply-kleene-closure ] + [ apply-concatenation ] tri ; + +: apply-question-closure ( regexp -- ) + [ stack>> peek first2 eps swap ] [ nfa>> add-entry ] bi ; + +: with0 ( obj n quot -- n quot' ) swapd curry ; inline + +: copy-state ( regexp state n -- ) + [ copy-state-rows ] with0 with0 times ; + +:: (exactly-n) ( regexp state n -- ) + regexp state n copy-state + t regexp set-last-state-final ; + +: exactly-n ( regexp n -- ) + >r dup last-state r> 1- (exactly-n) ; + +: exactly-n-concatenated ( regexp state n -- ) + [ (exactly-n) ] 3keep + nip 1- [ apply-concatenation ] with0 times ; + +:: at-least-n ( regexp n -- ) + [let | state [ regexp stack>> pop first2 [a,b] ] | + regexp state n copy-state + state regexp stack>> push + regexp apply-kleene-closure ] ; + +: pop-last ( regexp -- range ) + stack>> pop first2 [a,b] ; + +:: at-most-n ( regexp n -- ) + [let | state [ regexp pop-last ] | + regexp state n [ 1+ exactly-n-concatenated ] with with each + regexp n 1- [ apply-alternation ] with0 times + regexp apply-question-closure ] ; + +:: from-m-to-n ( regexp m n -- ) + [let | state [ regexp pop-last ] | + regexp state + m n [a,b] [ exactly-n-concatenated ] with with each + regexp n m - [ apply-alternation ] with0 times ] ; + +: apply-brace-closure ( regexp from/f to/f comma? -- ) + [ + 2dup and + [ from-m-to-n ] + [ [ nip at-most-n ] [ at-least-n ] if* ] if + ] [ drop exactly-n ] if ; + +:: make-nontoken-nfa ( regexp obj -- ) + [let | s0 [ regexp next-state ] + s1 [ regexp next-state ] + stack [ regexp stack>> ] + table [ regexp nfa>> ] | + s0 obj s1 table set-entry + s1 table add-row + t s1 table final-states>> set-at + s0 s1 2array stack push ] ; + +: set-start-state ( regexp -- ) + dup stack>> dup empty? [ + 2drop + ] [ + [ nfa>> ] [ pop first ] bi* >>start-state drop + ] if ; + +: ascii? ( n -- ? ) 0 HEX: 7f between? ; +: octal-digit? ( n -- ? ) CHAR: 0 CHAR: 7 between? ; +: decimal-digit? ( n -- ? ) CHAR: 0 CHAR: 9 between? ; + +: hex-digit? ( n -- ? ) + dup decimal-digit? + over CHAR: a CHAR: f between? or + swap CHAR: A CHAR: F between? or ; + +: control-char? ( n -- ? ) + dup 0 HEX: 1f between? swap HEX: 7f = or ; + +: punct? ( n -- ? ) + "!\"#$%&'()*+,-./:;<=>?@[\\]^_`{|}~" member? ; + +: c-identifier-char? ( ch -- ? ) + dup alpha? swap CHAR: _ = or ; + +: java-blank? ( n -- ? ) + { + CHAR: \s CHAR: \t CHAR: \n + HEX: b HEX: 7 CHAR: \r + } member? ; + +: java-printable? ( n -- ? ) + dup alpha? swap punct? or ; + +ERROR: bad-character-class obj ; + +: parse-posix-class ( -- quot ) + next + CHAR: { expect + [ get-char CHAR: } = ] take-until + { + { "Lower" [ [ letter? ] ] } + { "Upper" [ [ LETTER? ] ] } + { "ASCII" [ [ ascii? ] ] } + { "Alpha" [ [ Letter? ] ] } + { "Digit" [ [ digit? ] ] } + { "Alnum" [ [ alpha? ] ] } + { "Punct" [ [ punct? ] ] } + { "Graph" [ [ java-printable? ] ] } + { "Print" [ [ java-printable? ] ] } + { "Blank" [ [ " \t" member? ] ] } + { "Cntrl" [ [ control-char? ] ] } + { "XDigit" [ [ hex-digit? ] ] } + { "Space" [ [ java-blank? ] ] } + ! TODO: unicode-character-class, fallthrough in unicode is bad-char-clss + [ bad-character-class ] + } case ; + +ERROR: bad-octal number ; + +: parse-octal ( regexp -- ) + next get-char drop + 3 take oct> + dup 255 > [ bad-octal ] when + make-nontoken-nfa ; + +ERROR: bad-hex number ; + +: parse-short-hex ( regexp -- ) + next 2 take hex> + dup number? [ bad-hex ] unless + make-nontoken-nfa ; + +: parse-long-hex ( regexp -- ) + next 4 take hex> + dup number? [ bad-hex ] unless + make-nontoken-nfa ; + +: parse-control-character ( regexp -- ) + next get-char make-nontoken-nfa ; + +: parse-backreference ( regexp obj -- ) + 2drop ; + +: dot-construction ( regexp -- ) + [ CHAR: \n = not ] make-nontoken-nfa ; + +: front-anchor-construction ( regexp -- ) + drop ; + +: back-anchor-construction ( regexp -- ) + drop ; + +: parse-brace ( -- from/f to/f comma? ) + next + [ get-char CHAR: } = ] take-until + "," split1 [ [ string>number ] bi@ ] keep >boolean ; + +: take-until-] + [ get-char CHAR: ] = ] take-until ; + +: make-character-set ( regexp str -- ) + dup + [ length 1 > ] [ first CHAR: ^ = ] bi and + [ rest t ] [ f ] if + >r [ member? ] curry r> + [ [ not ] compose ] when make-nontoken-nfa ; + +: parse-escaped ( regexp -- ) + next get-char { + { CHAR: \ [ CHAR: \ make-nontoken-nfa ] } + { CHAR: t [ CHAR: \t make-nontoken-nfa ] } + { CHAR: n [ CHAR: \n make-nontoken-nfa ] } + { CHAR: r [ CHAR: \r make-nontoken-nfa ] } + { CHAR: f [ HEX: c make-nontoken-nfa ] } + { CHAR: a [ HEX: 7 make-nontoken-nfa ] } + { CHAR: e [ HEX: 1b make-nontoken-nfa ] } + + { CHAR: d [ [ digit? ] make-nontoken-nfa ] } + { CHAR: D [ [ digit? not ] make-nontoken-nfa ] } + { CHAR: s [ [ java-blank? ] make-nontoken-nfa ] } + { CHAR: S [ [ java-blank? not ] make-nontoken-nfa ] } + { CHAR: w [ [ c-identifier-char? ] make-nontoken-nfa ] } + { CHAR: W [ [ c-identifier-char? not ] make-nontoken-nfa ] } + + { CHAR: p [ parse-posix-class make-nontoken-nfa ] } + { CHAR: P [ parse-posix-class [ not ] compose make-nontoken-nfa ] } + { CHAR: x [ parse-short-hex ] } + { CHAR: u [ parse-long-hex ] } + { CHAR: 0 [ parse-octal ] } + { CHAR: c [ parse-control-character ] } + + ! { CHAR: Q [ quot til \E ] } + ! { CHAR: E [ should be an error, parse this in the Q if exists ] } + + ! { CHAR: b [ ] } ! a word boundary + ! { CHAR: B [ ] } ! a non-word boundary + ! { CHAR: A [ ] } ! beginning of input + ! { CHAR: G [ ] } ! end of previous match + ! { CHAR: Z [ ] } ! end of input but for the final terminator, if any + ! { CHAR: z [ ] } ! end of the input + [ dup digit? [ parse-backreference ] [ make-nontoken-nfa ] if ] + } case ; + +ERROR: unsupported-token token ; +: parse-token ( regexp token -- ) + dup { + { CHAR: ^ [ drop front-anchor-construction ] } + { CHAR: $ [ drop back-anchor-construction ] } + { CHAR: \ [ drop parse-escaped ] } + { CHAR: | [ drop dup concatenation-loop alternation push-stack ] } + { CHAR: ( [ drop [ 1+ ] change-paren-count left-paren push-stack ] } + { CHAR: ) [ drop [ 1- ] change-paren-count left-paren apply-til-last ] } + { CHAR: * [ drop apply-kleene-closure ] } + { CHAR: + [ drop apply-plus-closure ] } + { CHAR: ? [ drop apply-question-closure ] } + { CHAR: { [ drop parse-brace apply-brace-closure ] } + ! { CHAR: [ [ drop parse-character-set ] } + ! { CHAR: } [ drop drop "brace" ] } + ! { CHAR: ? [ drop ] } + { CHAR: . [ drop dot-construction ] } + { beginning-of-text [ push-stack ] } + { end-of-text [ + drop { + [ check-unbalanced-paren ] + [ concatenation-loop ] + [ beginning-of-text apply-til-last ] + [ set-start-state ] + } cleave + ] } + [ drop make-nontoken-nfa ] + } case ; + +: (parse-raw-regexp) ( regexp -- ) + get-char [ dupd parse-token next (parse-raw-regexp) ] [ drop ] if* ; + +: parse-raw-regexp ( regexp -- ) + [ beginning-of-text parse-token ] + [ + dup raw>> dup empty? [ + 2drop + ] [ + [ (parse-raw-regexp) ] string-parse + ] if + ] + [ end-of-text parse-token ] tri ; + +:: find-delta ( states obj table -- keys ) + obj states [ + table get-row at + [ dup integer? [ 1array ] when unique ] [ H{ } ] if* + ] with map H{ } clone [ assoc-union ] reduce keys ; + +:: (find-closure) ( states obj assoc table -- keys ) + [let | size [ assoc assoc-size ] | + assoc states unique assoc-union + dup assoc-size size > [ + obj states [ + table get-row at* [ + dup integer? [ 1array ] when + obj rot table (find-closure) + ] [ + drop + ] if + ] with each + ] when ] ; + +: find-closure ( states obj table -- states ) + >r H{ } r> (find-closure) keys ; + +: find-epsilon-closure ( states table -- states ) + >r eps H{ } r> (find-closure) keys ; + +: filter-special-transition ( vec -- vec' ) + [ drop eps = not ] assoc-filter ; + +: initialize-subset-construction ( regexp -- ) + >>dfa + [ + nfa>> [ start-state>> 1array ] keep + find-epsilon-closure 1dlist + ] [ + swap >>new-states drop + ] [ + [ dfa>> ] [ nfa>> ] bi + columns>> filter-special-transition >>columns drop + ] tri ; + +:: (subset-construction) ( regexp -- ) + [let* | nfa [ regexp nfa>> ] + dfa [ regexp dfa>> ] + new-states [ regexp new-states>> ] + columns [ dfa columns>> keys ] | + + new-states dlist-empty? [ + new-states pop-front + dup dfa add-row + columns [ + 2dup nfa [ find-delta ] [ find-epsilon-closure ] bi + dup [ dfa rows>> key? ] [ empty? ] bi or [ + dup new-states push-back + ] unless + dup empty? [ 3drop ] [ dfa set-entry ] if + ] with each + regexp (subset-construction) + ] unless ] ; + +: set-start/final-states ( regexp -- ) + dup [ nfa>> start-state>> ] + [ dfa>> rows>> keys [ member? ] with filter first ] bi + >r dup dfa>> r> >>start-state drop + + dup [ nfa>> final-states>> ] [ dfa>> rows>> ] bi + [ keys ] bi@ + [ intersect empty? not ] with filter + >r dfa>> r> >>final-states drop ; + +: subset-construction ( regexp -- ) + [ initialize-subset-construction ] + [ (subset-construction) ] + [ set-start/final-states ] tri ; + +: ( raw -- obj ) + regexp new + swap >>raw + 0 >>paren-count + -1 >>state + V{ } clone >>stack + V{ } clone >>character-sets + >>nfa + dup [ parse-raw-regexp ] [ subset-construction ] bi ; + +TUPLE: dfa-traverser + dfa + last-state current-state + text + start-index current-index + matches ; + +: ( text dfa -- match ) + dfa>> + dfa-traverser new + swap [ start-state>> >>current-state ] keep + >>dfa + swap >>text + 0 >>start-index + 0 >>current-index + V{ } clone >>matches ; + +: final-state? ( dfa-traverser -- ? ) + [ current-state>> ] [ dfa>> final-states>> ] bi + member? ; + +: text-finished? ( dfa-traverser -- ? ) + [ current-index>> ] [ text>> length ] bi >= ; + +: save-final-state ( dfa-straverser -- ) + [ current-index>> ] [ matches>> ] bi push ; + +: match-done? ( dfa-traverser -- ? ) + dup final-state? [ + dup save-final-state + ] when text-finished? ; + +: increment-state ( dfa-traverser state -- dfa-traverser ) + >r [ 1+ ] change-current-index + dup current-state>> >>last-state r> + >>current-state ; + +: match-transition ( obj hash -- state/f ) + 2dup keys [ callable? ] filter predicates + [ swap at nip ] [ at ] if* ; + +: do-match ( dfa-traverser -- dfa-traverser ) + dup match-done? [ + dup { + [ current-index>> ] + [ text>> ] + [ current-state>> ] + [ dfa>> rows>> ] + } cleave + at >r nth r> match-transition [ + increment-state do-match + ] when* + ] unless ; + +: return-match ( dfa-traverser -- interval/f ) + dup matches>> empty? [ + drop f + ] [ + [ start-index>> ] [ matches>> peek ] bi 1 + ] if ; + +: match ( string regexp -- pair ) + do-match return-match ; + +: matches? ( string regexp -- ? ) + dupd match [ [ length ] [ range-length 1- ] bi* = ] [ drop f ] if* ; + + + +! character classes +! TUPLE: range-class from to ; +! TUPLE: or-class left right ; + +! (?:a|b)* <- does not capture +! (a|b)*\1 <- group captured +! (?!abba) negative lookahead matches ababa but not abbaa +! (?=abba) positive lookahead matches abbaaa but not abaaa diff --git a/extra/state-tables/authors.txt b/extra/state-tables/authors.txt new file mode 100644 index 0000000000..7c1b2f2279 --- /dev/null +++ b/extra/state-tables/authors.txt @@ -0,0 +1 @@ +Doug Coleman diff --git a/extra/state-tables/state-tables-tests.factor b/extra/state-tables/state-tables-tests.factor new file mode 100644 index 0000000000..b46cc94266 --- /dev/null +++ b/extra/state-tables/state-tables-tests.factor @@ -0,0 +1,49 @@ +USING: kernel tables tools.test ; +IN: tables.tests + +: test-table + + "a" "c" "z" over set-entry + "a" "o" "y" over set-entry + "a" "l" "x" over set-entry + "b" "o" "y" over set-entry + "b" "l" "x" over set-entry + "b" "s" "u" over set-entry ; + +[ + T{ table f + H{ + { "a" H{ { "l" "x" } { "c" "z" } { "o" "y" } } } + { "b" H{ { "l" "x" } { "s" "u" } { "o" "y" } } } + } + H{ { "l" t } { "s" t } { "c" t } { "o" t } } } +] [ test-table ] unit-test + +[ "x" t ] [ "a" "l" test-table get-entry ] unit-test +[ "har" t ] [ + "a" "z" "har" test-table [ set-entry ] keep + >r "a" "z" r> get-entry +] unit-test + +: vector-test-table + + "a" "c" "z" over add-value + "a" "c" "r" over add-value + "a" "o" "y" over add-value + "a" "l" "x" over add-value + "b" "o" "y" over add-value + "b" "l" "x" over add-value + "b" "s" "u" over add-value ; + +[ +T{ vector-table f + H{ + { "a" + H{ { "l" "x" } { "c" V{ "z" "r" } } { "o" "y" } } } + { "b" + H{ { "l" "x" } { "s" "u" } { "o" "y" } } } + } + H{ { "l" t } { "s" t } { "c" t } { "o" t } } +} +] [ vector-test-table ] unit-test + diff --git a/extra/state-tables/state-tables.factor b/extra/state-tables/state-tables.factor new file mode 100644 index 0000000000..9a04a5b74a --- /dev/null +++ b/extra/state-tables/state-tables.factor @@ -0,0 +1,123 @@ +! Copyright (C) 2008 Doug Coleman. +! See http://factorcode.org/license.txt for BSD license. +USING: kernel namespaces sequences vectors assocs accessors ; +IN: state-tables + +TUPLE: table rows columns start-state final-states ; +TUPLE: entry row-key column-key value ; + +GENERIC: add-entry ( entry table -- ) + +: make-table ( class -- obj ) + new + H{ } clone >>rows + H{ } clone >>columns + H{ } clone >>final-states ; + +:
( -- obj ) + table make-table ; + +C: entry + +: (add-row) ( row-key table -- row ) + 2dup rows>> at* [ + 2nip + ] [ + drop H{ } clone [ -rot rows>> set-at ] keep + ] if ; + +: add-row ( row-key table -- ) + (add-row) drop ; + +: add-column ( column-key table -- ) + t -rot columns>> set-at ; + +: set-row ( row row-key table -- ) + rows>> set-at ; + +: lookup-row ( row-key table -- row/f ? ) + rows>> at* ; + +: row-exists? ( row-key table -- ? ) + lookup-row nip ; + +: lookup-column ( column-key table -- column/f ? ) + columns>> at* ; + +: column-exists? ( column-key table -- ? ) + lookup-column nip ; + +ERROR: no-row key ; +ERROR: no-column key ; + +: get-row ( row-key table -- row ) + dupd lookup-row [ + nip + ] [ + drop no-row + ] if ; + +: get-column ( column-key table -- column ) + dupd lookup-column [ + nip + ] [ + drop no-column + ] if ; + +: get-entry ( row-key column-key table -- obj ? ) + swapd lookup-row [ + at* + ] [ + 2drop f f + ] if ; + +: (set-entry) ( entry table -- value column-key row ) + [ >r column-key>> r> add-column ] 2keep + dupd >r row-key>> r> (add-row) + >r [ value>> ] keep column-key>> r> ; + +: set-entry ( entry table -- ) + (set-entry) set-at ; + +: delete-entry ( entry table -- ) + >r [ column-key>> ] [ row-key>> ] bi r> + lookup-row [ delete-at ] [ 2drop ] if ; + +: swap-rows ( row-key1 row-key2 table -- ) + [ tuck get-row >r get-row r> ] 3keep + >r >r rot r> r> [ set-row ] keep set-row ; + +: member?* ( obj obj -- bool ) + 2dup = [ 2drop t ] [ member? ] if ; + +: find-by-column ( column-key data table -- seq ) + swapd 2dup lookup-column 2drop + [ + rows>> [ + pick swap at* [ + >r pick r> member?* [ , ] [ drop ] if + ] [ + 2drop + ] if + ] assoc-each + ] { } make 2nip ; + + +TUPLE: vector-table < table ; +: ( -- obj ) + vector-table make-table ; + +: add-hash-vector ( value key hash -- ) + 2dup at* [ + dup vector? [ + 2nip push + ] [ + V{ } clone [ push ] keep + -rot >r >r [ push ] keep r> r> set-at + ] if + ] [ + drop set-at + ] if ; + +M: vector-table add-entry ( entry table -- ) + (set-entry) add-hash-vector ; diff --git a/extra/unix/bsd/macosx/macosx.factor b/extra/unix/bsd/macosx/macosx.factor index edef2aaa0c..174dcbf632 100644 --- a/extra/unix/bsd/macosx/macosx.factor +++ b/extra/unix/bsd/macosx/macosx.factor @@ -12,3 +12,16 @@ C-STRUCT: addrinfo { "char*" "canonname" } { "void*" "addr" } { "addrinfo*" "next" } ; + +C-STRUCT: passwd + { "char*" "pw_name" } + { "char*" "pw_passwd" } + { "uid_t" "pw_uid" } + { "gid_t" "pw_gid" } + { "time_t" "pw_change" } + { "char*" "pw_class" } + { "char*" "pw_gecos" } + { "char*" "pw_dir" } + { "char*" "pw_shell" } + { "time_t" "pw_expire" } + { "int" "pw_fields" } ; diff --git a/extra/unix/linux/linux.factor b/extra/unix/linux/linux.factor index 74195fae36..9450663aaa 100755 --- a/extra/unix/linux/linux.factor +++ b/extra/unix/linux/linux.factor @@ -84,3 +84,12 @@ C-STRUCT: sockaddr-un : SEEK_SET 0 ; inline : SEEK_CUR 1 ; inline : SEEK_END 2 ; inline + +C-STRUCT: passwd + { "char*" "pw_name" } + { "char*" "pw_passwd" } + { "uid_t" "pw_uid" } + { "gid_t" "pw_gid" } + { "char*" "pw_gecos" } + { "char*" "pw_dir" } + { "char*" "pw_shell" } ; diff --git a/extra/unix/unix.factor b/extra/unix/unix.factor index 7d846b9bef..f1f46fc184 100755 --- a/extra/unix/unix.factor +++ b/extra/unix/unix.factor @@ -34,19 +34,6 @@ C-STRUCT: group { "int" "gr_gid" } { "char**" "gr_mem" } ; -C-STRUCT: passwd - { "char*" "pw_name" } - { "char*" "pw_passwd" } - { "uid_t" "pw_uid" } - { "gid_t" "pw_gid" } - { "time_t" "pw_change" } - { "char*" "pw_class" } - { "char*" "pw_gecos" } - { "char*" "pw_dir" } - { "char*" "pw_shell" } - { "time_t" "pw_expire" } - { "int" "pw_fields" } ; - LIBRARY: factor FUNCTION: void clear_err_no ( ) ;