Merge branch 'master' of git://factorcode.org/git/factor
commit
707ce03f71
basis/smtp
core/io/encodings/utf8
extra
assocs/lib
combinators/lib
html/parser
regexp2
backend
classes
dfa
nfa
transition-tables
traversal
utils
taxes
|
@ -3,12 +3,6 @@ smtp.server kernel sequences namespaces logging accessors
|
|||
assocs sorting ;
|
||||
IN: smtp.tests
|
||||
|
||||
[ t ] [
|
||||
<email>
|
||||
dup clone "a" "b" set-header drop
|
||||
headers>> assoc-empty?
|
||||
] unit-test
|
||||
|
||||
{ 0 0 } [ [ ] with-smtp-connection ] must-infer-as
|
||||
|
||||
[ "hello\nworld" validate-address ] must-fail
|
||||
|
@ -60,12 +54,13 @@ IN: smtp.tests
|
|||
"Ed <dharmatech@factorcode.org>"
|
||||
} >>to
|
||||
"Doug <erg@factorcode.org>" >>from
|
||||
prepare
|
||||
dup headers>> >alist sort-keys [
|
||||
drop { "Date" "Message-Id" } member? not
|
||||
] assoc-filter
|
||||
over to>>
|
||||
rot from>>
|
||||
[
|
||||
email>headers sort-keys [
|
||||
drop { "Date" "Message-Id" } member? not
|
||||
] assoc-filter
|
||||
]
|
||||
[ to>> [ extract-email ] map ]
|
||||
[ from>> extract-email ] tri
|
||||
] unit-test
|
||||
|
||||
[ ] [ [ 4321 mock-smtp-server ] "SMTP server" spawn drop ] unit-test
|
||||
|
|
|
@ -1,10 +1,10 @@
|
|||
! Copyright (C) 2007, 2008 Elie CHAFTARI, Dirk Vleugels,
|
||||
! Slava Pestov.
|
||||
! See http://factorcode.org/license.txt for BSD license.
|
||||
USING: namespaces io io.timeouts kernel logging io.sockets
|
||||
USING: arrays namespaces io io.timeouts kernel logging io.sockets
|
||||
sequences combinators sequences.lib splitting assocs strings
|
||||
math.parser random system calendar io.encodings.ascii
|
||||
calendar.format accessors sets ;
|
||||
math.parser random system calendar io.encodings.ascii summary
|
||||
calendar.format accessors sets hashtables ;
|
||||
IN: smtp
|
||||
|
||||
SYMBOL: smtp-domain
|
||||
|
@ -23,6 +23,16 @@ LOG: log-smtp-connection NOTICE ( addrspec -- )
|
|||
call
|
||||
] with-client ; inline
|
||||
|
||||
TUPLE: email
|
||||
{ from string }
|
||||
{ to array }
|
||||
{ cc array }
|
||||
{ bcc array }
|
||||
{ subject string }
|
||||
{ body string } ;
|
||||
|
||||
: <email> ( -- email ) email new ;
|
||||
|
||||
: crlf ( -- ) "\r\n" write ;
|
||||
|
||||
: command ( string -- ) write crlf flush ;
|
||||
|
@ -30,10 +40,12 @@ LOG: log-smtp-connection NOTICE ( addrspec -- )
|
|||
: helo ( -- )
|
||||
esmtp get "EHLO " "HELO " ? host-name append command ;
|
||||
|
||||
ERROR: bad-email-address email ;
|
||||
|
||||
: validate-address ( string -- string' )
|
||||
#! Make sure we send funky stuff to the server by accident.
|
||||
dup "\r\n>" intersect empty?
|
||||
[ "Bad e-mail address: " prepend throw ] unless ;
|
||||
[ bad-email-address ] unless ;
|
||||
|
||||
: mail-from ( fromaddr -- )
|
||||
"MAIL FROM:<" swap validate-address ">" 3append command ;
|
||||
|
@ -44,8 +56,15 @@ LOG: log-smtp-connection NOTICE ( addrspec -- )
|
|||
: data ( -- )
|
||||
"DATA" command ;
|
||||
|
||||
ERROR: message-contains-dot message ;
|
||||
|
||||
M: message-contains-dot summary ( obj -- string )
|
||||
drop
|
||||
"Message cannot contain . on a line by itself" ;
|
||||
|
||||
: validate-message ( msg -- msg' )
|
||||
"." over member? [ "Message cannot contain . on a line by itself" throw ] when ;
|
||||
"." over member?
|
||||
[ message-contains-dot ] when ;
|
||||
|
||||
: send-body ( body -- )
|
||||
string-lines
|
||||
|
@ -58,19 +77,37 @@ LOG: log-smtp-connection NOTICE ( addrspec -- )
|
|||
|
||||
LOG: smtp-response DEBUG
|
||||
|
||||
ERROR: smtp-error message ;
|
||||
ERROR: smtp-server-busy < smtp-error ;
|
||||
ERROR: smtp-syntax-error < smtp-error ;
|
||||
ERROR: smtp-command-not-implemented < smtp-error ;
|
||||
ERROR: smtp-bad-authentication < smtp-error ;
|
||||
ERROR: smtp-mailbox-unavailable < smtp-error ;
|
||||
ERROR: smtp-user-not-local < smtp-error ;
|
||||
ERROR: smtp-exceeded-storage-allocation < smtp-error ;
|
||||
ERROR: smtp-bad-mailbox-name < smtp-error ;
|
||||
ERROR: smtp-transaction-failed < smtp-error ;
|
||||
|
||||
: check-response ( response -- )
|
||||
dup smtp-response
|
||||
{
|
||||
{ [ dup "220" head? ] [ smtp-response ] }
|
||||
{ [ dup "235" swap subseq? ] [ smtp-response ] }
|
||||
{ [ dup "250" head? ] [ smtp-response ] }
|
||||
{ [ dup "221" head? ] [ smtp-response ] }
|
||||
{ [ dup "bye" head? ] [ smtp-response ] }
|
||||
{ [ dup "4" head? ] [ "server busy" throw ] }
|
||||
{ [ dup "354" head? ] [ smtp-response ] }
|
||||
{ [ dup "50" head? ] [ smtp-response "syntax error" throw ] }
|
||||
{ [ dup "53" head? ] [ smtp-response "invalid authentication data" throw ] }
|
||||
{ [ dup "55" head? ] [ smtp-response "fatal error" throw ] }
|
||||
[ "unknown error" throw ]
|
||||
{ [ dup "bye" head? ] [ drop ] }
|
||||
{ [ dup "220" head? ] [ drop ] }
|
||||
{ [ dup "235" swap subseq? ] [ drop ] }
|
||||
{ [ dup "250" head? ] [ drop ] }
|
||||
{ [ dup "221" head? ] [ drop ] }
|
||||
{ [ dup "354" head? ] [ drop ] }
|
||||
{ [ dup "4" head? ] [ smtp-server-busy ] }
|
||||
{ [ dup "500" head? ] [ smtp-syntax-error ] }
|
||||
{ [ dup "501" head? ] [ smtp-command-not-implemented ] }
|
||||
{ [ dup "50" head? ] [ smtp-syntax-error ] }
|
||||
{ [ dup "53" head? ] [ smtp-bad-authentication ] }
|
||||
{ [ dup "550" head? ] [ smtp-mailbox-unavailable ] }
|
||||
{ [ dup "551" head? ] [ smtp-user-not-local ] }
|
||||
{ [ dup "552" head? ] [ smtp-exceeded-storage-allocation ] }
|
||||
{ [ dup "553" head? ] [ smtp-bad-mailbox-name ] }
|
||||
{ [ dup "554" head? ] [ smtp-transaction-failed ] }
|
||||
[ smtp-error ]
|
||||
} cond ;
|
||||
|
||||
: multiline? ( response -- boolean )
|
||||
|
@ -89,41 +126,19 @@ LOG: smtp-response DEBUG
|
|||
|
||||
: get-ok ( -- ) receive-response check-response ;
|
||||
|
||||
ERROR: invalid-header-string string ;
|
||||
|
||||
: validate-header ( string -- string' )
|
||||
dup "\r\n" intersect empty?
|
||||
[ "Invalid header string: " prepend throw ] unless ;
|
||||
[ invalid-header-string ] unless ;
|
||||
|
||||
: write-header ( key value -- )
|
||||
swap
|
||||
validate-header write
|
||||
": " write
|
||||
validate-header write
|
||||
crlf ;
|
||||
[ validate-header write ]
|
||||
[ ": " write validate-header write ] bi* crlf ;
|
||||
|
||||
: write-headers ( assoc -- )
|
||||
[ write-header ] assoc-each ;
|
||||
|
||||
TUPLE: email from to subject headers body ;
|
||||
|
||||
M: email clone
|
||||
call-next-method [ clone ] change-headers ;
|
||||
|
||||
: (send) ( email -- )
|
||||
[
|
||||
helo get-ok
|
||||
dup from>> mail-from get-ok
|
||||
dup to>> [ rcpt-to get-ok ] each
|
||||
data get-ok
|
||||
dup headers>> write-headers
|
||||
crlf
|
||||
body>> send-body get-ok
|
||||
quit get-ok
|
||||
] with-smtp-connection ;
|
||||
|
||||
: extract-email ( recepient -- email )
|
||||
#! This could be much smarter.
|
||||
" " last-split1 swap or "<" ?head drop ">" ?tail drop ;
|
||||
|
||||
: message-id ( -- string )
|
||||
[
|
||||
"<" %
|
||||
|
@ -135,25 +150,38 @@ M: email clone
|
|||
">" %
|
||||
] "" make ;
|
||||
|
||||
: set-header ( email value key -- email )
|
||||
pick headers>> set-at ;
|
||||
: extract-email ( recepient -- email )
|
||||
#! This could be much smarter.
|
||||
" " last-split1 swap or "<" ?head drop ">" ?tail drop ;
|
||||
|
||||
: prepare ( email -- email )
|
||||
clone
|
||||
dup from>> "From" set-header
|
||||
[ extract-email ] change-from
|
||||
dup to>> ", " join "To" set-header
|
||||
[ [ extract-email ] map ] change-to
|
||||
dup subject>> "Subject" set-header
|
||||
now timestamp>rfc822 "Date" set-header
|
||||
message-id "Message-Id" set-header ;
|
||||
: email>headers ( email -- hashtable )
|
||||
[
|
||||
{
|
||||
[ from>> "From" set ]
|
||||
[ to>> ", " join "To" set ]
|
||||
[ cc>> ", " join [ "Cc" set ] unless-empty ]
|
||||
[ subject>> "Subject" set ]
|
||||
} cleave
|
||||
now timestamp>rfc822 "Date" set
|
||||
message-id "Message-Id" set
|
||||
] { } make-assoc ;
|
||||
|
||||
: <email> ( -- email )
|
||||
email new
|
||||
H{ } clone >>headers ;
|
||||
: (send-email) ( headers email -- )
|
||||
[
|
||||
helo get-ok
|
||||
dup from>> extract-email mail-from get-ok
|
||||
dup to>> [ extract-email rcpt-to get-ok ] each
|
||||
dup cc>> [ extract-email rcpt-to get-ok ] each
|
||||
dup bcc>> [ extract-email rcpt-to get-ok ] each
|
||||
data get-ok
|
||||
swap write-headers
|
||||
crlf
|
||||
body>> send-body get-ok
|
||||
quit get-ok
|
||||
] with-smtp-connection ;
|
||||
|
||||
: send-email ( email -- )
|
||||
prepare (send) ;
|
||||
[ email>headers ] keep (send-email) ;
|
||||
|
||||
! Dirk's old AUTH CRAM-MD5 code. I don't know anything about
|
||||
! CRAM MD5, and the old code didn't work properly either, so here
|
||||
|
|
|
@ -24,7 +24,7 @@ SINGLETON: utf8
|
|||
: triple ( stream byte -- stream char )
|
||||
BIN: 1111 bitand append-nums append-nums ; inline
|
||||
|
||||
: quad ( stream byte -- stream char )
|
||||
: quadruple ( stream byte -- stream char )
|
||||
BIN: 111 bitand append-nums append-nums append-nums ; inline
|
||||
|
||||
: begin-utf8 ( stream byte -- stream char )
|
||||
|
@ -32,7 +32,7 @@ SINGLETON: utf8
|
|||
{ [ dup -7 shift zero? ] [ ] }
|
||||
{ [ dup -5 shift BIN: 110 number= ] [ double ] }
|
||||
{ [ dup -4 shift BIN: 1110 number= ] [ triple ] }
|
||||
{ [ dup -3 shift BIN: 11110 number= ] [ quad ] }
|
||||
{ [ dup -3 shift BIN: 11110 number= ] [ quadruple ] }
|
||||
[ drop replacement-char ]
|
||||
} cond ; inline
|
||||
|
||||
|
|
|
@ -1,4 +1,17 @@
|
|||
USING: kernel tools.test sequences vectors assocs.lib ;
|
||||
IN: assocs.lib.tests
|
||||
USING: assocs.lib tools.test vectors ;
|
||||
|
||||
{ 1 1 } [ [ ?push ] histogram ] must-infer-as
|
||||
|
||||
! substitute
|
||||
[ { 2 } ] [ { 1 } H{ { 1 2 } } [ ?at drop ] curry map ] unit-test
|
||||
[ { 3 } ] [ { 3 } H{ { 1 2 } } [ ?at drop ] curry map ] unit-test
|
||||
|
||||
[ 2 ] [ 1 H{ { 1 2 } } [ ] [ ] if-at ] unit-test
|
||||
[ 3 ] [ 3 H{ { 1 2 } } [ ] [ ] if-at ] unit-test
|
||||
|
||||
[ "hi" ] [ 1 H{ { 1 2 } } [ drop "hi" ] when-at ] unit-test
|
||||
[ 3 ] [ 3 H{ { 1 2 } } [ drop "hi" ] when-at ] unit-test
|
||||
[ 2 ] [ 1 H{ { 1 2 } } [ drop "hi" ] unless-at ] unit-test
|
||||
[ "hi" ] [ 3 H{ { 1 2 } } [ drop "hi" ] unless-at ] unit-test
|
||||
|
||||
|
|
|
@ -37,3 +37,13 @@ IN: assocs.lib
|
|||
H{ } clone [
|
||||
swap [ change-at ] 2curry assoc-each
|
||||
] keep ; inline
|
||||
|
||||
: ?at ( obj assoc -- value/obj ? )
|
||||
dupd at* [ [ nip ] [ drop ] if ] keep ;
|
||||
|
||||
: if-at ( obj assoc quot1 quot2 -- )
|
||||
[ ?at ] 2dip if ; inline
|
||||
|
||||
: when-at ( obj assoc quot -- ) [ ] if-at ; inline
|
||||
|
||||
: unless-at ( obj assoc quot -- ) [ ] swap if-at ; inline
|
||||
|
|
|
@ -11,3 +11,12 @@ HELP: generate
|
|||
"[ 20 random-prime ] [ 4 mod 3 = ] generate ."
|
||||
"526367"
|
||||
} ;
|
||||
|
||||
HELP: %chance
|
||||
{ $values { "quot" quotation } { "n" integer } }
|
||||
{ $description "Calls the quotation " { $snippet "n" } " percent of the time." }
|
||||
{ $unchecked-example
|
||||
"USING: io ;"
|
||||
"[ \"hello, world! maybe.\" print ] 50 %chance"
|
||||
""
|
||||
} ;
|
||||
|
|
|
@ -4,7 +4,7 @@
|
|||
! See http://factorcode.org/license.txt for BSD license.
|
||||
USING: kernel combinators fry namespaces quotations hashtables
|
||||
sequences assocs arrays inference effects math math.ranges
|
||||
generalizations macros continuations locals ;
|
||||
generalizations macros continuations random locals ;
|
||||
|
||||
IN: combinators.lib
|
||||
|
||||
|
@ -31,6 +31,8 @@ IN: combinators.lib
|
|||
! Generalized versions of core combinators
|
||||
! !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
|
||||
|
||||
: quad ( x p q r s -- ) >r >r >r keep r> keep r> keep r> call ; inline
|
||||
|
||||
: 4slip ( quot a b c d -- a b c d ) 4 nslip ; inline
|
||||
|
||||
: 4keep ( w x y z quot -- w x y z ) 4 nkeep ; inline
|
||||
|
@ -137,7 +139,7 @@ MACRO: multikeep ( word out-indexes -- ... )
|
|||
[ drop ] rot compose attempt-all ; inline
|
||||
|
||||
: do-while ( pred body tail -- )
|
||||
>r tuck 2slip r> while ;
|
||||
>r tuck 2slip r> while ; inline
|
||||
|
||||
: generate ( generator predicate -- obj )
|
||||
[ dup ] swap [ dup [ nip ] unless not ] 3compose
|
||||
|
@ -147,3 +149,5 @@ MACRO: predicates ( seq -- quot/f )
|
|||
dup [ 1quotation [ drop ] prepend ] map
|
||||
>r [ [ dup ] prepend ] map r> zip [ drop f ] suffix
|
||||
[ cond ] curry ;
|
||||
|
||||
: %chance ( quot integer -- ) 100 random > swap when ; inline
|
||||
|
|
|
@ -56,8 +56,7 @@ TUPLE: link attributes clickable ;
|
|||
: trim-text ( vector -- vector' )
|
||||
[
|
||||
dup name>> text = [
|
||||
[ text>> [ blank? ] trim ] keep
|
||||
[ set-tag-text ] keep
|
||||
[ [ blank? ] trim ] change-text
|
||||
] when
|
||||
] map ;
|
||||
|
||||
|
@ -173,8 +172,7 @@ TUPLE: link attributes clickable ;
|
|||
[
|
||||
{
|
||||
{ [ dup name>> "form" = ]
|
||||
[ "form action: " write attributes>> "action" swap at print
|
||||
] }
|
||||
[ "form action: " write attributes>> "action" swap at print ] }
|
||||
{ [ dup name>> "input" = ] [ input. ] }
|
||||
[ drop ]
|
||||
} cond
|
||||
|
|
|
@ -2,19 +2,19 @@ USING: html.parser kernel tools.test ;
|
|||
IN: html.parser.tests
|
||||
|
||||
[
|
||||
V{ T{ tag f "html" H{ } f f f } }
|
||||
V{ T{ tag f "html" H{ } f f } }
|
||||
] [ "<html>" parse-html ] unit-test
|
||||
|
||||
[
|
||||
V{ T{ tag f "html" H{ } f f t } }
|
||||
V{ T{ tag f "html" H{ } f t } }
|
||||
] [ "</html>" parse-html ] unit-test
|
||||
|
||||
[
|
||||
V{ T{ tag f "a" H{ { "href" "http://factorcode.org/" } } f f f } }
|
||||
V{ T{ tag f "a" H{ { "href" "http://factorcode.org/" } } f f } }
|
||||
] [ "<a href=\"http://factorcode.org/\">" parse-html ] unit-test
|
||||
|
||||
[
|
||||
V{ T{ tag f "a" H{ { "href" "http://factorcode.org/" } } f f f } }
|
||||
V{ T{ tag f "a" H{ { "href" "http://factorcode.org/" } } f f } }
|
||||
] [ "<a href = \"http://factorcode.org/\" >" parse-html ] unit-test
|
||||
|
||||
[
|
||||
|
@ -26,7 +26,6 @@ V{
|
|||
H{ { "baz" "\"quux\"" } { "foo" "bar's" } }
|
||||
f
|
||||
f
|
||||
f
|
||||
}
|
||||
}
|
||||
] [ "<a foo=\"bar's\" baz='\"quux\"' >" parse-html ] unit-test
|
||||
|
@ -39,25 +38,25 @@ V{
|
|||
{ "foo" "bar" }
|
||||
{ "href" "http://factorcode.org/" }
|
||||
{ "baz" "quux" }
|
||||
} f f f }
|
||||
} f f }
|
||||
}
|
||||
] [ "<a href = \"http://factorcode.org/\" foo = bar baz='quux'a=pirsqd >" parse-html ] unit-test
|
||||
|
||||
[
|
||||
V{
|
||||
T{ tag f "html" H{ } f f f }
|
||||
T{ tag f "head" H{ } f f f }
|
||||
T{ tag f "head" H{ } f f t }
|
||||
T{ tag f "html" H{ } f f t }
|
||||
T{ tag f "html" H{ } f f }
|
||||
T{ tag f "head" H{ } f f }
|
||||
T{ tag f "head" H{ } f t }
|
||||
T{ tag f "html" H{ } f t }
|
||||
}
|
||||
] [ "<html<head</head</html" parse-html ] unit-test
|
||||
|
||||
[
|
||||
V{
|
||||
T{ tag f "head" H{ } f f f }
|
||||
T{ tag f "title" H{ } f f f }
|
||||
T{ tag f text f "Spagna" f f }
|
||||
T{ tag f "title" H{ } f f t }
|
||||
T{ tag f "head" H{ } f f t }
|
||||
T{ tag f "head" H{ } f f }
|
||||
T{ tag f "title" H{ } f f }
|
||||
T{ tag f text f "Spagna" f }
|
||||
T{ tag f "title" H{ } f t }
|
||||
T{ tag f "head" H{ } f t }
|
||||
}
|
||||
] [ "<head<title>Spagna</title></head" parse-html ] unit-test
|
||||
|
|
|
@ -1,26 +1,22 @@
|
|||
USING: accessors arrays html.parser.utils hashtables io kernel
|
||||
namespaces prettyprint quotations
|
||||
sequences splitting state-parser strings unicode.categories unicode.case ;
|
||||
sequences splitting state-parser strings unicode.categories unicode.case
|
||||
sequences.lib ;
|
||||
IN: html.parser
|
||||
|
||||
TUPLE: tag name attributes text matched? closing? ;
|
||||
TUPLE: tag name attributes text closing? ;
|
||||
|
||||
SYMBOL: text
|
||||
SYMBOL: dtd
|
||||
SYMBOL: comment
|
||||
SYMBOL: javascript
|
||||
SINGLETON: text
|
||||
SINGLETON: dtd
|
||||
SINGLETON: comment
|
||||
SYMBOL: tagstack
|
||||
|
||||
: push-tag ( tag -- )
|
||||
tagstack get push ;
|
||||
|
||||
: closing-tag? ( string -- ? )
|
||||
dup empty? [
|
||||
drop f
|
||||
] [
|
||||
dup first CHAR: / =
|
||||
swap peek CHAR: / = or
|
||||
] if ;
|
||||
[ f ]
|
||||
[ [ first ] [ peek ] bi [ CHAR: / = ] bi@ or ] if-empty ;
|
||||
|
||||
: <tag> ( name attributes closing? -- tag )
|
||||
tag new
|
||||
|
@ -28,56 +24,55 @@ SYMBOL: tagstack
|
|||
swap >>attributes
|
||||
swap >>name ;
|
||||
|
||||
: make-tag ( str attribs -- tag )
|
||||
: make-tag ( string attribs -- tag )
|
||||
>r [ closing-tag? ] keep "/" trim1 r> rot <tag> ;
|
||||
|
||||
: make-text-tag ( str -- tag )
|
||||
T{ tag f text } clone [ set-tag-text ] keep ;
|
||||
: make-text-tag ( string -- tag )
|
||||
tag new
|
||||
text >>name
|
||||
swap >>text ;
|
||||
|
||||
: make-comment-tag ( str -- tag )
|
||||
T{ tag f comment } clone [ set-tag-text ] keep ;
|
||||
: make-comment-tag ( string -- tag )
|
||||
tag new
|
||||
comment >>name
|
||||
swap >>text ;
|
||||
|
||||
: make-dtd-tag ( str -- tag )
|
||||
T{ tag f dtd } clone [ set-tag-text ] keep ;
|
||||
: make-dtd-tag ( string -- tag )
|
||||
tag new
|
||||
dtd >>name
|
||||
swap >>text ;
|
||||
|
||||
: read-whitespace ( -- str )
|
||||
: read-whitespace ( -- string )
|
||||
[ get-char blank? not ] take-until ;
|
||||
|
||||
: read-whitespace* ( -- )
|
||||
read-whitespace drop ;
|
||||
: read-whitespace* ( -- ) read-whitespace drop ;
|
||||
|
||||
: read-token ( -- str )
|
||||
: read-token ( -- string )
|
||||
read-whitespace*
|
||||
[ get-char blank? ] take-until ;
|
||||
|
||||
: read-single-quote ( -- str )
|
||||
: read-single-quote ( -- string )
|
||||
[ get-char CHAR: ' = ] take-until ;
|
||||
|
||||
: read-double-quote ( -- str )
|
||||
: read-double-quote ( -- string )
|
||||
[ get-char CHAR: " = ] take-until ;
|
||||
|
||||
: read-quote ( -- str )
|
||||
get-char next* CHAR: ' = [
|
||||
read-single-quote
|
||||
] [
|
||||
read-double-quote
|
||||
] if next* ;
|
||||
: read-quote ( -- string )
|
||||
get-char next* CHAR: ' =
|
||||
[ read-single-quote ] [ read-double-quote ] if next* ;
|
||||
|
||||
: read-key ( -- str )
|
||||
: read-key ( -- string )
|
||||
read-whitespace*
|
||||
[ get-char CHAR: = = get-char blank? or ] take-until ;
|
||||
[ get-char [ CHAR: = = ] [ blank? ] bi or ] take-until ;
|
||||
|
||||
: read-= ( -- )
|
||||
read-whitespace*
|
||||
[ get-char CHAR: = = ] take-until drop next* ;
|
||||
|
||||
: read-value ( -- str )
|
||||
: read-value ( -- string )
|
||||
read-whitespace*
|
||||
get-char quote? [
|
||||
read-quote
|
||||
] [
|
||||
read-token
|
||||
] if [ blank? ] trim ;
|
||||
get-char quote? [ read-quote ] [ read-token ] if
|
||||
[ blank? ] trim ;
|
||||
|
||||
: read-comment ( -- )
|
||||
"-->" take-string* make-comment-tag push-tag ;
|
||||
|
@ -97,14 +92,14 @@ SYMBOL: tagstack
|
|||
[ get-char CHAR: > = get-char CHAR: < = or ] take-until
|
||||
get-char CHAR: < = [ next* ] unless ;
|
||||
|
||||
: read-< ( -- str )
|
||||
: read-< ( -- string )
|
||||
next* get-char CHAR: ! = [
|
||||
read-bang f
|
||||
] [
|
||||
read-tag
|
||||
] if ;
|
||||
|
||||
: read-until-< ( -- str )
|
||||
: read-until-< ( -- string )
|
||||
[ get-char CHAR: < = ] take-until ;
|
||||
|
||||
: parse-text ( -- )
|
||||
|
@ -131,11 +126,9 @@ SYMBOL: tagstack
|
|||
] string-parse ;
|
||||
|
||||
: parse-tag ( -- )
|
||||
read-< dup empty? [
|
||||
drop
|
||||
] [
|
||||
read-< [
|
||||
(parse-tag) make-tag push-tag
|
||||
] if ;
|
||||
] unless-empty ;
|
||||
|
||||
: (parse-html) ( -- )
|
||||
get-next [
|
||||
|
@ -145,13 +138,7 @@ SYMBOL: tagstack
|
|||
] when ;
|
||||
|
||||
: tag-parse ( quot -- vector )
|
||||
[
|
||||
V{ } clone tagstack set
|
||||
string-parse
|
||||
] with-scope ;
|
||||
V{ } clone tagstack [ string-parse ] with-variable ;
|
||||
|
||||
: parse-html ( string -- vector )
|
||||
[
|
||||
(parse-html)
|
||||
tagstack get
|
||||
] tag-parse ;
|
||||
[ (parse-html) tagstack get ] tag-parse ;
|
||||
|
|
|
@ -1,123 +1,89 @@
|
|||
USING: assocs html.parser html.parser.utils combinators
|
||||
USING: accessors assocs html.parser html.parser.utils combinators
|
||||
continuations hashtables
|
||||
hashtables.private io kernel math
|
||||
namespaces prettyprint quotations sequences splitting
|
||||
strings ;
|
||||
IN: html.parser.printer
|
||||
|
||||
SYMBOL: no-section
|
||||
SYMBOL: html
|
||||
SYMBOL: head
|
||||
SYMBOL: body
|
||||
TUPLE: state section ;
|
||||
SYMBOL: printer
|
||||
|
||||
! TUPLE: text bold? underline? strikethrough? ;
|
||||
TUPLE: html-printer ;
|
||||
TUPLE: text-printer < html-printer ;
|
||||
TUPLE: src-printer < html-printer ;
|
||||
TUPLE: html-prettyprinter < html-printer ;
|
||||
|
||||
TUPLE: text-printer ;
|
||||
TUPLE: ui-printer ;
|
||||
TUPLE: src-printer ;
|
||||
TUPLE: html-prettyprinter ;
|
||||
UNION: printer text-printer ui-printer src-printer html-prettyprinter ;
|
||||
HOOK: print-tag printer ( tag -- )
|
||||
HOOK: print-text-tag printer ( tag -- )
|
||||
HOOK: print-comment-tag printer ( tag -- )
|
||||
HOOK: print-dtd-tag printer ( tag -- )
|
||||
HOOK: print-opening-named-tag printer ( tag -- )
|
||||
HOOK: print-closing-named-tag printer ( tag -- )
|
||||
HOOK: print-text-tag html-printer ( tag -- )
|
||||
HOOK: print-comment-tag html-printer ( tag -- )
|
||||
HOOK: print-dtd-tag html-printer ( tag -- )
|
||||
HOOK: print-opening-tag html-printer ( tag -- )
|
||||
HOOK: print-closing-tag html-printer ( tag -- )
|
||||
|
||||
: print-tags ( vector -- )
|
||||
[ print-tag ] each ;
|
||||
ERROR: unknown-tag-error tag ;
|
||||
|
||||
: print-tag ( tag -- )
|
||||
{
|
||||
{ [ dup name>> text = ] [ print-text-tag ] }
|
||||
{ [ dup name>> comment = ] [ print-comment-tag ] }
|
||||
{ [ dup name>> dtd = ] [ print-dtd-tag ] }
|
||||
{ [ dup [ name>> string? ] [ closing?>> ] bi and ]
|
||||
[ print-closing-tag ] }
|
||||
{ [ dup name>> string? ]
|
||||
[ print-opening-tag ] }
|
||||
[ unknown-tag-error ]
|
||||
} cond ;
|
||||
|
||||
: print-tags ( vector -- ) [ print-tag ] each ;
|
||||
|
||||
: html-text. ( vector -- )
|
||||
[
|
||||
T{ text-printer } printer set
|
||||
print-tags
|
||||
] with-scope ;
|
||||
T{ text-printer } html-printer [ print-tags ] with-variable ;
|
||||
|
||||
: html-src. ( vector -- )
|
||||
[
|
||||
T{ src-printer } printer set
|
||||
print-tags
|
||||
] with-scope ;
|
||||
T{ src-printer } html-printer [ print-tags ] with-variable ;
|
||||
|
||||
M: printer print-text-tag ( tag -- )
|
||||
tag-text write ;
|
||||
M: html-printer print-text-tag ( tag -- ) text>> write ;
|
||||
|
||||
M: printer print-comment-tag ( tag -- )
|
||||
"<!--" write
|
||||
tag-text write
|
||||
"-->" write ;
|
||||
M: html-printer print-comment-tag ( tag -- )
|
||||
"<!--" write text>> write "-->" write ;
|
||||
|
||||
M: printer print-dtd-tag ( tag -- )
|
||||
"<!" write
|
||||
tag-text write
|
||||
">" write ;
|
||||
|
||||
M: printer print-opening-named-tag ( tag -- )
|
||||
dup tag-name {
|
||||
{ "html" [ drop ] }
|
||||
{ "head" [ drop ] }
|
||||
{ "body" [ drop ] }
|
||||
{ "title" [ "Title: " write tag-text print ] }
|
||||
} case ;
|
||||
|
||||
M: printer print-closing-named-tag ( tag -- )
|
||||
drop ;
|
||||
M: html-printer print-dtd-tag ( tag -- )
|
||||
"<!" write text>> write ">" write ;
|
||||
|
||||
: print-attributes ( hashtable -- )
|
||||
[
|
||||
swap bl write "=" write ?quote write
|
||||
] assoc-each ;
|
||||
[ [ bl write "=" write ] [ ?quote write ] bi* ] assoc-each ;
|
||||
|
||||
M: src-printer print-opening-named-tag ( tag -- )
|
||||
M: src-printer print-opening-tag ( tag -- )
|
||||
"<" write
|
||||
[ tag-name write ]
|
||||
[ tag-attributes dup assoc-empty? [ drop ] [ print-attributes ] if ] bi
|
||||
[ name>> write ]
|
||||
[ attributes>> dup assoc-empty? [ drop ] [ print-attributes ] if ] bi
|
||||
">" write ;
|
||||
|
||||
M: src-printer print-closing-named-tag ( tag -- )
|
||||
M: src-printer print-closing-tag ( tag -- )
|
||||
"</" write
|
||||
tag-name write
|
||||
name>> write
|
||||
">" write ;
|
||||
|
||||
SYMBOL: tab-width
|
||||
SYMBOL: #indentations
|
||||
SYMBOL: tagstack
|
||||
|
||||
: prettyprint-html ( vector -- )
|
||||
[
|
||||
T{ html-prettyprinter } printer set
|
||||
V{ } clone tagstack set
|
||||
2 tab-width set
|
||||
0 #indentations set
|
||||
print-tags
|
||||
] with-scope ;
|
||||
|
||||
: print-tabs ( -- )
|
||||
tab-width get #indentations get * CHAR: \s <repetition> write ;
|
||||
|
||||
M: html-prettyprinter print-opening-named-tag ( tag -- )
|
||||
M: html-prettyprinter print-opening-tag ( tag -- )
|
||||
print-tabs "<" write
|
||||
tag-name write
|
||||
name>> write
|
||||
">\n" write ;
|
||||
|
||||
M: html-prettyprinter print-closing-named-tag ( tag -- )
|
||||
M: html-prettyprinter print-closing-tag ( tag -- )
|
||||
"</" write
|
||||
tag-name write
|
||||
name>> write
|
||||
">" write ;
|
||||
|
||||
ERROR: unknown-tag-error tag ;
|
||||
|
||||
M: printer print-tag ( tag -- )
|
||||
{
|
||||
{ [ dup tag-name text = ] [ print-text-tag ] }
|
||||
{ [ dup tag-name comment = ] [ print-comment-tag ] }
|
||||
{ [ dup tag-name dtd = ] [ print-dtd-tag ] }
|
||||
{ [ dup tag-name string? over tag-closing? and ]
|
||||
[ print-closing-named-tag ] }
|
||||
{ [ dup tag-name string? ]
|
||||
[ print-opening-named-tag ] }
|
||||
[ unknown-tag-error ]
|
||||
} cond ;
|
||||
|
||||
! SYMBOL: tablestack
|
||||
! : with-html-printer ( vector quot -- )
|
||||
! [ V{ } clone tablestack set ] with-scope ;
|
||||
|
||||
! { { 1 2 } { 3 4 } }
|
||||
! H{ { table-gap { 10 10 } } } [
|
||||
! [ [ [ [ . ] with-cell ] each ] with-row ] each
|
||||
! ] tabular-output
|
||||
|
||||
! : html-pp ( vector -- )
|
||||
! [ 0 #indentations set 2 tab-width set ] with-scope ;
|
||||
|
|
|
@ -4,8 +4,7 @@ namespaces prettyprint quotations sequences splitting
|
|||
state-parser strings sequences.lib ;
|
||||
IN: html.parser.utils
|
||||
|
||||
: string-parse-end? ( -- ? )
|
||||
get-next not ;
|
||||
: string-parse-end? ( -- ? ) get-next not ;
|
||||
|
||||
: take-string* ( match -- string )
|
||||
dup length <circular-string>
|
||||
|
@ -16,17 +15,18 @@ IN: html.parser.utils
|
|||
[ ?head drop ] [ ?tail drop ] bi ;
|
||||
|
||||
: single-quote ( str -- newstr )
|
||||
>r "'" r> "'" 3append ;
|
||||
"'" swap "'" 3append ;
|
||||
|
||||
: double-quote ( str -- newstr )
|
||||
>r "\"" r> "\"" 3append ;
|
||||
"\"" swap "\"" 3append ;
|
||||
|
||||
: quote ( str -- newstr )
|
||||
CHAR: ' over member?
|
||||
[ double-quote ] [ single-quote ] if ;
|
||||
|
||||
: quoted? ( str -- ? )
|
||||
[ [ first ] [ peek ] bi [ = ] keep "'\"" member? and ] [ f ] if-seq ;
|
||||
[ f ]
|
||||
[ [ first ] [ peek ] bi [ = ] keep "'\"" member? and ] if-empty ;
|
||||
|
||||
: ?quote ( str -- newstr )
|
||||
dup quoted? [ quote ] unless ;
|
||||
|
|
|
@ -0,0 +1 @@
|
|||
Doug Coleman
|
|
@ -0,0 +1,23 @@
|
|||
! Copyright (C) 2008 Doug Coleman.
|
||||
! See http://factorcode.org/license.txt for BSD license.
|
||||
USING: accessors hashtables kernel math state-tables vars vectors ;
|
||||
IN: regexp2.backend
|
||||
|
||||
TUPLE: regexp
|
||||
raw
|
||||
{ stack vector }
|
||||
parse-tree
|
||||
nfa-table
|
||||
dfa-table
|
||||
minimized-table
|
||||
{ state integer }
|
||||
{ new-states vector }
|
||||
{ visited-states hashtable } ;
|
||||
|
||||
: reset-regexp ( regexp -- regexp )
|
||||
0 >>state
|
||||
V{ } clone >>stack
|
||||
V{ } clone >>new-states
|
||||
H{ } clone >>visited-states ;
|
||||
|
||||
SYMBOL: current-regexp
|
|
@ -0,0 +1,49 @@
|
|||
! Copyright (C) 2008 Doug Coleman.
|
||||
! See http://factorcode.org/license.txt for BSD license.
|
||||
USING: accessors kernel math math.order symbols regexp2.parser
|
||||
words regexp2.utils unicode.categories combinators.short-circuit ;
|
||||
IN: regexp2.classes
|
||||
|
||||
GENERIC: class-member? ( obj class -- ? )
|
||||
|
||||
M: word class-member? ( obj class -- ? ) 2drop f ;
|
||||
M: integer class-member? ( obj class -- ? ) 2drop f ;
|
||||
|
||||
M: character-class-range class-member? ( obj class -- ? )
|
||||
[ from>> ] [ to>> ] bi between? ;
|
||||
|
||||
M: any-char class-member? ( obj class -- ? )
|
||||
2drop t ;
|
||||
|
||||
M: letter-class class-member? ( obj class -- ? )
|
||||
drop letter? ;
|
||||
|
||||
M: LETTER-class class-member? ( obj class -- ? )
|
||||
drop LETTER? ;
|
||||
|
||||
M: ascii-class class-member? ( obj class -- ? )
|
||||
drop ascii? ;
|
||||
|
||||
M: digit-class class-member? ( obj class -- ? )
|
||||
drop digit? ;
|
||||
|
||||
M: alpha-class class-member? ( obj class -- ? )
|
||||
drop alpha? ;
|
||||
|
||||
M: punctuation-class class-member? ( obj class -- ? )
|
||||
drop punct? ;
|
||||
|
||||
M: java-printable-class class-member? ( obj class -- ? )
|
||||
drop java-printable? ;
|
||||
|
||||
M: non-newline-blank-class class-member? ( obj class -- ? )
|
||||
drop { [ blank? ] [ CHAR: \n = not ] } 1&& ;
|
||||
|
||||
M: control-character-class class-member? ( obj class -- ? )
|
||||
drop control-char? ;
|
||||
|
||||
M: hex-digit-class class-member? ( obj class -- ? )
|
||||
drop hex-digit? ;
|
||||
|
||||
M: java-blank-class class-member? ( obj class -- ? )
|
||||
drop java-blank? ;
|
|
@ -0,0 +1,70 @@
|
|||
! Copyright (C) 2008 Doug Coleman.
|
||||
! See http://factorcode.org/license.txt for BSD license.
|
||||
USING: accessors arrays assocs combinators fry kernel locals
|
||||
math math.order regexp2.nfa regexp2.transition-tables sequences
|
||||
sets sorting vectors regexp2.utils sequences.lib ;
|
||||
USING: io prettyprint threads ;
|
||||
IN: regexp2.dfa
|
||||
|
||||
: find-delta ( states transition regexp -- new-states )
|
||||
nfa-table>> transitions>>
|
||||
rot [ swap at at ] with with map sift concat prune ;
|
||||
|
||||
: (find-epsilon-closure) ( states regexp -- new-states )
|
||||
eps swap find-delta ;
|
||||
|
||||
: find-epsilon-closure ( states regexp -- new-states )
|
||||
'[ dup , (find-epsilon-closure) union ] [ length ] while-changes
|
||||
natural-sort ;
|
||||
|
||||
: find-closure ( states transition regexp -- new-states )
|
||||
[ find-delta ] 2keep nip find-epsilon-closure ;
|
||||
|
||||
: find-start-state ( regexp -- state )
|
||||
[ nfa-table>> start-state>> 1vector ] keep find-epsilon-closure ;
|
||||
|
||||
: find-transitions ( seq1 regexp -- seq2 )
|
||||
nfa-table>> transitions>>
|
||||
[ at keys ] curry map concat eps swap remove ;
|
||||
|
||||
: add-todo-state ( state regexp -- )
|
||||
2dup visited-states>> key? [
|
||||
2drop
|
||||
] [
|
||||
[ visited-states>> conjoin ]
|
||||
[ new-states>> push ] 2bi
|
||||
] if ;
|
||||
|
||||
: new-transitions ( regexp -- )
|
||||
dup new-states>> [
|
||||
drop
|
||||
] [
|
||||
dupd pop dup pick find-transitions rot
|
||||
[
|
||||
[ [ find-closure ] 2keep nip dupd add-todo-state ] 3keep
|
||||
>r swapd transition boa r> dfa-table>> add-transition
|
||||
] curry with each
|
||||
new-transitions
|
||||
] if-empty ;
|
||||
|
||||
: states ( hashtable -- array )
|
||||
[ keys ]
|
||||
[ values [ values concat ] map concat append ] bi ;
|
||||
|
||||
: set-final-states ( regexp -- )
|
||||
dup
|
||||
[ nfa-table>> final-states>> keys ]
|
||||
[ dfa-table>> transitions>> states ] bi
|
||||
[ intersect empty? not ] with filter
|
||||
|
||||
swap dfa-table>> final-states>>
|
||||
[ conjoin ] curry each ;
|
||||
|
||||
: set-initial-state ( regexp -- )
|
||||
dup
|
||||
[ dfa-table>> ] [ find-start-state ] bi
|
||||
[ >>start-state drop ] keep
|
||||
1vector >>new-states drop ;
|
||||
|
||||
: construct-dfa ( regexp -- )
|
||||
[ set-initial-state ] [ new-transitions ] [ set-final-states ] tri ;
|
|
@ -0,0 +1,126 @@
|
|||
! Copyright (C) 2008 Doug Coleman.
|
||||
! See http://factorcode.org/license.txt for BSD license.
|
||||
USING: accessors arrays assocs grouping kernel regexp2.backend
|
||||
locals math namespaces regexp2.parser sequences state-tables fry
|
||||
quotations math.order math.ranges vectors unicode.categories
|
||||
regexp2.utils regexp2.transition-tables words sequences.lib ;
|
||||
IN: regexp2.nfa
|
||||
|
||||
SYMBOL: negation-mode
|
||||
: negated? ( -- ? ) negation-mode get 0 or odd? ;
|
||||
|
||||
SINGLETON: eps
|
||||
|
||||
: next-state ( regexp -- state )
|
||||
[ state>> ] [ [ 1+ ] change-state drop ] bi ;
|
||||
|
||||
: set-start-state ( regexp -- )
|
||||
dup stack>> [
|
||||
drop
|
||||
] [
|
||||
[ nfa-table>> ] [ pop first ] bi* >>start-state drop
|
||||
] if-empty ;
|
||||
|
||||
GENERIC: nfa-node ( node -- )
|
||||
|
||||
:: add-simple-entry ( obj class -- )
|
||||
[let* | regexp [ current-regexp get ]
|
||||
s0 [ regexp next-state ]
|
||||
s1 [ regexp next-state ]
|
||||
stack [ regexp stack>> ]
|
||||
table [ regexp nfa-table>> ] |
|
||||
negated? [
|
||||
s0 f obj class boa table add-transition
|
||||
s0 s1 <default-transition> table add-transition
|
||||
] [
|
||||
s0 s1 obj class boa table add-transition
|
||||
] if
|
||||
s0 s1 2array stack push
|
||||
t s1 table final-states>> set-at ] ;
|
||||
|
||||
:: concatenate-nodes ( -- )
|
||||
[let* | regexp [ current-regexp get ]
|
||||
stack [ regexp stack>> ]
|
||||
table [ regexp nfa-table>> ]
|
||||
s2 [ stack peek first ]
|
||||
s3 [ stack pop second ]
|
||||
s0 [ stack peek first ]
|
||||
s1 [ stack pop second ] |
|
||||
s1 s2 eps <literal-transition> table add-transition
|
||||
s1 table final-states>> delete-at
|
||||
s0 s3 2array stack push ] ;
|
||||
|
||||
:: alternate-nodes ( -- )
|
||||
[let* | regexp [ current-regexp get ]
|
||||
stack [ regexp stack>> ]
|
||||
table [ regexp nfa-table>> ]
|
||||
s2 [ stack peek first ]
|
||||
s3 [ stack pop second ]
|
||||
s0 [ stack peek first ]
|
||||
s1 [ stack pop second ]
|
||||
s4 [ regexp next-state ]
|
||||
s5 [ regexp next-state ] |
|
||||
s4 s0 eps <literal-transition> table add-transition
|
||||
s4 s2 eps <literal-transition> table add-transition
|
||||
s1 s5 eps <literal-transition> table add-transition
|
||||
s3 s5 eps <literal-transition> table add-transition
|
||||
s1 table final-states>> delete-at
|
||||
s3 table final-states>> delete-at
|
||||
t s5 table final-states>> set-at
|
||||
s4 s5 2array stack push ] ;
|
||||
|
||||
M: kleene-star nfa-node ( node -- )
|
||||
term>> nfa-node
|
||||
[let* | regexp [ current-regexp get ]
|
||||
stack [ regexp stack>> ]
|
||||
s0 [ stack peek first ]
|
||||
s1 [ stack pop second ]
|
||||
s2 [ regexp next-state ]
|
||||
s3 [ regexp next-state ]
|
||||
table [ regexp nfa-table>> ] |
|
||||
s1 table final-states>> delete-at
|
||||
t s3 table final-states>> set-at
|
||||
s1 s0 eps <literal-transition> table add-transition
|
||||
s2 s0 eps <literal-transition> table add-transition
|
||||
s2 s3 eps <literal-transition> table add-transition
|
||||
s1 s3 eps <literal-transition> table add-transition
|
||||
s2 s3 2array stack push ] ;
|
||||
|
||||
M: concatenation nfa-node ( node -- )
|
||||
seq>>
|
||||
[ [ nfa-node ] each ]
|
||||
[ length 1- [ concatenate-nodes ] times ] bi ;
|
||||
|
||||
M: alternation nfa-node ( node -- )
|
||||
seq>>
|
||||
[ [ nfa-node ] each ]
|
||||
[ length 1- [ alternate-nodes ] times ] bi ;
|
||||
|
||||
M: constant nfa-node ( node -- )
|
||||
char>> literal-transition add-simple-entry ;
|
||||
|
||||
M: epsilon nfa-node ( node -- )
|
||||
drop eps literal-transition add-simple-entry ;
|
||||
|
||||
M: word nfa-node ( node -- )
|
||||
class-transition add-simple-entry ;
|
||||
|
||||
M: character-class-range nfa-node ( node -- )
|
||||
class-transition add-simple-entry ;
|
||||
|
||||
M: capture-group nfa-node ( node -- )
|
||||
term>> nfa-node ;
|
||||
|
||||
M: negation nfa-node ( node -- )
|
||||
negation-mode inc
|
||||
term>> nfa-node
|
||||
negation-mode dec ;
|
||||
|
||||
: construct-nfa ( regexp -- )
|
||||
[
|
||||
reset-regexp
|
||||
negation-mode off
|
||||
[ current-regexp set ]
|
||||
[ parse-tree>> nfa-node ]
|
||||
[ set-start-state ] tri
|
||||
] with-scope ;
|
|
@ -0,0 +1,33 @@
|
|||
USING: kernel tools.test regexp2.backend regexp2 ;
|
||||
IN: regexp2.parser
|
||||
|
||||
: test-regexp ( string -- )
|
||||
default-regexp parse-regexp ;
|
||||
|
||||
: test-regexp2 ( string -- regexp )
|
||||
default-regexp dup parse-regexp ;
|
||||
|
||||
[ "(" ] [ unmatched-parentheses? ] must-fail-with
|
||||
|
||||
[ ] [ "a|b" test-regexp ] unit-test
|
||||
[ ] [ "a.b" test-regexp ] unit-test
|
||||
[ ] [ "a|b|c" test-regexp ] unit-test
|
||||
[ ] [ "abc|b" test-regexp ] unit-test
|
||||
[ ] [ "a|bcd" test-regexp ] unit-test
|
||||
[ ] [ "a|(b)" test-regexp ] unit-test
|
||||
[ ] [ "(a)|b" test-regexp ] unit-test
|
||||
[ ] [ "(a|b)" test-regexp ] unit-test
|
||||
[ ] [ "((a)|(b))" test-regexp ] unit-test
|
||||
|
||||
[ ] [ "(?:a)" test-regexp ] unit-test
|
||||
[ ] [ "(?i:a)" test-regexp ] unit-test
|
||||
[ ] [ "(?-i:a)" test-regexp ] unit-test
|
||||
[ "(?z:a)" test-regexp ] [ bad-option? ] must-fail-with
|
||||
[ "(?-z:a)" test-regexp ] [ bad-option? ] must-fail-with
|
||||
|
||||
[ ] [ "(?=a)" test-regexp ] unit-test
|
||||
|
||||
[ ] [ "[abc]" test-regexp ] unit-test
|
||||
[ ] [ "[a-c]" test-regexp ] unit-test
|
||||
[ ] [ "[^a-c]" test-regexp ] unit-test
|
||||
[ "[^]" test-regexp ] must-fail
|
|
@ -0,0 +1,362 @@
|
|||
! Copyright (C) 2008 Doug Coleman.
|
||||
! See http://factorcode.org/license.txt for BSD license.
|
||||
USING: accessors arrays assocs combinators io io.streams.string
|
||||
kernel math math.parser multi-methods namespaces qualified
|
||||
quotations sequences sequences.lib splitting symbols vectors
|
||||
dlists math.order combinators.lib unicode.categories
|
||||
sequences.lib regexp2.backend regexp2.utils ;
|
||||
IN: regexp2.parser
|
||||
|
||||
FROM: math.ranges => [a,b] ;
|
||||
|
||||
MIXIN: node
|
||||
TUPLE: concatenation seq ; INSTANCE: concatenation node
|
||||
TUPLE: alternation seq ; INSTANCE: alternation node
|
||||
TUPLE: kleene-star term ; INSTANCE: kleene-star node
|
||||
TUPLE: question term ; INSTANCE: question node
|
||||
TUPLE: negation term ; INSTANCE: negation node
|
||||
TUPLE: constant char ; INSTANCE: constant node
|
||||
TUPLE: range from to ; INSTANCE: range node
|
||||
TUPLE: lookahead term ; INSTANCE: lookahead node
|
||||
TUPLE: lookbehind term ; INSTANCE: lookbehind node
|
||||
TUPLE: capture-group term ; INSTANCE: capture-group node
|
||||
TUPLE: non-capture-group term ; INSTANCE: non-capture-group node
|
||||
TUPLE: independent-group term ; INSTANCE: independent-group node
|
||||
TUPLE: character-class-range from to ; INSTANCE: character-class-range node
|
||||
SINGLETON: epsilon INSTANCE: epsilon node
|
||||
SINGLETON: any-char INSTANCE: any-char node
|
||||
SINGLETON: front-anchor INSTANCE: front-anchor node
|
||||
SINGLETON: back-anchor INSTANCE: back-anchor node
|
||||
|
||||
TUPLE: option-on option ; INSTANCE: option-on node
|
||||
TUPLE: option-off option ; INSTANCE: option-off node
|
||||
SINGLETONS: unix-lines dotall multiline comments case-insensitive unicode-case ;
|
||||
MIXIN: regexp-option
|
||||
INSTANCE: unix-lines regexp-option
|
||||
INSTANCE: dotall regexp-option
|
||||
INSTANCE: multiline regexp-option
|
||||
INSTANCE: comments regexp-option
|
||||
INSTANCE: case-insensitive regexp-option
|
||||
INSTANCE: unicode-case regexp-option
|
||||
|
||||
SINGLETONS: letter-class LETTER-class Letter-class digit-class
|
||||
alpha-class non-newline-blank-class
|
||||
ascii-class punctuation-class java-printable-class blank-class
|
||||
control-character-class hex-digit-class java-blank-class c-identifier-class ;
|
||||
|
||||
SINGLETONS: beginning-of-group end-of-group
|
||||
beginning-of-character-class end-of-character-class
|
||||
left-parenthesis pipe caret dash ;
|
||||
|
||||
: <constant> ( obj -- constant ) constant boa ;
|
||||
: <negation> ( obj -- negation ) negation boa ;
|
||||
: <concatenation> ( seq -- concatenation ) >vector concatenation boa ;
|
||||
: <alternation> ( seq -- alternation ) >vector alternation boa ;
|
||||
: <capture-group> ( obj -- capture-group ) capture-group boa ;
|
||||
: <kleene-star> ( obj -- kleene-star ) kleene-star boa ;
|
||||
|
||||
: first|concatenation ( seq -- first/concatenation )
|
||||
dup length 1 = [ first ] [ <concatenation> ] if ;
|
||||
|
||||
: first|alternation ( seq -- first/alternation )
|
||||
dup length 1 = [ first ] [ <alternation> ] if ;
|
||||
|
||||
ERROR: unmatched-parentheses ;
|
||||
|
||||
: make-positive-lookahead ( string -- )
|
||||
lookahead boa push-stack ;
|
||||
|
||||
: make-negative-lookahead ( string -- )
|
||||
<negation> lookahead boa push-stack ;
|
||||
|
||||
: make-independent-group ( string -- )
|
||||
#! no backtracking
|
||||
independent-group boa push-stack ;
|
||||
|
||||
: make-positive-lookbehind ( string -- )
|
||||
lookbehind boa push-stack ;
|
||||
|
||||
: make-negative-lookbehind ( string -- )
|
||||
<negation> lookbehind boa push-stack ;
|
||||
|
||||
DEFER: nested-parse-regexp
|
||||
: make-non-capturing-group ( string -- )
|
||||
non-capture-group boa push-stack ;
|
||||
|
||||
ERROR: bad-option ch ;
|
||||
|
||||
: option ( ch -- singleton )
|
||||
{
|
||||
{ CHAR: i [ case-insensitive ] }
|
||||
{ CHAR: d [ unix-lines ] }
|
||||
{ CHAR: m [ multiline ] }
|
||||
{ CHAR: s [ dotall ] }
|
||||
{ CHAR: u [ unicode-case ] }
|
||||
{ CHAR: x [ comments ] }
|
||||
[ bad-option ]
|
||||
} case ;
|
||||
|
||||
: option-on ( ch -- ) option \ option-on boa push-stack ;
|
||||
: option-off ( ch -- ) option \ option-off boa push-stack ;
|
||||
: toggle-option ( ch ? -- ) [ option-on ] [ option-off ] if ;
|
||||
: (parse-options) ( string ? -- ) [ toggle-option ] curry each ;
|
||||
|
||||
: parse-options ( string -- )
|
||||
"-" split1 [ t (parse-options) ] [ f (parse-options) ] bi* ;
|
||||
|
||||
DEFER: (parse-regexp)
|
||||
: parse-special-group-options ( options -- )
|
||||
beginning-of-group push-stack
|
||||
parse-options (parse-regexp) pop-stack make-non-capturing-group ;
|
||||
|
||||
ERROR: bad-special-group string ;
|
||||
|
||||
: (parse-special-group) ( -- )
|
||||
read1 {
|
||||
{ [ dup CHAR: : = ]
|
||||
[ drop nested-parse-regexp pop-stack make-non-capturing-group ] }
|
||||
{ [ dup CHAR: = = ]
|
||||
[ drop nested-parse-regexp pop-stack make-positive-lookahead ] }
|
||||
{ [ dup CHAR: = = ]
|
||||
[ drop nested-parse-regexp pop-stack make-negative-lookahead ] }
|
||||
{ [ dup CHAR: > = ]
|
||||
[ drop nested-parse-regexp pop-stack make-independent-group ] }
|
||||
{ [ dup CHAR: < = peek1 CHAR: = = and ]
|
||||
[ drop read1 drop nested-parse-regexp pop-stack make-positive-lookbehind ] }
|
||||
{ [ dup CHAR: < = peek1 CHAR: ! = and ]
|
||||
[ drop read1 drop nested-parse-regexp pop-stack make-negative-lookbehind ] }
|
||||
[
|
||||
":" read-until [ bad-special-group ] unless
|
||||
swap prefix parse-special-group-options
|
||||
]
|
||||
} cond ;
|
||||
|
||||
: handle-left-parenthesis ( -- )
|
||||
peek1 CHAR: ? =
|
||||
[ read1 drop (parse-special-group) ]
|
||||
[ nested-parse-regexp ] if ;
|
||||
|
||||
: handle-dot ( -- ) any-char push-stack ;
|
||||
: handle-pipe ( -- ) pipe push-stack ;
|
||||
: handle-star ( -- ) stack pop <kleene-star> push-stack ;
|
||||
: handle-question ( -- )
|
||||
stack pop epsilon 2array <alternation> push-stack ;
|
||||
: handle-plus ( -- )
|
||||
stack pop dup <kleene-star> 2array <concatenation> push-stack ;
|
||||
|
||||
ERROR: unmatched-brace ;
|
||||
: parse-repetition ( -- start finish ? )
|
||||
"}" read-until [ unmatched-brace ] unless
|
||||
[ "," split1 [ string>number ] bi@ ]
|
||||
[ CHAR: , swap index >boolean ] bi ;
|
||||
|
||||
: replicate/concatenate ( n obj -- obj' )
|
||||
over zero? [ 2drop epsilon ]
|
||||
[ <repetition> first|concatenation ] if ;
|
||||
|
||||
: exactly-n ( n -- )
|
||||
stack pop replicate/concatenate push-stack ;
|
||||
|
||||
: at-least-n ( n -- )
|
||||
stack pop
|
||||
[ replicate/concatenate ] keep
|
||||
<kleene-star> 2array <concatenation> push-stack ;
|
||||
|
||||
: at-most-n ( n -- )
|
||||
1+
|
||||
stack pop
|
||||
[ replicate/concatenate ] curry map <alternation> push-stack ;
|
||||
|
||||
: from-m-to-n ( m n -- )
|
||||
[a,b]
|
||||
stack pop
|
||||
[ replicate/concatenate ] curry map
|
||||
<alternation> push-stack ;
|
||||
|
||||
ERROR: invalid-range a b ;
|
||||
|
||||
: handle-left-brace ( -- )
|
||||
parse-repetition
|
||||
>r 2dup [ [ 0 < [ invalid-range ] when ] when* ] bi@ r>
|
||||
[
|
||||
2dup and [ from-m-to-n ]
|
||||
[ [ nip at-most-n ] [ at-least-n ] if* ] if
|
||||
] [ drop 0 max exactly-n ] if ;
|
||||
|
||||
: handle-front-anchor ( -- ) front-anchor push-stack ;
|
||||
: handle-back-anchor ( -- ) back-anchor push-stack ;
|
||||
|
||||
ERROR: bad-character-class obj ;
|
||||
ERROR: expected-posix-class ;
|
||||
|
||||
: parse-posix-class ( -- obj )
|
||||
read1 CHAR: { = [ expected-posix-class ] unless
|
||||
"}" read-until [ bad-character-class ] unless
|
||||
{
|
||||
{ "Lower" [ letter-class ] }
|
||||
{ "Upper" [ LETTER-class ] }
|
||||
{ "ASCII" [ ascii-class ] }
|
||||
{ "Alpha" [ Letter-class ] }
|
||||
{ "Digit" [ digit-class ] }
|
||||
{ "Alnum" [ alpha-class ] }
|
||||
{ "Punct" [ punctuation-class ] }
|
||||
{ "Graph" [ java-printable-class ] }
|
||||
{ "Print" [ java-printable-class ] }
|
||||
{ "Blank" [ non-newline-blank-class ] }
|
||||
{ "Cntrl" [ control-character-class ] }
|
||||
{ "XDigit" [ hex-digit-class ] }
|
||||
{ "Space" [ java-blank-class ] }
|
||||
! TODO: unicode-character-class, fallthrough in unicode is bad-char-clss
|
||||
[ bad-character-class ]
|
||||
} case ;
|
||||
|
||||
: parse-octal ( -- n ) 3 read oct> check-octal ;
|
||||
: parse-short-hex ( -- n ) 2 read hex> check-hex ;
|
||||
: parse-long-hex ( -- n ) 6 read hex> check-hex ;
|
||||
: parse-control-character ( -- n ) read1 ;
|
||||
|
||||
ERROR: bad-escaped-literals seq ;
|
||||
: parse-escaped-literals ( -- obj )
|
||||
"\\E" read-until [ bad-escaped-literals ] unless
|
||||
read1 drop
|
||||
[ epsilon ] [
|
||||
[ <constant> ] V{ } map-as
|
||||
first|concatenation
|
||||
] if-empty ;
|
||||
|
||||
: parse-escaped ( -- obj )
|
||||
read1
|
||||
{
|
||||
{ CHAR: \ [ CHAR: \ <constant> ] }
|
||||
{ CHAR: . [ CHAR: . <constant> ] }
|
||||
{ CHAR: t [ CHAR: \t <constant> ] }
|
||||
{ CHAR: n [ CHAR: \n <constant> ] }
|
||||
{ CHAR: r [ CHAR: \r <constant> ] }
|
||||
{ CHAR: f [ HEX: c <constant> ] }
|
||||
{ CHAR: a [ HEX: 7 <constant> ] }
|
||||
{ CHAR: e [ HEX: 1b <constant> ] }
|
||||
|
||||
{ CHAR: d [ digit-class ] }
|
||||
{ CHAR: D [ digit-class <negation> ] }
|
||||
{ CHAR: s [ java-blank-class ] }
|
||||
{ CHAR: S [ java-blank-class <negation> ] }
|
||||
{ CHAR: w [ c-identifier-class ] }
|
||||
{ CHAR: W [ c-identifier-class <negation> ] }
|
||||
|
||||
{ CHAR: p [ parse-posix-class ] }
|
||||
{ CHAR: P [ parse-posix-class <negation> ] }
|
||||
{ CHAR: x [ parse-short-hex <constant> ] }
|
||||
{ CHAR: u [ parse-long-hex <constant> ] }
|
||||
{ CHAR: 0 [ parse-octal <constant> ] }
|
||||
{ CHAR: c [ parse-control-character ] }
|
||||
|
||||
{ CHAR: Q [ parse-escaped-literals ] }
|
||||
} case ;
|
||||
|
||||
: handle-escape ( -- ) parse-escaped push-stack ;
|
||||
|
||||
: handle-dash ( vector -- vector' )
|
||||
H{ { dash CHAR: - } } substitute ;
|
||||
|
||||
: character-class>alternation ( seq -- alternation )
|
||||
[ dup number? [ <constant> ] when ] map first|alternation ;
|
||||
|
||||
: handle-caret ( vector -- vector' )
|
||||
dup [ length 2 >= ] [ first caret eq? ] bi and [
|
||||
rest-slice character-class>alternation <negation>
|
||||
] [
|
||||
character-class>alternation
|
||||
] if ;
|
||||
|
||||
: make-character-class ( -- character-class )
|
||||
[ beginning-of-character-class swap cut-stack ] change-whole-stack
|
||||
handle-dash handle-caret ;
|
||||
|
||||
: apply-dash ( -- )
|
||||
stack [ pop3 nip character-class-range boa ] keep push ;
|
||||
|
||||
: apply-dash? ( -- ? )
|
||||
stack dup length 3 >=
|
||||
[ [ length 2 - ] keep nth dash eq? ] [ drop f ] if ;
|
||||
|
||||
ERROR: empty-negated-character-class ;
|
||||
DEFER: handle-left-bracket
|
||||
: (parse-character-class) ( -- )
|
||||
read1 [ empty-negated-character-class ] unless* {
|
||||
{ CHAR: [ [ handle-left-bracket t ] }
|
||||
{ CHAR: ] [ make-character-class push-stack f ] }
|
||||
{ CHAR: - [ dash push-stack t ] }
|
||||
{ CHAR: \ [ parse-escaped push-stack t ] }
|
||||
[ push-stack apply-dash? [ apply-dash ] when t ]
|
||||
} case
|
||||
[ (parse-character-class) ] when ;
|
||||
|
||||
: parse-character-class-second ( -- )
|
||||
read1 {
|
||||
{ CHAR: [ [ CHAR: [ <constant> push-stack ] }
|
||||
{ CHAR: ] [ CHAR: ] <constant> push-stack ] }
|
||||
{ CHAR: - [ CHAR: - <constant> push-stack ] }
|
||||
[ push1 ]
|
||||
} case ;
|
||||
|
||||
: parse-character-class-first ( -- )
|
||||
read1 {
|
||||
{ CHAR: ^ [ caret push-stack parse-character-class-second ] }
|
||||
{ CHAR: [ [ CHAR: [ <constant> push-stack ] }
|
||||
{ CHAR: ] [ CHAR: ] <constant> push-stack ] }
|
||||
{ CHAR: - [ CHAR: - <constant> push-stack ] }
|
||||
[ push1 ]
|
||||
} case ;
|
||||
|
||||
: handle-left-bracket ( -- )
|
||||
beginning-of-character-class push-stack
|
||||
parse-character-class-first (parse-character-class) ;
|
||||
|
||||
ERROR: empty-regexp ;
|
||||
: finish-regexp-parse ( stack -- obj )
|
||||
dup length {
|
||||
{ 0 [ empty-regexp ] }
|
||||
{ 1 [ first ] }
|
||||
[
|
||||
drop { pipe } split
|
||||
[ first|concatenation ] map first|alternation
|
||||
]
|
||||
} case ;
|
||||
|
||||
: handle-right-parenthesis ( -- )
|
||||
stack beginning-of-group over last-index cut rest
|
||||
[ current-regexp get swap >>stack drop ]
|
||||
[ finish-regexp-parse <capture-group> push-stack ] bi* ;
|
||||
|
||||
: nested-parse-regexp ( -- )
|
||||
beginning-of-group push-stack (parse-regexp) ;
|
||||
|
||||
: ((parse-regexp)) ( token -- )
|
||||
{
|
||||
{ CHAR: . [ handle-dot ] }
|
||||
{ CHAR: ( [ handle-left-parenthesis ] }
|
||||
{ CHAR: ) [ handle-right-parenthesis ] }
|
||||
{ CHAR: | [ handle-pipe ] }
|
||||
{ CHAR: ? [ handle-question ] }
|
||||
{ CHAR: * [ handle-star ] }
|
||||
{ CHAR: + [ handle-plus ] }
|
||||
{ CHAR: { [ handle-left-brace ] }
|
||||
{ CHAR: [ [ handle-left-bracket ] }
|
||||
{ CHAR: ^ [ handle-front-anchor ] }
|
||||
{ CHAR: $ [ handle-back-anchor ] }
|
||||
{ CHAR: \ [ handle-escape ] }
|
||||
[ <constant> push-stack ]
|
||||
} case ;
|
||||
|
||||
: (parse-regexp) ( -- )
|
||||
read1 [ ((parse-regexp)) (parse-regexp) ] when* ;
|
||||
|
||||
: parse-regexp ( regexp -- )
|
||||
dup current-regexp [
|
||||
raw>> [
|
||||
<string-reader> [ (parse-regexp) ] with-input-stream
|
||||
] unless-empty
|
||||
current-regexp get
|
||||
stack finish-regexp-parse
|
||||
>>parse-tree drop
|
||||
] with-variable ;
|
|
@ -0,0 +1,240 @@
|
|||
USING: regexp2 tools.test kernel regexp2.traversal ;
|
||||
IN: regexp2-tests
|
||||
|
||||
[ f ] [ "b" "a*" <regexp> matches? ] unit-test
|
||||
[ t ] [ "" "a*" <regexp> matches? ] unit-test
|
||||
[ t ] [ "a" "a*" <regexp> matches? ] unit-test
|
||||
[ t ] [ "aaaaaaa" "a*" <regexp> matches? ] unit-test
|
||||
[ f ] [ "ab" "a*" <regexp> matches? ] unit-test
|
||||
|
||||
[ t ] [ "abc" "abc" <regexp> matches? ] unit-test
|
||||
[ t ] [ "a" "a|b|c" <regexp> matches? ] unit-test
|
||||
[ t ] [ "b" "a|b|c" <regexp> matches? ] unit-test
|
||||
[ t ] [ "c" "a|b|c" <regexp> matches? ] unit-test
|
||||
[ f ] [ "c" "d|e|f" <regexp> matches? ] unit-test
|
||||
|
||||
[ f ] [ "aa" "a|b|c" <regexp> matches? ] unit-test
|
||||
[ f ] [ "bb" "a|b|c" <regexp> matches? ] unit-test
|
||||
[ f ] [ "cc" "a|b|c" <regexp> matches? ] unit-test
|
||||
[ f ] [ "cc" "d|e|f" <regexp> matches? ] unit-test
|
||||
|
||||
[ f ] [ "" "a+" <regexp> matches? ] unit-test
|
||||
[ t ] [ "a" "a+" <regexp> matches? ] unit-test
|
||||
[ t ] [ "aa" "a+" <regexp> matches? ] unit-test
|
||||
|
||||
[ t ] [ "" "a?" <regexp> matches? ] unit-test
|
||||
[ t ] [ "a" "a?" <regexp> matches? ] unit-test
|
||||
[ f ] [ "aa" "a?" <regexp> matches? ] unit-test
|
||||
|
||||
[ f ] [ "" "." <regexp> matches? ] unit-test
|
||||
[ t ] [ "a" "." <regexp> matches? ] unit-test
|
||||
[ t ] [ "." "." <regexp> matches? ] unit-test
|
||||
! [ f ] [ "\n" "." <regexp> matches? ] unit-test
|
||||
|
||||
[ f ] [ "" ".+" <regexp> matches? ] unit-test
|
||||
[ t ] [ "a" ".+" <regexp> matches? ] unit-test
|
||||
[ t ] [ "ab" ".+" <regexp> matches? ] unit-test
|
||||
|
||||
|
||||
[ t ] [ "" "a|b*|c+|d?" <regexp> matches? ] unit-test
|
||||
[ t ] [ "a" "a|b*|c+|d?" <regexp> matches? ] unit-test
|
||||
[ t ] [ "c" "a|b*|c+|d?" <regexp> matches? ] unit-test
|
||||
[ t ] [ "cc" "a|b*|c+|d?" <regexp> matches? ] unit-test
|
||||
[ f ] [ "ccd" "a|b*|c+|d?" <regexp> matches? ] unit-test
|
||||
[ t ] [ "d" "a|b*|c+|d?" <regexp> matches? ] unit-test
|
||||
|
||||
[ t ] [ "foo" "foo|bar" <regexp> matches? ] unit-test
|
||||
[ t ] [ "bar" "foo|bar" <regexp> matches? ] unit-test
|
||||
[ f ] [ "foobar" "foo|bar" <regexp> matches? ] unit-test
|
||||
|
||||
[ f ] [ "" "(a)" <regexp> matches? ] unit-test
|
||||
[ t ] [ "a" "(a)" <regexp> matches? ] unit-test
|
||||
[ f ] [ "aa" "(a)" <regexp> matches? ] unit-test
|
||||
[ t ] [ "aa" "(a*)" <regexp> matches? ] unit-test
|
||||
|
||||
[ f ] [ "aababaaabbac" "(a|b)+" <regexp> matches? ] unit-test
|
||||
[ t ] [ "ababaaabba" "(a|b)+" <regexp> matches? ] unit-test
|
||||
|
||||
[ f ] [ "" "a{1}" <regexp> matches? ] unit-test
|
||||
[ t ] [ "a" "a{1}" <regexp> matches? ] unit-test
|
||||
[ f ] [ "aa" "a{1}" <regexp> matches? ] unit-test
|
||||
|
||||
[ f ] [ "a" "a{2,}" <regexp> matches? ] unit-test
|
||||
[ t ] [ "aaa" "a{2,}" <regexp> matches? ] unit-test
|
||||
[ t ] [ "aaaa" "a{2,}" <regexp> matches? ] unit-test
|
||||
[ t ] [ "aaaaa" "a{2,}" <regexp> matches? ] unit-test
|
||||
|
||||
[ t ] [ "" "a{,2}" <regexp> matches? ] unit-test
|
||||
[ t ] [ "a" "a{,2}" <regexp> matches? ] unit-test
|
||||
[ t ] [ "aa" "a{,2}" <regexp> matches? ] unit-test
|
||||
[ f ] [ "aaa" "a{,2}" <regexp> matches? ] unit-test
|
||||
[ f ] [ "aaaa" "a{,2}" <regexp> matches? ] unit-test
|
||||
[ f ] [ "aaaaa" "a{,2}" <regexp> matches? ] unit-test
|
||||
|
||||
[ f ] [ "" "a{1,3}" <regexp> matches? ] unit-test
|
||||
[ t ] [ "a" "a{1,3}" <regexp> matches? ] unit-test
|
||||
[ t ] [ "aa" "a{1,3}" <regexp> matches? ] unit-test
|
||||
[ t ] [ "aaa" "a{1,3}" <regexp> matches? ] unit-test
|
||||
[ f ] [ "aaaa" "a{1,3}" <regexp> matches? ] unit-test
|
||||
|
||||
[ f ] [ "" "[a]" <regexp> matches? ] unit-test
|
||||
[ t ] [ "a" "[a]" <regexp> matches? ] unit-test
|
||||
[ t ] [ "a" "[abc]" <regexp> matches? ] unit-test
|
||||
[ f ] [ "b" "[a]" <regexp> matches? ] unit-test
|
||||
[ f ] [ "d" "[abc]" <regexp> matches? ] unit-test
|
||||
[ t ] [ "ab" "[abc]{1,2}" <regexp> matches? ] unit-test
|
||||
[ f ] [ "abc" "[abc]{1,2}" <regexp> matches? ] unit-test
|
||||
|
||||
[ f ] [ "" "[^a]" <regexp> matches? ] unit-test
|
||||
[ f ] [ "a" "[^a]" <regexp> matches? ] unit-test
|
||||
[ f ] [ "a" "[^abc]" <regexp> matches? ] unit-test
|
||||
[ t ] [ "b" "[^a]" <regexp> matches? ] unit-test
|
||||
[ t ] [ "d" "[^abc]" <regexp> matches? ] unit-test
|
||||
[ f ] [ "ab" "[^abc]{1,2}" <regexp> matches? ] unit-test
|
||||
[ f ] [ "abc" "[^abc]{1,2}" <regexp> matches? ] unit-test
|
||||
|
||||
[ t ] [ "]" "[]]" <regexp> matches? ] unit-test
|
||||
[ f ] [ "]" "[^]]" <regexp> matches? ] unit-test
|
||||
[ t ] [ "a" "[^]]" <regexp> matches? ] unit-test
|
||||
|
||||
[ "^" "[^]" <regexp> matches? ] must-fail
|
||||
[ t ] [ "^" "[]^]" <regexp> matches? ] unit-test
|
||||
[ t ] [ "]" "[]^]" <regexp> matches? ] unit-test
|
||||
|
||||
[ t ] [ "[" "[[]" <regexp> matches? ] unit-test
|
||||
[ f ] [ "^" "[^^]" <regexp> matches? ] unit-test
|
||||
[ t ] [ "a" "[^^]" <regexp> matches? ] unit-test
|
||||
|
||||
[ t ] [ "-" "[-]" <regexp> matches? ] unit-test
|
||||
[ f ] [ "a" "[-]" <regexp> matches? ] unit-test
|
||||
[ f ] [ "-" "[^-]" <regexp> matches? ] unit-test
|
||||
[ t ] [ "a" "[^-]" <regexp> matches? ] unit-test
|
||||
|
||||
[ t ] [ "-" "[-a]" <regexp> matches? ] unit-test
|
||||
[ t ] [ "a" "[-a]" <regexp> matches? ] unit-test
|
||||
[ t ] [ "-" "[a-]" <regexp> matches? ] unit-test
|
||||
[ t ] [ "a" "[a-]" <regexp> matches? ] unit-test
|
||||
[ f ] [ "b" "[a-]" <regexp> matches? ] unit-test
|
||||
[ f ] [ "-" "[^-]" <regexp> matches? ] unit-test
|
||||
[ t ] [ "a" "[^-]" <regexp> matches? ] unit-test
|
||||
|
||||
[ f ] [ "-" "[a-c]" <regexp> matches? ] unit-test
|
||||
[ t ] [ "-" "[^a-c]" <regexp> matches? ] unit-test
|
||||
[ t ] [ "b" "[a-c]" <regexp> matches? ] unit-test
|
||||
[ f ] [ "b" "[^a-c]" <regexp> matches? ] unit-test
|
||||
|
||||
[ t ] [ "-" "[a-c-]" <regexp> matches? ] unit-test
|
||||
[ f ] [ "-" "[^a-c-]" <regexp> matches? ] unit-test
|
||||
|
||||
[ t ] [ "\\" "[\\\\]" <regexp> matches? ] unit-test
|
||||
[ f ] [ "a" "[\\\\]" <regexp> matches? ] unit-test
|
||||
[ f ] [ "\\" "[^\\\\]" <regexp> matches? ] unit-test
|
||||
[ t ] [ "a" "[^\\\\]" <regexp> matches? ] unit-test
|
||||
|
||||
[ t ] [ "0" "[\\d]" <regexp> matches? ] unit-test
|
||||
[ f ] [ "a" "[\\d]" <regexp> matches? ] unit-test
|
||||
[ f ] [ "0" "[^\\d]" <regexp> matches? ] unit-test
|
||||
[ t ] [ "a" "[^\\d]" <regexp> matches? ] unit-test
|
||||
|
||||
[ t ] [ "a" "[a-z]{1,}|[A-Z]{2,4}|b*|c|(f|g)*" <regexp> matches? ] unit-test
|
||||
[ t ] [ "a" "[a-z]{1,2}|[A-Z]{3,3}|b*|c|(f|g)*" <regexp> matches? ] unit-test
|
||||
[ t ] [ "a" "[a-z]{1,2}|[A-Z]{3,3}" <regexp> matches? ] unit-test
|
||||
|
||||
[ t ] [ "1000" "\\d{4,6}" <regexp> matches? ] unit-test
|
||||
[ t ] [ "1000" "[0-9]{4,6}" <regexp> matches? ] unit-test
|
||||
|
||||
[ t ] [ "abc" "\\p{Lower}{3}" <regexp> matches? ] unit-test
|
||||
[ f ] [ "ABC" "\\p{Lower}{3}" <regexp> matches? ] unit-test
|
||||
[ t ] [ "ABC" "\\p{Upper}{3}" <regexp> matches? ] unit-test
|
||||
[ f ] [ "abc" "\\p{Upper}{3}" <regexp> matches? ] unit-test
|
||||
!
|
||||
[ f ] [ "abc" "[\\p{Upper}]{3}" <regexp> matches? ] unit-test
|
||||
[ t ] [ "ABC" "[\\p{Upper}]{3}" <regexp> matches? ] unit-test
|
||||
|
||||
[ t ] [ "" "\\Q\\E" <regexp> matches? ] unit-test
|
||||
[ f ] [ "a" "\\Q\\E" <regexp> matches? ] unit-test
|
||||
[ t ] [ "|*+" "\\Q|*+\\E" <regexp> matches? ] unit-test
|
||||
[ f ] [ "abc" "\\Q|*+\\E" <regexp> matches? ] unit-test
|
||||
[ t ] [ "s" "\\Qs\\E" <regexp> matches? ] unit-test
|
||||
|
||||
[ t ] [ "S" "\\0123" <regexp> matches? ] unit-test
|
||||
[ t ] [ "SXY" "\\0123XY" <regexp> matches? ] unit-test
|
||||
[ t ] [ "x" "\\x78" <regexp> matches? ] unit-test
|
||||
[ f ] [ "y" "\\x78" <regexp> matches? ] unit-test
|
||||
[ t ] [ "x" "\\u000078" <regexp> matches? ] unit-test
|
||||
[ f ] [ "y" "\\u000078" <regexp> matches? ] unit-test
|
||||
|
||||
[ t ] [ "ab" "a+b" <regexp> matches? ] unit-test
|
||||
[ f ] [ "b" "a+b" <regexp> matches? ] unit-test
|
||||
[ t ] [ "aab" "a+b" <regexp> matches? ] unit-test
|
||||
[ f ] [ "abb" "a+b" <regexp> matches? ] unit-test
|
||||
|
||||
[ t ] [ "abbbb" "ab*" <regexp> matches? ] unit-test
|
||||
[ t ] [ "a" "ab*" <regexp> matches? ] unit-test
|
||||
[ f ] [ "abab" "ab*" <regexp> matches? ] unit-test
|
||||
|
||||
[ f ] [ "x" "\\." <regexp> matches? ] unit-test
|
||||
[ t ] [ "." "\\." <regexp> matches? ] unit-test
|
||||
|
||||
[ t ] [ "aaaab" "a+ab" <regexp> matches? ] unit-test
|
||||
[ f ] [ "aaaxb" "a+ab" <regexp> matches? ] unit-test
|
||||
[ t ] [ "aaacb" "a+cb" <regexp> matches? ] unit-test
|
||||
[ f ] [ "aaaab" "a++ab" <regexp> matches? ] unit-test
|
||||
[ t ] [ "aaacb" "a++cb" <regexp> matches? ] unit-test
|
||||
|
||||
[ 3 ] [ "aaacb" "a*" <regexp> match-head ] unit-test
|
||||
[ 1 ] [ "aaacb" "a+?" <regexp> match-head ] unit-test
|
||||
[ 2 ] [ "aaacb" "aa?" <regexp> match-head ] unit-test
|
||||
[ 1 ] [ "aaacb" "aa??" <regexp> match-head ] unit-test
|
||||
[ 3 ] [ "aacb" "aa?c" <regexp> match-head ] unit-test
|
||||
[ 3 ] [ "aacb" "aa??c" <regexp> match-head ] unit-test
|
||||
|
||||
! [ t ] [ "aaa" "AAA" t <regexp> matches? ] unit-test
|
||||
! [ f ] [ "aax" "AAA" t <regexp> matches? ] unit-test
|
||||
! [ t ] [ "aaa" "A*" t <regexp> matches? ] unit-test
|
||||
! [ f ] [ "aaba" "A*" t <regexp> matches? ] unit-test
|
||||
! [ t ] [ "b" "[AB]" t <regexp> matches? ] unit-test
|
||||
! [ f ] [ "c" "[AB]" t <regexp> matches? ] unit-test
|
||||
! [ t ] [ "c" "[A-Z]" t <regexp> matches? ] unit-test
|
||||
! [ f ] [ "3" "[A-Z]" t <regexp> matches? ] unit-test
|
||||
|
||||
[ ] [
|
||||
"(0[lL]?|[1-9]\\d{0,9}(\\d{0,9}[lL])?|0[xX]\\p{XDigit}{1,8}(\\p{XDigit}{0,8}[lL])?|0[0-7]{1,11}([0-7]{0,11}[lL])?|([0-9]+\\.[0-9]*|\\.[0-9]+)([eE][+-]?[0-9]+)?[fFdD]?|[0-9]+([eE][+-]?[0-9]+[fFdD]?|([eE][+-]?[0-9]+)?[fFdD]))"
|
||||
<regexp> drop
|
||||
] unit-test
|
||||
|
||||
[ t ] [ "fxxbar" "(?!foo).{3}bar" <regexp> matches? ] unit-test
|
||||
[ f ] [ "foobar" "(?!foo).{3}bar" <regexp> matches? ] unit-test
|
||||
|
||||
! [ 3 ] [ "foobar" "foo(?=bar)" <regexp> match-head ] unit-test
|
||||
! [ f ] [ "foobxr" "foo(?=bar)" <regexp> match-head ] unit-test
|
||||
|
||||
! [ f ] [ "foobxr" "foo\\z" <regexp> match-head ] unit-test
|
||||
! [ 3 ] [ "foo" "foo\\z" <regexp> match-head ] unit-test
|
||||
|
||||
! [ 3 ] [ "foo bar" "foo\\b" <regexp> match-head ] unit-test
|
||||
! [ f ] [ "fooxbar" "foo\\b" <regexp> matches? ] unit-test
|
||||
! [ t ] [ "foo" "foo\\b" <regexp> matches? ] unit-test
|
||||
! [ t ] [ "foo bar" "foo\\b bar" <regexp> matches? ] unit-test
|
||||
! [ f ] [ "fooxbar" "foo\\bxbar" <regexp> matches? ] unit-test
|
||||
! [ f ] [ "foo" "foo\\bbar" <regexp> matches? ] unit-test
|
||||
|
||||
! [ f ] [ "foo bar" "foo\\B" <regexp> matches? ] unit-test
|
||||
! [ 3 ] [ "fooxbar" "foo\\B" <regexp> match-head ] unit-test
|
||||
! [ t ] [ "foo" "foo\\B" <regexp> matches? ] unit-test
|
||||
! [ f ] [ "foo bar" "foo\\B bar" <regexp> matches? ] unit-test
|
||||
! [ t ] [ "fooxbar" "foo\\Bxbar" <regexp> matches? ] unit-test
|
||||
! [ f ] [ "foo" "foo\\Bbar" <regexp> matches? ] unit-test
|
||||
|
||||
! [ t ] [ "s@f" "[a-z.-]@[a-z]" <regexp> matches? ] unit-test
|
||||
! [ f ] [ "a" "[a-z.-]@[a-z]" <regexp> matches? ] unit-test
|
||||
! [ t ] [ ".o" "\\.[a-z]" <regexp> matches? ] unit-test
|
||||
|
||||
! Bug in parsing word
|
||||
! [ t ] [ "a" R' a' matches? ] unit-test
|
||||
|
||||
! ((A)(B(C)))
|
||||
! 1. ((A)(B(C)))
|
||||
! 2. (A)
|
||||
! 3. (B(C))
|
||||
! 4. (C)
|
|
@ -0,0 +1,35 @@
|
|||
! Copyright (C) 2008 Doug Coleman.
|
||||
! See http://factorcode.org/license.txt for BSD license.
|
||||
USING: accessors combinators kernel regexp2.backend regexp2.utils
|
||||
regexp2.parser regexp2.nfa regexp2.dfa regexp2.traversal state-tables
|
||||
regexp2.transition-tables ;
|
||||
IN: regexp2
|
||||
|
||||
: default-regexp ( string -- regexp )
|
||||
regexp new
|
||||
swap >>raw
|
||||
<transition-table> >>nfa-table
|
||||
<transition-table> >>dfa-table
|
||||
<transition-table> >>minimized-table
|
||||
reset-regexp ;
|
||||
|
||||
: <regexp> ( string -- regexp )
|
||||
default-regexp
|
||||
{
|
||||
[ parse-regexp ]
|
||||
[ construct-nfa ]
|
||||
[ construct-dfa ]
|
||||
[ ]
|
||||
} cleave ;
|
||||
|
||||
: R! CHAR: ! <regexp> ; parsing
|
||||
: R" CHAR: " <regexp> ; parsing
|
||||
: R# CHAR: # <regexp> ; parsing
|
||||
: R' CHAR: ' <regexp> ; parsing
|
||||
: R( CHAR: ) <regexp> ; parsing
|
||||
: R/ CHAR: / <regexp> ; parsing
|
||||
: R@ CHAR: @ <regexp> ; parsing
|
||||
: R[ CHAR: ] <regexp> ; parsing
|
||||
: R` CHAR: ` <regexp> ; parsing
|
||||
: R{ CHAR: } <regexp> ; parsing
|
||||
: R| CHAR: | <regexp> ; parsing
|
|
@ -0,0 +1 @@
|
|||
Regular expressions
|
|
@ -0,0 +1,2 @@
|
|||
parsing
|
||||
text
|
|
@ -0,0 +1,44 @@
|
|||
! Copyright (C) 2008 Doug Coleman.
|
||||
! See http://factorcode.org/license.txt for BSD license.
|
||||
USING: accessors arrays assocs fry hashtables kernel sequences
|
||||
vectors ;
|
||||
IN: regexp2.transition-tables
|
||||
|
||||
: insert-at ( value key hash -- )
|
||||
2dup at* [
|
||||
2nip push
|
||||
] [
|
||||
drop >r >r dup vector? [ 1vector ] unless r> r> set-at
|
||||
] if ;
|
||||
|
||||
: ?insert-at ( value key hash/f -- hash )
|
||||
[ H{ } clone ] unless* [ insert-at ] keep ;
|
||||
|
||||
TUPLE: transition from to obj ;
|
||||
TUPLE: literal-transition < transition ;
|
||||
TUPLE: class-transition < transition ;
|
||||
TUPLE: default-transition < transition ;
|
||||
|
||||
TUPLE: literal obj ;
|
||||
TUPLE: class obj ;
|
||||
TUPLE: default ;
|
||||
: <literal-transition> ( from to obj -- transition ) literal-transition boa ;
|
||||
: <class-transition> ( from to obj -- transition ) class-transition boa ;
|
||||
: <default-transition> ( from to -- transition ) t default-transition boa ;
|
||||
|
||||
TUPLE: transition-table transitions
|
||||
literals classes defaults
|
||||
start-state final-states ;
|
||||
|
||||
: <transition-table> ( -- transition-table )
|
||||
transition-table new
|
||||
H{ } clone >>transitions
|
||||
H{ } clone >>final-states ;
|
||||
|
||||
: set-transition ( transition hash -- )
|
||||
>r [ to>> ] [ obj>> ] [ from>> ] tri r>
|
||||
2dup at* [ 2nip insert-at ]
|
||||
[ drop >r >r H{ } clone [ insert-at ] keep r> r> set-at ] if ;
|
||||
|
||||
: add-transition ( transition transition-table -- )
|
||||
transitions>> set-transition ;
|
|
@ -0,0 +1,88 @@
|
|||
! Copyright (C) 2008 Doug Coleman.
|
||||
! See http://factorcode.org/license.txt for BSD license.
|
||||
USING: accessors assocs combinators combinators.lib kernel
|
||||
math math.ranges quotations sequences regexp2.parser
|
||||
regexp2.classes combinators.short-circuit assocs.lib
|
||||
sequences.lib ;
|
||||
IN: regexp2.traversal
|
||||
|
||||
TUPLE: dfa-traverser
|
||||
dfa-table
|
||||
last-state current-state
|
||||
text
|
||||
start-index current-index
|
||||
matches ;
|
||||
|
||||
: <dfa-traverser> ( text regexp -- match )
|
||||
dfa-table>>
|
||||
dfa-traverser new
|
||||
swap [ start-state>> >>current-state ] keep
|
||||
>>dfa-table
|
||||
swap >>text
|
||||
0 >>start-index
|
||||
0 >>current-index
|
||||
V{ } clone >>matches ;
|
||||
|
||||
: final-state? ( dfa-traverser -- ? )
|
||||
[ current-state>> ] [ dfa-table>> final-states>> ] bi
|
||||
key? ;
|
||||
|
||||
: text-finished? ( dfa-traverser -- ? )
|
||||
[ current-index>> ] [ text>> length ] bi >= ;
|
||||
|
||||
: save-final-state ( dfa-straverser -- )
|
||||
[ current-index>> ] [ matches>> ] bi push ;
|
||||
|
||||
: match-done? ( dfa-traverser -- ? )
|
||||
dup final-state? [
|
||||
dup save-final-state
|
||||
] when text-finished? ;
|
||||
|
||||
: increment-state ( dfa-traverser state -- dfa-traverser )
|
||||
>r [ 1+ ] change-current-index
|
||||
dup current-state>> >>last-state r>
|
||||
first >>current-state ;
|
||||
|
||||
: match-failed ( dfa-traverser -- dfa-traverser )
|
||||
V{ } clone >>matches ;
|
||||
|
||||
: match-literal ( transition from-state table -- to-state/f )
|
||||
transitions>> [ at ] [ 2drop f ] if-at ;
|
||||
|
||||
: assoc-with ( param assoc quot -- assoc curry )
|
||||
swapd [ [ -rot ] dip call ] 2curry ; inline
|
||||
|
||||
: match-class ( transition from-state table -- to-state/f )
|
||||
transitions>> at* [
|
||||
[ drop class-member? ] assoc-with assoc-find [ nip ] [ drop ] if
|
||||
] [ drop ] if ;
|
||||
|
||||
: match-default ( transition from-state table -- to-state/f )
|
||||
[ nip ] dip transitions>>
|
||||
[ t swap [ drop f ] unless-at ] [ drop f ] if-at ;
|
||||
|
||||
: match-transition ( obj from-state dfa -- to-state/f )
|
||||
{ [ match-literal ] [ match-class ] [ match-default ] } 3|| ;
|
||||
|
||||
: setup-match ( match -- obj state dfa-table )
|
||||
{ current-index>> text>> current-state>> dfa-table>> } get-slots
|
||||
[ nth ] 2dip ;
|
||||
|
||||
: do-match ( dfa-traverser -- dfa-traverser )
|
||||
dup match-done? [
|
||||
dup setup-match match-transition
|
||||
[ increment-state do-match ] when*
|
||||
] unless ;
|
||||
|
||||
: return-match ( dfa-traverser -- interval/f )
|
||||
dup matches>>
|
||||
[ drop f ]
|
||||
[ [ start-index>> ] [ peek ] bi* 1 <range> ] if-empty ;
|
||||
|
||||
: match ( string regexp -- pair )
|
||||
<dfa-traverser> do-match return-match ;
|
||||
|
||||
: matches? ( string regexp -- ? )
|
||||
dupd match [ [ length ] [ range-length 1- ] bi* = ] [ drop f ] if* ;
|
||||
|
||||
: match-head ( string regexp -- end ) match length>> 1- ;
|
|
@ -0,0 +1,69 @@
|
|||
! Copyright (C) 2008 Doug Coleman.
|
||||
! See http://factorcode.org/license.txt for BSD license.
|
||||
USING: accessors arrays assocs combinators.lib io kernel
|
||||
math math.order namespaces regexp2.backend sequences
|
||||
sequences.lib unicode.categories math.ranges fry
|
||||
combinators.short-circuit ;
|
||||
IN: regexp2.utils
|
||||
|
||||
: (while-changes) ( obj quot pred pred-ret -- obj )
|
||||
! quot: ( obj -- obj' )
|
||||
! pred: ( obj -- <=> )
|
||||
>r >r dup slip r> pick over call r> dupd =
|
||||
[ 3drop ] [ (while-changes) ] if ; inline
|
||||
|
||||
: while-changes ( obj quot pred -- obj' )
|
||||
pick over call (while-changes) ; inline
|
||||
|
||||
: last-state ( regexp -- range ) stack>> peek first2 [a,b] ;
|
||||
: push1 ( obj -- ) input-stream get stream>> push ;
|
||||
: peek1 ( -- obj ) input-stream get stream>> [ f ] [ peek ] if-empty ;
|
||||
: pop3 ( seq -- obj1 obj2 obj3 ) [ pop ] [ pop ] [ pop ] tri spin ;
|
||||
|
||||
: stack ( -- obj ) current-regexp get stack>> ;
|
||||
: change-whole-stack ( quot -- )
|
||||
current-regexp get
|
||||
[ stack>> swap call ] keep (>>stack) ; inline
|
||||
: push-stack ( obj -- ) stack push ;
|
||||
: pop-stack ( -- obj ) stack pop ;
|
||||
: cut-out ( vector n -- vector' vector ) cut rest ;
|
||||
ERROR: cut-stack-error ;
|
||||
: cut-stack ( obj vector -- vector' vector )
|
||||
tuck last-index [ cut-stack-error ] unless* cut-out swap ;
|
||||
|
||||
ERROR: bad-octal number ;
|
||||
ERROR: bad-hex number ;
|
||||
: check-octal ( octal -- octal ) dup 255 > [ bad-octal ] when ;
|
||||
: check-hex ( hex -- hex ) dup number? [ bad-hex ] unless ;
|
||||
|
||||
: ascii? ( n -- ? ) 0 HEX: 7f between? ;
|
||||
: octal-digit? ( n -- ? ) CHAR: 0 CHAR: 7 between? ;
|
||||
: decimal-digit? ( n -- ? ) CHAR: 0 CHAR: 9 between? ;
|
||||
|
||||
: hex-digit? ( n -- ? )
|
||||
[
|
||||
[ decimal-digit? ]
|
||||
[ CHAR: a CHAR: f between? ]
|
||||
[ CHAR: A CHAR: F between? ]
|
||||
] 1|| ;
|
||||
|
||||
: control-char? ( n -- ? )
|
||||
[
|
||||
[ 0 HEX: 1f between? ]
|
||||
[ HEX: 7f = ]
|
||||
] 1|| ;
|
||||
|
||||
: punct? ( n -- ? )
|
||||
"!\"#$%&'()*+,-./:;<=>?@[\\]^_`{|}~" member? ;
|
||||
|
||||
: c-identifier-char? ( ch -- ? )
|
||||
[ [ alpha? ] [ CHAR: _ = ] ] 1|| ;
|
||||
|
||||
: java-blank? ( n -- ? )
|
||||
{
|
||||
CHAR: \s CHAR: \t CHAR: \n
|
||||
HEX: b HEX: 7 CHAR: \r
|
||||
} member? ;
|
||||
|
||||
: java-printable? ( n -- ? )
|
||||
[ [ alpha? ] [ punct? ] ] 1|| ;
|
|
@ -1 +1 @@
|
|||
|
||||
taxes
|
||||
|
|
|
@ -1,5 +1,7 @@
|
|||
USING: arrays assocs kernel math math.intervals namespaces
|
||||
sequences combinators.lib money math.order ;
|
||||
! Copyright (C) 2008 Doug Coleman.
|
||||
! See http://factorcode.org/license.txt for BSD license.
|
||||
USING: accessors arrays assocs kernel math math.intervals
|
||||
namespaces sequences combinators.lib money math.order ;
|
||||
IN: taxes
|
||||
|
||||
: monthly ( x -- y ) 12 / ;
|
||||
|
@ -14,22 +16,21 @@ C: <w4> w4
|
|||
|
||||
: allowance ( -- x ) 3500 ; inline
|
||||
|
||||
: calculate-w4-allowances ( w4 -- x )
|
||||
w4-allowances allowance * ;
|
||||
: calculate-w4-allowances ( w4 -- x ) allowances>> allowance * ;
|
||||
|
||||
! Withhold: FICA, Medicare, Federal (FICA is social security)
|
||||
: fica-tax-rate ( -- x ) DECIMAL: .062 ; inline
|
||||
|
||||
! Base rate -- income over this rate is not taxed
|
||||
TUPLE: fica-base-unknown ;
|
||||
ERROR: fica-base-unknown ;
|
||||
: fica-base-rate ( year -- x )
|
||||
H{
|
||||
{ 2008 102000 }
|
||||
{ 2007 97500 }
|
||||
} at* [ T{ fica-base-unknown } throw ] unless ;
|
||||
} at* [ fica-base-unknown ] unless ;
|
||||
|
||||
: fica-tax ( salary w4 -- x )
|
||||
w4-year fica-base-rate min fica-tax-rate * ;
|
||||
year>> fica-base-rate min fica-tax-rate * ;
|
||||
|
||||
! Employer tax only, not withheld
|
||||
: futa-tax-rate ( -- x ) DECIMAL: .062 ; inline
|
||||
|
@ -64,8 +65,7 @@ TUPLE: tax-table single married ;
|
|||
0 -rot [ tax-bracket ] each drop ;
|
||||
|
||||
: marriage-table ( w4 tax-table -- triples )
|
||||
swap w4-married?
|
||||
[ tax-table-married ] [ tax-table-single ] if ;
|
||||
swap married?>> [ married>> ] [ single>> ] if ;
|
||||
|
||||
: federal-tax ( salary w4 tax-table -- n )
|
||||
[ adjust-allowances ] 2keep marriage-table tax ;
|
||||
|
|
Loading…
Reference in New Issue