Merge branch 'master' of git://factorcode.org/git/factor

db4
Doug Coleman 2008-08-21 12:46:29 -05:00
commit e0091012e6
37 changed files with 1457 additions and 294 deletions

View File

@ -4,7 +4,7 @@ USING: math kernel io sequences io.buffers io.timeouts generic
byte-vectors system io.encodings math.order io.backend byte-vectors system io.encodings math.order io.backend
continuations debugger classes byte-arrays namespaces splitting continuations debugger classes byte-arrays namespaces splitting
grouping dlists assocs io.encodings.binary summary accessors grouping dlists assocs io.encodings.binary summary accessors
destructors ; destructors combinators ;
IN: io.ports IN: io.ports
SYMBOL: default-buffer-size SYMBOL: default-buffer-size
@ -133,10 +133,12 @@ M: output-port stream-flush ( port -- )
M: output-port dispose* M: output-port dispose*
[ [
{
[ handle>> &dispose drop ] [ handle>> &dispose drop ]
[ buffer>> &dispose drop ]
[ port-flush ] [ port-flush ]
[ handle>> shutdown ] [ handle>> shutdown ]
tri } cleave
] with-destructors ; ] with-destructors ;
M: buffered-port dispose* M: buffered-port dispose*

View File

@ -3,12 +3,6 @@ smtp.server kernel sequences namespaces logging accessors
assocs sorting ; assocs sorting ;
IN: smtp.tests IN: smtp.tests
[ t ] [
<email>
dup clone "a" "b" set-header drop
headers>> assoc-empty?
] unit-test
{ 0 0 } [ [ ] with-smtp-connection ] must-infer-as { 0 0 } [ [ ] with-smtp-connection ] must-infer-as
[ "hello\nworld" validate-address ] must-fail [ "hello\nworld" validate-address ] must-fail
@ -60,12 +54,13 @@ IN: smtp.tests
"Ed <dharmatech@factorcode.org>" "Ed <dharmatech@factorcode.org>"
} >>to } >>to
"Doug <erg@factorcode.org>" >>from "Doug <erg@factorcode.org>" >>from
prepare [
dup headers>> >alist sort-keys [ email>headers sort-keys [
drop { "Date" "Message-Id" } member? not drop { "Date" "Message-Id" } member? not
] assoc-filter ] assoc-filter
over to>> ]
rot from>> [ to>> [ extract-email ] map ]
[ from>> extract-email ] tri
] unit-test ] unit-test
[ ] [ [ 4321 mock-smtp-server ] "SMTP server" spawn drop ] unit-test [ ] [ [ 4321 mock-smtp-server ] "SMTP server" spawn drop ] unit-test

View File

@ -1,10 +1,10 @@
! Copyright (C) 2007, 2008 Elie CHAFTARI, Dirk Vleugels, ! Copyright (C) 2007, 2008 Elie CHAFTARI, Dirk Vleugels,
! Slava Pestov. ! Slava Pestov.
! See http://factorcode.org/license.txt for BSD license. ! See http://factorcode.org/license.txt for BSD license.
USING: namespaces io io.timeouts kernel logging io.sockets USING: arrays namespaces io io.timeouts kernel logging io.sockets
sequences combinators sequences.lib splitting assocs strings sequences combinators sequences.lib splitting assocs strings
math.parser random system calendar io.encodings.ascii math.parser random system calendar io.encodings.ascii summary
calendar.format accessors sets ; calendar.format accessors sets hashtables ;
IN: smtp IN: smtp
SYMBOL: smtp-domain SYMBOL: smtp-domain
@ -23,6 +23,16 @@ LOG: log-smtp-connection NOTICE ( addrspec -- )
call call
] with-client ; inline ] with-client ; inline
TUPLE: email
{ from string }
{ to array }
{ cc array }
{ bcc array }
{ subject string }
{ body string } ;
: <email> ( -- email ) email new ;
: crlf ( -- ) "\r\n" write ; : crlf ( -- ) "\r\n" write ;
: command ( string -- ) write crlf flush ; : command ( string -- ) write crlf flush ;
@ -30,10 +40,12 @@ LOG: log-smtp-connection NOTICE ( addrspec -- )
: helo ( -- ) : helo ( -- )
esmtp get "EHLO " "HELO " ? host-name append command ; esmtp get "EHLO " "HELO " ? host-name append command ;
ERROR: bad-email-address email ;
: validate-address ( string -- string' ) : validate-address ( string -- string' )
#! Make sure we send funky stuff to the server by accident. #! Make sure we send funky stuff to the server by accident.
dup "\r\n>" intersect empty? dup "\r\n>" intersect empty?
[ "Bad e-mail address: " prepend throw ] unless ; [ bad-email-address ] unless ;
: mail-from ( fromaddr -- ) : mail-from ( fromaddr -- )
"MAIL FROM:<" swap validate-address ">" 3append command ; "MAIL FROM:<" swap validate-address ">" 3append command ;
@ -44,8 +56,15 @@ LOG: log-smtp-connection NOTICE ( addrspec -- )
: data ( -- ) : data ( -- )
"DATA" command ; "DATA" command ;
ERROR: message-contains-dot message ;
M: message-contains-dot summary ( obj -- string )
drop
"Message cannot contain . on a line by itself" ;
: validate-message ( msg -- msg' ) : validate-message ( msg -- msg' )
"." over member? [ "Message cannot contain . on a line by itself" throw ] when ; "." over member?
[ message-contains-dot ] when ;
: send-body ( body -- ) : send-body ( body -- )
string-lines string-lines
@ -58,19 +77,37 @@ LOG: log-smtp-connection NOTICE ( addrspec -- )
LOG: smtp-response DEBUG LOG: smtp-response DEBUG
ERROR: smtp-error message ;
ERROR: smtp-server-busy < smtp-error ;
ERROR: smtp-syntax-error < smtp-error ;
ERROR: smtp-command-not-implemented < smtp-error ;
ERROR: smtp-bad-authentication < smtp-error ;
ERROR: smtp-mailbox-unavailable < smtp-error ;
ERROR: smtp-user-not-local < smtp-error ;
ERROR: smtp-exceeded-storage-allocation < smtp-error ;
ERROR: smtp-bad-mailbox-name < smtp-error ;
ERROR: smtp-transaction-failed < smtp-error ;
: check-response ( response -- ) : check-response ( response -- )
dup smtp-response
{ {
{ [ dup "220" head? ] [ smtp-response ] } { [ dup "bye" head? ] [ drop ] }
{ [ dup "235" swap subseq? ] [ smtp-response ] } { [ dup "220" head? ] [ drop ] }
{ [ dup "250" head? ] [ smtp-response ] } { [ dup "235" swap subseq? ] [ drop ] }
{ [ dup "221" head? ] [ smtp-response ] } { [ dup "250" head? ] [ drop ] }
{ [ dup "bye" head? ] [ smtp-response ] } { [ dup "221" head? ] [ drop ] }
{ [ dup "4" head? ] [ "server busy" throw ] } { [ dup "354" head? ] [ drop ] }
{ [ dup "354" head? ] [ smtp-response ] } { [ dup "4" head? ] [ smtp-server-busy ] }
{ [ dup "50" head? ] [ smtp-response "syntax error" throw ] } { [ dup "500" head? ] [ smtp-syntax-error ] }
{ [ dup "53" head? ] [ smtp-response "invalid authentication data" throw ] } { [ dup "501" head? ] [ smtp-command-not-implemented ] }
{ [ dup "55" head? ] [ smtp-response "fatal error" throw ] } { [ dup "50" head? ] [ smtp-syntax-error ] }
[ "unknown error" throw ] { [ dup "53" head? ] [ smtp-bad-authentication ] }
{ [ dup "550" head? ] [ smtp-mailbox-unavailable ] }
{ [ dup "551" head? ] [ smtp-user-not-local ] }
{ [ dup "552" head? ] [ smtp-exceeded-storage-allocation ] }
{ [ dup "553" head? ] [ smtp-bad-mailbox-name ] }
{ [ dup "554" head? ] [ smtp-transaction-failed ] }
[ smtp-error ]
} cond ; } cond ;
: multiline? ( response -- boolean ) : multiline? ( response -- boolean )
@ -89,41 +126,19 @@ LOG: smtp-response DEBUG
: get-ok ( -- ) receive-response check-response ; : get-ok ( -- ) receive-response check-response ;
ERROR: invalid-header-string string ;
: validate-header ( string -- string' ) : validate-header ( string -- string' )
dup "\r\n" intersect empty? dup "\r\n" intersect empty?
[ "Invalid header string: " prepend throw ] unless ; [ invalid-header-string ] unless ;
: write-header ( key value -- ) : write-header ( key value -- )
swap [ validate-header write ]
validate-header write [ ": " write validate-header write ] bi* crlf ;
": " write
validate-header write
crlf ;
: write-headers ( assoc -- ) : write-headers ( assoc -- )
[ write-header ] assoc-each ; [ write-header ] assoc-each ;
TUPLE: email from to subject headers body ;
M: email clone
call-next-method [ clone ] change-headers ;
: (send) ( email -- )
[
helo get-ok
dup from>> mail-from get-ok
dup to>> [ rcpt-to get-ok ] each
data get-ok
dup headers>> write-headers
crlf
body>> send-body get-ok
quit get-ok
] with-smtp-connection ;
: extract-email ( recepient -- email )
#! This could be much smarter.
" " last-split1 swap or "<" ?head drop ">" ?tail drop ;
: message-id ( -- string ) : message-id ( -- string )
[ [
"<" % "<" %
@ -135,25 +150,38 @@ M: email clone
">" % ">" %
] "" make ; ] "" make ;
: set-header ( email value key -- email ) : extract-email ( recepient -- email )
pick headers>> set-at ; #! This could be much smarter.
" " last-split1 swap or "<" ?head drop ">" ?tail drop ;
: prepare ( email -- email ) : email>headers ( email -- hashtable )
clone [
dup from>> "From" set-header {
[ extract-email ] change-from [ from>> "From" set ]
dup to>> ", " join "To" set-header [ to>> ", " join "To" set ]
[ [ extract-email ] map ] change-to [ cc>> ", " join [ "Cc" set ] unless-empty ]
dup subject>> "Subject" set-header [ subject>> "Subject" set ]
now timestamp>rfc822 "Date" set-header } cleave
message-id "Message-Id" set-header ; now timestamp>rfc822 "Date" set
message-id "Message-Id" set
] { } make-assoc ;
: <email> ( -- email ) : (send-email) ( headers email -- )
email new [
H{ } clone >>headers ; helo get-ok
dup from>> extract-email mail-from get-ok
dup to>> [ extract-email rcpt-to get-ok ] each
dup cc>> [ extract-email rcpt-to get-ok ] each
dup bcc>> [ extract-email rcpt-to get-ok ] each
data get-ok
swap write-headers
crlf
body>> send-body get-ok
quit get-ok
] with-smtp-connection ;
: send-email ( email -- ) : send-email ( email -- )
prepare (send) ; [ email>headers ] keep (send-email) ;
! Dirk's old AUTH CRAM-MD5 code. I don't know anything about ! Dirk's old AUTH CRAM-MD5 code. I don't know anything about
! CRAM MD5, and the old code didn't work properly either, so here ! CRAM MD5, and the old code didn't work properly either, so here

View File

@ -697,3 +697,7 @@ DEFER: error-y
<string-reader> "forget-subclass-test" parse-stream <string-reader> "forget-subclass-test" parse-stream
drop drop
] unit-test ] unit-test
[ ] [
"IN: sequences TUPLE: reversed { seq read-only } ;" eval
] unit-test

View File

@ -104,8 +104,7 @@ ERROR: bad-superclass class ;
[ tuple-instance? ] 2curry define-predicate ; [ tuple-instance? ] 2curry define-predicate ;
: superclass-size ( class -- n ) : superclass-size ( class -- n )
superclasses but-last-slice superclasses but-last [ "slots" word-prop length ] sigma ;
[ "slots" word-prop length ] sigma ;
: (instance-check-quot) ( class -- quot ) : (instance-check-quot) ( class -- quot )
[ [
@ -203,11 +202,11 @@ ERROR: bad-superclass class ;
M: tuple-class update-class M: tuple-class update-class
{ {
[ define-boa-check ]
[ define-tuple-layout ] [ define-tuple-layout ]
[ define-tuple-slots ] [ define-tuple-slots ]
[ define-tuple-predicate ] [ define-tuple-predicate ]
[ define-tuple-prototype ] [ define-tuple-prototype ]
[ define-boa-check ]
} cleave ; } cleave ;
: define-new-tuple-class ( class superclass slots -- ) : define-new-tuple-class ( class superclass slots -- )
@ -280,11 +279,8 @@ M: tuple-class reset-class
] with each ] with each
] [ ] [
[ call-next-method ] [ call-next-method ]
[ [ { "layout" "slots" "boa-check" "prototype" } reset-props ]
{ bi
"layout" "slots" "boa-check" "prototype"
} reset-props
] bi
] bi ; ] bi ;
M: tuple-class rank-class drop 0 ; M: tuple-class rank-class drop 0 ;

View File

@ -117,10 +117,10 @@ ERROR: no-case ;
] [ drop f ] if ; ] [ drop f ] if ;
: dispatch-case ( value from to default array -- ) : dispatch-case ( value from to default array -- )
>r >r 3dup between? [ >r >r 3dup between? r> r> rot [
drop - >fixnum r> drop r> dispatch >r 2drop - >fixnum r> dispatch
] [ ] [
2drop r> call r> drop drop 2nip call
] if ; inline ] if ; inline
: dispatch-case-quot ( default assoc -- quot ) : dispatch-case-quot ( default assoc -- quot )

View File

@ -34,10 +34,10 @@ GENERIC: engine>quot ( engine -- quot )
[ [ nip class<= ] curry assoc-filter ] 2bi ; [ [ nip class<= ] curry assoc-filter ] 2bi ;
: convert-methods ( assoc class word -- assoc' ) : convert-methods ( assoc class word -- assoc' )
over >r >r split-methods dup assoc-empty? [ over [ split-methods ] 2dip pick assoc-empty? [
r> r> 3drop 3drop
] [ ] [
r> execute r> pick set-at [ execute ] dip pick set-at
] if ; inline ] if ; inline
: (picker) ( n -- quot ) : (picker) ( n -- quot )

View File

@ -61,8 +61,8 @@ M: decoder stream-read1
: (read) ( n quot -- n string ) : (read) ( n quot -- n string )
over 0 <string> [ over 0 <string> [
[ [
>r call dup slip over
[ swap r> set-nth-unsafe f ] [ r> 3drop t ] if [ swapd set-nth-unsafe f ] [ 3drop t ] if
] 2curry find-integer ] 2curry find-integer
] keep ; inline ] keep ; inline

View File

@ -24,7 +24,7 @@ SINGLETON: utf8
: triple ( stream byte -- stream char ) : triple ( stream byte -- stream char )
BIN: 1111 bitand append-nums append-nums ; inline BIN: 1111 bitand append-nums append-nums ; inline
: quad ( stream byte -- stream char ) : quadruple ( stream byte -- stream char )
BIN: 111 bitand append-nums append-nums append-nums ; inline BIN: 111 bitand append-nums append-nums append-nums ; inline
: begin-utf8 ( stream byte -- stream char ) : begin-utf8 ( stream byte -- stream char )
@ -32,7 +32,7 @@ SINGLETON: utf8
{ [ dup -7 shift zero? ] [ ] } { [ dup -7 shift zero? ] [ ] }
{ [ dup -5 shift BIN: 110 number= ] [ double ] } { [ dup -5 shift BIN: 110 number= ] [ double ] }
{ [ dup -4 shift BIN: 1110 number= ] [ triple ] } { [ dup -4 shift BIN: 1110 number= ] [ triple ] }
{ [ dup -3 shift BIN: 11110 number= ] [ quad ] } { [ dup -3 shift BIN: 11110 number= ] [ quadruple ] }
[ drop replacement-char ] [ drop replacement-char ]
} cond ; inline } cond ; inline

View File

@ -96,8 +96,8 @@ PRIVATE>
: integer, ( num radix -- ) : integer, ( num radix -- )
dup 1 <= [ "Invalid radix" throw ] when dup 1 <= [ "Invalid radix" throw ] when
dup >r /mod >digit , dup 0 > [ /mod >digit , ] keep over 0 >
[ r> integer, ] [ r> 2drop ] if ; [ integer, ] [ 2drop ] if ;
PRIVATE> PRIVATE>

View File

@ -173,8 +173,6 @@ M: reversed length seq>> length ;
INSTANCE: reversed virtual-sequence INSTANCE: reversed virtual-sequence
: reverse ( seq -- newseq ) [ <reversed> ] [ like ] bi ;
! A slice of another sequence. ! A slice of another sequence.
TUPLE: slice TUPLE: slice
{ from read-only } { from read-only }
@ -336,11 +334,10 @@ M: immutable-sequence clone-like like ;
pick >r >r (each) r> call r> finish-find ; inline pick >r >r (each) r> call r> finish-find ; inline
: (find-from) ( n seq quot quot' -- i elt ) : (find-from) ( n seq quot quot' -- i elt )
>r >r 2dup bounds-check? [ [ 2dup bounds-check? ] 2dip
r> r> (find) [ (find) ] 2curry
] [ [ 2drop f f ]
r> r> 2drop 2drop f f if ; inline
] if ; inline
: (monotonic) ( seq quot -- ? ) : (monotonic) ( seq quot -- ? )
[ 2dup nth-unsafe rot 1+ rot nth-unsafe ] [ 2dup nth-unsafe rot 1+ rot nth-unsafe ]
@ -601,6 +598,13 @@ M: slice equal? over slice? [ sequence= ] [ 2drop f ] if ;
tuck - 1- rot exchange-unsafe tuck - 1- rot exchange-unsafe
] each 2drop ; ] each 2drop ;
: reverse ( seq -- newseq )
[
dup [ length ] keep new-sequence
[ 0 swap copy ] keep
[ reverse-here ] keep
] keep like ;
: sum-lengths ( seq -- n ) : sum-lengths ( seq -- n )
0 [ length + ] reduce ; 0 [ length + ] reduce ;
@ -624,8 +628,10 @@ M: slice equal? over slice? [ sequence= ] [ 2drop f ] if ;
] keep like ; ] keep like ;
: padding ( seq n elt quot -- newseq ) : padding ( seq n elt quot -- newseq )
>r >r over length [-] dup zero? [
[ r> r> 3drop ] [ r> <repetition> r> call ] if ; inline [ over length [-] dup zero? [ drop ] ] dip
[ <repetition> ] curry
] dip compose if ; inline
: pad-left ( seq n elt -- padded ) : pad-left ( seq n elt -- padded )
[ swap dup (append) ] padding ; [ swap dup (append) ] padding ;
@ -730,9 +736,11 @@ PRIVATE>
[ left-trim ] [ right-trim ] bi ; inline [ left-trim ] [ right-trim ] bi ; inline
: sum ( seq -- n ) 0 [ + ] binary-reduce ; : sum ( seq -- n ) 0 [ + ] binary-reduce ;
: product ( seq -- n ) 1 [ * ] binary-reduce ; : product ( seq -- n ) 1 [ * ] binary-reduce ;
: infimum ( seq -- n ) dup first [ min ] reduce ; : infimum ( seq -- n ) dup first [ min ] reduce ;
: supremum ( seq -- n ) dup first [ max ] reduce ; : supremum ( seq -- n ) dup first [ max ] reduce ;
: flip ( matrix -- newmatrix ) : flip ( matrix -- newmatrix )
@ -744,4 +752,3 @@ PRIVATE>
: sigma ( seq quot -- n ) [ + ] compose 0 swap reduce ; inline : sigma ( seq quot -- n ) [ + ] compose 0 swap reduce ; inline
: count ( seq quot -- n ) [ 1 0 ? ] compose sigma ; inline : count ( seq quot -- n ) [ 1 0 ? ] compose sigma ; inline

View File

@ -25,19 +25,19 @@ TUPLE: merge
: dump ( from to seq accum -- ) : dump ( from to seq accum -- )
#! Optimize common case where to - from = 1, 2, or 3. #! Optimize common case where to - from = 1, 2, or 3.
>r >r 2dup swap - dup 1 = >r >r 2dup swap - r> r> pick 1 =
[ 2drop r> nth-unsafe r> push ] [ [ >r >r 2drop r> nth-unsafe r> push ] [
dup 2 = [ pick 2 = [
2drop dup 1+ >r >r 2drop dup 1+
r> [ nth-unsafe ] curry bi@ r> [ nth-unsafe ] curry bi@
r> [ push ] curry bi@ r> [ push ] curry bi@
] [ ] [
dup 3 = [ pick 3 = [
2drop dup 1+ dup 1+ >r >r 2drop dup 1+ dup 1+
r> [ nth-unsafe ] curry tri@ r> [ nth-unsafe ] curry tri@
r> [ push ] curry tri@ r> [ push ] curry tri@
] [ ] [
drop r> subseq r> push-all >r nip subseq r> push-all
] if ] if
] if ] if
] if ; inline ] if ; inline
@ -120,11 +120,13 @@ TUPLE: merge
[ [ 1 shift dup 1+ ] dip ] prepose curry each-integer ; inline [ [ 1 shift dup 1+ ] dip ] prepose curry each-integer ; inline
: (sort-pairs) ( i1 i2 seq quot accum -- ) : (sort-pairs) ( i1 i2 seq quot accum -- )
>r >r 2dup length = [ [ 2dup length = ] 2dip rot [
nip nth r> drop r> push [ drop nip nth ] dip push
] [ ] [
tuck [ nth-unsafe ] 2bi@ 2dup r> call +gt+ eq? [
[ swap ] when r> tuck [ push ] 2bi@ [ tuck [ nth-unsafe ] 2bi@ 2dup ] dip call +gt+ eq?
[ swap ] when
] dip tuck [ push ] 2bi@
] if ; inline ] if ; inline
: sort-pairs ( merge quot -- ) : sort-pairs ( merge quot -- )

View File

@ -1,4 +1,17 @@
USING: kernel tools.test sequences vectors assocs.lib ;
IN: assocs.lib.tests IN: assocs.lib.tests
USING: assocs.lib tools.test vectors ;
{ 1 1 } [ [ ?push ] histogram ] must-infer-as { 1 1 } [ [ ?push ] histogram ] must-infer-as
! substitute
[ { 2 } ] [ { 1 } H{ { 1 2 } } [ ?at drop ] curry map ] unit-test
[ { 3 } ] [ { 3 } H{ { 1 2 } } [ ?at drop ] curry map ] unit-test
[ 2 ] [ 1 H{ { 1 2 } } [ ] [ ] if-at ] unit-test
[ 3 ] [ 3 H{ { 1 2 } } [ ] [ ] if-at ] unit-test
[ "hi" ] [ 1 H{ { 1 2 } } [ drop "hi" ] when-at ] unit-test
[ 3 ] [ 3 H{ { 1 2 } } [ drop "hi" ] when-at ] unit-test
[ 2 ] [ 1 H{ { 1 2 } } [ drop "hi" ] unless-at ] unit-test
[ "hi" ] [ 3 H{ { 1 2 } } [ drop "hi" ] unless-at ] unit-test

View File

@ -37,3 +37,13 @@ IN: assocs.lib
H{ } clone [ H{ } clone [
swap [ change-at ] 2curry assoc-each swap [ change-at ] 2curry assoc-each
] keep ; inline ] keep ; inline
: ?at ( obj assoc -- value/obj ? )
dupd at* [ [ nip ] [ drop ] if ] keep ;
: if-at ( obj assoc quot1 quot2 -- )
[ ?at ] 2dip if ; inline
: when-at ( obj assoc quot -- ) [ ] if-at ; inline
: unless-at ( obj assoc quot -- ) [ ] swap if-at ; inline

View File

@ -11,3 +11,12 @@ HELP: generate
"[ 20 random-prime ] [ 4 mod 3 = ] generate ." "[ 20 random-prime ] [ 4 mod 3 = ] generate ."
"526367" "526367"
} ; } ;
HELP: %chance
{ $values { "quot" quotation } { "n" integer } }
{ $description "Calls the quotation " { $snippet "n" } " percent of the time." }
{ $unchecked-example
"USING: io ;"
"[ \"hello, world! maybe.\" print ] 50 %chance"
""
} ;

View File

@ -4,7 +4,7 @@
! See http://factorcode.org/license.txt for BSD license. ! See http://factorcode.org/license.txt for BSD license.
USING: kernel combinators fry namespaces quotations hashtables USING: kernel combinators fry namespaces quotations hashtables
sequences assocs arrays inference effects math math.ranges sequences assocs arrays inference effects math math.ranges
generalizations macros continuations locals ; generalizations macros continuations random locals ;
IN: combinators.lib IN: combinators.lib
@ -31,6 +31,8 @@ IN: combinators.lib
! Generalized versions of core combinators ! Generalized versions of core combinators
! !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! ! !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
: quad ( x p q r s -- ) >r >r >r keep r> keep r> keep r> call ; inline
: 4slip ( quot a b c d -- a b c d ) 4 nslip ; inline : 4slip ( quot a b c d -- a b c d ) 4 nslip ; inline
: 4keep ( w x y z quot -- w x y z ) 4 nkeep ; inline : 4keep ( w x y z quot -- w x y z ) 4 nkeep ; inline
@ -137,7 +139,7 @@ MACRO: multikeep ( word out-indexes -- ... )
[ drop ] rot compose attempt-all ; inline [ drop ] rot compose attempt-all ; inline
: do-while ( pred body tail -- ) : do-while ( pred body tail -- )
>r tuck 2slip r> while ; >r tuck 2slip r> while ; inline
: generate ( generator predicate -- obj ) : generate ( generator predicate -- obj )
[ dup ] swap [ dup [ nip ] unless not ] 3compose [ dup ] swap [ dup [ nip ] unless not ] 3compose
@ -147,3 +149,5 @@ MACRO: predicates ( seq -- quot/f )
dup [ 1quotation [ drop ] prepend ] map dup [ 1quotation [ drop ] prepend ] map
>r [ [ dup ] prepend ] map r> zip [ drop f ] suffix >r [ [ dup ] prepend ] map r> zip [ drop f ] suffix
[ cond ] curry ; [ cond ] curry ;
: %chance ( quot integer -- ) 100 random > swap when ; inline

View File

@ -56,8 +56,7 @@ TUPLE: link attributes clickable ;
: trim-text ( vector -- vector' ) : trim-text ( vector -- vector' )
[ [
dup name>> text = [ dup name>> text = [
[ text>> [ blank? ] trim ] keep [ [ blank? ] trim ] change-text
[ set-tag-text ] keep
] when ] when
] map ; ] map ;
@ -173,8 +172,7 @@ TUPLE: link attributes clickable ;
[ [
{ {
{ [ dup name>> "form" = ] { [ dup name>> "form" = ]
[ "form action: " write attributes>> "action" swap at print [ "form action: " write attributes>> "action" swap at print ] }
] }
{ [ dup name>> "input" = ] [ input. ] } { [ dup name>> "input" = ] [ input. ] }
[ drop ] [ drop ]
} cond } cond

View File

@ -2,19 +2,19 @@ USING: html.parser kernel tools.test ;
IN: html.parser.tests IN: html.parser.tests
[ [
V{ T{ tag f "html" H{ } f f f } } V{ T{ tag f "html" H{ } f f } }
] [ "<html>" parse-html ] unit-test ] [ "<html>" parse-html ] unit-test
[ [
V{ T{ tag f "html" H{ } f f t } } V{ T{ tag f "html" H{ } f t } }
] [ "</html>" parse-html ] unit-test ] [ "</html>" parse-html ] unit-test
[ [
V{ T{ tag f "a" H{ { "href" "http://factorcode.org/" } } f f f } } V{ T{ tag f "a" H{ { "href" "http://factorcode.org/" } } f f } }
] [ "<a href=\"http://factorcode.org/\">" parse-html ] unit-test ] [ "<a href=\"http://factorcode.org/\">" parse-html ] unit-test
[ [
V{ T{ tag f "a" H{ { "href" "http://factorcode.org/" } } f f f } } V{ T{ tag f "a" H{ { "href" "http://factorcode.org/" } } f f } }
] [ "<a href = \"http://factorcode.org/\" >" parse-html ] unit-test ] [ "<a href = \"http://factorcode.org/\" >" parse-html ] unit-test
[ [
@ -26,7 +26,6 @@ V{
H{ { "baz" "\"quux\"" } { "foo" "bar's" } } H{ { "baz" "\"quux\"" } { "foo" "bar's" } }
f f
f f
f
} }
} }
] [ "<a foo=\"bar's\" baz='\"quux\"' >" parse-html ] unit-test ] [ "<a foo=\"bar's\" baz='\"quux\"' >" parse-html ] unit-test
@ -39,25 +38,25 @@ V{
{ "foo" "bar" } { "foo" "bar" }
{ "href" "http://factorcode.org/" } { "href" "http://factorcode.org/" }
{ "baz" "quux" } { "baz" "quux" }
} f f f } } f f }
} }
] [ "<a href = \"http://factorcode.org/\" foo = bar baz='quux'a=pirsqd >" parse-html ] unit-test ] [ "<a href = \"http://factorcode.org/\" foo = bar baz='quux'a=pirsqd >" parse-html ] unit-test
[ [
V{ V{
T{ tag f "html" H{ } f f f } T{ tag f "html" H{ } f f }
T{ tag f "head" H{ } f f f } T{ tag f "head" H{ } f f }
T{ tag f "head" H{ } f f t } T{ tag f "head" H{ } f t }
T{ tag f "html" H{ } f f t } T{ tag f "html" H{ } f t }
} }
] [ "<html<head</head</html" parse-html ] unit-test ] [ "<html<head</head</html" parse-html ] unit-test
[ [
V{ V{
T{ tag f "head" H{ } f f f } T{ tag f "head" H{ } f f }
T{ tag f "title" H{ } f f f } T{ tag f "title" H{ } f f }
T{ tag f text f "Spagna" f f } T{ tag f text f "Spagna" f }
T{ tag f "title" H{ } f f t } T{ tag f "title" H{ } f t }
T{ tag f "head" H{ } f f t } T{ tag f "head" H{ } f t }
} }
] [ "<head<title>Spagna</title></head" parse-html ] unit-test ] [ "<head<title>Spagna</title></head" parse-html ] unit-test

View File

@ -1,26 +1,22 @@
USING: accessors arrays html.parser.utils hashtables io kernel USING: accessors arrays html.parser.utils hashtables io kernel
namespaces prettyprint quotations namespaces prettyprint quotations
sequences splitting state-parser strings unicode.categories unicode.case ; sequences splitting state-parser strings unicode.categories unicode.case
sequences.lib ;
IN: html.parser IN: html.parser
TUPLE: tag name attributes text matched? closing? ; TUPLE: tag name attributes text closing? ;
SYMBOL: text SINGLETON: text
SYMBOL: dtd SINGLETON: dtd
SYMBOL: comment SINGLETON: comment
SYMBOL: javascript
SYMBOL: tagstack SYMBOL: tagstack
: push-tag ( tag -- ) : push-tag ( tag -- )
tagstack get push ; tagstack get push ;
: closing-tag? ( string -- ? ) : closing-tag? ( string -- ? )
dup empty? [ [ f ]
drop f [ [ first ] [ peek ] bi [ CHAR: / = ] bi@ or ] if-empty ;
] [
dup first CHAR: / =
swap peek CHAR: / = or
] if ;
: <tag> ( name attributes closing? -- tag ) : <tag> ( name attributes closing? -- tag )
tag new tag new
@ -28,56 +24,55 @@ SYMBOL: tagstack
swap >>attributes swap >>attributes
swap >>name ; swap >>name ;
: make-tag ( str attribs -- tag ) : make-tag ( string attribs -- tag )
>r [ closing-tag? ] keep "/" trim1 r> rot <tag> ; >r [ closing-tag? ] keep "/" trim1 r> rot <tag> ;
: make-text-tag ( str -- tag ) : make-text-tag ( string -- tag )
T{ tag f text } clone [ set-tag-text ] keep ; tag new
text >>name
swap >>text ;
: make-comment-tag ( str -- tag ) : make-comment-tag ( string -- tag )
T{ tag f comment } clone [ set-tag-text ] keep ; tag new
comment >>name
swap >>text ;
: make-dtd-tag ( str -- tag ) : make-dtd-tag ( string -- tag )
T{ tag f dtd } clone [ set-tag-text ] keep ; tag new
dtd >>name
swap >>text ;
: read-whitespace ( -- str ) : read-whitespace ( -- string )
[ get-char blank? not ] take-until ; [ get-char blank? not ] take-until ;
: read-whitespace* ( -- ) : read-whitespace* ( -- ) read-whitespace drop ;
read-whitespace drop ;
: read-token ( -- str ) : read-token ( -- string )
read-whitespace* read-whitespace*
[ get-char blank? ] take-until ; [ get-char blank? ] take-until ;
: read-single-quote ( -- str ) : read-single-quote ( -- string )
[ get-char CHAR: ' = ] take-until ; [ get-char CHAR: ' = ] take-until ;
: read-double-quote ( -- str ) : read-double-quote ( -- string )
[ get-char CHAR: " = ] take-until ; [ get-char CHAR: " = ] take-until ;
: read-quote ( -- str ) : read-quote ( -- string )
get-char next* CHAR: ' = [ get-char next* CHAR: ' =
read-single-quote [ read-single-quote ] [ read-double-quote ] if next* ;
] [
read-double-quote
] if next* ;
: read-key ( -- str ) : read-key ( -- string )
read-whitespace* read-whitespace*
[ get-char CHAR: = = get-char blank? or ] take-until ; [ get-char [ CHAR: = = ] [ blank? ] bi or ] take-until ;
: read-= ( -- ) : read-= ( -- )
read-whitespace* read-whitespace*
[ get-char CHAR: = = ] take-until drop next* ; [ get-char CHAR: = = ] take-until drop next* ;
: read-value ( -- str ) : read-value ( -- string )
read-whitespace* read-whitespace*
get-char quote? [ get-char quote? [ read-quote ] [ read-token ] if
read-quote [ blank? ] trim ;
] [
read-token
] if [ blank? ] trim ;
: read-comment ( -- ) : read-comment ( -- )
"-->" take-string* make-comment-tag push-tag ; "-->" take-string* make-comment-tag push-tag ;
@ -97,14 +92,14 @@ SYMBOL: tagstack
[ get-char CHAR: > = get-char CHAR: < = or ] take-until [ get-char CHAR: > = get-char CHAR: < = or ] take-until
get-char CHAR: < = [ next* ] unless ; get-char CHAR: < = [ next* ] unless ;
: read-< ( -- str ) : read-< ( -- string )
next* get-char CHAR: ! = [ next* get-char CHAR: ! = [
read-bang f read-bang f
] [ ] [
read-tag read-tag
] if ; ] if ;
: read-until-< ( -- str ) : read-until-< ( -- string )
[ get-char CHAR: < = ] take-until ; [ get-char CHAR: < = ] take-until ;
: parse-text ( -- ) : parse-text ( -- )
@ -131,11 +126,9 @@ SYMBOL: tagstack
] string-parse ; ] string-parse ;
: parse-tag ( -- ) : parse-tag ( -- )
read-< dup empty? [ read-< [
drop
] [
(parse-tag) make-tag push-tag (parse-tag) make-tag push-tag
] if ; ] unless-empty ;
: (parse-html) ( -- ) : (parse-html) ( -- )
get-next [ get-next [
@ -145,13 +138,7 @@ SYMBOL: tagstack
] when ; ] when ;
: tag-parse ( quot -- vector ) : tag-parse ( quot -- vector )
[ V{ } clone tagstack [ string-parse ] with-variable ;
V{ } clone tagstack set
string-parse
] with-scope ;
: parse-html ( string -- vector ) : parse-html ( string -- vector )
[ [ (parse-html) tagstack get ] tag-parse ;
(parse-html)
tagstack get
] tag-parse ;

View File

@ -1,123 +1,89 @@
USING: assocs html.parser html.parser.utils combinators USING: accessors assocs html.parser html.parser.utils combinators
continuations hashtables continuations hashtables
hashtables.private io kernel math hashtables.private io kernel math
namespaces prettyprint quotations sequences splitting namespaces prettyprint quotations sequences splitting
strings ; strings ;
IN: html.parser.printer IN: html.parser.printer
SYMBOL: no-section SYMBOL: printer
SYMBOL: html
SYMBOL: head
SYMBOL: body
TUPLE: state section ;
! TUPLE: text bold? underline? strikethrough? ; TUPLE: html-printer ;
TUPLE: text-printer < html-printer ;
TUPLE: src-printer < html-printer ;
TUPLE: html-prettyprinter < html-printer ;
TUPLE: text-printer ; HOOK: print-text-tag html-printer ( tag -- )
TUPLE: ui-printer ; HOOK: print-comment-tag html-printer ( tag -- )
TUPLE: src-printer ; HOOK: print-dtd-tag html-printer ( tag -- )
TUPLE: html-prettyprinter ; HOOK: print-opening-tag html-printer ( tag -- )
UNION: printer text-printer ui-printer src-printer html-prettyprinter ; HOOK: print-closing-tag html-printer ( tag -- )
HOOK: print-tag printer ( tag -- )
HOOK: print-text-tag printer ( tag -- )
HOOK: print-comment-tag printer ( tag -- )
HOOK: print-dtd-tag printer ( tag -- )
HOOK: print-opening-named-tag printer ( tag -- )
HOOK: print-closing-named-tag printer ( tag -- )
: print-tags ( vector -- ) ERROR: unknown-tag-error tag ;
[ print-tag ] each ;
: print-tag ( tag -- )
{
{ [ dup name>> text = ] [ print-text-tag ] }
{ [ dup name>> comment = ] [ print-comment-tag ] }
{ [ dup name>> dtd = ] [ print-dtd-tag ] }
{ [ dup [ name>> string? ] [ closing?>> ] bi and ]
[ print-closing-tag ] }
{ [ dup name>> string? ]
[ print-opening-tag ] }
[ unknown-tag-error ]
} cond ;
: print-tags ( vector -- ) [ print-tag ] each ;
: html-text. ( vector -- ) : html-text. ( vector -- )
[ T{ text-printer } html-printer [ print-tags ] with-variable ;
T{ text-printer } printer set
print-tags
] with-scope ;
: html-src. ( vector -- ) : html-src. ( vector -- )
[ T{ src-printer } html-printer [ print-tags ] with-variable ;
T{ src-printer } printer set
print-tags
] with-scope ;
M: printer print-text-tag ( tag -- ) M: html-printer print-text-tag ( tag -- ) text>> write ;
tag-text write ;
M: printer print-comment-tag ( tag -- ) M: html-printer print-comment-tag ( tag -- )
"<!--" write "<!--" write text>> write "-->" write ;
tag-text write
"-->" write ;
M: printer print-dtd-tag ( tag -- ) M: html-printer print-dtd-tag ( tag -- )
"<!" write "<!" write text>> write ">" write ;
tag-text write
">" write ;
M: printer print-opening-named-tag ( tag -- )
dup tag-name {
{ "html" [ drop ] }
{ "head" [ drop ] }
{ "body" [ drop ] }
{ "title" [ "Title: " write tag-text print ] }
} case ;
M: printer print-closing-named-tag ( tag -- )
drop ;
: print-attributes ( hashtable -- ) : print-attributes ( hashtable -- )
[ [ [ bl write "=" write ] [ ?quote write ] bi* ] assoc-each ;
swap bl write "=" write ?quote write
] assoc-each ;
M: src-printer print-opening-named-tag ( tag -- ) M: src-printer print-opening-tag ( tag -- )
"<" write "<" write
[ tag-name write ] [ name>> write ]
[ tag-attributes dup assoc-empty? [ drop ] [ print-attributes ] if ] bi [ attributes>> dup assoc-empty? [ drop ] [ print-attributes ] if ] bi
">" write ; ">" write ;
M: src-printer print-closing-named-tag ( tag -- ) M: src-printer print-closing-tag ( tag -- )
"</" write "</" write
tag-name write name>> write
">" write ; ">" write ;
SYMBOL: tab-width SYMBOL: tab-width
SYMBOL: #indentations SYMBOL: #indentations
SYMBOL: tagstack
: prettyprint-html ( vector -- )
[
T{ html-prettyprinter } printer set
V{ } clone tagstack set
2 tab-width set
0 #indentations set
print-tags
] with-scope ;
: print-tabs ( -- ) : print-tabs ( -- )
tab-width get #indentations get * CHAR: \s <repetition> write ; tab-width get #indentations get * CHAR: \s <repetition> write ;
M: html-prettyprinter print-opening-named-tag ( tag -- ) M: html-prettyprinter print-opening-tag ( tag -- )
print-tabs "<" write print-tabs "<" write
tag-name write name>> write
">\n" write ; ">\n" write ;
M: html-prettyprinter print-closing-named-tag ( tag -- ) M: html-prettyprinter print-closing-tag ( tag -- )
"</" write "</" write
tag-name write name>> write
">" write ; ">" write ;
ERROR: unknown-tag-error tag ;
M: printer print-tag ( tag -- )
{
{ [ dup tag-name text = ] [ print-text-tag ] }
{ [ dup tag-name comment = ] [ print-comment-tag ] }
{ [ dup tag-name dtd = ] [ print-dtd-tag ] }
{ [ dup tag-name string? over tag-closing? and ]
[ print-closing-named-tag ] }
{ [ dup tag-name string? ]
[ print-opening-named-tag ] }
[ unknown-tag-error ]
} cond ;
! SYMBOL: tablestack
! : with-html-printer ( vector quot -- )
! [ V{ } clone tablestack set ] with-scope ;
! { { 1 2 } { 3 4 } }
! H{ { table-gap { 10 10 } } } [
! [ [ [ [ . ] with-cell ] each ] with-row ] each
! ] tabular-output
! : html-pp ( vector -- )
! [ 0 #indentations set 2 tab-width set ] with-scope ;

View File

@ -4,8 +4,7 @@ namespaces prettyprint quotations sequences splitting
state-parser strings sequences.lib ; state-parser strings sequences.lib ;
IN: html.parser.utils IN: html.parser.utils
: string-parse-end? ( -- ? ) : string-parse-end? ( -- ? ) get-next not ;
get-next not ;
: take-string* ( match -- string ) : take-string* ( match -- string )
dup length <circular-string> dup length <circular-string>
@ -16,17 +15,18 @@ IN: html.parser.utils
[ ?head drop ] [ ?tail drop ] bi ; [ ?head drop ] [ ?tail drop ] bi ;
: single-quote ( str -- newstr ) : single-quote ( str -- newstr )
>r "'" r> "'" 3append ; "'" swap "'" 3append ;
: double-quote ( str -- newstr ) : double-quote ( str -- newstr )
>r "\"" r> "\"" 3append ; "\"" swap "\"" 3append ;
: quote ( str -- newstr ) : quote ( str -- newstr )
CHAR: ' over member? CHAR: ' over member?
[ double-quote ] [ single-quote ] if ; [ double-quote ] [ single-quote ] if ;
: quoted? ( str -- ? ) : quoted? ( str -- ? )
[ [ first ] [ peek ] bi [ = ] keep "'\"" member? and ] [ f ] if-seq ; [ f ]
[ [ first ] [ peek ] bi [ = ] keep "'\"" member? and ] if-empty ;
: ?quote ( str -- newstr ) : ?quote ( str -- newstr )
dup quoted? [ quote ] unless ; dup quoted? [ quote ] unless ;

View File

@ -0,0 +1 @@
Doug Coleman

View File

@ -0,0 +1,23 @@
! Copyright (C) 2008 Doug Coleman.
! See http://factorcode.org/license.txt for BSD license.
USING: accessors hashtables kernel math state-tables vars vectors ;
IN: regexp2.backend
TUPLE: regexp
raw
{ stack vector }
parse-tree
nfa-table
dfa-table
minimized-table
{ state integer }
{ new-states vector }
{ visited-states hashtable } ;
: reset-regexp ( regexp -- regexp )
0 >>state
V{ } clone >>stack
V{ } clone >>new-states
H{ } clone >>visited-states ;
SYMBOL: current-regexp

View File

@ -0,0 +1,49 @@
! Copyright (C) 2008 Doug Coleman.
! See http://factorcode.org/license.txt for BSD license.
USING: accessors kernel math math.order symbols regexp2.parser
words regexp2.utils unicode.categories combinators.short-circuit ;
IN: regexp2.classes
GENERIC: class-member? ( obj class -- ? )
M: word class-member? ( obj class -- ? ) 2drop f ;
M: integer class-member? ( obj class -- ? ) 2drop f ;
M: character-class-range class-member? ( obj class -- ? )
[ from>> ] [ to>> ] bi between? ;
M: any-char class-member? ( obj class -- ? )
2drop t ;
M: letter-class class-member? ( obj class -- ? )
drop letter? ;
M: LETTER-class class-member? ( obj class -- ? )
drop LETTER? ;
M: ascii-class class-member? ( obj class -- ? )
drop ascii? ;
M: digit-class class-member? ( obj class -- ? )
drop digit? ;
M: alpha-class class-member? ( obj class -- ? )
drop alpha? ;
M: punctuation-class class-member? ( obj class -- ? )
drop punct? ;
M: java-printable-class class-member? ( obj class -- ? )
drop java-printable? ;
M: non-newline-blank-class class-member? ( obj class -- ? )
drop { [ blank? ] [ CHAR: \n = not ] } 1&& ;
M: control-character-class class-member? ( obj class -- ? )
drop control-char? ;
M: hex-digit-class class-member? ( obj class -- ? )
drop hex-digit? ;
M: java-blank-class class-member? ( obj class -- ? )
drop java-blank? ;

View File

@ -0,0 +1,70 @@
! Copyright (C) 2008 Doug Coleman.
! See http://factorcode.org/license.txt for BSD license.
USING: accessors arrays assocs combinators fry kernel locals
math math.order regexp2.nfa regexp2.transition-tables sequences
sets sorting vectors regexp2.utils sequences.lib ;
USING: io prettyprint threads ;
IN: regexp2.dfa
: find-delta ( states transition regexp -- new-states )
nfa-table>> transitions>>
rot [ swap at at ] with with map sift concat prune ;
: (find-epsilon-closure) ( states regexp -- new-states )
eps swap find-delta ;
: find-epsilon-closure ( states regexp -- new-states )
'[ dup , (find-epsilon-closure) union ] [ length ] while-changes
natural-sort ;
: find-closure ( states transition regexp -- new-states )
[ find-delta ] 2keep nip find-epsilon-closure ;
: find-start-state ( regexp -- state )
[ nfa-table>> start-state>> 1vector ] keep find-epsilon-closure ;
: find-transitions ( seq1 regexp -- seq2 )
nfa-table>> transitions>>
[ at keys ] curry map concat eps swap remove ;
: add-todo-state ( state regexp -- )
2dup visited-states>> key? [
2drop
] [
[ visited-states>> conjoin ]
[ new-states>> push ] 2bi
] if ;
: new-transitions ( regexp -- )
dup new-states>> [
drop
] [
dupd pop dup pick find-transitions rot
[
[ [ find-closure ] 2keep nip dupd add-todo-state ] 3keep
>r swapd transition boa r> dfa-table>> add-transition
] curry with each
new-transitions
] if-empty ;
: states ( hashtable -- array )
[ keys ]
[ values [ values concat ] map concat append ] bi ;
: set-final-states ( regexp -- )
dup
[ nfa-table>> final-states>> keys ]
[ dfa-table>> transitions>> states ] bi
[ intersect empty? not ] with filter
swap dfa-table>> final-states>>
[ conjoin ] curry each ;
: set-initial-state ( regexp -- )
dup
[ dfa-table>> ] [ find-start-state ] bi
[ >>start-state drop ] keep
1vector >>new-states drop ;
: construct-dfa ( regexp -- )
[ set-initial-state ] [ new-transitions ] [ set-final-states ] tri ;

View File

@ -0,0 +1,126 @@
! Copyright (C) 2008 Doug Coleman.
! See http://factorcode.org/license.txt for BSD license.
USING: accessors arrays assocs grouping kernel regexp2.backend
locals math namespaces regexp2.parser sequences state-tables fry
quotations math.order math.ranges vectors unicode.categories
regexp2.utils regexp2.transition-tables words sequences.lib ;
IN: regexp2.nfa
SYMBOL: negation-mode
: negated? ( -- ? ) negation-mode get 0 or odd? ;
SINGLETON: eps
: next-state ( regexp -- state )
[ state>> ] [ [ 1+ ] change-state drop ] bi ;
: set-start-state ( regexp -- )
dup stack>> [
drop
] [
[ nfa-table>> ] [ pop first ] bi* >>start-state drop
] if-empty ;
GENERIC: nfa-node ( node -- )
:: add-simple-entry ( obj class -- )
[let* | regexp [ current-regexp get ]
s0 [ regexp next-state ]
s1 [ regexp next-state ]
stack [ regexp stack>> ]
table [ regexp nfa-table>> ] |
negated? [
s0 f obj class boa table add-transition
s0 s1 <default-transition> table add-transition
] [
s0 s1 obj class boa table add-transition
] if
s0 s1 2array stack push
t s1 table final-states>> set-at ] ;
:: concatenate-nodes ( -- )
[let* | regexp [ current-regexp get ]
stack [ regexp stack>> ]
table [ regexp nfa-table>> ]
s2 [ stack peek first ]
s3 [ stack pop second ]
s0 [ stack peek first ]
s1 [ stack pop second ] |
s1 s2 eps <literal-transition> table add-transition
s1 table final-states>> delete-at
s0 s3 2array stack push ] ;
:: alternate-nodes ( -- )
[let* | regexp [ current-regexp get ]
stack [ regexp stack>> ]
table [ regexp nfa-table>> ]
s2 [ stack peek first ]
s3 [ stack pop second ]
s0 [ stack peek first ]
s1 [ stack pop second ]
s4 [ regexp next-state ]
s5 [ regexp next-state ] |
s4 s0 eps <literal-transition> table add-transition
s4 s2 eps <literal-transition> table add-transition
s1 s5 eps <literal-transition> table add-transition
s3 s5 eps <literal-transition> table add-transition
s1 table final-states>> delete-at
s3 table final-states>> delete-at
t s5 table final-states>> set-at
s4 s5 2array stack push ] ;
M: kleene-star nfa-node ( node -- )
term>> nfa-node
[let* | regexp [ current-regexp get ]
stack [ regexp stack>> ]
s0 [ stack peek first ]
s1 [ stack pop second ]
s2 [ regexp next-state ]
s3 [ regexp next-state ]
table [ regexp nfa-table>> ] |
s1 table final-states>> delete-at
t s3 table final-states>> set-at
s1 s0 eps <literal-transition> table add-transition
s2 s0 eps <literal-transition> table add-transition
s2 s3 eps <literal-transition> table add-transition
s1 s3 eps <literal-transition> table add-transition
s2 s3 2array stack push ] ;
M: concatenation nfa-node ( node -- )
seq>>
[ [ nfa-node ] each ]
[ length 1- [ concatenate-nodes ] times ] bi ;
M: alternation nfa-node ( node -- )
seq>>
[ [ nfa-node ] each ]
[ length 1- [ alternate-nodes ] times ] bi ;
M: constant nfa-node ( node -- )
char>> literal-transition add-simple-entry ;
M: epsilon nfa-node ( node -- )
drop eps literal-transition add-simple-entry ;
M: word nfa-node ( node -- )
class-transition add-simple-entry ;
M: character-class-range nfa-node ( node -- )
class-transition add-simple-entry ;
M: capture-group nfa-node ( node -- )
term>> nfa-node ;
M: negation nfa-node ( node -- )
negation-mode inc
term>> nfa-node
negation-mode dec ;
: construct-nfa ( regexp -- )
[
reset-regexp
negation-mode off
[ current-regexp set ]
[ parse-tree>> nfa-node ]
[ set-start-state ] tri
] with-scope ;

View File

@ -0,0 +1,33 @@
USING: kernel tools.test regexp2.backend regexp2 ;
IN: regexp2.parser
: test-regexp ( string -- )
default-regexp parse-regexp ;
: test-regexp2 ( string -- regexp )
default-regexp dup parse-regexp ;
[ "(" ] [ unmatched-parentheses? ] must-fail-with
[ ] [ "a|b" test-regexp ] unit-test
[ ] [ "a.b" test-regexp ] unit-test
[ ] [ "a|b|c" test-regexp ] unit-test
[ ] [ "abc|b" test-regexp ] unit-test
[ ] [ "a|bcd" test-regexp ] unit-test
[ ] [ "a|(b)" test-regexp ] unit-test
[ ] [ "(a)|b" test-regexp ] unit-test
[ ] [ "(a|b)" test-regexp ] unit-test
[ ] [ "((a)|(b))" test-regexp ] unit-test
[ ] [ "(?:a)" test-regexp ] unit-test
[ ] [ "(?i:a)" test-regexp ] unit-test
[ ] [ "(?-i:a)" test-regexp ] unit-test
[ "(?z:a)" test-regexp ] [ bad-option? ] must-fail-with
[ "(?-z:a)" test-regexp ] [ bad-option? ] must-fail-with
[ ] [ "(?=a)" test-regexp ] unit-test
[ ] [ "[abc]" test-regexp ] unit-test
[ ] [ "[a-c]" test-regexp ] unit-test
[ ] [ "[^a-c]" test-regexp ] unit-test
[ "[^]" test-regexp ] must-fail

View File

@ -0,0 +1,362 @@
! Copyright (C) 2008 Doug Coleman.
! See http://factorcode.org/license.txt for BSD license.
USING: accessors arrays assocs combinators io io.streams.string
kernel math math.parser multi-methods namespaces qualified
quotations sequences sequences.lib splitting symbols vectors
dlists math.order combinators.lib unicode.categories
sequences.lib regexp2.backend regexp2.utils ;
IN: regexp2.parser
FROM: math.ranges => [a,b] ;
MIXIN: node
TUPLE: concatenation seq ; INSTANCE: concatenation node
TUPLE: alternation seq ; INSTANCE: alternation node
TUPLE: kleene-star term ; INSTANCE: kleene-star node
TUPLE: question term ; INSTANCE: question node
TUPLE: negation term ; INSTANCE: negation node
TUPLE: constant char ; INSTANCE: constant node
TUPLE: range from to ; INSTANCE: range node
TUPLE: lookahead term ; INSTANCE: lookahead node
TUPLE: lookbehind term ; INSTANCE: lookbehind node
TUPLE: capture-group term ; INSTANCE: capture-group node
TUPLE: non-capture-group term ; INSTANCE: non-capture-group node
TUPLE: independent-group term ; INSTANCE: independent-group node
TUPLE: character-class-range from to ; INSTANCE: character-class-range node
SINGLETON: epsilon INSTANCE: epsilon node
SINGLETON: any-char INSTANCE: any-char node
SINGLETON: front-anchor INSTANCE: front-anchor node
SINGLETON: back-anchor INSTANCE: back-anchor node
TUPLE: option-on option ; INSTANCE: option-on node
TUPLE: option-off option ; INSTANCE: option-off node
SINGLETONS: unix-lines dotall multiline comments case-insensitive unicode-case ;
MIXIN: regexp-option
INSTANCE: unix-lines regexp-option
INSTANCE: dotall regexp-option
INSTANCE: multiline regexp-option
INSTANCE: comments regexp-option
INSTANCE: case-insensitive regexp-option
INSTANCE: unicode-case regexp-option
SINGLETONS: letter-class LETTER-class Letter-class digit-class
alpha-class non-newline-blank-class
ascii-class punctuation-class java-printable-class blank-class
control-character-class hex-digit-class java-blank-class c-identifier-class ;
SINGLETONS: beginning-of-group end-of-group
beginning-of-character-class end-of-character-class
left-parenthesis pipe caret dash ;
: <constant> ( obj -- constant ) constant boa ;
: <negation> ( obj -- negation ) negation boa ;
: <concatenation> ( seq -- concatenation ) >vector concatenation boa ;
: <alternation> ( seq -- alternation ) >vector alternation boa ;
: <capture-group> ( obj -- capture-group ) capture-group boa ;
: <kleene-star> ( obj -- kleene-star ) kleene-star boa ;
: first|concatenation ( seq -- first/concatenation )
dup length 1 = [ first ] [ <concatenation> ] if ;
: first|alternation ( seq -- first/alternation )
dup length 1 = [ first ] [ <alternation> ] if ;
ERROR: unmatched-parentheses ;
: make-positive-lookahead ( string -- )
lookahead boa push-stack ;
: make-negative-lookahead ( string -- )
<negation> lookahead boa push-stack ;
: make-independent-group ( string -- )
#! no backtracking
independent-group boa push-stack ;
: make-positive-lookbehind ( string -- )
lookbehind boa push-stack ;
: make-negative-lookbehind ( string -- )
<negation> lookbehind boa push-stack ;
DEFER: nested-parse-regexp
: make-non-capturing-group ( string -- )
non-capture-group boa push-stack ;
ERROR: bad-option ch ;
: option ( ch -- singleton )
{
{ CHAR: i [ case-insensitive ] }
{ CHAR: d [ unix-lines ] }
{ CHAR: m [ multiline ] }
{ CHAR: s [ dotall ] }
{ CHAR: u [ unicode-case ] }
{ CHAR: x [ comments ] }
[ bad-option ]
} case ;
: option-on ( ch -- ) option \ option-on boa push-stack ;
: option-off ( ch -- ) option \ option-off boa push-stack ;
: toggle-option ( ch ? -- ) [ option-on ] [ option-off ] if ;
: (parse-options) ( string ? -- ) [ toggle-option ] curry each ;
: parse-options ( string -- )
"-" split1 [ t (parse-options) ] [ f (parse-options) ] bi* ;
DEFER: (parse-regexp)
: parse-special-group-options ( options -- )
beginning-of-group push-stack
parse-options (parse-regexp) pop-stack make-non-capturing-group ;
ERROR: bad-special-group string ;
: (parse-special-group) ( -- )
read1 {
{ [ dup CHAR: : = ]
[ drop nested-parse-regexp pop-stack make-non-capturing-group ] }
{ [ dup CHAR: = = ]
[ drop nested-parse-regexp pop-stack make-positive-lookahead ] }
{ [ dup CHAR: = = ]
[ drop nested-parse-regexp pop-stack make-negative-lookahead ] }
{ [ dup CHAR: > = ]
[ drop nested-parse-regexp pop-stack make-independent-group ] }
{ [ dup CHAR: < = peek1 CHAR: = = and ]
[ drop read1 drop nested-parse-regexp pop-stack make-positive-lookbehind ] }
{ [ dup CHAR: < = peek1 CHAR: ! = and ]
[ drop read1 drop nested-parse-regexp pop-stack make-negative-lookbehind ] }
[
":" read-until [ bad-special-group ] unless
swap prefix parse-special-group-options
]
} cond ;
: handle-left-parenthesis ( -- )
peek1 CHAR: ? =
[ read1 drop (parse-special-group) ]
[ nested-parse-regexp ] if ;
: handle-dot ( -- ) any-char push-stack ;
: handle-pipe ( -- ) pipe push-stack ;
: handle-star ( -- ) stack pop <kleene-star> push-stack ;
: handle-question ( -- )
stack pop epsilon 2array <alternation> push-stack ;
: handle-plus ( -- )
stack pop dup <kleene-star> 2array <concatenation> push-stack ;
ERROR: unmatched-brace ;
: parse-repetition ( -- start finish ? )
"}" read-until [ unmatched-brace ] unless
[ "," split1 [ string>number ] bi@ ]
[ CHAR: , swap index >boolean ] bi ;
: replicate/concatenate ( n obj -- obj' )
over zero? [ 2drop epsilon ]
[ <repetition> first|concatenation ] if ;
: exactly-n ( n -- )
stack pop replicate/concatenate push-stack ;
: at-least-n ( n -- )
stack pop
[ replicate/concatenate ] keep
<kleene-star> 2array <concatenation> push-stack ;
: at-most-n ( n -- )
1+
stack pop
[ replicate/concatenate ] curry map <alternation> push-stack ;
: from-m-to-n ( m n -- )
[a,b]
stack pop
[ replicate/concatenate ] curry map
<alternation> push-stack ;
ERROR: invalid-range a b ;
: handle-left-brace ( -- )
parse-repetition
>r 2dup [ [ 0 < [ invalid-range ] when ] when* ] bi@ r>
[
2dup and [ from-m-to-n ]
[ [ nip at-most-n ] [ at-least-n ] if* ] if
] [ drop 0 max exactly-n ] if ;
: handle-front-anchor ( -- ) front-anchor push-stack ;
: handle-back-anchor ( -- ) back-anchor push-stack ;
ERROR: bad-character-class obj ;
ERROR: expected-posix-class ;
: parse-posix-class ( -- obj )
read1 CHAR: { = [ expected-posix-class ] unless
"}" read-until [ bad-character-class ] unless
{
{ "Lower" [ letter-class ] }
{ "Upper" [ LETTER-class ] }
{ "ASCII" [ ascii-class ] }
{ "Alpha" [ Letter-class ] }
{ "Digit" [ digit-class ] }
{ "Alnum" [ alpha-class ] }
{ "Punct" [ punctuation-class ] }
{ "Graph" [ java-printable-class ] }
{ "Print" [ java-printable-class ] }
{ "Blank" [ non-newline-blank-class ] }
{ "Cntrl" [ control-character-class ] }
{ "XDigit" [ hex-digit-class ] }
{ "Space" [ java-blank-class ] }
! TODO: unicode-character-class, fallthrough in unicode is bad-char-clss
[ bad-character-class ]
} case ;
: parse-octal ( -- n ) 3 read oct> check-octal ;
: parse-short-hex ( -- n ) 2 read hex> check-hex ;
: parse-long-hex ( -- n ) 6 read hex> check-hex ;
: parse-control-character ( -- n ) read1 ;
ERROR: bad-escaped-literals seq ;
: parse-escaped-literals ( -- obj )
"\\E" read-until [ bad-escaped-literals ] unless
read1 drop
[ epsilon ] [
[ <constant> ] V{ } map-as
first|concatenation
] if-empty ;
: parse-escaped ( -- obj )
read1
{
{ CHAR: \ [ CHAR: \ <constant> ] }
{ CHAR: . [ CHAR: . <constant> ] }
{ CHAR: t [ CHAR: \t <constant> ] }
{ CHAR: n [ CHAR: \n <constant> ] }
{ CHAR: r [ CHAR: \r <constant> ] }
{ CHAR: f [ HEX: c <constant> ] }
{ CHAR: a [ HEX: 7 <constant> ] }
{ CHAR: e [ HEX: 1b <constant> ] }
{ CHAR: d [ digit-class ] }
{ CHAR: D [ digit-class <negation> ] }
{ CHAR: s [ java-blank-class ] }
{ CHAR: S [ java-blank-class <negation> ] }
{ CHAR: w [ c-identifier-class ] }
{ CHAR: W [ c-identifier-class <negation> ] }
{ CHAR: p [ parse-posix-class ] }
{ CHAR: P [ parse-posix-class <negation> ] }
{ CHAR: x [ parse-short-hex <constant> ] }
{ CHAR: u [ parse-long-hex <constant> ] }
{ CHAR: 0 [ parse-octal <constant> ] }
{ CHAR: c [ parse-control-character ] }
{ CHAR: Q [ parse-escaped-literals ] }
} case ;
: handle-escape ( -- ) parse-escaped push-stack ;
: handle-dash ( vector -- vector' )
H{ { dash CHAR: - } } substitute ;
: character-class>alternation ( seq -- alternation )
[ dup number? [ <constant> ] when ] map first|alternation ;
: handle-caret ( vector -- vector' )
dup [ length 2 >= ] [ first caret eq? ] bi and [
rest-slice character-class>alternation <negation>
] [
character-class>alternation
] if ;
: make-character-class ( -- character-class )
[ beginning-of-character-class swap cut-stack ] change-whole-stack
handle-dash handle-caret ;
: apply-dash ( -- )
stack [ pop3 nip character-class-range boa ] keep push ;
: apply-dash? ( -- ? )
stack dup length 3 >=
[ [ length 2 - ] keep nth dash eq? ] [ drop f ] if ;
ERROR: empty-negated-character-class ;
DEFER: handle-left-bracket
: (parse-character-class) ( -- )
read1 [ empty-negated-character-class ] unless* {
{ CHAR: [ [ handle-left-bracket t ] }
{ CHAR: ] [ make-character-class push-stack f ] }
{ CHAR: - [ dash push-stack t ] }
{ CHAR: \ [ parse-escaped push-stack t ] }
[ push-stack apply-dash? [ apply-dash ] when t ]
} case
[ (parse-character-class) ] when ;
: parse-character-class-second ( -- )
read1 {
{ CHAR: [ [ CHAR: [ <constant> push-stack ] }
{ CHAR: ] [ CHAR: ] <constant> push-stack ] }
{ CHAR: - [ CHAR: - <constant> push-stack ] }
[ push1 ]
} case ;
: parse-character-class-first ( -- )
read1 {
{ CHAR: ^ [ caret push-stack parse-character-class-second ] }
{ CHAR: [ [ CHAR: [ <constant> push-stack ] }
{ CHAR: ] [ CHAR: ] <constant> push-stack ] }
{ CHAR: - [ CHAR: - <constant> push-stack ] }
[ push1 ]
} case ;
: handle-left-bracket ( -- )
beginning-of-character-class push-stack
parse-character-class-first (parse-character-class) ;
ERROR: empty-regexp ;
: finish-regexp-parse ( stack -- obj )
dup length {
{ 0 [ empty-regexp ] }
{ 1 [ first ] }
[
drop { pipe } split
[ first|concatenation ] map first|alternation
]
} case ;
: handle-right-parenthesis ( -- )
stack beginning-of-group over last-index cut rest
[ current-regexp get swap >>stack drop ]
[ finish-regexp-parse <capture-group> push-stack ] bi* ;
: nested-parse-regexp ( -- )
beginning-of-group push-stack (parse-regexp) ;
: ((parse-regexp)) ( token -- )
{
{ CHAR: . [ handle-dot ] }
{ CHAR: ( [ handle-left-parenthesis ] }
{ CHAR: ) [ handle-right-parenthesis ] }
{ CHAR: | [ handle-pipe ] }
{ CHAR: ? [ handle-question ] }
{ CHAR: * [ handle-star ] }
{ CHAR: + [ handle-plus ] }
{ CHAR: { [ handle-left-brace ] }
{ CHAR: [ [ handle-left-bracket ] }
{ CHAR: ^ [ handle-front-anchor ] }
{ CHAR: $ [ handle-back-anchor ] }
{ CHAR: \ [ handle-escape ] }
[ <constant> push-stack ]
} case ;
: (parse-regexp) ( -- )
read1 [ ((parse-regexp)) (parse-regexp) ] when* ;
: parse-regexp ( regexp -- )
dup current-regexp [
raw>> [
<string-reader> [ (parse-regexp) ] with-input-stream
] unless-empty
current-regexp get
stack finish-regexp-parse
>>parse-tree drop
] with-variable ;

View File

@ -0,0 +1,240 @@
USING: regexp2 tools.test kernel regexp2.traversal ;
IN: regexp2-tests
[ f ] [ "b" "a*" <regexp> matches? ] unit-test
[ t ] [ "" "a*" <regexp> matches? ] unit-test
[ t ] [ "a" "a*" <regexp> matches? ] unit-test
[ t ] [ "aaaaaaa" "a*" <regexp> matches? ] unit-test
[ f ] [ "ab" "a*" <regexp> matches? ] unit-test
[ t ] [ "abc" "abc" <regexp> matches? ] unit-test
[ t ] [ "a" "a|b|c" <regexp> matches? ] unit-test
[ t ] [ "b" "a|b|c" <regexp> matches? ] unit-test
[ t ] [ "c" "a|b|c" <regexp> matches? ] unit-test
[ f ] [ "c" "d|e|f" <regexp> matches? ] unit-test
[ f ] [ "aa" "a|b|c" <regexp> matches? ] unit-test
[ f ] [ "bb" "a|b|c" <regexp> matches? ] unit-test
[ f ] [ "cc" "a|b|c" <regexp> matches? ] unit-test
[ f ] [ "cc" "d|e|f" <regexp> matches? ] unit-test
[ f ] [ "" "a+" <regexp> matches? ] unit-test
[ t ] [ "a" "a+" <regexp> matches? ] unit-test
[ t ] [ "aa" "a+" <regexp> matches? ] unit-test
[ t ] [ "" "a?" <regexp> matches? ] unit-test
[ t ] [ "a" "a?" <regexp> matches? ] unit-test
[ f ] [ "aa" "a?" <regexp> matches? ] unit-test
[ f ] [ "" "." <regexp> matches? ] unit-test
[ t ] [ "a" "." <regexp> matches? ] unit-test
[ t ] [ "." "." <regexp> matches? ] unit-test
! [ f ] [ "\n" "." <regexp> matches? ] unit-test
[ f ] [ "" ".+" <regexp> matches? ] unit-test
[ t ] [ "a" ".+" <regexp> matches? ] unit-test
[ t ] [ "ab" ".+" <regexp> matches? ] unit-test
[ t ] [ "" "a|b*|c+|d?" <regexp> matches? ] unit-test
[ t ] [ "a" "a|b*|c+|d?" <regexp> matches? ] unit-test
[ t ] [ "c" "a|b*|c+|d?" <regexp> matches? ] unit-test
[ t ] [ "cc" "a|b*|c+|d?" <regexp> matches? ] unit-test
[ f ] [ "ccd" "a|b*|c+|d?" <regexp> matches? ] unit-test
[ t ] [ "d" "a|b*|c+|d?" <regexp> matches? ] unit-test
[ t ] [ "foo" "foo|bar" <regexp> matches? ] unit-test
[ t ] [ "bar" "foo|bar" <regexp> matches? ] unit-test
[ f ] [ "foobar" "foo|bar" <regexp> matches? ] unit-test
[ f ] [ "" "(a)" <regexp> matches? ] unit-test
[ t ] [ "a" "(a)" <regexp> matches? ] unit-test
[ f ] [ "aa" "(a)" <regexp> matches? ] unit-test
[ t ] [ "aa" "(a*)" <regexp> matches? ] unit-test
[ f ] [ "aababaaabbac" "(a|b)+" <regexp> matches? ] unit-test
[ t ] [ "ababaaabba" "(a|b)+" <regexp> matches? ] unit-test
[ f ] [ "" "a{1}" <regexp> matches? ] unit-test
[ t ] [ "a" "a{1}" <regexp> matches? ] unit-test
[ f ] [ "aa" "a{1}" <regexp> matches? ] unit-test
[ f ] [ "a" "a{2,}" <regexp> matches? ] unit-test
[ t ] [ "aaa" "a{2,}" <regexp> matches? ] unit-test
[ t ] [ "aaaa" "a{2,}" <regexp> matches? ] unit-test
[ t ] [ "aaaaa" "a{2,}" <regexp> matches? ] unit-test
[ t ] [ "" "a{,2}" <regexp> matches? ] unit-test
[ t ] [ "a" "a{,2}" <regexp> matches? ] unit-test
[ t ] [ "aa" "a{,2}" <regexp> matches? ] unit-test
[ f ] [ "aaa" "a{,2}" <regexp> matches? ] unit-test
[ f ] [ "aaaa" "a{,2}" <regexp> matches? ] unit-test
[ f ] [ "aaaaa" "a{,2}" <regexp> matches? ] unit-test
[ f ] [ "" "a{1,3}" <regexp> matches? ] unit-test
[ t ] [ "a" "a{1,3}" <regexp> matches? ] unit-test
[ t ] [ "aa" "a{1,3}" <regexp> matches? ] unit-test
[ t ] [ "aaa" "a{1,3}" <regexp> matches? ] unit-test
[ f ] [ "aaaa" "a{1,3}" <regexp> matches? ] unit-test
[ f ] [ "" "[a]" <regexp> matches? ] unit-test
[ t ] [ "a" "[a]" <regexp> matches? ] unit-test
[ t ] [ "a" "[abc]" <regexp> matches? ] unit-test
[ f ] [ "b" "[a]" <regexp> matches? ] unit-test
[ f ] [ "d" "[abc]" <regexp> matches? ] unit-test
[ t ] [ "ab" "[abc]{1,2}" <regexp> matches? ] unit-test
[ f ] [ "abc" "[abc]{1,2}" <regexp> matches? ] unit-test
[ f ] [ "" "[^a]" <regexp> matches? ] unit-test
[ f ] [ "a" "[^a]" <regexp> matches? ] unit-test
[ f ] [ "a" "[^abc]" <regexp> matches? ] unit-test
[ t ] [ "b" "[^a]" <regexp> matches? ] unit-test
[ t ] [ "d" "[^abc]" <regexp> matches? ] unit-test
[ f ] [ "ab" "[^abc]{1,2}" <regexp> matches? ] unit-test
[ f ] [ "abc" "[^abc]{1,2}" <regexp> matches? ] unit-test
[ t ] [ "]" "[]]" <regexp> matches? ] unit-test
[ f ] [ "]" "[^]]" <regexp> matches? ] unit-test
[ t ] [ "a" "[^]]" <regexp> matches? ] unit-test
[ "^" "[^]" <regexp> matches? ] must-fail
[ t ] [ "^" "[]^]" <regexp> matches? ] unit-test
[ t ] [ "]" "[]^]" <regexp> matches? ] unit-test
[ t ] [ "[" "[[]" <regexp> matches? ] unit-test
[ f ] [ "^" "[^^]" <regexp> matches? ] unit-test
[ t ] [ "a" "[^^]" <regexp> matches? ] unit-test
[ t ] [ "-" "[-]" <regexp> matches? ] unit-test
[ f ] [ "a" "[-]" <regexp> matches? ] unit-test
[ f ] [ "-" "[^-]" <regexp> matches? ] unit-test
[ t ] [ "a" "[^-]" <regexp> matches? ] unit-test
[ t ] [ "-" "[-a]" <regexp> matches? ] unit-test
[ t ] [ "a" "[-a]" <regexp> matches? ] unit-test
[ t ] [ "-" "[a-]" <regexp> matches? ] unit-test
[ t ] [ "a" "[a-]" <regexp> matches? ] unit-test
[ f ] [ "b" "[a-]" <regexp> matches? ] unit-test
[ f ] [ "-" "[^-]" <regexp> matches? ] unit-test
[ t ] [ "a" "[^-]" <regexp> matches? ] unit-test
[ f ] [ "-" "[a-c]" <regexp> matches? ] unit-test
[ t ] [ "-" "[^a-c]" <regexp> matches? ] unit-test
[ t ] [ "b" "[a-c]" <regexp> matches? ] unit-test
[ f ] [ "b" "[^a-c]" <regexp> matches? ] unit-test
[ t ] [ "-" "[a-c-]" <regexp> matches? ] unit-test
[ f ] [ "-" "[^a-c-]" <regexp> matches? ] unit-test
[ t ] [ "\\" "[\\\\]" <regexp> matches? ] unit-test
[ f ] [ "a" "[\\\\]" <regexp> matches? ] unit-test
[ f ] [ "\\" "[^\\\\]" <regexp> matches? ] unit-test
[ t ] [ "a" "[^\\\\]" <regexp> matches? ] unit-test
[ t ] [ "0" "[\\d]" <regexp> matches? ] unit-test
[ f ] [ "a" "[\\d]" <regexp> matches? ] unit-test
[ f ] [ "0" "[^\\d]" <regexp> matches? ] unit-test
[ t ] [ "a" "[^\\d]" <regexp> matches? ] unit-test
[ t ] [ "a" "[a-z]{1,}|[A-Z]{2,4}|b*|c|(f|g)*" <regexp> matches? ] unit-test
[ t ] [ "a" "[a-z]{1,2}|[A-Z]{3,3}|b*|c|(f|g)*" <regexp> matches? ] unit-test
[ t ] [ "a" "[a-z]{1,2}|[A-Z]{3,3}" <regexp> matches? ] unit-test
[ t ] [ "1000" "\\d{4,6}" <regexp> matches? ] unit-test
[ t ] [ "1000" "[0-9]{4,6}" <regexp> matches? ] unit-test
[ t ] [ "abc" "\\p{Lower}{3}" <regexp> matches? ] unit-test
[ f ] [ "ABC" "\\p{Lower}{3}" <regexp> matches? ] unit-test
[ t ] [ "ABC" "\\p{Upper}{3}" <regexp> matches? ] unit-test
[ f ] [ "abc" "\\p{Upper}{3}" <regexp> matches? ] unit-test
!
[ f ] [ "abc" "[\\p{Upper}]{3}" <regexp> matches? ] unit-test
[ t ] [ "ABC" "[\\p{Upper}]{3}" <regexp> matches? ] unit-test
[ t ] [ "" "\\Q\\E" <regexp> matches? ] unit-test
[ f ] [ "a" "\\Q\\E" <regexp> matches? ] unit-test
[ t ] [ "|*+" "\\Q|*+\\E" <regexp> matches? ] unit-test
[ f ] [ "abc" "\\Q|*+\\E" <regexp> matches? ] unit-test
[ t ] [ "s" "\\Qs\\E" <regexp> matches? ] unit-test
[ t ] [ "S" "\\0123" <regexp> matches? ] unit-test
[ t ] [ "SXY" "\\0123XY" <regexp> matches? ] unit-test
[ t ] [ "x" "\\x78" <regexp> matches? ] unit-test
[ f ] [ "y" "\\x78" <regexp> matches? ] unit-test
[ t ] [ "x" "\\u000078" <regexp> matches? ] unit-test
[ f ] [ "y" "\\u000078" <regexp> matches? ] unit-test
[ t ] [ "ab" "a+b" <regexp> matches? ] unit-test
[ f ] [ "b" "a+b" <regexp> matches? ] unit-test
[ t ] [ "aab" "a+b" <regexp> matches? ] unit-test
[ f ] [ "abb" "a+b" <regexp> matches? ] unit-test
[ t ] [ "abbbb" "ab*" <regexp> matches? ] unit-test
[ t ] [ "a" "ab*" <regexp> matches? ] unit-test
[ f ] [ "abab" "ab*" <regexp> matches? ] unit-test
[ f ] [ "x" "\\." <regexp> matches? ] unit-test
[ t ] [ "." "\\." <regexp> matches? ] unit-test
[ t ] [ "aaaab" "a+ab" <regexp> matches? ] unit-test
[ f ] [ "aaaxb" "a+ab" <regexp> matches? ] unit-test
[ t ] [ "aaacb" "a+cb" <regexp> matches? ] unit-test
[ f ] [ "aaaab" "a++ab" <regexp> matches? ] unit-test
[ t ] [ "aaacb" "a++cb" <regexp> matches? ] unit-test
[ 3 ] [ "aaacb" "a*" <regexp> match-head ] unit-test
[ 1 ] [ "aaacb" "a+?" <regexp> match-head ] unit-test
[ 2 ] [ "aaacb" "aa?" <regexp> match-head ] unit-test
[ 1 ] [ "aaacb" "aa??" <regexp> match-head ] unit-test
[ 3 ] [ "aacb" "aa?c" <regexp> match-head ] unit-test
[ 3 ] [ "aacb" "aa??c" <regexp> match-head ] unit-test
! [ t ] [ "aaa" "AAA" t <regexp> matches? ] unit-test
! [ f ] [ "aax" "AAA" t <regexp> matches? ] unit-test
! [ t ] [ "aaa" "A*" t <regexp> matches? ] unit-test
! [ f ] [ "aaba" "A*" t <regexp> matches? ] unit-test
! [ t ] [ "b" "[AB]" t <regexp> matches? ] unit-test
! [ f ] [ "c" "[AB]" t <regexp> matches? ] unit-test
! [ t ] [ "c" "[A-Z]" t <regexp> matches? ] unit-test
! [ f ] [ "3" "[A-Z]" t <regexp> matches? ] unit-test
[ ] [
"(0[lL]?|[1-9]\\d{0,9}(\\d{0,9}[lL])?|0[xX]\\p{XDigit}{1,8}(\\p{XDigit}{0,8}[lL])?|0[0-7]{1,11}([0-7]{0,11}[lL])?|([0-9]+\\.[0-9]*|\\.[0-9]+)([eE][+-]?[0-9]+)?[fFdD]?|[0-9]+([eE][+-]?[0-9]+[fFdD]?|([eE][+-]?[0-9]+)?[fFdD]))"
<regexp> drop
] unit-test
[ t ] [ "fxxbar" "(?!foo).{3}bar" <regexp> matches? ] unit-test
[ f ] [ "foobar" "(?!foo).{3}bar" <regexp> matches? ] unit-test
! [ 3 ] [ "foobar" "foo(?=bar)" <regexp> match-head ] unit-test
! [ f ] [ "foobxr" "foo(?=bar)" <regexp> match-head ] unit-test
! [ f ] [ "foobxr" "foo\\z" <regexp> match-head ] unit-test
! [ 3 ] [ "foo" "foo\\z" <regexp> match-head ] unit-test
! [ 3 ] [ "foo bar" "foo\\b" <regexp> match-head ] unit-test
! [ f ] [ "fooxbar" "foo\\b" <regexp> matches? ] unit-test
! [ t ] [ "foo" "foo\\b" <regexp> matches? ] unit-test
! [ t ] [ "foo bar" "foo\\b bar" <regexp> matches? ] unit-test
! [ f ] [ "fooxbar" "foo\\bxbar" <regexp> matches? ] unit-test
! [ f ] [ "foo" "foo\\bbar" <regexp> matches? ] unit-test
! [ f ] [ "foo bar" "foo\\B" <regexp> matches? ] unit-test
! [ 3 ] [ "fooxbar" "foo\\B" <regexp> match-head ] unit-test
! [ t ] [ "foo" "foo\\B" <regexp> matches? ] unit-test
! [ f ] [ "foo bar" "foo\\B bar" <regexp> matches? ] unit-test
! [ t ] [ "fooxbar" "foo\\Bxbar" <regexp> matches? ] unit-test
! [ f ] [ "foo" "foo\\Bbar" <regexp> matches? ] unit-test
! [ t ] [ "s@f" "[a-z.-]@[a-z]" <regexp> matches? ] unit-test
! [ f ] [ "a" "[a-z.-]@[a-z]" <regexp> matches? ] unit-test
! [ t ] [ ".o" "\\.[a-z]" <regexp> matches? ] unit-test
! Bug in parsing word
! [ t ] [ "a" R' a' matches? ] unit-test
! ((A)(B(C)))
! 1. ((A)(B(C)))
! 2. (A)
! 3. (B(C))
! 4. (C)

View File

@ -0,0 +1,35 @@
! Copyright (C) 2008 Doug Coleman.
! See http://factorcode.org/license.txt for BSD license.
USING: accessors combinators kernel regexp2.backend regexp2.utils
regexp2.parser regexp2.nfa regexp2.dfa regexp2.traversal state-tables
regexp2.transition-tables ;
IN: regexp2
: default-regexp ( string -- regexp )
regexp new
swap >>raw
<transition-table> >>nfa-table
<transition-table> >>dfa-table
<transition-table> >>minimized-table
reset-regexp ;
: <regexp> ( string -- regexp )
default-regexp
{
[ parse-regexp ]
[ construct-nfa ]
[ construct-dfa ]
[ ]
} cleave ;
: R! CHAR: ! <regexp> ; parsing
: R" CHAR: " <regexp> ; parsing
: R# CHAR: # <regexp> ; parsing
: R' CHAR: ' <regexp> ; parsing
: R( CHAR: ) <regexp> ; parsing
: R/ CHAR: / <regexp> ; parsing
: R@ CHAR: @ <regexp> ; parsing
: R[ CHAR: ] <regexp> ; parsing
: R` CHAR: ` <regexp> ; parsing
: R{ CHAR: } <regexp> ; parsing
: R| CHAR: | <regexp> ; parsing

View File

@ -0,0 +1 @@
Regular expressions

2
extra/regexp2/tags.txt Normal file
View File

@ -0,0 +1,2 @@
parsing
text

View File

@ -0,0 +1,44 @@
! Copyright (C) 2008 Doug Coleman.
! See http://factorcode.org/license.txt for BSD license.
USING: accessors arrays assocs fry hashtables kernel sequences
vectors ;
IN: regexp2.transition-tables
: insert-at ( value key hash -- )
2dup at* [
2nip push
] [
drop >r >r dup vector? [ 1vector ] unless r> r> set-at
] if ;
: ?insert-at ( value key hash/f -- hash )
[ H{ } clone ] unless* [ insert-at ] keep ;
TUPLE: transition from to obj ;
TUPLE: literal-transition < transition ;
TUPLE: class-transition < transition ;
TUPLE: default-transition < transition ;
TUPLE: literal obj ;
TUPLE: class obj ;
TUPLE: default ;
: <literal-transition> ( from to obj -- transition ) literal-transition boa ;
: <class-transition> ( from to obj -- transition ) class-transition boa ;
: <default-transition> ( from to -- transition ) t default-transition boa ;
TUPLE: transition-table transitions
literals classes defaults
start-state final-states ;
: <transition-table> ( -- transition-table )
transition-table new
H{ } clone >>transitions
H{ } clone >>final-states ;
: set-transition ( transition hash -- )
>r [ to>> ] [ obj>> ] [ from>> ] tri r>
2dup at* [ 2nip insert-at ]
[ drop >r >r H{ } clone [ insert-at ] keep r> r> set-at ] if ;
: add-transition ( transition transition-table -- )
transitions>> set-transition ;

View File

@ -0,0 +1,88 @@
! Copyright (C) 2008 Doug Coleman.
! See http://factorcode.org/license.txt for BSD license.
USING: accessors assocs combinators combinators.lib kernel
math math.ranges quotations sequences regexp2.parser
regexp2.classes combinators.short-circuit assocs.lib
sequences.lib ;
IN: regexp2.traversal
TUPLE: dfa-traverser
dfa-table
last-state current-state
text
start-index current-index
matches ;
: <dfa-traverser> ( text regexp -- match )
dfa-table>>
dfa-traverser new
swap [ start-state>> >>current-state ] keep
>>dfa-table
swap >>text
0 >>start-index
0 >>current-index
V{ } clone >>matches ;
: final-state? ( dfa-traverser -- ? )
[ current-state>> ] [ dfa-table>> final-states>> ] bi
key? ;
: text-finished? ( dfa-traverser -- ? )
[ current-index>> ] [ text>> length ] bi >= ;
: save-final-state ( dfa-straverser -- )
[ current-index>> ] [ matches>> ] bi push ;
: match-done? ( dfa-traverser -- ? )
dup final-state? [
dup save-final-state
] when text-finished? ;
: increment-state ( dfa-traverser state -- dfa-traverser )
>r [ 1+ ] change-current-index
dup current-state>> >>last-state r>
first >>current-state ;
: match-failed ( dfa-traverser -- dfa-traverser )
V{ } clone >>matches ;
: match-literal ( transition from-state table -- to-state/f )
transitions>> [ at ] [ 2drop f ] if-at ;
: assoc-with ( param assoc quot -- assoc curry )
swapd [ [ -rot ] dip call ] 2curry ; inline
: match-class ( transition from-state table -- to-state/f )
transitions>> at* [
[ drop class-member? ] assoc-with assoc-find [ nip ] [ drop ] if
] [ drop ] if ;
: match-default ( transition from-state table -- to-state/f )
[ nip ] dip transitions>>
[ t swap [ drop f ] unless-at ] [ drop f ] if-at ;
: match-transition ( obj from-state dfa -- to-state/f )
{ [ match-literal ] [ match-class ] [ match-default ] } 3|| ;
: setup-match ( match -- obj state dfa-table )
{ current-index>> text>> current-state>> dfa-table>> } get-slots
[ nth ] 2dip ;
: do-match ( dfa-traverser -- dfa-traverser )
dup match-done? [
dup setup-match match-transition
[ increment-state do-match ] when*
] unless ;
: return-match ( dfa-traverser -- interval/f )
dup matches>>
[ drop f ]
[ [ start-index>> ] [ peek ] bi* 1 <range> ] if-empty ;
: match ( string regexp -- pair )
<dfa-traverser> do-match return-match ;
: matches? ( string regexp -- ? )
dupd match [ [ length ] [ range-length 1- ] bi* = ] [ drop f ] if* ;
: match-head ( string regexp -- end ) match length>> 1- ;

View File

@ -0,0 +1,69 @@
! Copyright (C) 2008 Doug Coleman.
! See http://factorcode.org/license.txt for BSD license.
USING: accessors arrays assocs combinators.lib io kernel
math math.order namespaces regexp2.backend sequences
sequences.lib unicode.categories math.ranges fry
combinators.short-circuit ;
IN: regexp2.utils
: (while-changes) ( obj quot pred pred-ret -- obj )
! quot: ( obj -- obj' )
! pred: ( obj -- <=> )
>r >r dup slip r> pick over call r> dupd =
[ 3drop ] [ (while-changes) ] if ; inline
: while-changes ( obj quot pred -- obj' )
pick over call (while-changes) ; inline
: last-state ( regexp -- range ) stack>> peek first2 [a,b] ;
: push1 ( obj -- ) input-stream get stream>> push ;
: peek1 ( -- obj ) input-stream get stream>> [ f ] [ peek ] if-empty ;
: pop3 ( seq -- obj1 obj2 obj3 ) [ pop ] [ pop ] [ pop ] tri spin ;
: stack ( -- obj ) current-regexp get stack>> ;
: change-whole-stack ( quot -- )
current-regexp get
[ stack>> swap call ] keep (>>stack) ; inline
: push-stack ( obj -- ) stack push ;
: pop-stack ( -- obj ) stack pop ;
: cut-out ( vector n -- vector' vector ) cut rest ;
ERROR: cut-stack-error ;
: cut-stack ( obj vector -- vector' vector )
tuck last-index [ cut-stack-error ] unless* cut-out swap ;
ERROR: bad-octal number ;
ERROR: bad-hex number ;
: check-octal ( octal -- octal ) dup 255 > [ bad-octal ] when ;
: check-hex ( hex -- hex ) dup number? [ bad-hex ] unless ;
: ascii? ( n -- ? ) 0 HEX: 7f between? ;
: octal-digit? ( n -- ? ) CHAR: 0 CHAR: 7 between? ;
: decimal-digit? ( n -- ? ) CHAR: 0 CHAR: 9 between? ;
: hex-digit? ( n -- ? )
[
[ decimal-digit? ]
[ CHAR: a CHAR: f between? ]
[ CHAR: A CHAR: F between? ]
] 1|| ;
: control-char? ( n -- ? )
[
[ 0 HEX: 1f between? ]
[ HEX: 7f = ]
] 1|| ;
: punct? ( n -- ? )
"!\"#$%&'()*+,-./:;<=>?@[\\]^_`{|}~" member? ;
: c-identifier-char? ( ch -- ? )
[ [ alpha? ] [ CHAR: _ = ] ] 1|| ;
: java-blank? ( n -- ? )
{
CHAR: \s CHAR: \t CHAR: \n
HEX: b HEX: 7 CHAR: \r
} member? ;
: java-printable? ( n -- ? )
[ [ alpha? ] [ punct? ] ] 1|| ;

View File

@ -1 +1 @@
taxes

View File

@ -1,5 +1,7 @@
USING: arrays assocs kernel math math.intervals namespaces ! Copyright (C) 2008 Doug Coleman.
sequences combinators.lib money math.order ; ! See http://factorcode.org/license.txt for BSD license.
USING: accessors arrays assocs kernel math math.intervals
namespaces sequences combinators.lib money math.order ;
IN: taxes IN: taxes
: monthly ( x -- y ) 12 / ; : monthly ( x -- y ) 12 / ;
@ -14,22 +16,21 @@ C: <w4> w4
: allowance ( -- x ) 3500 ; inline : allowance ( -- x ) 3500 ; inline
: calculate-w4-allowances ( w4 -- x ) : calculate-w4-allowances ( w4 -- x ) allowances>> allowance * ;
w4-allowances allowance * ;
! Withhold: FICA, Medicare, Federal (FICA is social security) ! Withhold: FICA, Medicare, Federal (FICA is social security)
: fica-tax-rate ( -- x ) DECIMAL: .062 ; inline : fica-tax-rate ( -- x ) DECIMAL: .062 ; inline
! Base rate -- income over this rate is not taxed ! Base rate -- income over this rate is not taxed
TUPLE: fica-base-unknown ; ERROR: fica-base-unknown ;
: fica-base-rate ( year -- x ) : fica-base-rate ( year -- x )
H{ H{
{ 2008 102000 } { 2008 102000 }
{ 2007 97500 } { 2007 97500 }
} at* [ T{ fica-base-unknown } throw ] unless ; } at* [ fica-base-unknown ] unless ;
: fica-tax ( salary w4 -- x ) : fica-tax ( salary w4 -- x )
w4-year fica-base-rate min fica-tax-rate * ; year>> fica-base-rate min fica-tax-rate * ;
! Employer tax only, not withheld ! Employer tax only, not withheld
: futa-tax-rate ( -- x ) DECIMAL: .062 ; inline : futa-tax-rate ( -- x ) DECIMAL: .062 ; inline
@ -64,8 +65,7 @@ TUPLE: tax-table single married ;
0 -rot [ tax-bracket ] each drop ; 0 -rot [ tax-bracket ] each drop ;
: marriage-table ( w4 tax-table -- triples ) : marriage-table ( w4 tax-table -- triples )
swap w4-married? swap married?>> [ married>> ] [ single>> ] if ;
[ tax-table-married ] [ tax-table-single ] if ;
: federal-tax ( salary w4 tax-table -- n ) : federal-tax ( salary w4 tax-table -- n )
[ adjust-allowances ] 2keep marriage-table tax ; [ adjust-allowances ] 2keep marriage-table tax ;