Merge commit 'origin/master' into emacs

db4
Jose A. Ortega Ruiz 2009-01-22 09:40:33 +01:00
commit fc658411fa
36 changed files with 645 additions and 474 deletions

View File

@ -8,10 +8,6 @@ sequences system libc alien.strings io.encodings.utf8 ;
[ { "blah" 123 } ] [ { "blah" xyz } expand-constants ] unit-test
: foo ( -- n ) &: fdafd [ 123 ] unless* ;
[ 123 ] [ foo ] unit-test
[ -1 ] [ -1 <char> *char ] unit-test
[ -1 ] [ -1 <short> *short ] unit-test
[ -1 ] [ -1 <int> *int ] unit-test

View File

@ -4,7 +4,7 @@ USING: accessors arrays alien alien.c-types alien.structs
alien.arrays alien.strings kernel math namespaces parser
sequences words quotations math.parser splitting grouping
effects assocs combinators lexer strings.parser alien.parser
fry vocabs.parser ;
fry vocabs.parser words.constant ;
IN: alien.syntax
: DLL" lexer get skip-blank parse-string dlopen parsed ; parsing
@ -31,10 +31,11 @@ IN: alien.syntax
: C-ENUM:
";" parse-tokens
dup length
[ [ create-in ] dip 1quotation define ] 2each ;
[ [ create-in ] dip define-constant ] each-index ;
parsing
: address-of ( name library -- value )
load-library dlsym [ "No such symbol" throw ] unless* ;
: &:
scan "c-library" get
'[ _ _ load-library dlsym ] over push-all ; parsing
scan "c-library" get '[ _ _ address-of ] over push-all ; parsing

View File

@ -96,11 +96,7 @@ M: object modify-form drop ;
dup method>> {
{ "GET" [ url>> query>> ] }
{ "HEAD" [ url>> query>> ] }
{ "POST" [
post-data>>
dup content-type>> "application/x-www-form-urlencoded" =
[ content>> ] [ drop f ] if
] }
{ "POST" [ post-data>> params>> ] }
} case ;
: referrer ( -- referrer/f )

View File

@ -32,10 +32,8 @@ IN: heaps.tests
: random-alist ( n -- alist )
[
[
32 random-bits dup number>string swap set
] times
] H{ } make-assoc ;
drop 32 random-bits dup number>string
] H{ } map>assoc ;
: test-heap-sort ( n -- ? )
random-alist dup >alist sort-keys swap heap-sort = ;

View File

@ -25,7 +25,7 @@ IN: http.client
dup header>> >hashtable
over url>> host>> [ over url>> url-host "host" pick set-at ] when
over post-data>> [
[ raw>> length "content-length" pick set-at ]
[ data>> length "content-length" pick set-at ]
[ content-type>> "content-type" pick set-at ]
bi
] when*
@ -34,21 +34,39 @@ IN: http.client
GENERIC: >post-data ( object -- post-data )
M: post-data >post-data ;
M: string >post-data utf8 encode "application/octet-stream" <post-data> ;
M: byte-array >post-data "application/octet-stream" <post-data> ;
M: assoc >post-data assoc>query ascii encode "application/x-www-form-urlencoded" <post-data> ;
M: f >post-data ;
M: post-data >post-data ;
M: string >post-data
utf8 encode
"application/octet-stream" <post-data>
swap >>data ;
M: assoc >post-data
"application/x-www-form-urlencoded" <post-data>
swap >>params ;
M: object >post-data
"application/octet-stream" <post-data>
swap >>data ;
: normalize-post-data ( request -- request )
dup post-data>> [
dup params>> [
assoc>query ascii encode >>data
] when* drop
] when* ;
: unparse-post-data ( request -- request )
[ >post-data ] change-post-data ;
[ >post-data ] change-post-data
normalize-post-data ;
: write-post-data ( request -- request )
dup method>> [ "POST" = ] [ "PUT" = ] bi or [ dup post-data>> raw>> write ] when ;
dup method>> { "POST" "PUT" } member? [
dup post-data>> data>> dup sequence?
[ write ] [ output-stream get stream-copy ] if
] when ;
: write-request ( request -- )
unparse-post-data

View File

@ -35,7 +35,7 @@ blah
{ method "POST" }
{ version "1.1" }
{ header H{ { "some-header" "1; 2" } { "content-length" "4" } { "content-type" "application/octet-stream" } } }
{ post-data T{ post-data { content "blah" } { raw "blah" } { content-type "application/octet-stream" } } }
{ post-data T{ post-data { data "blah" } { content-type "application/octet-stream" } } }
{ cookies V{ } }
}
] [

View File

@ -213,14 +213,11 @@ body ;
raw-response new
"1.1" >>version ;
TUPLE: post-data raw content content-type form-variables uploaded-files ;
TUPLE: post-data data params content-type content-encoding ;
: <post-data> ( form-variables uploaded-files raw content-type -- post-data )
: <post-data> ( content-type -- post-data )
post-data new
swap >>content-type
swap >>raw
swap >>uploaded-files
swap >>form-variables ;
swap >>content-type ;
: parse-content-type-attributes ( string -- attributes )
" " split harvest [ "=" split1 [ >lower ] dip ] { } map>assoc ;

View File

@ -34,7 +34,7 @@ IN: http.server.cgi
request get "accept" header "HTTP_ACCEPT" set
post-request? [
request get post-data>> raw>>
request get post-data>> data>>
[ "CONTENT_TYPE" set ]
[ length number>string "CONTENT_LENGTH" set ]
bi
@ -54,7 +54,7 @@ IN: http.server.cgi
swap '[
binary encode-output
_ output-stream get swap <cgi-process> binary <process-stream> [
post-request? [ request get post-data>> raw>> write flush ] when
post-request? [ request get post-data>> data>> write flush ] when
input-stream get swap (stream-copy)
] with-stream
] >>body ;

View File

@ -55,18 +55,17 @@ ERROR: no-boundary ;
: read-content ( request -- bytes )
"content-length" header string>number read ;
: parse-content ( request content-type -- form-variables uploaded-files raw )
{
{ "multipart/form-data" [ read-multipart-data f ] }
{ "application/x-www-form-urlencoded" [ read-content [ f f ] dip ] }
[ drop read-content [ f f ] dip ]
: parse-content ( request content-type -- post-data )
[ <post-data> swap ] keep {
{ "multipart/form-data" [ read-multipart-data assoc-union >>params ] }
{ "application/x-www-form-urlencoded" [ read-content query>assoc >>params ] }
[ drop read-content >>data ]
} case ;
: read-post-data ( request -- request )
dup method>> "POST" = [
dup dup "content-type" header
[ ";" split1 drop parse-content ] keep
<post-data> >>post-data
";" split1 drop parse-content >>post-data
] when ;
: extract-host ( request -- request )

View File

@ -51,4 +51,13 @@ PRIVATE>
: <"
"\">" parse-multiline-string parsed ; parsing
: <'
"'>" parse-multiline-string parsed ; parsing
: {'
"'}" parse-multiline-string parsed ; parsing
: {"
"\"}" parse-multiline-string parsed ; parsing
: /* "*/" parse-multiline-string drop ; parsing

View File

@ -0,0 +1 @@
Daniel Ehrenberg

View File

@ -0,0 +1,64 @@
! Copyright (C) 2005, 2009 Daniel Ehrenberg
! See http://factorcode.org/license.txt for BSD license.
USING: kernel namespaces xml.name io.encodings.utf8 xml.elements
io.encodings.utf16 xml.tokenize xml.state math ascii sequences
io.encodings.string io.encodings combinators ;
IN: xml.autoencoding
: continue-make-tag ( str -- tag )
parse-name-starting middle-tag end-tag ;
: start-utf16le ( -- tag )
utf16le decode-input-if
CHAR: ? expect
0 expect check instruct ;
: 10xxxxxx? ( ch -- ? )
-6 shift 3 bitand 2 = ;
: start<name ( ch -- tag )
ascii?
[ utf8 decode-input-if next make-tag ] [
next
[ get-next 10xxxxxx? not ] take-until
get-char suffix utf8 decode
utf8 decode-input-if next
continue-make-tag
] if ;
: start< ( -- tag )
get-next {
{ 0 [ next next start-utf16le ] }
{ CHAR: ? [ check next next instruct ] } ! XML prolog parsing sets the encoding
{ CHAR: ! [ check utf8 decode-input next next direct ] }
[ check start<name ]
} case ;
: skip-utf8-bom ( -- tag )
"\u0000bb\u0000bf" expect utf8 decode-input
CHAR: < expect check make-tag ;
: decode-expecting ( encoding string -- tag )
[ decode-input-if next ] [ expect-string ] bi* check make-tag ;
: start-utf16be ( -- tag )
utf16be "<" decode-expecting ;
: skip-utf16le-bom ( -- tag )
utf16le "\u0000fe<" decode-expecting ;
: skip-utf16be-bom ( -- tag )
utf16be "\u0000ff<" decode-expecting ;
: start-document ( -- tag )
get-char {
{ CHAR: < [ start< ] }
{ 0 [ start-utf16be ] }
{ HEX: EF [ skip-utf8-bom ] }
{ HEX: FF [ skip-utf16le-bom ] }
{ HEX: FE [ skip-utf16be-bom ] }
{ f [ "" ] }
[ drop utf8 decode-input-if f ]
! Same problem as with <e`>, in the case of XML chunks?
} case check ;

View File

@ -45,7 +45,7 @@ C: <element-decl> element-decl
TUPLE: attlist-decl < directive name att-defs ;
C: <attlist-decl> attlist-decl
TUPLE: entity-decl < directive name def ;
TUPLE: entity-decl < directive name def pe? ;
C: <entity-decl> entity-decl
TUPLE: system-id system-literal ;

View File

@ -0,0 +1 @@
Daniel Ehrenberg

61
basis/xml/dtd/dtd.factor Normal file
View File

@ -0,0 +1,61 @@
! Copyright (C) 2005, 2009 Daniel Ehrenberg, Slava Pestov
! See http://factorcode.org/license.txt for BSD license.
USING: xml.tokenize xml.data xml.state kernel sequences ascii
fry xml.errors combinators hashtables namespaces xml.entities
strings ;
IN: xml.dtd
: take-word ( -- string )
[ get-char blank? ] take-until ;
: take-decl-contents ( -- first second )
pass-blank take-word pass-blank ">" take-string ;
: take-element-decl ( -- element-decl )
take-decl-contents <element-decl> ;
: take-attlist-decl ( -- attlist-decl )
take-decl-contents <attlist-decl> ;
: take-notation-decl ( -- notation-decl )
take-decl-contents <notation-decl> ;
: take-until-one-of ( seps -- str sep )
'[ get-char _ member? ] take-until get-char ;
: take-system-id ( -- system-id )
parse-quote <system-id> close ;
: take-public-id ( -- public-id )
parse-quote parse-quote <public-id> close ;
UNION: dtd-acceptable
directive comment instruction ;
: (take-external-id) ( token -- external-id )
pass-blank {
{ "SYSTEM" [ take-system-id ] }
{ "PUBLIC" [ take-public-id ] }
[ bad-external-id ]
} case ;
: take-external-id ( -- external-id )
take-word (take-external-id) ;
: only-blanks ( str -- )
[ blank? ] all? [ bad-decl ] unless ;
: take-entity-def ( var -- entity-name entity-def )
[
take-word pass-blank get-char {
{ CHAR: ' [ parse-quote ] }
{ CHAR: " [ parse-quote ] }
[ drop take-external-id ]
} case
] dip '[ swap _ [ ?set-at ] change ] 2keep ;
: take-entity-decl ( -- entity-decl )
pass-blank get-char {
{ CHAR: % [ next pass-blank pe-table take-entity-def t ] }
[ drop extra-entities take-entity-def f ]
} case
close <entity-decl> ;

View File

@ -0,0 +1 @@
Daniel Ehrenberg

View File

@ -0,0 +1,165 @@
! Copyright (C) 2005, 2009 Daniel Ehrenberg
! See http://factorcode.org/license.txt for BSD license.
USING: kernel namespaces xml.tokenize xml.state xml.name
xml.data accessors arrays make xml.char-classes fry assocs sequences
math xml.errors sets combinators io.encodings io.encodings.iana
unicode.case xml.dtd strings ;
IN: xml.elements
: parse-attr ( -- )
parse-name pass-blank CHAR: = expect pass-blank
t parse-quote* 2array , ;
: start-tag ( -- name ? )
#! Outputs the name and whether this is a closing tag
get-char CHAR: / = dup [ next ] when
parse-name swap ;
: (middle-tag) ( -- )
pass-blank version=1.0? get-char name-start?
[ parse-attr (middle-tag) ] when ;
: assure-no-duplicates ( attrs-alist -- attrs-alist )
H{ } clone 2dup '[ swap _ push-at ] assoc-each
[ nip length 2 >= ] assoc-filter >alist
[ first first2 duplicate-attr ] unless-empty ;
: middle-tag ( -- attrs-alist )
! f make will make a vector if it has any elements
[ (middle-tag) ] f make pass-blank
assure-no-duplicates ;
: end-tag ( name attrs-alist -- tag )
tag-ns pass-blank get-char CHAR: / =
[ pop-ns <contained> next CHAR: > expect ]
[ depth inc <opener> close ] if ;
: take-comment ( -- comment )
"--" expect-string
"--" take-string
<comment>
CHAR: > expect ;
: assure-no-extra ( seq -- )
[ first ] map {
T{ name f "" "version" f }
T{ name f "" "encoding" f }
T{ name f "" "standalone" f }
} diff
[ extra-attrs ] unless-empty ;
: good-version ( version -- version )
dup { "1.0" "1.1" } member? [ bad-version ] unless ;
: prolog-version ( alist -- version )
T{ name f "" "version" f } swap at
[ good-version ] [ versionless-prolog ] if* ;
: prolog-encoding ( alist -- encoding )
T{ name f "" "encoding" f } swap at "UTF-8" or ;
: yes/no>bool ( string -- t/f )
{
{ "yes" [ t ] }
{ "no" [ f ] }
[ not-yes/no ]
} case ;
: prolog-standalone ( alist -- version )
T{ name f "" "standalone" f } swap at
[ yes/no>bool ] [ f ] if* ;
: prolog-attrs ( alist -- prolog )
[ prolog-version ]
[ prolog-encoding ]
[ prolog-standalone ]
tri <prolog> ;
SYMBOL: string-input?
: decode-input-if ( encoding -- )
string-input? get [ drop ] [ decode-input ] if ;
: parse-prolog ( -- prolog )
pass-blank middle-tag "?>" expect-string
dup assure-no-extra prolog-attrs
dup encoding>> dup "UTF-16" =
[ drop ] [ name>encoding [ decode-input-if ] when* ] if
dup prolog-data set ;
: instruct ( -- instruction )
take-name {
{ [ dup "xml" = ] [ drop parse-prolog ] }
{ [ dup >lower "xml" = ] [ capitalized-prolog ] }
{ [ dup valid-name? not ] [ bad-name ] }
[ "?>" take-string append <instruction> ]
} cond ;
: take-cdata ( -- string )
depth get zero? [ bad-cdata ] when
"[CDATA[" expect-string "]]>" take-string ;
DEFER: make-tag ! Is this unavoidable?
: expand-pe ( -- ) ; ! Make this run the contents of the pe within a DOCTYPE
: (take-internal-subset) ( -- )
pass-blank get-char {
{ CHAR: ] [ next ] }
{ CHAR: % [ expand-pe ] }
{ CHAR: < [
next make-tag dup dtd-acceptable?
[ bad-doctype ] unless , (take-internal-subset)
] }
[ 1string bad-doctype ]
} case ;
: take-internal-subset ( -- seq )
[
H{ } pe-table set
t in-dtd? set
(take-internal-subset)
] { } make ;
: nontrivial-doctype ( -- external-id internal-subset )
pass-blank get-char CHAR: [ = [
next take-internal-subset f swap close
] [
" >" take-until-one-of {
{ CHAR: \s [ (take-external-id) ] }
{ CHAR: > [ only-blanks f ] }
} case f
] if ;
: take-doctype-decl ( -- doctype-decl )
pass-blank " >" take-until-one-of {
{ CHAR: \s [ nontrivial-doctype ] }
{ CHAR: > [ f f ] }
} case <doctype-decl> ;
: take-directive ( -- directive )
take-name {
{ "ELEMENT" [ take-element-decl ] }
{ "ATTLIST" [ take-attlist-decl ] }
{ "DOCTYPE" [ take-doctype-decl ] }
{ "ENTITY" [ take-entity-decl ] }
{ "NOTATION" [ take-notation-decl ] }
[ bad-directive ]
} case ;
: direct ( -- object )
get-char {
{ CHAR: - [ take-comment ] }
{ CHAR: [ [ take-cdata ] }
[ drop take-directive ]
} case ;
: make-tag ( -- tag )
{
{ [ get-char dup CHAR: ! = ] [ drop next direct ] }
{ [ CHAR: ? = ] [ next instruct ] }
[
start-tag [ dup add-ns pop-ns <closer> depth dec close ]
[ middle-tag end-tag ] if
]
} cond ;

View File

@ -34,3 +34,5 @@ T{ duplicate-attr f 1 21 T{ name { space "" } { main "this" } } V{ "a" "b" } } "
T{ bad-cdata f 1 3 } "<![CDATA[]]><x/>" xml-error-test
T{ bad-cdata f 1 7 } "<x/><![CDATA[]]>" xml-error-test
T{ pre/post-content f "&" t } "&32;<x/>" xml-error-test
T{ bad-doctype f 1 17 "a" } "<!DOCTYPE foo [ a ]><x/>" xml-error-test
T{ bad-doctype f 1 22 T{ opener { name T{ name f "" "foo" "" } } { attrs T{ attrs } } } } "<!DOCTYPE foo [ <foo> ]><x/>" xml-error-test

View File

@ -2,7 +2,7 @@
! See http://factorcode.org/license.txt for BSD license.
USING: xml.data xml.writer kernel generic io prettyprint math
debugger sequences xml.state accessors summary
namespaces io.streams.string xml.backend ;
namespaces io.streams.string xml.backend xml.writer.private ;
IN: xml.errors
TUPLE: parsing-error line column ;
@ -332,6 +332,12 @@ M: not-enough-characters summary ( obj -- str )
"Not enough characters" print
] with-string-writer ;
TUPLE: bad-doctype < parsing-error contents ;
: bad-doctype ( contents -- * )
\ bad-doctype parsing-error swap >>contents throw ;
M: bad-doctype summary
call-next-method "\nDTD contains invalid object" append ;
UNION: xml-parse-error
multitags notags extra-attrs nonexist-ns bad-decl
not-yes/no unclosed mismatched expected no-entity

View File

@ -0,0 +1,76 @@
! Copyright (C) 2005, 2009 Daniel Ehrenberg
! See http://factorcode.org/license.txt for BSD license.
USING: kernel namespaces accessors xml.tokenize xml.data assocs
xml.errors xml.char-classes combinators.short-circuit splitting
fry xml.state sequences ;
IN: xml.name
! XML namespace processing: ns = namespace
! A stack of hashtables
SYMBOL: ns-stack
: attrs>ns ( attrs-alist -- hash )
! this should check to make sure URIs are valid
[
[
swap dup space>> "xmlns" =
[ main>> set ]
[
T{ name f "" "xmlns" f } names-match?
[ "" set ] [ drop ] if
] if
] assoc-each
] { } make-assoc f like ;
: add-ns ( name -- )
dup space>> dup ns-stack get assoc-stack
[ nip ] [ nonexist-ns ] if* >>url drop ;
: push-ns ( hash -- )
ns-stack get push ;
: pop-ns ( -- )
ns-stack get pop* ;
: init-ns-stack ( -- )
V{ H{
{ "xml" "http://www.w3.org/XML/1998/namespace" }
{ "xmlns" "http://www.w3.org/2000/xmlns" }
{ "" "" }
} } clone
ns-stack set ;
: tag-ns ( name attrs-alist -- name attrs )
dup attrs>ns push-ns
[ dup add-ns ] dip dup [ drop add-ns ] assoc-each <attrs> ;
: valid-name? ( str -- ? )
[ f ] [
version=1.0? swap {
[ first name-start? ]
[ rest-slice [ name-char? ] with all? ]
} 2&&
] if-empty ;
: prefixed-name ( str -- name/f )
":" split dup length 2 = [
[ [ valid-name? ] all? ]
[ first2 f <name> ] bi and
] [ drop f ] if ;
: interpret-name ( str -- name )
dup prefixed-name [ ] [
dup valid-name?
[ <simple-name> ] [ bad-name ] if
] ?if ;
: take-name ( -- string )
version=1.0? '[ _ get-char name-char? not ] take-until ;
: parse-name ( -- name )
take-name interpret-name ;
: parse-name-starting ( string -- name )
take-name append interpret-name ;

View File

@ -0,0 +1 @@
Daniel Ehrenberg

View File

@ -1,6 +1,6 @@
! Copyright (C) 2005, 2009 Daniel Ehrenberg
! See http://factorcode.org/license.txt for BSD license.
USING: accessors kernel namespaces ;
USING: accessors kernel namespaces io ;
IN: xml.state
TUPLE: spot char line column next check ;

View File

@ -2,7 +2,7 @@ USING: tools.test xml.tokenize xml.state io.streams.string kernel io strings asc
IN: xml.test.state
: string-parse ( str quot -- )
[ <string-reader> ] dip state-parse ;
[ <string-reader> ] dip with-state ;
: take-rest ( -- string )
[ f ] take-until ;

View File

@ -3,11 +3,13 @@
IN: xml.tests
USING: kernel xml tools.test io namespaces make sequences
xml.errors xml.entities.html parser strings xml.data io.files
xml.writer xml.utilities continuations assocs
xml.utilities continuations assocs
sequences.deep accessors io.streams.string ;
! This is insufficient
\ read-xml must-infer
[ [ drop ] sax ] must-infer
\ string>xml must-infer
SYMBOL: xml-file
[ ] [ "resource:basis/xml/tests/test.xml"
@ -29,8 +31,6 @@ SYMBOL: xml-file
] unit-test
[ V{ "fa&g" } ] [ xml-file get "x" get-id children>> ] unit-test
[ "that" ] [ xml-file get "this" swap at ] unit-test
[ "<?xml version=\"1.0\" encoding=\"UTF-8\"?><a b=\"c\"/>" ]
[ "<a b='c'/>" string>xml xml>string ] unit-test
[ "abcd" ] [
"<main>a<sub>bc</sub>d<nothing/></main>" string>xml
[ [ dup string? [ % ] [ drop ] if ] deep-each ] "" make
@ -47,10 +47,6 @@ SYMBOL: xml-file
at swap "z" [ tuck ] dip swap set-at
T{ name f "blah" "z" f } swap at ] unit-test
[ "foo" ] [ "<boo><![CDATA[foo]]></boo>" string>xml children>string ] unit-test
[ "<?xml version=\"1.0\" encoding=\"UTF-8\"?><foo>bar baz</foo>" ]
[ "<foo>bar</foo>" string>xml [ " baz" append ] map xml>string ] unit-test
[ "<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n<foo>\n bar\n</foo>" ]
[ "<foo> bar </foo>" string>xml pprint-xml>string ] unit-test
[ "<!-- B+, B, or B--->" string>xml ] must-fail
[ ] [ "<?xml version='1.0'?><!-- declarations for <head> & <body> --><foo/>" string>xml drop ] unit-test
[ T{ element-decl f "br" "EMPTY" } ] [ "<!ELEMENT br EMPTY>" string>xml-chunk first ] unit-test
@ -61,8 +57,5 @@ SYMBOL: xml-file
[ T{ doctype-decl f "foo" } ] [ "<!DOCTYPE foo >" string>xml-chunk first ] unit-test
[ T{ doctype-decl f "foo" T{ system-id f "blah.dtd" } } ] [ "<!DOCTYPE foo SYSTEM 'blah.dtd'>" string>xml-chunk first ] unit-test
[ T{ doctype-decl f "foo" T{ system-id f "blah.dtd" } } ] [ "<!DOCTYPE foo SYSTEM \"blah.dtd\" >" string>xml-chunk first ] unit-test
[ t ] [ "<!DOCTYPE html PUBLIC '-//W3C//DTD XHTML 1.1//EN' 'http://www.w3.org/TR/xhtml11/DTD/xhtml11.dtd' >" dup string>xml-chunk [ write-xml-chunk ] with-string-writer = ] unit-test
[ "foo" ] [ "<!ENTITY bar 'foo'><x>&bar;</x>" string>xml children>string ] unit-test
[ V{ "hello" } ] [ "hello" string>xml-chunk ] unit-test
[ 958 ] [ [ "&xi;" string>xml-chunk ] with-html-entities first first ] unit-test
[ "x" "<" ] [ "<x value='&lt;'/>" string>xml [ name>> main>> ] [ "value" swap at ] bi ] unit-test

View File

@ -1,17 +1,15 @@
! Copyright (C) 2005, 2006 Daniel Ehrenberg
! Copyright (C) 2005, 2009 Daniel Ehrenberg
! See http://factorcode.org/license.txt for BSD license.
USING: accessors arrays ascii assocs combinators locals
combinators.short-circuit fry io.encodings io.encodings.iana
io.encodings.string io.encodings.utf16 io.encodings.utf8 kernel make
math math.parser namespaces sequences sets splitting xml.state
strings xml.char-classes xml.data xml.entities xml.errors hashtables
circular io sbufs ;
USING: namespaces xml.state kernel sequences accessors
xml.char-classes xml.errors math io sbufs fry strings ascii
circular xml.entities assocs make splitting math.parser
locals combinators arrays ;
IN: xml.tokenize
! Originally from state-parser
SYMBOL: prolog-data
SYMBOL: depth
: version=1.0? ( -- ? )
prolog-data get [ version>> "1.0" = ] [ t ] if* ;
@ -41,6 +39,14 @@ SYMBOL: prolog-data
#! Increment spot.
get-char [ unexpected-end ] unless (next) record ;
: init-parser ( -- )
0 1 0 f f <spot> spot set
read1 set-next next ;
: with-state ( stream quot -- )
! with-input-stream implicitly creates a new scope which we use
swap [ init-parser call ] with-input-stream ; inline
: skip-until ( quot: ( -- ? ) -- )
get-char [
[ call ] keep swap [ drop ] [
@ -82,89 +88,6 @@ SYMBOL: prolog-data
dup [ get-char next ] replicate 2dup =
[ 2drop ] [ expected ] if ;
: init-parser ( -- )
0 1 0 f f <spot> spot set
read1 set-next next ;
: state-parse ( stream quot -- )
! with-input-stream implicitly creates a new scope which we use
swap [ init-parser call ] with-input-stream ; inline
! XML namespace processing: ns = namespace
! A stack of hashtables
SYMBOL: ns-stack
SYMBOL: depth
: attrs>ns ( attrs-alist -- hash )
! this should check to make sure URIs are valid
[
[
swap dup space>> "xmlns" =
[ main>> set ]
[
T{ name f "" "xmlns" f } names-match?
[ "" set ] [ drop ] if
] if
] assoc-each
] { } make-assoc f like ;
: add-ns ( name -- )
dup space>> dup ns-stack get assoc-stack
[ nip ] [ nonexist-ns ] if* >>url drop ;
: push-ns ( hash -- )
ns-stack get push ;
: pop-ns ( -- )
ns-stack get pop* ;
: init-ns-stack ( -- )
V{ H{
{ "xml" "http://www.w3.org/XML/1998/namespace" }
{ "xmlns" "http://www.w3.org/2000/xmlns" }
{ "" "" }
} } clone
ns-stack set ;
: tag-ns ( name attrs-alist -- name attrs )
dup attrs>ns push-ns
[ dup add-ns ] dip dup [ drop add-ns ] assoc-each <attrs> ;
! Parsing names
: valid-name? ( str -- ? )
[ f ] [
version=1.0? swap {
[ first name-start? ]
[ rest-slice [ name-char? ] with all? ]
} 2&&
] if-empty ;
: prefixed-name ( str -- name/f )
":" split dup length 2 = [
[ [ valid-name? ] all? ]
[ first2 f <name> ] bi and
] [ drop f ] if ;
: interpret-name ( str -- name )
dup prefixed-name [ ] [
dup valid-name?
[ <simple-name> ] [ bad-name ] if
] ?if ;
: take-name ( -- string )
version=1.0? '[ _ get-char name-char? not ] take-until ;
: parse-name ( -- name )
take-name interpret-name ;
: parse-name-starting ( string -- name )
take-name append interpret-name ;
! -- Parsing strings
: parse-named-entity ( string -- )
dup entities at [ , ] [
dup extra-entities get at
@ -211,12 +134,8 @@ SYMBOL: in-dtd?
char CHAR: < =
] parse-char ;
! Parsing tags
: start-tag ( -- name ? )
#! Outputs the name and whether this is a closing tag
get-char CHAR: / = dup [ next ] when
parse-name swap ;
: close ( -- )
pass-blank CHAR: > expect ;
: normalize-quote ( str -- str )
[ dup "\t\r\n" member? [ drop CHAR: \s ] when ] map ;
@ -235,262 +154,3 @@ SYMBOL: in-dtd?
: parse-quote ( -- seq )
f parse-quote* ;
: parse-attr ( -- )
parse-name pass-blank CHAR: = expect pass-blank
t parse-quote* 2array , ;
: (middle-tag) ( -- )
pass-blank version=1.0? get-char name-start?
[ parse-attr (middle-tag) ] when ;
: assure-no-duplicates ( attrs-alist -- attrs-alist )
H{ } clone 2dup '[ swap _ push-at ] assoc-each
[ nip length 2 >= ] assoc-filter >alist
[ first first2 duplicate-attr ] unless-empty ;
: middle-tag ( -- attrs-alist )
! f make will make a vector if it has any elements
[ (middle-tag) ] f make pass-blank
assure-no-duplicates ;
: close ( -- )
pass-blank CHAR: > expect ;
: end-tag ( name attrs-alist -- tag )
tag-ns pass-blank get-char CHAR: / =
[ pop-ns <contained> next CHAR: > expect ]
[ depth inc <opener> close ] if ;
: take-comment ( -- comment )
"--" expect-string
"--" take-string
<comment>
CHAR: > expect ;
: take-cdata ( -- string )
depth get zero? [ bad-cdata ] when
"[CDATA[" expect-string "]]>" take-string ;
: take-word ( -- string )
[ get-char blank? ] take-until ;
: take-decl-contents ( -- first second )
pass-blank take-word pass-blank ">" take-string ;
: take-element-decl ( -- element-decl )
take-decl-contents <element-decl> ;
: take-attlist-decl ( -- attlist-decl )
take-decl-contents <attlist-decl> ;
: take-notation-decl ( -- notation-decl )
take-decl-contents <notation-decl> ;
: take-until-one-of ( seps -- str sep )
'[ get-char _ member? ] take-until get-char ;
: take-system-id ( -- system-id )
parse-quote <system-id> close ;
: take-public-id ( -- public-id )
parse-quote parse-quote <public-id> close ;
DEFER: direct
: (take-internal-subset) ( -- )
pass-blank get-char {
{ CHAR: ] [ next ] }
[ drop "<!" expect-string direct , (take-internal-subset) ]
} case ;
: take-internal-subset ( -- seq )
[
H{ } pe-table set
t in-dtd? set
(take-internal-subset)
] { } make ;
: (take-external-id) ( token -- external-id )
pass-blank {
{ "SYSTEM" [ take-system-id ] }
{ "PUBLIC" [ take-public-id ] }
[ bad-external-id ]
} case ;
: take-external-id ( -- external-id )
take-word (take-external-id) ;
: only-blanks ( str -- )
[ blank? ] all? [ bad-decl ] unless ;
: nontrivial-doctype ( -- external-id internal-subset )
pass-blank get-char CHAR: [ = [
next take-internal-subset f swap close
] [
" >" take-until-one-of {
{ CHAR: \s [ (take-external-id) ] }
{ CHAR: > [ only-blanks f ] }
} case f
] if ;
: take-doctype-decl ( -- doctype-decl )
pass-blank " >" take-until-one-of {
{ CHAR: \s [ nontrivial-doctype ] }
{ CHAR: > [ f f ] }
} case <doctype-decl> ;
: take-entity-def ( var -- entity-name entity-def )
[
take-word pass-blank get-char {
{ CHAR: ' [ parse-quote ] }
{ CHAR: " [ parse-quote ] }
[ drop take-external-id ]
} case swap
] dip [ [ ?set-at ] change ] 2keep swap ;
: take-entity-decl ( -- entity-decl )
pass-blank get-char {
{ CHAR: % [ next pass-blank pe-table take-entity-def ] }
[ drop extra-entities take-entity-def ]
} case
close <entity-decl> ;
: take-directive ( -- directive )
take-name {
{ "ELEMENT" [ take-element-decl ] }
{ "ATTLIST" [ take-attlist-decl ] }
{ "DOCTYPE" [ take-doctype-decl ] }
{ "ENTITY" [ take-entity-decl ] }
{ "NOTATION" [ take-notation-decl ] }
[ bad-directive ]
} case ;
: direct ( -- object )
get-char {
{ CHAR: - [ take-comment ] }
{ CHAR: [ [ take-cdata ] }
[ drop take-directive ]
} case ;
: assure-no-extra ( seq -- )
[ first ] map {
T{ name f "" "version" f }
T{ name f "" "encoding" f }
T{ name f "" "standalone" f }
} diff
[ extra-attrs ] unless-empty ;
: good-version ( version -- version )
dup { "1.0" "1.1" } member? [ bad-version ] unless ;
: prolog-version ( alist -- version )
T{ name f "" "version" f } swap at
[ good-version ] [ versionless-prolog ] if* ;
: prolog-encoding ( alist -- encoding )
T{ name f "" "encoding" f } swap at "UTF-8" or ;
: yes/no>bool ( string -- t/f )
{
{ "yes" [ t ] }
{ "no" [ f ] }
[ not-yes/no ]
} case ;
: prolog-standalone ( alist -- version )
T{ name f "" "standalone" f } swap at
[ yes/no>bool ] [ f ] if* ;
: prolog-attrs ( alist -- prolog )
[ prolog-version ]
[ prolog-encoding ]
[ prolog-standalone ]
tri <prolog> ;
SYMBOL: string-input?
: decode-input-if ( encoding -- )
string-input? get [ drop ] [ decode-input ] if ;
: parse-prolog ( -- prolog )
pass-blank middle-tag "?>" expect-string
dup assure-no-extra prolog-attrs
dup encoding>> dup "UTF-16" =
[ drop ] [ name>encoding [ decode-input-if ] when* ] if
dup prolog-data set ;
: instruct ( -- instruction )
take-name {
{ [ dup "xml" = ] [ drop parse-prolog ] }
{ [ dup >lower "xml" = ] [ capitalized-prolog ] }
{ [ dup valid-name? not ] [ bad-name ] }
[ "?>" take-string append <instruction> ]
} cond ;
: make-tag ( -- tag )
{
{ [ get-char dup CHAR: ! = ] [ drop next direct ] }
{ [ CHAR: ? = ] [ next instruct ] }
[
start-tag [ dup add-ns pop-ns <closer> depth dec close ]
[ middle-tag end-tag ] if
]
} cond ;
! Autodetecting encodings
: continue-make-tag ( str -- tag )
parse-name-starting middle-tag end-tag ;
: start-utf16le ( -- tag )
utf16le decode-input-if
CHAR: ? expect
0 expect check instruct ;
: 10xxxxxx? ( ch -- ? )
-6 shift 3 bitand 2 = ;
: start<name ( ch -- tag )
ascii?
[ utf8 decode-input-if next make-tag ] [
next
[ get-next 10xxxxxx? not ] take-until
get-char suffix utf8 decode
utf8 decode-input-if next
continue-make-tag
] if ;
: start< ( -- tag )
get-next {
{ 0 [ next next start-utf16le ] }
{ CHAR: ? [ check next next instruct ] } ! XML prolog parsing sets the encoding
{ CHAR: ! [ check utf8 decode-input next next direct ] }
[ check start<name ]
} case ;
: skip-utf8-bom ( -- tag )
"\u0000bb\u0000bf" expect utf8 decode-input
CHAR: < expect check make-tag ;
: decode-expecting ( encoding string -- tag )
[ decode-input-if next ] [ expect-string ] bi* check make-tag ;
: start-utf16be ( -- tag )
utf16be "<" decode-expecting ;
: skip-utf16le-bom ( -- tag )
utf16le "\u0000fe<" decode-expecting ;
: skip-utf16be-bom ( -- tag )
utf16be "\u0000ff<" decode-expecting ;
: start-document ( -- tag )
get-char {
{ CHAR: < [ start< ] }
{ 0 [ start-utf16be ] }
{ HEX: EF [ skip-utf8-bom ] }
{ HEX: FF [ skip-utf16le-bom ] }
{ HEX: FE [ skip-utf16be-bom ] }
{ f [ "" ] }
[ drop utf8 decode-input-if f ]
! Same problem as with <e`>, in the case of XML chunks?
} case check ;

View File

@ -11,7 +11,6 @@ ARTICLE: "xml.writer" "Writing XML"
"These words are used to print XML normally"
{ $subsection xml>string }
{ $subsection write-xml }
{ $subsection print-xml }
"These words are used to prettyprint XML"
{ $subsection pprint-xml>string }
{ $subsection pprint-xml>string-but }
@ -38,11 +37,6 @@ HELP: write-xml
{ $description "prints the contents of an XML document to " { $link output-stream } "." }
{ $notes "does not preserve what type of quotes were used or what data was omitted from version declaration" } ;
HELP: print-xml
{ $values { "xml" "an XML document" } }
{ $description "prints the contents of an XML document to " { $link output-stream } ", followed by a newline" }
{ $notes "does not preserve what type of quotes were used or what data was omitted from version declaration" } ;
HELP: pprint-xml
{ $values { "xml" "an XML document" } }
{ $description "prints the contents of an XML document to " { $link output-stream } " in a prettyprinted form." }
@ -58,5 +52,5 @@ HELP: pprint-xml>string-but
{ $description "Prettyprints an XML document, returning the result as a string and leaving the whitespace of the tags with names in sensitive-tags intact." }
{ $notes "does not preserve what type of quotes were used or what data was omitted from version declaration" } ;
{ xml>string print-xml write-xml pprint-xml pprint-xml>string pprint-xml>string-but pprint-xml-but } related-words
{ xml>string write-xml pprint-xml pprint-xml>string pprint-xml>string-but pprint-xml-but } related-words

View File

@ -1,5 +1,62 @@
! Copyright (C) 2005, 2009 Daniel Ehrenberg
! See http://factorcode.org/license.txt for BSD license.
USING: xml.data xml.writer tools.test fry xml kernel multiline
xml.writer.private io.streams.string xml.utilities sequences ;
IN: xml.writer.tests
USING: xml.data xml.writer tools.test ;
\ write-xml must-infer
\ xml>string must-infer
\ pprint-xml must-infer
\ pprint-xml-but must-infer
[ "foo" ] [ T{ name { main "foo" } } name>string ] unit-test
[ "foo" ] [ T{ name { space "" } { main "foo" } } name>string ] unit-test
[ "ns:foo" ] [ T{ name { space "ns" } { main "foo" } } name>string ] unit-test
: reprints-as ( to from -- )
[ '[ _ ] ] [ '[ _ string>xml xml>string ] ] bi* unit-test ;
: pprint-reprints-as ( to from -- )
[ '[ _ ] ] [ '[ _ string>xml pprint-xml>string ] ] bi* unit-test ;
: reprints-same ( string -- ) dup reprints-as ;
"<?xml version=\"1.0\" encoding=\"UTF-8\"?><x/>" reprints-same
{" <?xml version="1.0" encoding="UTF-8"?>
<!DOCTYPE foo [<!ENTITY foo "bar">]>
<x>bar</x> "}
{" <?xml version="1.0" encoding="UTF-8"?>
<!DOCTYPE foo [<!ENTITY foo 'bar'>]>
<x>&foo;</x> "} reprints-as
{" <?xml version="1.0" encoding="UTF-8"?>
<!DOCTYPE foo [
<!ENTITY foo "bar">
<!ELEMENT br EMPTY>
<!ATTLIST list type (bullets|ordered|glossary) "ordered">
<!NOTATION foo bar>
<?baz bing bang bong?>
<!--wtf-->
]>
<x>
bar
</x>"}
{" <?xml version="1.0" encoding="UTF-8"?>
<!DOCTYPE foo [ <!ENTITY foo 'bar'> <!ELEMENT br EMPTY>
<!ATTLIST list
type (bullets|ordered|glossary) "ordered">
<!NOTATION foo bar> <?baz bing bang bong?>
<!--wtf-->
]>
<x>&foo;</x>"} pprint-reprints-as
[ t ] [ "<!DOCTYPE html PUBLIC '-//W3C//DTD XHTML 1.1//EN' 'http://www.w3.org/TR/xhtml11/DTD/xhtml11.dtd' >" dup string>xml-chunk xml-chunk>string = ] unit-test
[ "foo" ] [ "<!ENTITY bar 'foo'><x>&bar;</x>" string>xml children>string ] unit-test
[ V{ "hello" } ] [ "hello" string>xml-chunk ] unit-test
[ "<?xml version=\"1.0\" encoding=\"UTF-8\"?><a b=\"c\"/>" ]
[ "<a b='c'/>" string>xml xml>string ] unit-test
[ "<?xml version=\"1.0\" encoding=\"UTF-8\"?><foo>bar baz</foo>" ]
[ "<foo>bar</foo>" string>xml [ " baz" append ] map xml>string ] unit-test
[ "<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n<foo>\n bar\n</foo>" ]
[ "<foo> bar </foo>" string>xml pprint-xml>string ] unit-test

View File

@ -1,4 +1,4 @@
! Copyright (C) 2005, 2006 Daniel Ehrenberg
! Copyright (C) 2005, 2009 Daniel Ehrenberg
! See http://factorcode.org/license.txt for BSD license.
USING: hashtables kernel math namespaces sequences strings
assocs combinators io io.streams.string accessors
@ -11,6 +11,8 @@ SYMBOL: indentation
SYMBOL: indenter
" " indenter set-global
<PRIVATE
: sensitive? ( tag -- ? )
sensitive-tags get swap '[ _ names-match? ] contains? ;
@ -40,9 +42,13 @@ SYMBOL: indenter
: name>string ( name -- string )
[ main>> ] [ space>> ] bi [ ":" rot 3append ] unless-empty ;
PRIVATE>
: print-name ( name -- )
name>string write ;
<PRIVATE
: print-attrs ( assoc -- )
[
" " write
@ -52,11 +58,18 @@ SYMBOL: indenter
"\"" write
] assoc-each ;
PRIVATE>
GENERIC: write-xml-chunk ( object -- )
<PRIVATE
M: string write-xml-chunk
escape-string dup empty? not xml-pprint? get and
[ nl 80 indent-string indented-break ] when write ;
escape-string xml-pprint? get [
dup [ blank? ] all?
[ drop "" ]
[ nl 80 indent-string indented-break ] if
] when write ;
: write-tag ( tag -- )
?indent CHAR: < write1
@ -100,11 +113,20 @@ M: attlist-decl write-xml-chunk
[ att-defs>> write ">" write ]
bi ;
M: notation-decl write-xml-chunk
"<!NOTATION " write
[ name>> write " " write ]
[ id>> write ">" write ]
bi ;
M: entity-decl write-xml-chunk
"<!ENTITY " write
[ name>> write " " write ]
[ def>> write-xml-chunk ">" write ]
bi ;
[ pe?>> [ " % " write ] when ]
[ name>> write " \"" write ] [
def>> f xml-pprint?
[ write-xml-chunk ] with-variable
"\">" write
] tri ;
M: system-id write-xml-chunk
"SYSTEM '" write system-literal>> write "'" write ;
@ -114,17 +136,21 @@ M: public-id write-xml-chunk
[ pubid-literal>> write "' '" write ]
[ system-literal>> write "'" write ] bi ;
: write-internal-subset ( seq -- )
[
"[" write indent
[ ?indent write-xml-chunk ] each
unindent ?indent "]" write
] when* ;
M: doctype-decl write-xml-chunk
"<!DOCTYPE " write
?indent "<!DOCTYPE " write
[ name>> write " " write ]
[ external-id>> [ write-xml-chunk " " write ] when* ]
[
internal-subset>>
[ "[" write [ write-xml-chunk ] each "]" write ] when* ">" write
] tri ;
[ internal-subset>> write-internal-subset ">" write ] tri ;
M: directive write-xml-chunk
"<!" write text>> write CHAR: > write1 ;
"<!" write text>> write CHAR: > write1 nl ;
M: instruction write-xml-chunk
"<?" write text>> write "?>" write ;
@ -138,6 +164,8 @@ M: sequence write-xml-chunk
standalone>> [ "\" standalone=\"yes" write ] when
"\"?>" write ;
PRIVATE>
: write-xml ( xml -- )
{
[ prolog>> write-prolog ]
@ -149,31 +177,25 @@ M: sequence write-xml-chunk
M: xml write-xml-chunk
body>> write-xml-chunk ;
: print-xml ( xml -- )
write-xml nl ;
: xml>string ( xml -- string )
[ write-xml ] with-string-writer ;
: xml-chunk>string ( object -- string )
[ write-xml-chunk ] with-string-writer ;
: with-xml-pprint ( sensitive-tags quot -- )
: pprint-xml-but ( xml sensitive-tags -- )
[
swap [ assure-name ] map sensitive-tags set
[ assure-name ] map sensitive-tags set
0 indentation set
xml-pprint? on
call
] with-scope ; inline
: pprint-xml-but ( xml sensitive-tags -- )
[ print-xml ] with-xml-pprint ;
write-xml
] with-scope ;
: pprint-xml ( xml -- )
f pprint-xml-but ;
: pprint-xml>string-but ( xml sensitive-tags -- string )
[ xml>string ] with-xml-pprint ;
[ pprint-xml-but ] with-string-writer ;
: pprint-xml>string ( xml -- string )
f pprint-xml>string-but ;

View File

@ -1,9 +1,9 @@
! Copyright (C) 2005, 2006 Daniel Ehrenberg
! Copyright (C) 2005, 2009 Daniel Ehrenberg
! See http://factorcode.org/license.txt for BSD license.
USING: accessors arrays io io.encodings.binary io.files
io.streams.string kernel namespaces sequences strings
xml.backend xml.data xml.errors xml.tokenize ascii xml.entities
xml.writer xml.state assocs ;
xml.backend xml.data xml.errors xml.elements ascii xml.entities
xml.writer xml.state xml.autoencoding assocs xml.tokenize xml.name ;
IN: xml
! -- Overall parser with data tree
@ -132,7 +132,7 @@ TUPLE: pull-xml scope ;
reset-prolog init-ns-stack
start-document [ call-under ] when*
sax-loop
] state-parse ; inline recursive
] with-state ; inline recursive
: (read-xml) ( -- )
start-document [ process ] when*
@ -144,7 +144,7 @@ TUPLE: pull-xml scope ;
done? [ unclosed ] unless
xml-stack get first second
prolog-data get swap
] state-parse ;
] with-state ;
: read-xml ( stream -- xml )
0 depth

View File

@ -1,9 +1,9 @@
! Copyright (C) 2005, 2008 Slava Pestov.
! Copyright (C) 2005, 2009 Slava Pestov.
! See http://factorcode.org/license.txt for BSD license.
USING: arrays byte-arrays kernel kernel.private math namespaces
make sequences strings words effects generic generic.standard
classes classes.algebra slots.private combinators accessors
words sequences.private assocs alien quotations ;
words sequences.private assocs alien quotations hashtables ;
IN: slots
TUPLE: slot-spec name offset class initial read-only ;
@ -86,7 +86,7 @@ ERROR: bad-slot-value value class ;
] [ ] make ;
: writer-props ( slot-spec -- assoc )
[ "writing" set ] H{ } make-assoc ;
"writing" associate ;
: define-writer ( class slot-spec -- )
[ name>> writer-word ] [ writer-quot ] [ writer-props ] tri

View File

@ -2,7 +2,7 @@
! See http://factorcode.org/license.txt for BSD license.
USING: accessors arrays html.parser.utils hashtables io kernel
namespaces make prettyprint quotations sequences splitting
state-parser strings unicode.categories unicode.case ;
html.parser.state strings unicode.categories unicode.case ;
IN: html.parser
TUPLE: tag name attributes text closing? ;
@ -59,8 +59,8 @@ SYMBOL: tagstack
[ get-char CHAR: " = ] take-until ;
: read-quote ( -- string )
get-char next* CHAR: ' =
[ read-single-quote ] [ read-double-quote ] if next* ;
get-char next CHAR: ' =
[ read-single-quote ] [ read-double-quote ] if next ;
: read-key ( -- string )
read-whitespace*
@ -68,7 +68,7 @@ SYMBOL: tagstack
: read-= ( -- )
read-whitespace*
[ get-char CHAR: = = ] take-until drop next* ;
[ get-char CHAR: = = ] take-until drop next ;
: read-value ( -- string )
read-whitespace*
@ -76,14 +76,14 @@ SYMBOL: tagstack
[ blank? ] trim ;
: read-comment ( -- )
"-->" take-string* make-comment-tag push-tag ;
"-->" take-string make-comment-tag push-tag ;
: read-dtd ( -- )
">" take-string* make-dtd-tag push-tag ;
">" take-string make-dtd-tag push-tag ;
: read-bang ( -- )
next* get-char CHAR: - = get-next CHAR: - = and [
next* next*
next get-char CHAR: - = get-next CHAR: - = and [
next next
read-comment
] [
read-dtd
@ -91,10 +91,10 @@ SYMBOL: tagstack
: read-tag ( -- string )
[ get-char CHAR: > = get-char CHAR: < = or ] take-until
get-char CHAR: < = [ next* ] unless ;
get-char CHAR: < = [ next ] unless ;
: read-< ( -- string )
next* get-char CHAR: ! = [
next get-char CHAR: ! = [
read-bang f
] [
read-tag

View File

@ -0,0 +1,13 @@
USING: tools.test html.parser.state ascii kernel ;
IN: html.parser.state.tests
: take-rest ( -- string )
[ f ] take-until ;
: take-char ( -- string )
[ get-char = ] curry take-until ;
[ "hello" ] [ "hello" [ take-rest ] string-parse ] unit-test
[ "hi" " how are you?" ] [ "hi how are you?" [ [ get-char blank? ] take-until take-rest ] string-parse ] unit-test
[ "foo" ";bar" ] [ "foo;bar" [ CHAR: ; take-char take-rest ] string-parse ] unit-test
! [ "foo " " bar" ] [ "foo and bar" [ "and" take-string take-rest ] string-parse ] unit-test

View File

@ -0,0 +1,41 @@
! Copyright (C) 2005, 2009 Daniel Ehrenberg
! See http://factorcode.org/license.txt for BSD license.
USING: namespaces math kernel sequences accessors fry circular ;
IN: html.parser.state
TUPLE: state string i ;
: get-i ( -- i ) state get i>> ;
: get-char ( -- char )
state get [ i>> ] [ string>> ] bi ?nth ;
: get-next ( -- char )
state get [ i>> 1+ ] [ string>> ] bi ?nth ;
: next ( -- )
state get [ 1+ ] change-i drop ;
: string-parse ( string quot -- )
[ 0 state boa state ] dip with-variable ;
: short* ( n seq -- n' seq )
over [ nip dup length swap ] unless ;
: skip-until ( quot: ( -- ? ) -- )
get-char [
[ call ] keep swap
[ drop ] [ next skip-until ] if
] [ drop ] if ; inline recursive
: take-until ( quot: ( -- ? ) -- )
[ get-i ] dip skip-until get-i
state get string>> subseq ;
: string-matches? ( string circular -- ? )
get-char over push-circular sequence= ;
: take-string ( match -- string )
dup length <circular-string>
[ 2dup string-matches? ] take-until nip
dup length rot length 1- - head next ;

View File

@ -1,7 +1,7 @@
USING: assocs combinators continuations hashtables
hashtables.private io kernel math
namespaces prettyprint quotations sequences splitting
state-parser strings tools.test ;
strings tools.test ;
USING: html.parser.utils ;
IN: html.parser.utils.tests

View File

@ -2,17 +2,12 @@
! See http://factorcode.org/license.txt for BSD license.
USING: assocs circular combinators continuations hashtables
hashtables.private io kernel math namespaces prettyprint
quotations sequences splitting state-parser strings
quotations sequences splitting html.parser.state strings
combinators.short-circuit ;
IN: html.parser.utils
: string-parse-end? ( -- ? ) get-next not ;
: take-string* ( match -- string )
dup length <circular-string>
[ 2dup string-matches? ] take-until nip
dup length rot length 1- - head next* ;
: trim1 ( seq ch -- newseq )
[ [ ?head-slice drop ] [ ?tail-slice drop ] bi ] 2keep drop like ;

View File

@ -53,8 +53,7 @@ INLINE bool factor_arg(const F_CHAR* str, const F_CHAR* arg, CELL* value)
void init_parameters_from_args(F_PARAMETERS *p, int argc, F_CHAR **argv)
{
default_parameters(p);
const F_CHAR *executable_path = vm_executable_path();
p->executable_path = executable_path ? executable_path : argv[0];
p->executable_path = argv[0];
int i = 0;
@ -106,6 +105,11 @@ void init_factor(F_PARAMETERS *p)
/* OS-specific initialization */
early_init();
const F_CHAR *executable_path = vm_executable_path();
if(executable_path)
p->executable_path = executable_path;
if(p->image_path == NULL)
p->image_path = default_image_path();