stream input and more docs for xml

microdan 2006-11-27 02:48:21 +00:00
parent 82c74ce1ba
commit 5c21b39dc0
7 changed files with 216 additions and 106 deletions

View File

@ -21,7 +21,7 @@ HELP: base64
HELP: <rpc-method>
{ $values { "name" "a string" } { "params" "a sequence" } }
{ $description "creates a tuple reprsenting a method call which can be translated using" { $code send-rpc } "into an XML-RPC document" }
{ $description "creates a tuple reprsenting a method call which can be translated using send-rpc into an XML-RPC document" }
{ $see-also rpc-method <rpc-response> <rpc-fault> } ;
HELP: rpc-method
@ -52,7 +52,7 @@ HELP: post-rpc
{ $description "posts an XML-RPC document to the specified URL, receives the response and parses it as XML-RPC, returning the tuple" } ;
ARTICLE: { "xml-rpc" "intro" } "XML-RPC"
"This is the XML-RPC library. XML-RPC is used instead of SOAP because it is far simpler and easier to use for most tasks. The library was implemented by Daniel Ehrenberg."
"This is the XML-RPC library. XML-RPC is used instead of SOAP because it is far simpler and easier to use for most tasks. The library was implemented by Daniel Ehrenberg. Together with XML, this is part of the F2EE framework."
$terpri
"The most important words that this library implements are:"
{ $subsection send-rpc }

View File

@ -0,0 +1,36 @@
IN: ref-template
USING: kernel xml sequences hashtables tools io arrays namespaces generic ;
SYMBOL: ref-table
: replace-ref ( ref -- object )
reference-name ref-table get hash call ;
: r-ref-string ( xml-string -- xml-string )
xml-string-array [
dup reference? [ replace-ref ] when
] map <xml-string> ;
GENERIC: (r-ref) ( xml -- object )
M: any-tag (r-ref)
dup tag-props dup [
dup [ r-ref-string swap set ] hash-each
] bind over set-tag-props ;
M: reference (r-ref)
replace-ref ;
M: object (r-ref) ;
: replace-refs ( xml -- xml )
[ (r-ref) ] xml-map ;
! Example
: test-refs
H{
{ "foo" [ "foo" ] }
{ "bar" [ [ .s ] string-out ] }
{ "baz" [ "<a/>" string>xml delegate ] }
} ref-table set
"<x>%foo;<y prop='blah%foo;'>%bar;</y>%baz;</x>" string>xml
replace-refs ;

View File

@ -1,4 +1,4 @@
! Copyright (C) 2005, 2006 Daniel Ehrenberg
! --> Copyright (C) 2005, 2006 Daniel Ehrenberg
! See http://factorcode.org/license.txt for BSD license.
IN: xml
USING: errors hashtables io kernel math namespaces prettyprint sequences
@ -13,72 +13,79 @@ TUPLE: instruction text ;
: start-tag ( -- name ? )
#! Outputs the name and whether this is a closing tag
char CHAR: / = dup [ incr-spot ] when
get-char CHAR: / = dup [ incr-spot ] when
parse-name swap ;
: (parse-quot) ( ch sbuf -- )
char {
{ [ dup not ] [ "File ended in quote" <xml-string-error> throw ] }
{ [ 3dup nip = ] [ drop >string , drop incr-spot ] }
{ [ dup CHAR: & = ] [ drop parse-entity (parse-quot) ] }
{ [ dup CHAR: % = ] [ drop parse-reference (parse-quot) ] }
{ [ t ] [ parsed-ch (parse-quot) ] }
: (parse-quot) ( ch -- )
! The similarities with (parse-text) should be factored out
get-char {
{ [ dup not ]
[ "File ended in quote" <xml-string-error> throw ] }
{ [ 2dup = ]
[ 2drop end-record , incr-spot ] }
{ [ dup CHAR: & = ]
[ drop parse-entity (parse-quot) ] }
{ [ CHAR: % = ] [ parse-reference (parse-quot) ] }
{ [ t ] [ incr-spot (parse-quot) ] }
} cond ;
: parse-quot ( ch -- array )
[ SBUF" " clone (parse-quot) ] { } make <xml-string> ;
[ new-record (parse-quot) ] { } make <xml-string> ;
: parse-prop-value ( -- seq )
char dup "'\"" member? [
get-char dup "'\"" member? [
incr-spot parse-quot
] [
"Attribute lacks quote" <xml-string-error> throw
] if ;
: parse-prop ( -- )
parse-name pass-blank CHAR: = expect pass-blank
parse-prop-value swap set ;
[ parse-name ] with-scope
pass-blank CHAR: = expect pass-blank
[ parse-prop-value ] with-scope
swap set ;
: (middle-tag) ( -- )
pass-blank char name-start-char?
pass-blank get-char name-start-char?
[ parse-prop (middle-tag) ] when ;
: middle-tag ( -- hash )
[ (middle-tag) ] make-hash pass-blank ;
: end-tag ( string hash -- tag )
pass-blank char CHAR: / =
pass-blank get-char CHAR: / =
[ <contained> incr-spot ] [ <opener> ] if ;
: skip-comment ( -- comment )
"--" expect-string
"--" take-until-string
"--" take-string
<comment>
CHAR: > expect ;
: cdata ( -- string )
"[CDATA[" expect-string "]]>" take-until-string ;
"[CDATA[" expect-string "]]>" take-string ;
: directive ( -- object )
{
{ [ "--" string-matches? ] [ skip-comment ] }
{ [ "[CDATA[" string-matches? ] [ cdata ] }
{ [ t ] [ ">" take-until-string <directive> ] }
{ [ t ] [ CHAR: > take-char <directive> ] }
} cond ;
: instruction ( -- instruction )
! this should make sure the name doesn't include 'xml'
"?>" take-until-string <instruction> ;
"?>" take-string <instruction> ;
: make-tag ( -- tag/f )
CHAR: < expect
{ { [ char dup CHAR: ! = ] [ drop incr-spot directive ] }
{ { [ get-char dup CHAR: ! = ] [ drop incr-spot directive ] }
{ [ CHAR: ? = ] [ incr-spot instruction ] }
{ [ t ] [
start-tag [ <closer> ] [
middle-tag end-tag
] if pass-blank CHAR: > expect
] } } cond ;
] }
} cond ;
! -- Overall parser with data tree
@ -234,9 +241,18 @@ M: extra-attrs error.
<xml-string-error> throw ] unless
concat ;
TUPLE: bad-version num ;
M: bad-version error.
"XML version must be \"1.0\" or \"1.1\". Version here was " write
bad-version-num . ;
: good-version ( version -- version )
dup { "1.0" "1.1" } member? [ <bad-version> throw ] unless ;
: prolog-attrs ( hash -- )
T{ name f "" "version" f } over hash [
concat-strings prolog-data get set-prolog-version
concat-strings good-version
prolog-data get set-prolog-version
] when*
T{ name f "" "encoding" f } over hash [
concat-strings prolog-data get set-prolog-encoding
@ -253,10 +269,14 @@ M: extra-attrs error.
dup assure-no-extra prolog-attrs
] when ;
: init-xml ( string -- )
code set { 0 1 1 } clone spot set
: init-xml ( stream -- )
stdio set
{ 0 0 0 "" } clone spot set
f record set f now-recording? set
incr-spot
"1.0" "iso-8859-1" f <prolog> prolog-data set
init-xml-stack init-ns-stack ;
init-xml-stack
init-ns-stack ;
UNION: any-tag tag contained-tag ;
@ -277,9 +297,9 @@ M: multitags error.
: (string>xml) ( -- )
parse-text process
more? [ make-tag process (string>xml) ] when ; inline
get-char [ make-tag process (string>xml) ] when ;
: string>xml ( string -- xml-doc )
: stream>xml ( stream -- xml-doc )
#! Produces a tree of XML nodes
[
init-xml
@ -290,5 +310,8 @@ M: multitags error.
make-xml-doc
] with-scope ;
: string>xml ( string -- xml-doc )
<string-reader> stream>xml ;
UNION: xml-parse-error multitags notags xml-error extra-attrs nonexist-ns
not-yes/no unclosed mismatched xml-string-error expected no-entity ;

View File

@ -8,7 +8,7 @@ USING: kernel xml test io namespaces hashtables sequences
! This is insufficient
SYMBOL: xml-file
[ ] [ "contrib/xml/test.xml" resource-path <file-reader>
contents string>xml xml-file set ] unit-test
stream>xml xml-file set ] unit-test
[ "1.0" ] [ xml-file get xml-doc-prolog prolog-version ] unit-test
[ f ] [ xml-file get xml-doc-prolog prolog-standalone ] unit-test
[ "a" ] [ xml-file get name-space ] unit-test

View File

@ -4,16 +4,44 @@ IN: xml
USING: errors hashtables io kernel math namespaces prettyprint
sequences tools generic strings char-classes ;
SYMBOL: code #! Source code
SYMBOL: spot #! { index line column }
: get-index ( -- index ) spot get first ;
: set-index ( index -- ) 0 spot get set-nth ;
! -- Low-level parsing
! Code stored in stdio
! Spot is composite so it won't be lost in sub-scopes
SYMBOL: spot #! { char line column line-str }
: get-char ( -- char ) spot get first ;
: set-char ( char -- ) 0 spot get set-nth ;
: get-line ( -- line ) spot get second ;
: set-line ( line -- ) 1 spot get set-nth ;
: get-column ( -- column ) spot get third ;
: set-column ( column -- ) 2 spot get set-nth ;
: get-line-str ( -- line-str ) 3 spot get nth ;
: set-line-str ( line-str -- ) 3 spot get set-nth ;
SYMBOL: prolog-data
! Record is composite so it changes in nested scopes
SYMBOL: record ! string
SYMBOL: now-recording? ! t/f
: recording? ( -- t/f ) now-recording? get ;
: get-record ( -- sbuf ) record get ;
: push-record ( ch -- )
get-record push ;
: new-record ( -- )
SBUF" " clone record set
t now-recording? set
get-char [ push-record ] when* ;
: unrecord ( -- )
record get pop* ;
: (end-record) ( -- sbuf )
f now-recording? set
get-record ;
: end-record* ( n -- string )
(end-record) tuck length swap -
head-slice >string ;
: end-record ( -- string )
1 end-record* ;
! -- Error reporting
TUPLE: xml-error line column ;
@ -57,72 +85,79 @@ M: xml-string-error error.
! -- Basic utility words
: more? ( -- ? )
#! Return t if spot is not at the end of code
code get length get-index = not ;
: readln-nb ( -- string )
! read a non-blank line
readln dup "" = [ drop readln-nb ] when ;
: char ( -- char/f )
more? [ get-index code get nth ] [ f ] if ;
: (incr-spot) ( -- char )
get-column get-line-str 2dup length 1- < [
>r 1+ dup set-column r> nth
] [
2drop 0 set-column
readln-nb dup set-line-str
[ first ] [ f ] if*
get-line 1+ set-line
] if ;
: incr-spot ( -- )
#! Increment spot.
get-index 1+ set-index char "\n\r" member?
[ 0 set-column get-line 1+ set-line ]
[ get-column 1+ set-column ] if ;
get-char [
"XML document unexpectedly ended"
<xml-string-error> throw
] unless
(incr-spot) dup set-char
recording? over and [ push-record ] [ drop ] if ;
: skip-until ( quot -- )
#! quot: ( char -- ? )
more? [
char swap [ call ] keep swap [ drop ] [
incr-spot skip-until
#! quot: ( -- ? )
get-char [
[ call ] keep swap [ drop ] [
incr-spot skip-until
] if
] [ drop ] if ; inline
] [ 2drop ] if ; inline
: take-until ( quot -- string | quot: char -- ? )
: take-until ( quot -- string | quot: -- ? )
#! Take the substring of a string starting at spot
#! from code until the quotation given is true and
#! advance spot to after the substring.
get-index >r skip-until r>
get-index code get subseq ; inline
new-record skip-until end-record ; inline
: take-char ( ch -- string )
[ dup get-char = ] take-until nip ;
: pass-blank ( -- )
#! Advance code past any whitespace, including newlines
[ blank? not ] skip-until ;
[ get-char blank? not ] skip-until ;
: string-matches? ( string -- ? )
get-index dup pick length + code get
2dup length > [ 3drop drop f ] [ <slice> sequence= ] if ;
dup length get-column tuck +
dup get-line-str length <=
[ get-line-str <slice> sequence= ]
[ 3drop f ] if ;
: (take-until-string) ( string -- n )
more? [
dup string-matches? [
drop get-index
] [
incr-spot (take-until-string)
] if
] [ "Missing closing token" <xml-string-error> throw ] if ;
: take-until-string ( string -- string )
[ >r get-index r> (take-until-string) code get subseq ] keep
length get-index + set-index ;
: take-string ( match -- string )
! match must not contain a newline
[ dup string-matches? ] take-until
get-line-str
[ "Missing closing token" <xml-string-error> throw ] unless
swap length [ incr-spot ] times ;
! -- Parsing strings
: expect ( ch -- )
char 2dup = [ 2drop ] [
get-char 2dup = [ 2drop ] [
>r ch>string r> ch>string <expected> throw
] if incr-spot ;
: expect-string* ( num -- )
#! only skips string
#! only skips string, and only for when you're sure the string is there
[ incr-spot ] times ;
: expect-string ( string -- )
>r get-index r> t over [ char incr-spot = and ] each [
2drop
] [
swap get-index code get subseq <expected> throw
] if ;
! TODO: add error if this isn't long enough
new-record dup length [ incr-spot ] times
end-record 2dup = [ 2drop ]
[ <expected> throw ] if ;
TUPLE: prolog version encoding standalone ; ! part of xml-doc, see parser
@ -143,42 +178,47 @@ TUPLE: prolog version encoding standalone ; ! part of xml-doc, see parser
TUPLE: entity name ;
: parsed-ch ( sbuf ch -- sbuf ) over push incr-spot ;
: (parse-entity) ( string -- )
dup entities hash [ push-record ] [
prolog-data get prolog-standalone
[ <no-entity> throw ] [
end-record , <entity> , new-record
] if
] ?if ;
: parse-entity ( sbuf -- sbuf )
incr-spot [ CHAR: ; = ] take-until "#" ?head [
"x" ?head 16 10 ? base> parsed-ch
] [
dup entities hash [ parsed-ch ] [
prolog-data get prolog-standalone
[ <no-entity> throw ] [
>r >string , r> <entity> , incr-spot
SBUF" " clone
] if
] ?if
] if ;
: parse-entity ( -- )
unrecord
! the following line is in a scope to shield this
! word from the record-altering side effects of
! take-until.
[ CHAR: ; take-char ] with-scope
"#" ?head [
"x" ?head 16 10 ? base>
push-record incr-spot
] [ (parse-entity) ] if ;
TUPLE: reference name ;
: parse-reference ( sbuf -- sbuf )
>string , incr-spot [ CHAR: ; = ] take-until
<reference> , SBUF" " clone incr-spot ;
: parse-reference ( -- )
unrecord end-record , CHAR: ; take-char
<reference> , new-record incr-spot ;
: (parse-text) ( sbuf -- )
char {
{ [ dup not ] [ drop >string , ] } ! should this be an error?
{ [ dup CHAR: < = ] [ drop >string , ] }
: (parse-text) ( -- )
get-char {
{ [ dup not ]
[ drop 0 end-record* , ] }
{ [ dup CHAR: < = ] [ drop end-record , ] }
{ [ dup CHAR: & = ]
[ drop parse-entity (parse-text) ] }
{ [ dup CHAR: % = ]
[ drop parse-reference (parse-text) ] }
{ [ t ] [ parsed-ch (parse-text) ] }
{ [ CHAR: % = ]
[ parse-reference (parse-text) ] }
{ [ t ] [ incr-spot (parse-text) ] }
} cond ;
TUPLE: xml-string array ;
: parse-text ( -- array )
[ SBUF" " clone (parse-text) ] { } make <xml-string> ;
[ new-record (parse-text) ] { } make <xml-string> ;
! -- Parsing tags
@ -199,12 +239,12 @@ C: name ( space tag -- name )
[ 1.0name-char? ] [ 1.1name-char? ] if ;
: (parse-name) ( -- str )
char dup name-start-char? [
incr-spot ch>string [ name-char? not ] take-until append
new-record get-char name-start-char? [
[ get-char name-char? not ] skip-until end-record
] [
"Malformed name" <xml-string-error> throw
] if ;
: parse-name ( -- str-name )
(parse-name) char CHAR: : =
: parse-name ( -- name )
(parse-name) get-char CHAR: : =
[ incr-spot (parse-name) ] [ "" swap ] if <name> ;

View File

@ -19,6 +19,8 @@ M: entity write-str-elem
M: reference write-str-elem
CHAR: % write1 reference-name write CHAR: ; write1 ;
UNION: str-elem string entity reference ;
: print-name ( name -- )
dup name-space dup "" = [ drop ]
[ write CHAR: : write1 ] if
@ -32,7 +34,7 @@ M: reference write-str-elem
GENERIC: (xml>string) ( object -- )
M: object (xml>string) ! string element
M: str-elem (xml>string) ! string element
write-str-elem ;
M: contained-tag (xml>string)

View File

@ -12,15 +12,22 @@ HELP: xml>string
{ $values { "xml-doc" "an xml document" } { "string" "a string" } }
{ $description "converts an xml document into a string" }
{ $notes "does not preserve what type of quotes were used or what data was omitted from version declaration" }
{ $see-also string>xml xml-reprint } ;
{ $see-also string>xml xml-reprint write-xml } ;
HELP: xml-parse-error
{ $description "the exception class that all parsing errors in XML documents are in." } ;
{ $class-description "the exception class that all parsing errors in XML documents are in." } ;
HELP: xml-reprint
{ $values { "in" "a string of XML" } { "out" "reprinted XML" } }
{ $values { "string" "a string of XML" } { "string" "reprinted XML" } }
{ $description "parses XML and converts it back into a string, for testing purposes" }
{ $notes "does not preserve what type of quotes were used or what data was omitted from version declaration" } ;
{ $notes "does not preserve what type of quotes were used or what data was omitted from version declaration" }
{ $see-also write-xml xml>string string>xml } ;
HELP: write-xml
{ $values { "xml-doc" "an XML document" } }
{ $description "prints the contents of an XML document to stdio" }
{ $notes "does not preserve what type of quotes were used or what data was omitted from version declaration" }
{ $see-also xml>string xml-reprint } ;
HELP: PROCESS:
{ $syntax "PROCESS: word" }
@ -139,13 +146,15 @@ HELP: <prolog> ( version encoding standalone -- prolog )
{ $see-also prolog <xml-doc> } ;
ARTICLE: { "xml" "intro" } "XML"
"The XML module attempts to implement the XML 1.1 standard, converting strings of text into XML and vice versa. It currently is a work in progress."
"The XML module attempts to implement the XML 1.1 standard, converting strings of text into XML and vice versa. It currently is a work in progress. Together with XML-RPC, this is a component of the F2EE framework."
$terpri
"The XML module was implemented by Daniel Ehrenberg, with edits by Slava Pestov. Main functions implemented include:"
{ $subsection string>xml }
{ $subsection xml>string }
{ $subsection xml-parse-error }
{ $subsection xml-reprint }
{ $subsection write-xml }
{ $subsection init-xml }
"Data types that XML documents are made of:"
{ $subsection name }
{ $subsection tag }