Merge branch 'master' of git://factorcode.org/git/factor

db4
Doug Coleman 2009-01-29 19:58:29 -06:00
commit ea153f153a
81 changed files with 326 additions and 285 deletions

View File

@ -244,13 +244,13 @@ ARTICLE: "db-protocol" "Low-level database protocol"
! { $subsection bind-tuple }
ARTICLE: "db-lowlevel-tutorial" "Low-level database tutorial"
"Although Factor makes integrating a database with its object system easy (see " { $vocab-link "db.tuples" } "), sometimes you may want to write SQL directly and get the results back as arrays of strings, for instance, when interfacing with a legacy database that doesn't easily map to " { $snippet "tuples" } "."
"Although Factor makes integrating a database with its object system easy (see " { $vocab-link "db.tuples" } "), sometimes you may want to write SQL directly and get the results back as arrays of strings, for instance, when interfacing with a legacy database that doesn't easily map to " { $snippet "tuples" } "." $nl
"Executing a SQL command:"
{ $subsection sql-command }
"Executing a query directly:"
{ $subsection sql-query }
"Here's an example usage where we'll make a book table, insert some objects, and query them." $nl
"First, let's set up a custom combinator for using our database. See " { $link "db-custom-database-combinators" } " for more details."
"First, let's set up a custom combinator for using our database. See " { $link "db-custom-database-combinators" } " for more details."
{ $code <"
USING: db.sqlite db io.files ;
: with-book-db ( quot -- )

View File

@ -199,7 +199,7 @@ ARTICLE: "db-tuples-protocol" "Tuple database protocol"
{ $subsection <count-statement> } ;
ARTICLE: "db-tuples-tutorial" "Tuple database tutorial"
"Let's make a tuple and store it in a database. To follow along, click on each code example and run it in the listener. If you forget to run an example, just start at the top and run them all again in order." $nl
"Let's make a tuple and store it in a database. To follow along, click on each code example and run it in the listener. If you forget to run an example, just start at the top and run them all again in order." $nl
"We're going to store books in this tutorial."
{ $code "TUPLE: book id title author date-published edition cover-price condition ;" }
"The title, author, and publisher should be strings; the date-published a timestamp; the edition an integer; the cover-price a float. These are the Factor types for which we will need to look up the corresponding " { $link "db.types" } ". " $nl
@ -246,7 +246,7 @@ T{ book
{ $code <" [
book get update-tuple
] with-book-tutorial "> }
"And select it again. You can query the database by any field -- just set it in the exemplar tuple you pass to " { $link select-tuples } "."
"And select it again. You can query the database by any field -- just set it in the exemplar tuple you pass to " { $link select-tuples } "."
{ $code <" [
T{ book { title "Factor for Sheeple" } } select-tuples
] with-book-tutorial "> }

View File

@ -236,7 +236,7 @@ M: f (write-farkup) ;
parse-farkup (write-farkup) ;
: write-farkup ( string -- )
farkup>xml write-xml-chunk ;
farkup>xml write-xml ;
: convert-farkup ( string -- string' )
[ write-farkup ] with-string-writer ;

View File

@ -19,7 +19,7 @@ GENERIC: render* ( value name renderer -- xml )
[ f swap ]
if
] 2dip
render* write-xml-chunk
render* write-xml
[ render-error ] when* ;
<PRIVATE
@ -176,4 +176,4 @@ M: comparison render*
! HTML component
SINGLETON: html
M: html render* 2drop string>xml-chunk ;
M: html render* 2drop <unescaped> ;

View File

@ -1,11 +1,9 @@
! cont-html v0.6
!
! Copyright (C) 2004 Chris Double.
! Copyright (C) 2004, 2009 Chris Double, Daniel Ehrenberg.
! See http://factorcode.org/license.txt for BSD license.
USING: io io.styles kernel namespaces prettyprint quotations
sequences strings words xml.entities compiler.units effects
urls math math.parser combinators present fry ;
xml.data xml.interpolate urls math math.parser combinators
present fry io.streams.string xml.writer ;
IN: html.elements
@ -135,17 +133,18 @@ SYMBOL: html
"<!DOCTYPE html PUBLIC \"-//W3C//DTD XHTML 1.1//EN\" \"http://www.w3.org/TR/xhtml11/DTD/xhtml11.dtd\">" write-html ;
: simple-page ( title head-quot body-quot -- )
#! Call the quotation, with all output going to the
#! body of an html page with the given title.
spin
xhtml-preamble
<html "http://www.w3.org/1999/xhtml" =xmlns "en" =xml:lang "en" =lang html>
<head>
<title> write </title>
call
</head>
<body> call </body>
</html> ; inline
[ with-string-writer <unescaped> ] bi@
<XML
<?xml version="1.0"?>
<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.1//EN" "http://www.w3.org/TR/xhtml11/DTD/xhtml11.dtd">
<html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en" lang="en">
<head>
<title><-></title>
<->
</head>
<body><-></body>
</html>
XML> write-xml ; inline
: render-error ( message -- )
<span "error" =class span> escape-string write </span> ;
[XML <span class="error"><-></span> XML] write-xml ;

View File

@ -49,7 +49,7 @@ DEFER: compile-element
reset-buffer "@" ?head [ , [ value present ] % ] [ , ] if ;
: compile-attrs ( assoc -- )
attrs>> [
[
" " [write]
swap name>string [write]
"=\"" [write]
@ -59,7 +59,7 @@ DEFER: compile-element
: compile-start-tag ( tag -- )
"<" [write]
[ name>string [write] ] [ compile-attrs ] bi
[ name>string [write] ] [ attrs>> compile-attrs ] bi
">" [write] ;
: compile-end-tag ( tag -- )
@ -90,7 +90,7 @@ ERROR: unknown-chloe-tag tag ;
{ [ dup [ tag? ] [ xml? ] bi or ] [ compile-tag ] }
{ [ dup string? ] [ escape-string [write] ] }
{ [ dup comment? ] [ drop ] }
[ [ write-xml-chunk ] [code-with] ]
[ [ write-xml ] [code-with] ]
} cond ;
: with-compiler ( quot -- quot' )
@ -126,7 +126,7 @@ ERROR: unknown-chloe-tag tag ;
: compile-prologue ( xml -- )
[
[ prolog>> [ write-prolog ] [code-with] ]
[ prolog>> [ write-xml ] [code-with] ]
[ before>> compile-chunk ]
bi
] compile-quot

View File

@ -2,7 +2,7 @@ USING: http http.server http.client http.client.private tools.test multiline
io.streams.string io.encodings.utf8 io.encodings.8-bit
io.encodings.binary io.encodings.string kernel arrays splitting
sequences assocs io.sockets db db.sqlite continuations urls
hashtables accessors namespaces ;
hashtables accessors namespaces xml.data ;
IN: http.tests
[ "text/plain" latin1 ] [ "text/plain" parse-content-type ] unit-test
@ -322,7 +322,7 @@ SYMBOL: a
3 a set-global
: test-a string>xml "input" tag-named "value" swap at ;
: test-a string>xml "input" tag-named "value" attr ;
[ "3" ] [
"http://localhost/" add-port http-get

View File

@ -12,6 +12,7 @@ io.encodings.utf8
io.encodings.ascii
io.encodings.binary
io.streams.limited
io.streams.string
io.servers.connection
io.timeouts
io.crlf

View File

@ -3,4 +3,4 @@
USING: lcs.diff2html lcs kernel tools.test strings sequences xml.writer ;
IN: lcs.diff2html.tests
[ ] [ "hello" "heyo" [ 1string ] { } map-as diff htmlize-diff xml-chunk>string drop ] unit-test
[ ] [ "hello" "heyo" [ 1string ] { } map-as diff htmlize-diff xml>string drop ] unit-test

View File

@ -81,7 +81,7 @@ TUPLE: entry title url description date ;
[
{ "content" "summary" } any-tag-named
dup children>> [ string? not ] contains?
[ children>> [ write-xml-chunk ] with-string-writer ]
[ children>> xml>string ]
[ children>string ] if >>description
]
[

View File

@ -2,14 +2,15 @@
! See http://factorcode.org/license.txt for BSD license.
USING: kernel namespaces xml.name io.encodings.utf8 xml.elements
io.encodings.utf16 xml.tokenize xml.state math ascii sequences
io.encodings.string io.encodings combinators ;
io.encodings.string io.encodings combinators accessors
xml.data io.encodings.iana ;
IN: xml.autoencoding
: continue-make-tag ( str -- tag )
parse-name-starting middle-tag end-tag ;
: start-utf16le ( -- tag )
utf16le decode-input-if
utf16le decode-input
"?\0" expect
check instruct ;
@ -17,20 +18,36 @@ IN: xml.autoencoding
-6 shift 3 bitand 2 = ;
: start<name ( ch -- tag )
! This is unfortunate, and exists for the corner case
! that the first letter of the document is < and second is
! not ASCII
ascii?
[ utf8 decode-input-if next make-tag ] [
[ utf8 decode-input next make-tag ] [
next
[ get-next 10xxxxxx? not ] take-until
get-char suffix utf8 decode
utf8 decode-input-if next
utf8 decode-input next
continue-make-tag
] if ;
: prolog-encoding ( prolog -- )
encoding>> dup "UTF-16" =
[ drop ] [ name>encoding [ decode-input ] when* ] if ;
: instruct-encoding ( instruct/prolog -- )
dup prolog?
[ prolog-encoding ]
[ drop utf8 decode-input ] if ;
: go-utf8 ( -- )
check utf8 decode-input next next ;
: start< ( -- tag )
! What if first letter of processing instruction is non-ASCII?
get-next {
{ 0 [ next next start-utf16le ] }
{ CHAR: ? [ check next next instruct ] } ! XML prolog parsing sets the encoding
{ CHAR: ! [ check utf8 decode-input next next direct ] }
{ CHAR: ? [ go-utf8 instruct dup instruct-encoding ] }
{ CHAR: ! [ go-utf8 direct ] }
[ check start<name ]
} case ;
@ -39,7 +56,7 @@ IN: xml.autoencoding
"<" expect check make-tag ;
: decode-expecting ( encoding string -- tag )
[ decode-input-if next ] [ expect ] bi* check make-tag ;
[ decode-input next ] [ expect ] bi* check make-tag ;
: start-utf16be ( -- tag )
utf16be "<" decode-expecting ;
@ -57,8 +74,6 @@ IN: xml.autoencoding
{ HEX: EF [ skip-utf8-bom ] }
{ HEX: FF [ skip-utf16le-bom ] }
{ HEX: FE [ skip-utf16be-bom ] }
{ f [ "" ] }
[ drop utf8 decode-input-if f ]
! Same problem as with <e`>, in the case of XML chunks?
} case check ;
[ drop utf8 decode-input check f ]
} case ;

View File

@ -26,7 +26,7 @@ CATEGORY: 1.1name-char Ll Lu Lo Lm Ln Nl Mc Mn Nd Pc Cf _-.\u0000b7: ;
! 1.1:
! [#x1-#xD7FF] | [#xE000-#xFFFD] | [#x10000-#x10FFFF]
{
{ [ dup HEX: 20 < ] [ "\t\r\n" member? and ] }
{ [ dup HEX: 20 < ] [ swap [ "\t\r\n" member? ] [ zero? not ] if ] }
{ [ nip dup HEX: D800 < ] [ drop t ] }
{ [ dup HEX: E000 < ] [ drop f ] }
[ { HEX: FFFE HEX: FFFF } member? not ]

View File

@ -13,15 +13,17 @@ ARTICLE: "xml.data" "XML data types"
"For high-level tools for manipulating XML, see " { $vocab-link "xml.utilities" } ;
ARTICLE: { "xml.data" "classes" } "XML data classes"
"Data types that XML documents are made of:"
{ $subsection name }
"XML documents and chunks are made of the following classes:"
{ $subsection xml }
{ $subsection xml-chunk }
{ $subsection tag }
{ $subsection name }
{ $subsection contained-tag }
{ $subsection open-tag }
{ $subsection xml }
{ $subsection prolog }
{ $subsection comment }
{ $subsection instruction }
{ $subsection unescaped }
{ $subsection element-decl }
{ $subsection attlist-decl }
{ $subsection entity-decl }
@ -32,13 +34,15 @@ ARTICLE: { "xml.data" "classes" } "XML data classes"
ARTICLE: { "xml.data" "constructors" } "XML data constructors"
"These data types are constructed with:"
{ $subsection <name> }
{ $subsection <tag> }
{ $subsection <contained-tag> }
{ $subsection <xml> }
{ $subsection <xml-chunk> }
{ $subsection <tag> }
{ $subsection <name> }
{ $subsection <contained-tag> }
{ $subsection <prolog> }
{ $subsection <comment> }
{ $subsection <instruction> }
{ $subsection <unescaped> }
{ $subsection <simple-name> }
{ $subsection <element-decl> }
{ $subsection <attlist-decl> }
@ -89,7 +93,7 @@ HELP: xml
HELP: <xml>
{ $values { "prolog" "an XML prolog" } { "before" "a sequence of XML elements" }
{ "body" tag } { "after" "a sequence of XML elements" } { "xml" "an XML document" } }
{ $description "creates an XML document, delegating to the main tag, with the specified prolog, before, and after" }
{ $description "Creates an XML document. The " { $snippet "before" } " and " { $snippet "after" } " slots store what comes before and after the main tag, and " { $snippet "body" } "contains the main tag itself." }
{ $see-also xml <tag> } ;
HELP: prolog
@ -99,47 +103,46 @@ HELP: prolog
HELP: <prolog>
{ $values { "version" "a string, 1.0 or 1.1" }
{ "encoding" "a string" } { "standalone" "a boolean" } { "prolog" "an XML prolog" } }
{ $description "creates an XML prolog tuple" }
{ $description "Creates an XML prolog tuple." }
{ $see-also prolog <xml> } ;
HELP: comment
{ $class-description "represents a comment in XML. Has one slot, text, which contains the string of the comment" }
{ $class-description "Represents a comment in XML. This tuple has one slot, " { $snippet "text" } ", which contains the string of the comment." }
{ $see-also <comment> } ;
HELP: <comment>
{ $values { "text" "a string" } { "comment" "a comment" } }
{ $description "creates an XML comment tuple" }
{ $values { "text" string } { "comment" comment } }
{ $description "Creates an XML " { $link comment } " tuple." }
{ $see-also comment } ;
HELP: instruction
{ $class-description "represents an XML instruction, such as <?xsl stylesheet='foo.xml'?>. Contains one slot, text, which contains the string between the question marks." }
{ $class-description "Represents an XML instruction, such as " { $snippet "<?xsl stylesheet='foo.xml'?>" } ". Contains one slot, " { $snippet "text" } ", which contains the string between the question marks." }
{ $see-also <instruction> } ;
HELP: <instruction>
{ $values { "text" "a string" } { "instruction" "an XML instruction" } }
{ $description "creates an XML parsing instruction, such as <?xsl stylesheet='foo.xml'?>." }
{ $description "Creates an XML parsing instruction, like " { $snippet "<?xsl stylesheet='foo.xml'?>" } "." }
{ $see-also instruction } ;
HELP: opener
{ $class-description "describes an opening tag, like <a>. Contains two slots, name and attrs containing, respectively, the name of the tag and its attributes. Usually, the name-url will be f." }
{ $see-also closer contained } ;
{ $class-description "Describes an opening tag, like " { $snippet "<a>" } ". Contains two slots, " { $snippet "name" } " and " { $snippet "attrs" } " containing, respectively, the name of the tag and its attributes." } ;
HELP: closer
{ $class-description "describes a closing tag, like </a>. Contains one slot, name, containing the tag's name. Usually, the name-url will be f." }
{ $see-also opener contained } ;
{ $class-description "Describes a closing tag, like " { $snippet "</a>" } ". Contains one slot, " { $snippet "name" } ", containing the closer's name." } ;
HELP: contained
{ $class-description "represents a self-closing tag, like <a/>. Contains two slots, name and attrs containing, respectively, the name of the tag and its attributes. Usually, the name-url will be f." }
{ $see-also opener closer } ;
{ $class-description "Represents a self-closing tag, like " { $snippet "<a/>" } ". Contains two slots," { $snippet "name" } " and " { $snippet "attrs" } " containing, respectively, the name of the tag and its attributes." } ;
{ opener closer contained } related-words
HELP: open-tag
{ $class-description "represents a tag that does have children, ie is not a contained tag" }
{ $notes "the constructor used for this class is simply " { $link <tag> } "." }
{ $class-description "Represents a tag that does have children, ie. is not a contained tag" }
{ $notes "The constructor used for this class is simply " { $link <tag> } "." }
{ $see-also tag contained-tag } ;
HELP: names-match?
{ $values { "name1" "a name" } { "name2" "a name" } { "?" "t or f" } }
{ $description "checks to see if the two names match, that is, if all fields are equal, ignoring fields whose value is f in either name." }
{ $description "Checks to see if the two names match, that is, if all fields are equal, ignoring fields whose value is f in either name." }
{ $example "USING: prettyprint xml.data ;" "T{ name f \"rpc\" \"methodCall\" f } T{ name f f \"methodCall\" \"http://www.xmlrpc.org/\" } names-match? ." "t" }
{ $see-also name } ;
@ -173,7 +176,7 @@ HELP: <entity-decl>
{ $description "Creates an entity declaration object, of the class " { $link entity-decl } ". The pe? slot should be t if the object is a DTD-internal entity, like " { $snippet "<!ENTITY % foo 'bar'>" } " and f if the object is like " { $snippet "<!ENTITY foo 'bar'>" } ", that is, it can be used outside of the DTD." } ;
HELP: system-id
{ $class-description "Describes the class of system identifiers within an XML DTD directive, such as " { $snippet "<!DOCTYPE greeting " { $emphasis "SYSTEM 'hello.dtd'" } ">" } } ;
{ $class-description "Describes the class of system identifiers within an XML DTD directive, such as " { $snippet "<!DOCTYPE greeting " { $emphasis "SYSTEM 'hello.dtd'" } ">" } "." } ;
HELP: <system-id>
{ $values { "system-literal" string } { "system-id" system-id } }
@ -199,3 +202,17 @@ HELP: doctype-decl
HELP: <doctype-decl>
{ $values { "name" name } { "external-id" id } { "internal-subset" sequence } { "doctype-decl" doctype-decl } }
{ $description "Creates a new doctype declaration object, of the class " { $link doctype-decl } ". Only one of external-id or internal-subset will be non-null." } ;
HELP: unescaped
{ $class-description "When constructing XML documents to write to output, it can be useful to splice in a string which is already written. This tuple type allows for that. Printing an " { $snippet "unescaped" } " is the same is printing its " { $snippet "string" } " slot." } ;
HELP: <unescaped>
{ $values { "string" string } { "unescaped" unescaped } }
{ $description "Constructs an " { $link unescaped } " tuple, given a string." } ;
HELP: xml-chunk
{ $class-description "Encapsulates a balanced fragment of an XML document. This is a sequence (following the sequence protocol) of XML data types, eg " { $link string } "s and " { $link tag } "s." } ;
HELP: <xml-chunk>
{ $values { "seq" sequence } { "xml-chunk" xml-chunk } }
{ $description "Constructs an " { $link xml-chunk } " tuple, given a sequence to be its contents." } ;

View File

@ -29,7 +29,7 @@ IN: xml.elements
parse-name swap ;
: (middle-tag) ( -- )
pass-blank version=1.0? get-char name-start?
pass-blank version-1.0? get-char name-start?
[ parse-attr (middle-tag) ] when ;
: assure-no-duplicates ( attrs-alist -- attrs-alist )
@ -66,7 +66,8 @@ IN: xml.elements
: prolog-version ( alist -- version )
T{ name { space "" } { main "version" } } swap at
[ good-version ] [ versionless-prolog ] if* ;
[ good-version ] [ versionless-prolog ] if*
dup set-version ;
: prolog-encoding ( alist -- encoding )
T{ name { space "" } { main "encoding" } } swap at
@ -89,16 +90,9 @@ IN: xml.elements
[ prolog-standalone ]
tri <prolog> ;
SYMBOL: string-input?
: decode-input-if ( encoding -- )
string-input? get [ drop ] [ decode-input ] if ;
: parse-prolog ( -- prolog )
pass-blank middle-tag "?>" expect
dup assure-no-extra prolog-attrs
dup encoding>> dup "UTF-16" =
[ drop ] [ name>encoding [ decode-input-if ] when* ] if
dup prolog-data set ;
dup assure-no-extra prolog-attrs ;
: instruct ( -- instruction )
take-name {

View File

@ -12,11 +12,10 @@ ARTICLE: "xml.entities" "XML entities"
"For entities used in HTML/XHTML, see " { $vocab-link "xml.entities.html" } ;
HELP: entities
{ $description "a hash table from default XML entity names (like &amp; and &lt;) to the characters they represent. This is automatically included when parsing any XML document." }
{ $description "A hash table from default XML entity names (like " { $snippet "&amp;" } " and " { $snippet "&lt;" } ") to the characters they represent. This is automatically included when parsing any XML document." }
{ $see-also with-entities } ;
HELP: with-entities
{ $values { "entities" "a hash table of strings to chars" }
{ "quot" "a quotation ( -- )" } }
{ $description "calls the quotation using the given table of entity values (symbolizing, eg, that &foo; represents CHAR: a) on top of the default XML entities" } ;
{ $values { "entities" "a hash table of strings to strings" } { "quot" "a quotation ( -- )" } }
{ $description "Calls the quotation using the given table of entity values (symbolizing, eg, that " { $snippet "&foo;" } " represents " { $snippet "\"a\"" } ") on top of the default XML entities" } ;

View File

@ -5,14 +5,14 @@ IN: xml.entities.html
ARTICLE: "xml.entities.html" "HTML entities"
{ $vocab-link "xml.entities.html" } " defines words for using entities defined in HTML/XHTML."
{ $subsection html-entities }
{ $subsection with-html-entities } ;
{ $subsection html-entities }
{ $subsection with-html-entities } ;
HELP: html-entities
{ $description "a hash table from HTML entity names to their character values" }
{ $description "A hash table from HTML entity names to their character values." }
{ $see-also entities with-html-entities } ;
HELP: with-html-entities
{ $values { "quot" "a quotation ( -- )" } }
{ $description "calls the given quotation using HTML entity values" }
{ $description "Calls the given quotation using HTML entity values." }
{ $see-also html-entities with-entities } ;

View File

@ -3,45 +3,60 @@
USING: help.markup help.syntax ;
IN: xml.errors
<PRIVATE
: $xml-error ( element -- )
"Bad XML document for the error" $heading $code ;
PRIVATE>
HELP: multitags
{ $class-description "XML parsing error describing the case where there is more than one main tag in a document. Contains no slots" } ;
{ $class-description "XML parsing error describing the case where there is more than one main tag in a document." }
{ $xml-error "<a/>\n<b/>" } ;
HELP: notags
{ $class-description "XML parsing error describing the case where an XML document contains no main tag, or any tags at all" } ;
{ $class-description "XML parsing error describing the case where an XML document contains no main tag, or any tags at all" }
{ $xml-error "<?xml version='1.0'?>" } ;
HELP: extra-attrs
{ $class-description "XML parsing error describing the case where the XML prolog (" { $snippet "<?xml ...?>" } ") contains attributes other than the three allowed ones, standalone, version and encoding. Contains one slot, attrs, which is a hashtable of all the extra attributes' names. Delegates to " { $link xml-error-at } "." } ;
{ $class-description "XML parsing error describing the case where the XML prolog (" { $snippet "<?xml ...?>" } ") contains attributes other than the three allowed ones, " { $snippet "standalone" } ", " { $snippet "version" } " and " { $snippet "encoding" } ". Contains one slot, " { $snippet "attrs" } ", which is a hashtable of all the extra attributes' names. This is a subclass of " { $link xml-error-at } "." }
{ $xml-error "<?xml version='1.0' reason='because I said so'?>\n<foo/>" } ;
HELP: nonexist-ns
{ $class-description "XML parsing error describing the case where a namespace doesn't exist but it is used in a tag. Contains one slot, name, which contains the name of the undeclared namespace, and delegates to " { $link xml-error-at } "." } ;
{ $class-description "XML parsing error describing the case where a namespace doesn't exist but it is used in a tag. Contains one slot, " { $snippet "name" } ", which contains the name of the undeclared namespace, and is a subclass of " { $link xml-error-at } "." }
{ $xml-error "<a:b>c</a:b>" } ;
HELP: not-yes/no
{ $class-description "XML parsing error used to describe the case where standalone is set in the XML prolog to something other than 'yes' or 'no'. Delegates to " { $link xml-error-at } " and contains one slot, text, which contains offending value." } ;
{ $class-description "XML parsing error used to describe the case where standalone is set in the XML prolog to something other than " { $snippet "yes" } " or " { $snippet "no" } ". This is a subclass of " { $link xml-error-at } " and contains one slot, text, which contains offending value." }
{ $xml-error "<?xml version='1.0' standalone='maybe'?>\n<x/>" } ;
HELP: unclosed
{ $class-description "XML parsing error used to describe the case where the XML document contains classes which are not closed by the end of the document. Contains one slot, tags, a sequence of names." } ;
{ $class-description "XML parsing error used to describe the case where the XML document contains classes which are not closed by the end of the document. Contains one slot, " { $snippet "tags" } ", a sequence of names." }
{ $xml-error "<x>some text" } ;
HELP: mismatched
{ $class-description "XML parsing error describing mismatched tags, eg " { $snippet "<a></c>" } ". Contains two slots: open is the name of the opening tag and close is the name of the closing tag. Delegates to " { $link xml-error-at } " showing the location of the closing tag" } ;
{ $class-description "XML parsing error describing mismatched tags. Contains two slots: " { $snippet "open" } " is the name of the opening tag and " { $snippet "close" } " is the name of the closing tag. This is a subclass of " { $link xml-error-at } " showing the location of the closing tag" }
{ $xml-error "<a></c>" } ;
HELP: expected
{ $class-description "XML parsing error describing when an expected token was not present. Delegates to " { $link xml-error-at } ". Contains two slots, should-be, which has the expected string, and was, which has the actual string." } ;
{ $class-description "XML parsing error describing when an expected token was not present. Delegates to " { $link xml-error-at } ". Contains two slots, " { $snippet "should-be" } ", which has the expected string, and " { $snippet "was" } ", which has the actual string." } ;
HELP: no-entity
{ $class-description "XML parsing error describing the use of an undefined entity in a case where standalone is marked yes. Delegates to " { $link xml-error-at } ". Contains one slot, thing, containing a string representing the entity." } ;
{ $class-description "XML parsing error describing the use of an undefined entity. This is a subclass of " { $link xml-error-at } ". Contains one slot, thing, containing a string representing the entity." }
{ $xml-error "<x>&foo;</x>" } ;
HELP: pre/post-content
{ $class-description "Describes the error where a non-whitespace string is used before or after the main tag in an XML document. Contains two slots: string contains the offending string, and pre? is t if it occured before the main tag and f if it occured after" } ;
HELP: unclosed-quote
{ $class-description "Describes the error where a quotation for an attribute value is opened but not closed before the end of the document." } ;
{ $class-description "Describes the error where a non-whitespace string is used before or after the main tag in an XML document. Contains two slots: " { $snippet "string" } " contains the offending string, and " { $snippet "pre?" } " is " { $snippet "t" } " if it occured before the main tag and " { $snippet "f" } " if it occured after." }
{ $xml-error "hello\n<main-tag/>" } ;
HELP: bad-name
{ $class-description "Describes the error where a name is used, for example in an XML tag or attribute key, which is invalid." } ;
{ $class-description "Describes the error where a name is used, for example in an XML tag or attribute key, which is invalid." }
{ $xml-error "<%>\n</%>" } ;
HELP: quoteless-attr
{ $class-description "Describes the error where an attribute of an XML tag is missing quotes around a value." } ;
{ $class-description "Describes the error where an attribute of an XML tag is missing quotes around a value." }
{ $xml-error "<tag foo=bar/>" } ;
HELP: disallowed-char
{ $class-description "Describes the error where a disallowed character occurs in an XML document." } ;
@ -53,25 +68,30 @@ HELP: unexpected-end
{ $class-description "Describes the error where a document unexpectedly ends, and the XML parser expected it to continue." } ;
HELP: duplicate-attr
{ $class-description "Describes the error where there is more than one attribute of the same key." } ;
{ $class-description "Describes the error where there is more than one attribute of the same key." }
{ $xml-error "<tag value='1' value='2'/>" } ;
HELP: bad-cdata
{ $class-description "Describes the error where CDATA is used outside of the main tag of an XML document." } ;
{ $class-description "Describes the error where CDATA is used outside of the main tag of an XML document." }
{ $xml-error "<x>y</x>\n<![CDATA[]]>" } ;
HELP: text-w/]]>
{ $class-description "Describes the error where a text node contains the literal string " { $snippet "]]>" } " which is disallowed." } ;
{ $class-description "Describes the error where a text node contains the literal string " { $snippet "]]>" } " which is disallowed." }
{ $xml-error "<x>Here's some text: ]]> there it was</x>" } ;
HELP: attr-w/<
{ $class-description "Describes the error where an attribute value contains the literal character " { $snippet "<" } " which is disallowed." } ;
{ $class-description "Describes the error where an attribute value contains the literal character " { $snippet "<" } " which is disallowed." }
{ $xml-error "<x value='bar<baz'/>" } ;
HELP: misplaced-directive
{ $class-description "Describes the error where an internal DTD directive is used outside of a DOCTYPE or DTD file, or where a DOCTYPE occurs somewhere other than before the main tag of an XML document." } ;
{ $class-description "Describes the error where an internal DTD directive is used outside of a DOCTYPE or DTD file, or where a DOCTYPE occurs somewhere other than before the main tag of an XML document." }
{ $xml-error "<x><!ENTITY foo 'bar'></x>" } ;
HELP: xml-error
{ $class-description "The exception class that all parsing errors in XML documents are in." } ;
ARTICLE: "xml.errors" "XML parsing errors"
"The " { $vocab-link "xml.errors" } " vocabulary provides a rich and highly inspectable set of parsing errors. All XML errors are described by the union class " { $link xml-error } " but there are many classes contained in that:"
"The " { $vocab-link "xml.errors" } " vocabulary provides a rich and highly inspectable set of parsing errors. All XML errors are described by the union class " { $link xml-error } "."
{ $subsection multitags }
{ $subsection notags }
{ $subsection extra-attrs }
@ -93,7 +113,7 @@ ARTICLE: "xml.errors" "XML parsing errors"
{ $subsection text-w/]]> }
{ $subsection attr-w/< }
{ $subsection misplaced-directive }
"Additionally, most of these errors are a kind of " { $link xml-error-at } " which provides more information"
"Additionally, most of these errors are a kind of " { $link xml-error-at } " which provides more information about where the error occurred."
$nl
"Note that, in parsing an XML document, only the first error is reported." ;

View File

@ -194,7 +194,7 @@ M: bad-prolog summary ( obj -- str )
[
dup call-next-method write
"Misplaced XML prolog" print
prolog>> write-prolog nl
prolog>> write-xml nl
] with-string-writer ;
TUPLE: capitalized-prolog < xml-error-at name ;
@ -258,7 +258,7 @@ M: misplaced-directive summary ( obj -- str )
[
dup call-next-method write
"Misplaced directive:" print
dir>> write-xml-chunk nl
dir>> write-xml nl
] with-string-writer ;
TUPLE: bad-name < xml-error-at name ;

View File

@ -51,8 +51,8 @@ IN: xml.interpolate.tests
<XML <x number=<-> false=<-> url=<-> string=<-> word=<->/> XML>
pprint-xml>string ] unit-test
[ "<x>3</x>" ] [ 3 [XML <x><-></x> XML] xml-chunk>string ] unit-test
[ "<x></x>" ] [ f [XML <x><-></x> XML] xml-chunk>string ] unit-test
[ "<x>3</x>" ] [ 3 [XML <x><-></x> XML] xml>string ] unit-test
[ "<x></x>" ] [ f [XML <x><-></x> XML] xml>string ] unit-test
\ <XML must-infer
[ { } "" interpolate-xml ] must-infer

View File

@ -3,7 +3,7 @@
USING: xml xml.state kernel sequences fry assocs xml.data
accessors strings make multiline parser namespaces macros
sequences.deep generalizations words combinators
math present arrays ;
math present arrays unicode.categories ;
IN: xml.interpolate
<PRIVATE
@ -95,7 +95,7 @@ M: xml-chunk interpolate-xml
} cond ;
: parse-def ( accum delimiter quot -- accum )
[ parse-multiline-string 1 short head* ] dip call
[ parse-multiline-string [ blank? ] trim ] dip call
[ extract-variables collect ] keep swap
[ number<-> parsed ] dip
[ \ interpolate-xml parsed ] when ; inline

View File

@ -47,7 +47,7 @@ SYMBOL: ns-stack
: valid-name? ( str -- ? )
[ f ] [
version=1.0? swap {
version-1.0? swap {
[ first name-start? ]
[ rest-slice [ name-char? ] with all? ]
} 2&&
@ -66,7 +66,7 @@ SYMBOL: ns-stack
] ?if ;
: take-name ( -- string )
version=1.0? '[ _ get-char name-char? not ] take-until ;
version-1.0? '[ _ get-char name-char? not ] take-until ;
: parse-name ( -- name )
take-name interpret-name ;

View File

@ -3,7 +3,7 @@
USING: accessors kernel namespaces io ;
IN: xml.state
TUPLE: spot char line column next check ;
TUPLE: spot char line column next check version-1.0? ;
C: <spot> spot
@ -17,11 +17,12 @@ C: <spot> spot
: set-next ( char -- ) spot get swap >>next drop ;
: get-check ( -- ? ) spot get check>> ;
: check ( -- ) spot get t >>check drop ;
: version-1.0? ( -- ? ) spot get version-1.0?>> ;
: set-version ( string -- )
spot get swap "1.0" = >>version-1.0? drop ;
SYMBOL: xml-stack
SYMBOL: prolog-data
SYMBOL: depth
SYMBOL: interpolating?

View File

@ -9,10 +9,10 @@ SYMBOL: ref-table
GENERIC: (r-ref) ( xml -- )
M: tag (r-ref)
sub-tag over at* [
dup sub-tag attr [
ref-table get at
>>children drop
] [ 2drop ] if ;
] [ drop ] if* ;
M: object (r-ref) drop ;
: template ( xml -- )

View File

@ -51,14 +51,18 @@ SYMBOL: xml-file
[ "foo" ] [ "<boo><![CDATA[foo]]></boo>" string>xml children>string ] unit-test
[ "<!-- B+, B, or B--->" string>xml ] must-fail
[ ] [ "<?xml version='1.0'?><!-- declarations for <head> & <body> --><foo/>" string>xml drop ] unit-test
[ T{ element-decl f "br" "EMPTY" } ] [ "<!ELEMENT br EMPTY>" string>dtd directives>> first ] unit-test
[ T{ element-decl f "p" "(#PCDATA|emph)*" } ] [ "<!ELEMENT p (#PCDATA|emph)*>" string>dtd directives>> first ] unit-test
[ T{ element-decl f "%name.para;" "%content.para;" } ] [ "<!ELEMENT %name.para; %content.para;>" string>dtd directives>> first ] unit-test
[ T{ element-decl f "container" "ANY" } ] [ "<!ELEMENT container ANY>" string>dtd directives>> first ] unit-test
[ T{ doctype-decl f "foo" } ] [ "<!DOCTYPE foo>" string>xml-chunk first ] unit-test
[ T{ doctype-decl f "foo" } ] [ "<!DOCTYPE foo >" string>xml-chunk first ] unit-test
[ T{ doctype-decl f "foo" T{ system-id f "blah.dtd" } } ] [ "<!DOCTYPE foo SYSTEM 'blah.dtd'>" string>xml-chunk first ] unit-test
[ T{ doctype-decl f "foo" T{ system-id f "blah.dtd" } } ] [ "<!DOCTYPE foo SYSTEM \"blah.dtd\" >" string>xml-chunk first ] unit-test
: first-thing ( seq -- elt )
[ "" = not ] filter first ;
[ T{ element-decl f "br" "EMPTY" } ] [ "<!ELEMENT br EMPTY>" string>dtd directives>> first-thing ] unit-test
[ T{ element-decl f "p" "(#PCDATA|emph)*" } ] [ "<!ELEMENT p (#PCDATA|emph)*>" string>dtd directives>> first-thing ] unit-test
[ T{ element-decl f "%name.para;" "%content.para;" } ] [ "<!ELEMENT %name.para; %content.para;>" string>dtd directives>> first-thing ] unit-test
[ T{ element-decl f "container" "ANY" } ] [ "<!ELEMENT container ANY>" string>dtd directives>> first-thing ] unit-test
[ T{ doctype-decl f "foo" } ] [ "<!DOCTYPE foo>" string>xml-chunk first-thing ] unit-test
[ T{ doctype-decl f "foo" } ] [ "<!DOCTYPE foo >" string>xml-chunk first-thing ] unit-test
[ T{ doctype-decl f "foo" T{ system-id f "blah.dtd" } } ] [ "<!DOCTYPE foo SYSTEM 'blah.dtd'>" string>xml-chunk first-thing ] unit-test
[ T{ doctype-decl f "foo" T{ system-id f "blah.dtd" } } ] [ "<!DOCTYPE foo SYSTEM \"blah.dtd\" >" string>xml-chunk first-thing ] unit-test
[ 958 ] [ [ "&xi;" string>xml-chunk ] with-html-entities first first ] unit-test
[ "x" "<" ] [ "<x value='&lt;'/>" string>xml [ name>> main>> ] [ "value" attr ] bi ] unit-test
[ "foo" ] [ "<!DOCTYPE foo [<!ENTITY bar 'foo'>]><x>&bar;</x>" string>xml children>string ] unit-test

View File

@ -11,7 +11,7 @@ TUPLE: xml-test id uri sections description type ;
[ "ID" attr >>id ]
[ "URI" attr >>uri ]
[ "SECTIONS" attr >>sections ]
[ children>> xml-chunk>string >>description ]
[ children>> xml>string >>description ]
} cleave ;
: parse-tests ( xml -- tests )

View File

@ -6,12 +6,9 @@ circular xml.entities assocs make splitting math.parser
locals combinators arrays ;
IN: xml.tokenize
: version=1.0? ( -- ? )
prolog-data get [ version>> "1.0" = ] [ t ] if* ;
: assure-good-char ( ch -- ch )
[
version=1.0? over text? not get-check and
version-1.0? over text? not get-check and
[ disallowed-char ] when
] [ f ] if* ;
@ -36,7 +33,7 @@ IN: xml.tokenize
get-char [ unexpected-end ] unless (next) record ;
: init-parser ( -- )
0 1 0 f f <spot> spot set
0 1 0 f f t <spot> spot set
read1 set-next next ;
: with-state ( stream quot -- )

View File

@ -38,7 +38,7 @@ IN: xml.utilities
tags@ swap [ tag-named? ] with filter ;
: tag-with-attr? ( elem attr-value attr-name -- ? )
rot dup tag? [ at = ] [ 3drop f ] if ;
rot dup tag? [ swap attr = ] [ 3drop f ] if ;
: tag-with-attr ( tag attr-value attr-name -- matching-tag )
assure-name '[ _ _ tag-with-attr? ] find nip ;

View File

@ -1,56 +1,67 @@
! Copyright (C) 2005, 2009 Daniel Ehrenberg
! See http://factorcode.org/license.txt for BSD license.
USING: help.syntax help.markup io strings ;
USING: help.syntax help.markup io strings xml.data multiline ;
IN: xml.writer
ABOUT: "xml.writer"
ARTICLE: "xml.writer" "Writing XML"
"These words are used in implementing prettyprint"
{ $subsection write-xml-chunk }
"These words are used to print XML normally"
{ $subsection xml>string }
"These words are used to print XML preserving whitespace in text nodes"
{ $subsection write-xml }
{ $subsection xml>string }
"These words are used to prettyprint XML"
{ $subsection pprint-xml>string }
{ $subsection pprint-xml>string-but }
{ $subsection pprint-xml }
{ $subsection pprint-xml-but } ;
HELP: write-xml-chunk
{ $values { "object" "an XML element" } }
{ $description "writes an XML element to " { $link output-stream } "." }
{ $see-also write-xml-chunk write-xml } ;
"Certain variables can be changed to mainpulate prettyprinting"
{ $subsection sensitive-tags }
{ $subsection indenter }
"All of these words operate on arbitrary pieces of XML: they can take, as in put, XML documents, comments, tags, strings (text nodes), XML chunks, etc." ;
HELP: xml>string
{ $values { "xml" "an xml document" } { "string" "a string" } }
{ $description "converts an XML document into a string" }
{ $notes "does not preserve what type of quotes were used or what data was omitted from version declaration" } ;
{ $values { "xml" "an XML document" } { "string" "a string" } }
{ $description "This converts an XML document " { $link xml } " into a string. It can also be used to convert any piece of XML to a string, eg an " { $link xml-chunk } " or " { $link comment } "." }
{ $notes "This does not preserve what type of quotes were used or what data was omitted from version declaration, as that information isn't present in the XML data representation. The whitespace in the text nodes of the original document is preserved." } ;
HELP: pprint-xml>string
{ $values { "xml" "an xml document" } { "string" "a string" } }
{ $values { "xml" "an XML document" } { "string" "a string" } }
{ $description "converts an XML document into a string in a prettyprinted form." }
{ $notes "does not preserve what type of quotes were used or what data was omitted from version declaration" } ;
{ $notes "This does not preserve what type of quotes were used or what data was omitted from version declaration, as that information isn't present in the XML data representation. The whitespace in the text nodes of the original document is preserved." } ;
HELP: write-xml
{ $values { "xml" "an XML document" } }
{ $description "prints the contents of an XML document to " { $link output-stream } "." }
{ $notes "does not preserve what type of quotes were used or what data was omitted from version declaration" } ;
{ $notes "This does not preserve what type of quotes were used or what data was omitted from version declaration, as that information isn't present in the XML data representation. The whitespace in the text nodes of the original document is preserved." } ;
HELP: pprint-xml
{ $values { "xml" "an XML document" } }
{ $description "prints the contents of an XML document to " { $link output-stream } " in a prettyprinted form." }
{ $notes "does not preserve what type of quotes were used or what data was omitted from version declaration" } ;
{ $notes "This does not preserve what type of quotes were used or what data was omitted from version declaration, as that information isn't present in the XML data representation. Whitespace is also not preserved." } ;
HELP: pprint-xml-but
{ $values { "xml" "an XML document" } { "sensitive-tags" "a sequence of names" } }
{ $description "Prettyprints an XML document, leaving the whitespace of the tags with names in sensitive-tags intact." }
{ $notes "does not preserve what type of quotes were used or what data was omitted from version declaration" } ;
{ xml>string write-xml pprint-xml pprint-xml>string } related-words
HELP: pprint-xml>string-but
{ $values { "xml" "an XML document" } { "sensitive-tags" "a sequence of names" } { "string" string } }
{ $description "Prettyprints an XML document, returning the result as a string and leaving the whitespace of the tags with names in sensitive-tags intact." }
{ $notes "does not preserve what type of quotes were used or what data was omitted from version declaration" } ;
{ xml>string write-xml pprint-xml pprint-xml>string pprint-xml>string-but pprint-xml-but } related-words
HELP: indenter
{ $var-description "Contains the string which is used for indenting in the XML prettyprinter. For example, to print an XML document using " { $snippet "%%%%" } " for indentation, you can use the following:" }
{ $example {" USING: xml.interpolate xml.writer namespaces ;
[XML <foo>bar</foo> XML] "%%%%" indenter [ pprint-xml ] with-variable "} {"
<foo>
%%%%bar
</foo>"} } ;
HELP: sensitive-tags
{ $var-description "Contains a sequence of " { $link name } "s where whitespace should be considered significant for prettyprinting purposes. The sequence can contain " { $link string } "s in place of names. For example, to preserve whitespace inside a " { $snippet "pre" } " tag:" }
{ $example {" USING: xml.interpolate xml.writer namespaces ;
[XML <html> <head> <title> something</title></head><body><pre>bing
bang
bong</pre></body></html> XML] { "pre" } sensitive-tags [ pprint-xml ] with-variable "} {"
<html>
<head>
<title>
something
</title>
</head>
<body>
<pre>bing
bang
bong</pre>
</body>
</html>"} } ;

View File

@ -7,7 +7,7 @@ IN: xml.writer.tests
\ write-xml must-infer
\ xml>string must-infer
\ pprint-xml must-infer
\ pprint-xml-but must-infer
! Add a test for pprint-xml with sensitive-tags
[ "foo" ] [ T{ name { main "foo" } } name>string ] unit-test
[ "foo" ] [ T{ name { space "" } { main "foo" } } name>string ] unit-test
@ -51,11 +51,11 @@ IN: xml.writer.tests
]>
<x>&foo;</x>"} pprint-reprints-as
[ t ] [ "<!DOCTYPE html PUBLIC '-//W3C//DTD XHTML 1.1//EN' 'http://www.w3.org/TR/xhtml11/DTD/xhtml11.dtd' >" dup string>xml-chunk xml-chunk>string = ] unit-test
[ t ] [ "<!DOCTYPE html PUBLIC \"-//W3C//DTD XHTML 1.1//EN\" \"http://www.w3.org/TR/xhtml11/DTD/xhtml11.dtd\" >" dup string>xml-chunk xml>string = ] unit-test
[ "<?xml version=\"1.0\" encoding=\"UTF-8\"?><a b=\"c\"/>" ]
[ "<a b='c'/>" string>xml xml>string ] unit-test
[ "<?xml version=\"1.0\" encoding=\"UTF-8\"?><foo>bar baz</foo>" ]
[ "<foo>bar</foo>" string>xml [ " baz" append ] map xml>string ] unit-test
[ "<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n<foo>\n bar\n</foo>" ]
[ "<foo> bar </foo>" string>xml pprint-xml>string ] unit-test
[ "<foo'>" ] [ "<foo'>" <unescaped> xml-chunk>string ] unit-test
[ "<foo'>" ] [ "<foo'>" <unescaped> xml>string ] unit-test

View File

@ -5,14 +5,15 @@ assocs combinators io io.streams.string accessors
xml.data wrap xml.entities unicode.categories fry ;
IN: xml.writer
SYMBOL: xml-pprint?
SYMBOL: sensitive-tags
SYMBOL: indentation
SYMBOL: indenter
" " indenter set-global
<PRIVATE
SYMBOL: xml-pprint?
SYMBOL: indentation
: sensitive? ( tag -- ? )
sensitive-tags get swap '[ _ names-match? ] contains? ;
@ -49,22 +50,22 @@ PRIVATE>
<PRIVATE
: write-quoted ( string -- )
CHAR: " write1 write CHAR: " write1 ;
: print-attrs ( assoc -- )
[
" " write
swap print-name
"=\"" write
escape-quoted-string write
"\"" write
[ bl print-name "=" write ]
[ escape-quoted-string write-quoted ] bi*
] assoc-each ;
PRIVATE>
GENERIC: write-xml-chunk ( object -- )
GENERIC: write-xml ( xml -- )
<PRIVATE
M: string write-xml-chunk
M: string write-xml
escape-string xml-pprint? get [
dup [ blank? ] all?
[ drop "" ]
@ -78,130 +79,115 @@ M: string write-xml-chunk
: write-start-tag ( tag -- )
write-tag ">" write ;
M: contained-tag write-xml-chunk
M: contained-tag write-xml
write-tag "/>" write ;
: write-children ( tag -- )
indent children>> ?filter-children
[ write-xml-chunk ] each unindent ;
[ write-xml ] each unindent ;
: write-end-tag ( tag -- )
?indent "</" write print-name CHAR: > write1 ;
M: open-tag write-xml-chunk
M: open-tag write-xml
xml-pprint? get [
{
[ sensitive? not xml-pprint? get and xml-pprint? set ]
[ write-start-tag ]
[ sensitive? not xml-pprint? get and xml-pprint? set ]
[ write-children ]
[ write-end-tag ]
} cleave
] dip xml-pprint? set ;
M: unescaped write-xml-chunk
M: unescaped write-xml
string>> write ;
M: comment write-xml-chunk
M: comment write-xml
"<!--" write text>> write "-->" write ;
M: element-decl write-xml-chunk
"<!ELEMENT " write
[ name>> write " " write ]
[ content-spec>> write ">" write ]
bi ;
: write-decl ( decl name quot: ( decl -- slot ) -- )
"<!" write swap write bl
[ name>> write bl ]
swap '[ @ write ">" write ] bi ; inline
M: attlist-decl write-xml-chunk
"<!ATTLIST " write
[ name>> write " " write ]
[ att-defs>> write ">" write ]
bi ;
M: element-decl write-xml
"ELEMENT" [ content-spec>> ] write-decl ;
M: notation-decl write-xml-chunk
"<!NOTATION " write
[ name>> write " " write ]
[ id>> write ">" write ]
bi ;
M: attlist-decl write-xml
"ATTLIST" [ att-defs>> ] write-decl ;
M: entity-decl write-xml-chunk
M: notation-decl write-xml
"NOTATION" [ id>> ] write-decl ;
M: entity-decl write-xml
"<!ENTITY " write
[ pe?>> [ " % " write ] when ]
[ name>> write " \"" write ] [
def>> f xml-pprint?
[ write-xml-chunk ] with-variable
[ write-xml ] with-variable
"\">" write
] tri ;
M: system-id write-xml-chunk
"SYSTEM '" write system-literal>> write "'" write ;
M: system-id write-xml
"SYSTEM" write bl system-literal>> write-quoted ;
M: public-id write-xml-chunk
"PUBLIC '" write
[ pubid-literal>> write "' '" write ]
[ system-literal>> write "'" write ] bi ;
M: public-id write-xml
"PUBLIC" write bl
[ pubid-literal>> write-quoted bl ]
[ system-literal>> write-quoted ] bi ;
: write-internal-subset ( dtd -- )
[
"[" write indent
directives>> [ ?indent write-xml-chunk ] each
directives>> [ ?indent write-xml ] each
unindent ?indent "]" write
] when* ;
M: doctype-decl write-xml-chunk
M: doctype-decl write-xml
?indent "<!DOCTYPE " write
[ name>> write " " write ]
[ external-id>> [ write-xml-chunk " " write ] when* ]
[ external-id>> [ write-xml " " write ] when* ]
[ internal-subset>> write-internal-subset ">" write ] tri ;
M: directive write-xml-chunk
M: directive write-xml
"<!" write text>> write CHAR: > write1 nl ;
M: instruction write-xml-chunk
M: instruction write-xml
"<?" write text>> write "?>" write ;
M: number write-xml-chunk
M: number write-xml
"Numbers are not allowed in XML" throw ;
M: sequence write-xml-chunk
[ write-xml-chunk ] each ;
M: sequence write-xml
[ write-xml ] each ;
PRIVATE>
M: prolog write-xml
"<?xml version=" write
[ version>> write-quoted ]
[ " encoding=" write encoding>> write-quoted ]
[ standalone>> [ " standalone=\"yes\"" write ] when ] tri
"?>" write ;
: write-prolog ( xml -- )
"<?xml version=\"" write dup version>> write
"\" encoding=\"" write dup encoding>> write
standalone>> [ "\" standalone=\"yes" write ] when
"\"?>" write ;
: write-xml ( xml -- )
M: xml write-xml
{
[ prolog>> write-prolog ]
[ before>> write-xml-chunk ]
[ body>> write-xml-chunk ]
[ after>> write-xml-chunk ]
[ prolog>> write-xml ]
[ before>> write-xml ]
[ body>> write-xml ]
[ after>> write-xml ]
} cleave ;
M: xml write-xml-chunk
body>> write-xml-chunk ;
PRIVATE>
: xml>string ( xml -- string )
[ write-xml ] with-string-writer ;
: xml-chunk>string ( object -- string )
[ write-xml-chunk ] with-string-writer ;
: pprint-xml-but ( xml sensitive-tags -- )
: pprint-xml ( xml -- )
[
[ assure-name ] map sensitive-tags set
sensitive-tags [ [ assure-name ] map ] change
0 indentation set
xml-pprint? on
write-xml
] with-scope ;
: pprint-xml ( xml -- )
f pprint-xml-but ;
: pprint-xml>string-but ( xml sensitive-tags -- string )
[ pprint-xml-but ] with-string-writer ;
: pprint-xml>string ( xml -- string )
f pprint-xml>string-but ;
[ pprint-xml ] with-string-writer ;

View File

@ -20,21 +20,20 @@ HELP: file>xml
HELP: read-xml-chunk
{ $values { "stream" "an input stream" } { "seq" "a sequence of elements" } }
{ $description "Rather than parse a document, as " { $link read-xml } " does, this word parses and returns a sequence of XML elements (tags, strings, etc), ie a document fragment. This is useful for pieces of XML which may have more than one main tag." }
{ $description "Rather than parse a document, as " { $link read-xml } " does, this word parses and returns a sequence of XML elements (tags, strings, etc), ie a document fragment. This is useful for pieces of XML which may have more than one main tag. The encoding is not automatically detected, and a stream with an encoding (ie. one which returns strings from " { $link read } ") should be used as input." }
{ $see-also read-xml } ;
HELP: each-element
{ $values { "stream" "an input stream" } { "quot" "a quotation ( xml-elem -- )" } }
{ $description "Parses the XML document, and whenever an event is encountered (a tag piece, comment, parsing instruction, directive or string element), the quotation is called with that event on the stack. The quotation has all responsibility to deal with the event properly, and it is advised that generic words be used in dispatching on the event class." }
{ $notes "It is important to note that this is not SAX, merely an event-based XML view" }
{ $description "Parses the XML document, and whenever an event is encountered (a tag piece, comment, parsing instruction, directive or string element), the quotation is called with that event on the stack. The quotation has all responsibility to deal with the event properly. The encoding of the stream is automatically detected, so a binary input stream should be used." }
{ $see-also read-xml } ;
HELP: pull-xml
{ $class-description "Represents the state of a pull-parser for XML. Has one slot, scope, which is a namespace which contains all relevant state information." }
{ $class-description "Represents the state of a pull-parser for XML. Has one slot, " { $snippet "scope" } ", which is a namespace which contains all relevant state information." }
{ $see-also <pull-xml> pull-event pull-elem } ;
HELP: <pull-xml>
{ $values { "pull-xml" "a pull-xml tuple" } }
{ $values { "pull-xml" pull-xml } }
{ $description "Creates an XML pull-based parser which reads from " { $link input-stream } ", executing all initial XML commands to set up the parser." }
{ $see-also pull-xml pull-elem pull-event } ;
@ -87,7 +86,7 @@ ARTICLE: { "xml" "events" } "Event-based XML parsing"
{ $subsection pull-elem } ;
ARTICLE: "xml" "XML parser"
"The " { $vocab-link "xml" } " vocabulary implements the XML 1.0 and 1.1 standards, converting strings of text into XML and vice versa."
"The " { $vocab-link "xml" } " vocabulary implements the XML 1.0 and 1.1 standards, converting strings of text into XML and vice versa. The parser checks for well-formedness but is not validating. There is only partial support for processing DTDs."
{ $subsection { "xml" "reading" } }
{ $subsection { "xml" "events" } }
{ $vocab-subsection "Writing XML" "xml.writer" }

View File

@ -3,7 +3,8 @@
USING: accessors arrays io io.encodings.binary io.files
io.streams.string kernel namespaces sequences strings io.encodings.utf8
xml.data xml.errors xml.elements ascii xml.entities
xml.writer xml.state xml.autoencoding assocs xml.tokenize xml.name ;
xml.writer xml.state xml.autoencoding assocs xml.tokenize
combinators.short-circuit xml.name ;
IN: xml
<PRIVATE
@ -22,14 +23,18 @@ GENERIC: process ( object -- )
M: object process add-child ;
M: prolog process
xml-stack get V{ { f V{ } } } =
xml-stack get
{ V{ { f V{ "" } } } V{ { f V{ } } } } member?
[ bad-prolog ] unless drop ;
: before-main? ( -- ? )
xml-stack get {
[ length 1 = ]
[ first second [ tag? ] contains? not ]
} 1&& ;
M: directive process
xml-stack get dup length 1 =
swap first second [ tag? ] contains? not and
[ misplaced-directive ] unless
add-child ;
before-main? [ misplaced-directive ] unless add-child ;
M: contained process
[ name>> ] [ attrs>> ] bi
@ -49,17 +54,14 @@ M: closer process
: init-xml-stack ( -- )
V{ } clone xml-stack set
extra-entities [ H{ } assoc-like ] change
f push-xml ;
: default-prolog ( -- prolog )
"1.0" "UTF-8" f <prolog> ;
: reset-prolog ( -- )
default-prolog prolog-data set ;
: init-xml ( -- )
reset-prolog init-xml-stack init-ns-stack ;
init-ns-stack
extra-entities [ H{ } assoc-like ] change ;
: assert-blanks ( seq pre? -- )
swap [ string? ] filter
@ -80,7 +82,11 @@ M: closer process
! this does *not* affect the contents of the stack
[ notags ] unless* ;
: make-xml-doc ( prolog seq -- xml-doc )
: get-prolog ( seq -- prolog )
first dup prolog? [ drop default-prolog ] unless ;
: make-xml-doc ( seq -- xml-doc )
[ get-prolog ] keep
dup [ tag? ] find
[ assure-tags cut rest no-pre/post no-post-tags ] dip
swap <xml> ;
@ -95,8 +101,7 @@ TUPLE: pull-xml scope ;
: <pull-xml> ( -- pull-xml )
[
input-stream [ ] change ! bring var in this scope
init-parser reset-prolog init-ns-stack
text-now? on
init-xml text-now? on
] H{ } make-assoc
pull-xml boa ;
! pull-xml needs to call start-document somewhere
@ -135,50 +140,43 @@ PRIVATE>
get-char [ make-tag call-under xml-loop ]
[ drop ] if ; inline recursive
: read-seq ( stream quot n -- seq )
rot [
depth set
init-xml init-xml-stack
call
[ process ] xml-loop
done? [ unclosed ] unless
xml-stack get first second
] with-state ; inline
PRIVATE>
: each-element ( stream quot: ( xml-elem -- ) -- )
swap [
reset-prolog init-ns-stack
init-xml
start-document [ call-under ] when*
xml-loop
] with-state ; inline
: (read-xml) ( -- )
start-document [ process ] when*
[ process ] xml-loop ; inline
: (read-xml-chunk) ( stream -- prolog seq )
[
init-xml (read-xml)
done? [ unclosed ] unless
xml-stack get first second
prolog-data get swap
] with-state ;
: read-xml ( stream -- xml )
0 depth
[ (read-xml-chunk) make-xml-doc ] with-variable ;
[ start-document [ process ] when* ]
0 read-seq make-xml-doc ;
: read-xml-chunk ( stream -- seq )
1 depth
[ (read-xml-chunk) nip ] with-variable
<xml-chunk> ;
[ check ] 1 read-seq <xml-chunk> ;
: string>xml ( string -- xml )
t string-input?
[ <string-reader> read-xml ] with-variable ;
<string-reader> [ check ] 0 read-seq make-xml-doc ;
: string>xml-chunk ( string -- xml )
t string-input?
[ <string-reader> read-xml-chunk ] with-variable ;
<string-reader> read-xml-chunk ;
: file>xml ( filename -- xml )
binary <file-reader> read-xml ;
: read-dtd ( stream -- dtd )
[
reset-prolog
H{ } clone extra-entities set
take-internal-subset
] with-state ;

View File

@ -31,7 +31,7 @@ SYMBOL: ignore-case?
! PROP, PROPS
: parse-prop-tag ( tag -- key value )
"NAME" over at "VALUE" rot at ;
[ "NAME" attr ] [ "VALUE" attr ] bi ;
: parse-props-tag ( tag -- assoc )
child-tags
@ -40,7 +40,7 @@ SYMBOL: ignore-case?
: position-attrs ( tag -- at-line-start? at-whitespace-end? at-word-start? )
! XXX Wrong logic!
{ "AT_LINE_START" "AT_WHITESPACE_END" "AT_WORD_START" }
swap [ at string>boolean ] curry map first3 ;
[ attr string>boolean ] with map first3 ;
: parse-literal-matcher ( tag -- matcher )
dup children>string