332 lines
16 KiB
Plaintext
332 lines
16 KiB
Plaintext
! Copyright (C) 2005, 2006 Daniel Ehrenberg
|
|
! See http://factorcode.org/license.txt for BSD license.
|
|
USING: help kernel xml ;
|
|
|
|
HELP: string>xml
|
|
{ $values { "string" "a string" } { "xml-doc" "an xml document" } }
|
|
{ $description "converts a string into an " { $link xml-doc }
|
|
" datatype for further processing" }
|
|
{ $see-also xml>string xml-reprint } ;
|
|
|
|
HELP: xml>string
|
|
{ $values { "xml-doc" "an xml document" } { "string" "a string" } }
|
|
{ $description "converts an xml document (" { $link xml-doc } ") into a string" }
|
|
{ $notes "does not preserve what type of quotes were used or what data was omitted from version declaration" }
|
|
{ $see-also string>xml xml-reprint write-xml } ;
|
|
|
|
HELP: xml-parse-error
|
|
{ $class-description "the exception class that all parsing errors in XML documents are in." } ;
|
|
|
|
HELP: xml-reprint
|
|
{ $values { "string" "a string of XML" } }
|
|
{ $description "parses XML and prints it out again, for testing purposes" }
|
|
{ $notes "does not preserve what type of quotes were used or what data was omitted from version declaration" }
|
|
{ $see-also write-xml xml>string string>xml } ;
|
|
|
|
HELP: write-xml
|
|
{ $values { "xml-doc" "an XML document" } }
|
|
{ $description "prints the contents of an XML document (" { $link xml-doc } ") to stdio" }
|
|
{ $notes "does not preserve what type of quotes were used or what data was omitted from version declaration" }
|
|
{ $see-also xml>string xml-reprint read-xml } ;
|
|
|
|
HELP: read-xml
|
|
{ $values { "stream" "a stream that supports readln" }
|
|
{ "xml-doc" "an XML document" } }
|
|
{ $description "exausts the given stream, reading an XML document from it" }
|
|
{ $see-also write-xml string>xml } ;
|
|
|
|
HELP: PROCESS:
|
|
{ $syntax "PROCESS: word" }
|
|
{ $values { "word" "a new word to define" } }
|
|
{ $description "creates a new word to process XML tags" }
|
|
{ $see-also POSTPONE: TAG: } ;
|
|
|
|
HELP: TAG:
|
|
{ $syntax "TAG: tag word definition... ;" }
|
|
{ $values { "tag" "an xml tag name" } { "word" "an XML process" } }
|
|
{ $description "defines what a process should do when it encounters a specific tag" }
|
|
{ $examples { $code "PROCESS: x ( tag -- )\nTAG: a x drop \"hi\" write ;" } }
|
|
{ $see-also POSTPONE: PROCESS: } ;
|
|
|
|
HELP: xml-each
|
|
{ $values { "tag" "an XML tag" } { "quot" "a quotation ( element -- )" } }
|
|
{ $description "applies the quotation to each element (tags, strings, DTD references, etc) in the tag, moving top-down" }
|
|
{ $see-also xml-map xml-subset } ;
|
|
|
|
HELP: xml-map
|
|
{ $values { "tag" "an XML tag" } { "quot" "a quotation ( element -- element )" }
|
|
{ "tag" "an XML tag with the quotation applied to each element" } }
|
|
{ $description "applies the quotation to each element (tags, strings, DTD references, etc) in the tag, moving top-down, and produces a new tag" }
|
|
{ $see-also xml-each xml-subset } ;
|
|
|
|
HELP: xml-subset
|
|
{ $values { "tag" "an XML tag" } { "quot" "a quotation ( tag -- ? )" }
|
|
{ "seq" "sequence of elements" } }
|
|
{ $description "applies the quotation to each element (tags, strings, DTD references, etc) in the tag, moving top-down, producing a sequence of elements which do not return false for the sequence" }
|
|
{ $see-also xml-map xml-each } ;
|
|
|
|
HELP: build-tag*
|
|
{ $values { "items" "sequence of elements" } { "name" "string" }
|
|
{ "tag" "an XML tag" } }
|
|
{ $description "builds a " { $link tag } " with the specified name, in the namespace \"\" and URL \"\" containing the children listed in item" }
|
|
{ $see-also build-tag build-xml-doc } ;
|
|
|
|
HELP: build-tag
|
|
{ $values { "item" "an element" } { "name" "string" } { "tag" "XML tag" } }
|
|
{ $description "builds a " { $link tag } " with the specified name containing the single child item" }
|
|
{ $see-also build-tag* build-xml-doc } ;
|
|
|
|
HELP: build-xml-doc
|
|
{ $values { "tag" "an XML tag" } { "xml-doc" "an XML document" } }
|
|
{ $description "builds an XML document out of a tag" }
|
|
{ $see-also build-tag* build-tag } ;
|
|
|
|
HELP: tag
|
|
{ $class-description "tuple representing an XML tag, delegating to a " { $link name } ", containing the slots props (a hashtable) and children (a sequence)" }
|
|
{ $see-also <tag> name contained-tag xml-doc } ;
|
|
|
|
HELP: <tag>
|
|
{ $values { "name" "an XML tag name" }
|
|
{ "props" "a hashtable of XML properties" }
|
|
{ "children" "a sequence" } }
|
|
{ $description "constructs an XML " { $link tag } ", with the name (not a string) and tag properties specified in props, and children specified" }
|
|
{ $see-also tag <contained-tag> build-tag build-tag* } ;
|
|
|
|
HELP: name
|
|
{ $class-description "represents an XML name, with the fields space (a string representing the namespace, as written in the document, tag (a string of the actual name of the tag) and url (a string of the URL that the namespace points to)" }
|
|
{ $see-also <name> tag } ;
|
|
|
|
HELP: <name> ( space tag url -- name )
|
|
{ $values { "space" "a string" } { "tag" "a string" } { "url" "a string" }
|
|
{ "name" "an XML tag name" } }
|
|
{ $description "creates a name tuple with the name-space space and the tag-name tag and the tag-url url." }
|
|
{ $see-also name <tag> } ;
|
|
|
|
HELP: contained-tag
|
|
{ $class-description "delegates to tag representing a tag like <a/> with no contents. The tag properties are accessed with tag-props" }
|
|
{ $see-also tag <contained-tag> } ;
|
|
|
|
HELP: <contained-tag>
|
|
{ $values { "name" "an XML tag name" }
|
|
{ "props" "a hashtable of XML properties" }
|
|
{ "contained-tag" "an XML tag" } }
|
|
{ $description "creates an empty tag (like <a/>) with the specified name and tag properties. This delegates to tag" }
|
|
{ $see-also contained-tag <tag> } ;
|
|
|
|
HELP: reference
|
|
{ $class-description "represents a DTD reference like %foo;" }
|
|
{ $see-also <reference> entity } ;
|
|
|
|
HELP: <reference> ( name -- reference )
|
|
{ $values { "name" "a string" } { "reference" "an XML reference" } }
|
|
{ $description "creates a DTD reference (like %foo;) with the specified name" }
|
|
{ $see-also reference <entity> } ;
|
|
|
|
HELP: entity
|
|
{ $class-description "represents an XML entity like &foo;" }
|
|
{ $see-also <entity> reference } ;
|
|
|
|
HELP: <entity> ( name -- entity )
|
|
{ $values { "name" "a string" } { "entity" "an XML entity" } }
|
|
{ $description "creates an XML entity like &foo; with the specified name" }
|
|
{ $see-also entity <reference> } ;
|
|
|
|
HELP: xml-doc
|
|
{ $class-description "tuple representing an XML document, delegating to the main tag, containing the fields prolog (the header <?xml...?>), before (whatever comes between the prolog and the main tag) and after (whatever comes after the main tag)" }
|
|
{ $see-also <xml-doc> tag prolog } ;
|
|
|
|
HELP: <xml-doc>
|
|
{ $values { "prolog" "an XML prolog" } { "before" "a sequence of XML elements" }
|
|
{ "main" "an XML tag" } { "after" "a sequence of XML elements" } }
|
|
{ $description "creates an XML document, delegating to the main tag, with the specified prolog, before, and after" }
|
|
{ $see-also xml-doc <tag> } ;
|
|
|
|
HELP: prolog
|
|
{ $class-description "represents an XML prolog, with the tuple fields version (containing \"1.0\" or \"1.1\"), encoding (a string representing the encoding type), and standalone (t or f, whether the document is standalone without external entities)" }
|
|
{ $see-also <prolog> xml-doc } ;
|
|
|
|
HELP: <prolog> ( version encoding standalone -- prolog )
|
|
{ $values { "version" "a string, 1.0 or 1.1" }
|
|
{ "encoding" "a string" } { "standalone" "a boolean" } }
|
|
{ $description "creates an XML prolog tuple" }
|
|
{ $see-also prolog <xml-doc> } ;
|
|
|
|
HELP: comment
|
|
{ $class-description "represents a comment in XML. Has one slot, text, which contains the string of the comment" }
|
|
{ $see-also <comment> } ;
|
|
|
|
HELP: <comment> ( text -- comment )
|
|
{ $values { "text" "a string" } { "comment" "a comment" } }
|
|
{ $description "creates an XML comment tuple" }
|
|
{ $see-also comment } ;
|
|
|
|
HELP: instruction
|
|
{ $class-description "represents an XML instruction, such as <?xsl stylesheet='foo.xml'?>. Contains one slot, text, which contains the string between the question marks." }
|
|
{ $see-also <instruction> } ;
|
|
|
|
HELP: <instruction> ( text -- instruction )
|
|
{ $values { "text" "a string" } { "instruction" "an XML instruction" } }
|
|
{ $description "creates an XML parsing instruction, such as <?xsl stylesheet='foo.xml'?>." }
|
|
{ $see-also instruction } ;
|
|
|
|
HELP: names-match?
|
|
{ $values { "name1" "a name" } { "name2" "a name" } { "?" "t or f" } }
|
|
{ $description "checks to see if the two names match, that is, if all fields are equal, ignoring fields whose value is f in either name." }
|
|
{ $example "T{ name f \"rpc\" \"methodCall\" f } T{ name f f \"methodCall\" \"http://www.xmlrpc.org/\" } names-match? ! => t" }
|
|
{ $see-also name } ;
|
|
|
|
HELP: xml-chunk
|
|
{ $values { "stream" "an input stream" } { "seq" "a sequence of elements" } }
|
|
{ $description "rather than parse a document, as " { $link read-xml } " does, this word parses and returns a sequence of XML elements (tags, strings, etc). This is useful for fragments of XML which may have more than one main tag." }
|
|
{ $see-also read-xml } ;
|
|
|
|
HELP: xml-find
|
|
{ $values { "tag" "an XML element or document" } { "quot" "a quotation ( elem -- ? )" } { "tag" "an XML element which satisfies the predicate" } }
|
|
{ $description "finds the first element in the XML document which satisfies the predicate, moving from the outermost element to the innermost, top-down" }
|
|
{ $see-also xml-each xml-map get-id } ;
|
|
|
|
HELP: get-id
|
|
{ $values { "tag" "an XML tag or document" } { "id" "a string" } { "elem" "an XML element or f" } }
|
|
{ $description "finds the XML tag with the specified id, ignoring the namespace" }
|
|
{ $see-also xml-find } ;
|
|
|
|
HELP: spot
|
|
{ $var-description "This variable represents the location in the program. It is in the format { char line column line-str } where char is the current character, line is the line number, column is the column number, and line-str is the full contents of the line, as a string. The contents shouldn't be accessed directly but rather with the proxy words get-char set-char get-line etc." } ;
|
|
|
|
HELP: new-record
|
|
{ $description "puts the parser into recording mode, where all characters encountered are pushed into a string buffer to be extracted later" }
|
|
{ $see-also end-record } ;
|
|
|
|
HELP: end-record
|
|
{ $values { "string" "a string" } }
|
|
{ $description "takes the parser out of recording mode and outputs the string recorded" }
|
|
{ $see-also new-record } ;
|
|
|
|
HELP: skip-until
|
|
{ $values { "quot" "a quotation ( -- ? )" } }
|
|
{ $description "executes " { $link next } " until the quotation yields false. Usually, the quotation will call " { $link get-char } " in its test, but not always." }
|
|
{ $see-also take-until } ;
|
|
|
|
HELP: take-until
|
|
{ $values { "quot" "a quotation ( -- ? )" } { "string" "a string" } }
|
|
{ $description "like " { $link skip-until } " but records what it passes over and outputs the string." }
|
|
{ $see-also skip-until take-char take-string } ;
|
|
|
|
HELP: take-char
|
|
{ $values { "char" "a character" } { "string" "a string" } }
|
|
{ $description "records the document from the current spot to the first instance of the given character. Outputs the content between those two points." }
|
|
{ $see-also take-until take-string } ;
|
|
|
|
HELP: take-string
|
|
{ $values { "match" "a string to match" } { "string" "the portion of the XML document" } }
|
|
{ $description "records the document from the current spot to the first instance of the given character. Outputs the content between those two points." }
|
|
{ $notes "match may not contain a newline" } ;
|
|
|
|
HELP: next
|
|
{ $description "originally written as " { $code "spot inc" } ", code that would no longer run, this word moves the state of the XML parser to the next place in the source file, keeping track of appropriate debugging information." } ;
|
|
|
|
HELP: process
|
|
{ $values { "object" "an opener, closer, contained or text element" } }
|
|
{ $description "takes an XML event and, using the XML stack, processes it and adds it to the tree" } ;
|
|
|
|
HELP: sax
|
|
{ $values { "stream" "an input stream" } { "quot" "a quotation ( xml-elem -- )" } }
|
|
{ $description "parses the XML document, and whenever an event is encountered (a tag piece, comment, parsing instruction, directive or string element), the quotation is called with that event on the stack. The quotation has all responsibility to deal with the event properly, and it is advised that generic words be used in dispatching on the event class." }
|
|
{ $notes "It is important to note that this is not SAX, merely an event-based XML view" }
|
|
{ $see-also read-xml } ;
|
|
|
|
HELP: opener
|
|
{ $class-description "describes an opening tag, like <a>. Contains two slots, name and props, containing, respectively, the name of the tag and its attributes. Usually, the name-url will be f." }
|
|
{ $see-also closer contained } ;
|
|
|
|
HELP: closer
|
|
{ $class-description "describes a closing tag, like </a>. Contains one slot, name, containing the tag's name. Usually, the name-url will be f." }
|
|
{ $see-also opener contained } ;
|
|
|
|
HELP: contained
|
|
{ $class-description "represents a self-closing tag, like <a/>. Contains two slots, name and props, containing, respectively, the name of the tag and its attributes. Usually, the name-url will be f." }
|
|
{ $see-also opener closer } ;
|
|
|
|
ARTICLE: { "xml" "intro" } "XML"
|
|
"The XML module attempts to implement the XML 1.1 standard, converting strings of text into XML and vice versa. It currently is a work in progress. Together with XML-RPC, this is a component of the F2EE framework."
|
|
$terpri
|
|
"The XML module was implemented by Daniel Ehrenberg, with edits by Slava Pestov."
|
|
{ $subsection { "xml" "basic" } }
|
|
{ $subsection { "xml" "classes" } }
|
|
{ $subsection { "xml" "construct" } }
|
|
{ $subsection { "xml" "utils" } }
|
|
{ $subsection { "xml" "internal" } }
|
|
{ $subsection { "xml" "events" } } ;
|
|
|
|
ARTICLE: { "xml" "basic" } "Basic words for XML processing"
|
|
"These are the most basic words needed for processing an XML document"
|
|
{ $subsection string>xml }
|
|
{ $subsection xml>string }
|
|
{ $subsection xml-parse-error }
|
|
{ $subsection xml-reprint }
|
|
{ $subsection write-xml }
|
|
{ $subsection read-xml }
|
|
{ $subsection xml-chunk } ;
|
|
|
|
ARTICLE: { "xml" "classes" } "XML data classes"
|
|
"Data types that XML documents are made of:"
|
|
{ $subsection name }
|
|
{ $subsection tag }
|
|
{ $subsection contained-tag }
|
|
{ $subsection reference }
|
|
{ $subsection entity }
|
|
{ $subsection xml-doc }
|
|
{ $subsection prolog }
|
|
{ $subsection comment }
|
|
{ $subsection instruction } ;
|
|
|
|
ARTICLE: { "xml" "construct" } "XML data constructors"
|
|
"These data types are constructed with:"
|
|
{ $subsection <name> }
|
|
{ $subsection <tag> }
|
|
{ $subsection <contained-tag> }
|
|
{ $subsection <reference> }
|
|
{ $subsection <entity> }
|
|
{ $subsection <xml-doc> }
|
|
{ $subsection <prolog> }
|
|
{ $subsection <comment> }
|
|
{ $subsection <instruction> } ;
|
|
|
|
ARTICLE: { "xml" "utils" } "XML processing utilities"
|
|
"Utilities for processing XML include..."
|
|
$terpri
|
|
"System for creating words which dispatch on XML tags:"
|
|
{ $subsection POSTPONE: PROCESS: }
|
|
{ $subsection POSTPONE: TAG: }
|
|
"Combinators for traversing XML trees:"
|
|
{ $subsection xml-each }
|
|
{ $subsection xml-map }
|
|
{ $subsection xml-subset }
|
|
{ $subsection xml-find }
|
|
{ $subsection get-id }
|
|
"Words for simplified generation of XML:"
|
|
{ $subsection build-tag* }
|
|
{ $subsection build-tag }
|
|
{ $subsection build-xml-doc }
|
|
"Other relevant words:"
|
|
{ $subsection names-match? } ;
|
|
|
|
ARTICLE: { "xml" "internal" } "Internals of the XML parser"
|
|
"The XML parser creates its own parsing framework to process XML documents. The parser operates on streams. Important words involved in processing are:"
|
|
{ $subsection spot }
|
|
{ $subsection new-record }
|
|
{ $subsection end-record }
|
|
{ $subsection skip-until }
|
|
{ $subsection take-until }
|
|
{ $subsection take-char }
|
|
{ $subsection take-string }
|
|
{ $subsection next }
|
|
{ $subsection process } ; ! should I have more? less?
|
|
|
|
ARTICLE: { "xml" "events" } "Event-based XML parsing"
|
|
"In addition to DOM-style parsing based around " { $link read-xml } ", the XML module also provides SAX-style event-based parsing. This uses much of the same data structures as normal XML, with the exception of the classes " { $link xml-doc } " and " { $link tag } " and as such, the articles " { $link { "xml" "classes" } } " and " { $link { "xml" "construct" } } " may be useful in learning how to process documents in this way. Other useful words are:"
|
|
{ $subsection sax }
|
|
{ $subsection opener }
|
|
{ $subsection closer }
|
|
{ $subsection contained } ;
|