assorted refactoring and documentation for XML

darcs
microdan 2006-12-14 23:42:52 +00:00
parent e26d5802a4
commit 725ed93b35
5 changed files with 108 additions and 30 deletions

View File

@ -11,4 +11,5 @@ PROVIDE: libs/xml
} } } }
{ +tests+ { { +tests+ {
"test.factor" "test.factor"
} } ; } }
{ +help+ { "xml" "intro" } } ;

View File

@ -1,8 +1,8 @@
! Copyright (C) 2005, 2006 Daniel Ehrenberg ! Copyright (C) 2005, 2006 Daniel Ehrenberg
! See http://factorcode.org/license.txt for BSD license. ! See http://factorcode.org/license.txt for BSD license.
IN: xml
USING: errors hashtables io kernel math namespaces prettyprint sequences USING: errors hashtables io kernel math namespaces prettyprint sequences
arrays generic strings vectors char-classes ; arrays generic strings vectors char-classes ;
IN: xml
! * Parsing tags ! * Parsing tags
@ -62,7 +62,7 @@ TUPLE: instruction text ;
! this should make sure the name doesn't include 'xml' ! this should make sure the name doesn't include 'xml'
"?>" take-string <instruction> ; "?>" take-string <instruction> ;
: make-tag ( -- tag/f ) : make-tag ( -- tag )
{ { [ get-char dup CHAR: ! = ] [ drop next directive ] } { { [ get-char dup CHAR: ! = ] [ drop next directive ] }
{ [ CHAR: ? = ] [ next instruction ] } { [ CHAR: ? = ] [ next instruction ] }
{ [ t ] [ { [ t ] [
@ -252,14 +252,15 @@ M: bad-version error.
dup assure-no-extra prolog-attrs dup assure-no-extra prolog-attrs
] when ; ] when ;
: init-xml ( stream -- ) : basic-init ( stream -- )
stdio set stdio set
{ 0 0 0 "" } clone spot set { 0 0 0 "" } clone spot set
f record set f now-recording? set f record set f now-recording? set
next next
"1.0" "iso-8859-1" f <prolog> prolog-data set "1.0" "iso-8859-1" f <prolog> prolog-data set ;
init-xml-stack
init-ns-stack ; : init-xml ( stream -- )
basic-init init-xml-stack init-ns-stack ;
: init-xml-string ( string -- ) ! for debugging : init-xml-string ( string -- ) ! for debugging
<string-reader> init-xml ; <string-reader> init-xml ;
@ -279,15 +280,37 @@ M: multitags error.
dup [ tag? ] contains? [ <multitags> throw ] when r> dup [ tag? ] contains? [ <multitags> throw ] when r>
swap <xml-doc> ; swap <xml-doc> ;
! * Views of XML
SYMBOL: text-now?
TUPLE: pull-xml scope ;
C: pull-xml ( stream -- pull-xml )
[
swap basic-init parse-prolog
t text-now? set
[ namestack pop swap set-pull-xml-scope ] keep
] with-scope ;
: pull-next ( pull -- xml-elem/f )
pull-xml-scope [
text-now? get [ parse-text f ] [
get-char [ make-tag t ] [ f f ] if
] if text-now? set
] bind ;
: call-under ( quot object -- quot )
swap dup slip ; inline
: sax-loop ( quot -- ) ! quot: xml-elem -- : sax-loop ( quot -- ) ! quot: xml-elem --
parse-text [ swap dup slip ] each parse-text [ call-under ] each
get-char [ make-tag swap dup slip sax-loop ] get-char [ make-tag call-under sax-loop ]
[ drop ] if ; inline [ drop ] if ; inline
: sax ( stream quot -- ) ! quot: xml-elem -- : sax ( stream quot -- ) ! quot: xml-elem --
swap [ swap [
init-xml parse-prolog basic-init parse-prolog
prolog-data get swap dup slip prolog-data get call-under
sax-loop sax-loop
] with-scope ; inline ] with-scope ; inline

View File

@ -206,7 +206,6 @@ TUPLE: reference name ;
<reference> , next new-record ; <reference> , next new-record ;
: (parse-char) ( ch -- ) : (parse-char) ( ch -- )
! The similarities with (parse-text) should be factored out
get-char { get-char {
{ [ dup not ] { [ dup not ]
[ 2drop 0 end-record* , ] } [ 2drop 0 end-record* , ] }

View File

@ -32,32 +32,32 @@ UNION: str-elem string entity reference ;
[ write-str-elem ] each "\"" write [ write-str-elem ] each "\"" write
] hash-each ; ] hash-each ;
GENERIC: (xml>string) ( object -- ) GENERIC: write-item ( object -- )
M: str-elem (xml>string) ! string element M: str-elem write-item ! string element
write-str-elem ; write-str-elem ;
M: contained-tag (xml>string) M: contained-tag write-item
CHAR: < write1 CHAR: < write1
dup print-name dup print-name
tag-props print-props tag-props print-props
"/>" write ; "/>" write ;
M: open-tag (xml>string) M: open-tag write-item
CHAR: < write1 CHAR: < write1
dup print-name dup print-name
dup tag-props print-props dup tag-props print-props
CHAR: > write1 CHAR: > write1
dup tag-children [ (xml>string) ] each dup tag-children [ write-item ] each
"</" write print-name CHAR: > write1 ; "</" write print-name CHAR: > write1 ;
M: comment (xml>string) M: comment write-item
"<!--" write comment-text write "-->" write ; "<!--" write comment-text write "-->" write ;
M: directive (xml>string) M: directive write-item
"<!" write directive-text write CHAR: > write1 ; "<!" write directive-text write CHAR: > write1 ;
M: instruction (xml>string) M: instruction write-item
"<?" write instruction-text write "?>" write ; "<?" write instruction-text write "?>" write ;
: xml-preamble ( xml -- ) : xml-preamble ( xml -- )
@ -67,11 +67,14 @@ M: instruction (xml>string)
prolog-standalone "yes" "no" ? write prolog-standalone "yes" "no" ? write
"\"?>" write ; "\"?>" write ;
: write-chunk ( seq -- )
[ write-item ] each ;
: write-xml ( xml-doc -- ) : write-xml ( xml-doc -- )
dup xml-doc-prolog xml-preamble dup xml-doc-prolog xml-preamble
dup xml-doc-before [ (xml>string) ] each dup xml-doc-before write-chunk
dup delegate (xml>string) dup delegate write-item
xml-doc-after [ (xml>string) ] each ; xml-doc-after write-chunk ;
: print-xml ( xml-doc -- ) : print-xml ( xml-doc -- )
write-xml terpri ; write-xml terpri ;

View File

@ -247,6 +247,45 @@ HELP: contained
{ $class-description "represents a self-closing tag, like <a/>. Contains two slots, name and props, containing, respectively, the name of the tag and its attributes. Usually, the name-url will be f." } { $class-description "represents a self-closing tag, like <a/>. Contains two slots, name and props, containing, respectively, the name of the tag and its attributes. Usually, the name-url will be f." }
{ $see-also opener closer } ; { $see-also opener closer } ;
HELP: parse-text
{ $values { "array" "an array of text elements" } }
{ $description "moves the pointer from the current spot to the beginning of the next tag, parsing the text underneath, returning the text elements it passed. This parses DTD references like %foo; and XML entities like &bar; &#97; and &amp;" }
{ $see-also parse-name } ;
HELP: parse-name
{ $values { "name" "an XML name" } }
{ $description "parses a " { $link name } " from the input stream. Returns a name with only the name-space and name-tag defined, with name-url=f" }
{ $see-also parse-text } ;
HELP: make-tag
{ $values { "tag" "an opener, closer or contained" } }
{ $description "assuming the pointer is just past a <, this word parses until the next > and emits a tuple representing the tag parsed" }
{ $see-also opener closer contained } ;
HELP: pull-xml
{ $class-description "represents the state of a pull-parser for XML. Has one slot, scope, which is a namespace which contains all relevant state information." }
{ $see-also <pull-xml> pull-next } ;
HELP: <pull-xml>
{ $values { "stream" "an input stream containing XML" } { "pull-xml" "a pull-xml tuple" } }
{ $description "creates an XML pull-based parser, executing all initial XML commands to set up the parser" }
{ $see-also pull-xml pull-next } ;
HELP: pull-next
{ $values { "pull" "an XML pull parser" } { "xml-elem/f" "an XML tag event, sequence of string elements, or f" } }
{ $description "gets the next xml element from the given XML pull parser. Returns f upon exaustion" }
{ $see-also pull-xml <pull-xml> } ;
HELP: write-item
{ $values { "object" "an XML element" } }
{ $description "writes an XML element to stdio" }
{ $see-also write-chunk write-xml } ;
HELP: write-chunk
{ $values { "seq" "an XML document fragment" } }
{ $description "writes an XML document fragment, ie a sequence of XML elements, to stdio" }
{ $see-also write-item write-xml } ;
ARTICLE: { "xml" "intro" } "XML" ARTICLE: { "xml" "intro" } "XML"
"The XML module attempts to implement the XML 1.1 standard, converting strings of text into XML and vice versa. It currently is a work in progress. Together with XML-RPC, this is a component of the F2EE framework." "The XML module attempts to implement the XML 1.1 standard, converting strings of text into XML and vice versa. It currently is a work in progress. Together with XML-RPC, this is a component of the F2EE framework."
$terpri $terpri
@ -260,13 +299,19 @@ ARTICLE: { "xml" "intro" } "XML"
ARTICLE: { "xml" "basic" } "Basic words for XML processing" ARTICLE: { "xml" "basic" } "Basic words for XML processing"
"These are the most basic words needed for processing an XML document" "These are the most basic words needed for processing an XML document"
$terpri
"Parsing XML:"
{ $subsection string>xml } { $subsection string>xml }
{ $subsection xml>string }
{ $subsection xml-parse-error }
{ $subsection xml-reprint }
{ $subsection write-xml }
{ $subsection read-xml } { $subsection read-xml }
{ $subsection xml-chunk } ; { $subsection xml-chunk }
"Printing XML"
{ $subsection xml>string }
{ $subsection write-xml }
{ $subsection write-item }
{ $subsection write-chunk }
"Other"
{ $subsection xml-parse-error }
{ $subsection xml-reprint } ;
ARTICLE: { "xml" "classes" } "XML data classes" ARTICLE: { "xml" "classes" } "XML data classes"
"Data types that XML documents are made of:" "Data types that XML documents are made of:"
@ -295,7 +340,7 @@ ARTICLE: { "xml" "construct" } "XML data constructors"
ARTICLE: { "xml" "utils" } "XML processing utilities" ARTICLE: { "xml" "utils" } "XML processing utilities"
"Utilities for processing XML include..." "Utilities for processing XML include..."
$terpri $terpri
"System for creating words which dispatch on XML tags:" "System sfor creating words which dispatch on XML tags:"
{ $subsection POSTPONE: PROCESS: } { $subsection POSTPONE: PROCESS: }
{ $subsection POSTPONE: TAG: } { $subsection POSTPONE: TAG: }
"Combinators for traversing XML trees:" "Combinators for traversing XML trees:"
@ -321,6 +366,9 @@ ARTICLE: { "xml" "internal" } "Internals of the XML parser"
{ $subsection take-char } { $subsection take-char }
{ $subsection take-string } { $subsection take-string }
{ $subsection next } { $subsection next }
{ $subsection parse-text }
{ $subsection make-tag }
{ $subsection parse-name }
{ $subsection process } ; ! should I have more? less? { $subsection process } ; ! should I have more? less?
ARTICLE: { "xml" "events" } "Event-based XML parsing" ARTICLE: { "xml" "events" } "Event-based XML parsing"
@ -328,4 +376,8 @@ ARTICLE: { "xml" "events" } "Event-based XML parsing"
{ $subsection sax } { $subsection sax }
{ $subsection opener } { $subsection opener }
{ $subsection closer } { $subsection closer }
{ $subsection contained } ; { $subsection contained }
"There is also pull-based parsing to augment the push-parsing of SAX. This is probably easier to use and more logical. It uses the same parsing objects as the above style of parsing, except string elements are always in arrays, for example { \"\" }. Relevant pull-parsing words are:"
{ $subsection <pull-xml> }
{ $subsection pull-xml }
{ $subsection pull-next } ;