XML: more unit tests and event-based processing
parent
55d4b8d332
commit
eb3e6459eb
|
@ -279,9 +279,20 @@ M: multitags error.
|
||||||
dup [ tag? ] contains? [ <multitags> throw ] when r>
|
dup [ tag? ] contains? [ <multitags> throw ] when r>
|
||||||
swap <xml-doc> ;
|
swap <xml-doc> ;
|
||||||
|
|
||||||
|
: sax-loop ( quot -- ) ! quot: xml-elem --
|
||||||
|
parse-text [ swap dup slip ] each
|
||||||
|
get-char [ make-tag swap dup slip sax-loop ]
|
||||||
|
[ drop ] if ; inline
|
||||||
|
|
||||||
|
: sax ( stream quot -- ) ! quot: xml-elem --
|
||||||
|
swap [
|
||||||
|
init-xml parse-prolog
|
||||||
|
prolog-data get swap dup slip
|
||||||
|
sax-loop
|
||||||
|
] with-scope ; inline
|
||||||
|
|
||||||
: (read-xml) ( -- )
|
: (read-xml) ( -- )
|
||||||
parse-text [ add-child ] each
|
[ process ] sax-loop ; inline
|
||||||
get-char [ make-tag process (read-xml) ] when ;
|
|
||||||
|
|
||||||
: (xml-chunk) ( stream -- seq )
|
: (xml-chunk) ( stream -- seq )
|
||||||
init-xml parse-prolog (read-xml)
|
init-xml parse-prolog (read-xml)
|
||||||
|
|
|
@ -18,6 +18,16 @@ SYMBOL: xml-file
|
||||||
tag-props hash ] unit-test
|
tag-props hash ] unit-test
|
||||||
[ t ] [ xml-file get tag-children second contained-tag? ] unit-test
|
[ t ] [ xml-file get tag-children second contained-tag? ] unit-test
|
||||||
[ t ] [ [ "<a></b>" string>xml ] catch xml-parse-error? ] unit-test
|
[ t ] [ [ "<a></b>" string>xml ] catch xml-parse-error? ] unit-test
|
||||||
|
[ T{ comment f "This is where the fun begins!" } ] [
|
||||||
|
xml-file get xml-doc-before [ comment? ] find nip
|
||||||
|
] unit-test
|
||||||
|
[ "entity" ] [ xml-file get [ entity? ] xml-find entity-name ] unit-test
|
||||||
|
[ "reference" ] [ xml-file get [ reference? ] xml-find reference-name ] unit-test
|
||||||
|
[ "xsl stylesheet=\"that-one.xsl\"" ] [
|
||||||
|
xml-file get xml-doc-after [ instruction? ] find nip instruction-text
|
||||||
|
] unit-test
|
||||||
|
[ V{ "fa&g" } ] [ xml-file get "x" get-id tag-children ] unit-test
|
||||||
|
[ { "that" } ] [ xml-file get "this" prop-name-tag ] unit-test
|
||||||
[ "<?xml version=\"1.0\" encoding=\"iso-8859-1\" standalone=\"no\"?><a b=\"c\"/>" ]
|
[ "<?xml version=\"1.0\" encoding=\"iso-8859-1\" standalone=\"no\"?><a b=\"c\"/>" ]
|
||||||
[ "<a b='c'/>" string>xml xml>string ] unit-test
|
[ "<a b='c'/>" string>xml xml>string ] unit-test
|
||||||
[ 32 ] [
|
[ 32 ] [
|
||||||
|
|
|
@ -8,7 +8,9 @@
|
||||||
Here's a new, undefined &entity;
|
Here's a new, undefined &entity;
|
||||||
and here's a %reference; to go along with it!
|
and here's a %reference; to go along with it!
|
||||||
isn't this fun?
|
isn't this fun?
|
||||||
<a:c><d mood="happy"></d></a:c>
|
<a:c><d mood="happy"></d>
|
||||||
|
<e a:id='x'>fa&g</e>
|
||||||
|
</a:c>
|
||||||
</a:b>
|
</a:b>
|
||||||
<!--Well, that went over pretty well-->
|
<!--Well, that went over pretty well-->
|
||||||
<?xsl stylesheet="that-one.xsl"?>
|
<?xsl stylesheet="that-one.xsl"?>
|
||||||
|
|
|
@ -102,16 +102,20 @@ M: xml-doc (xml-find)
|
||||||
: xml-find ( tag quot -- tag ) ! quot: tag -- ?
|
: xml-find ( tag quot -- tag ) ! quot: tag -- ?
|
||||||
swap (xml-find) ; inline
|
swap (xml-find) ; inline
|
||||||
|
|
||||||
: prop-name ( name tag -- seq/f )
|
: prop-name ( tag name -- seq/f )
|
||||||
#! gets the property with the first matching name
|
#! gets the property with the first matching name
|
||||||
tag-props [
|
swap tag-props [
|
||||||
hash-keys [ over names-match? ] find
|
hash-keys [ over names-match? ] find
|
||||||
] keep hash 2nip ;
|
] keep hash 2nip ;
|
||||||
|
|
||||||
|
: prop-name-tag ( tag string -- seq/f )
|
||||||
|
! like prop-name but only with name-tag not the whole name
|
||||||
|
f swap f <name> prop-name ;
|
||||||
|
|
||||||
: get-id ( tag id -- elem ) ! elem=tag.getElementById(id)
|
: get-id ( tag id -- elem ) ! elem=tag.getElementById(id)
|
||||||
swap [
|
swap [
|
||||||
dup tag? [
|
dup tag? [
|
||||||
T{ name f f "id" f } swap prop-name
|
"id" prop-name-tag
|
||||||
[ string? ] subset concat
|
[ string? ] subset concat
|
||||||
over =
|
over =
|
||||||
] [ drop f ] if
|
] [ drop f ] if
|
||||||
|
|
|
@ -227,7 +227,25 @@ HELP: next
|
||||||
|
|
||||||
HELP: process
|
HELP: process
|
||||||
{ $values { "object" "an opener, closer, contained or text element" } }
|
{ $values { "object" "an opener, closer, contained or text element" } }
|
||||||
{ $description { "takes an XML event and, using the XML stack, processes it and adds it to the tree" } } ;
|
{ $description "takes an XML event and, using the XML stack, processes it and adds it to the tree" } ;
|
||||||
|
|
||||||
|
HELP: sax
|
||||||
|
{ $values { "stream" "an input stream" } { "quot" "a quotation ( xml-elem -- )" } }
|
||||||
|
{ $description "parses the XML document, and whenever an event is encountered (a tag piece, comment, parsing instruction, directive or string element), the quotation is called with that event on the stack. The quotation has all responsibility to deal with the event properly, and it is advised that generic words be used in dispatching on the event class." }
|
||||||
|
{ $notes "It is important to note that this is not SAX, merely an event-based XML view" }
|
||||||
|
{ $see-also read-xml } ;
|
||||||
|
|
||||||
|
HELP: opener
|
||||||
|
{ $class-description "describes an opening tag, like <a>. Contains two slots, name and props, containing, respectively, the name of the tag and its attributes. Usually, the name-url will be f." }
|
||||||
|
{ $see-also closer contained } ;
|
||||||
|
|
||||||
|
HELP: closer
|
||||||
|
{ $class-description "describes a closing tag, like </a>. Contains one slot, name, containing the tag's name. Usually, the name-url will be f." }
|
||||||
|
{ $see-also opener contained } ;
|
||||||
|
|
||||||
|
HELP: contained
|
||||||
|
{ $class-description "represents a self-closing tag, like <a/>. Contains two slots, name and props, containing, respectively, the name of the tag and its attributes. Usually, the name-url will be f." }
|
||||||
|
{ $see-also opener closer } ;
|
||||||
|
|
||||||
ARTICLE: { "xml" "intro" } "XML"
|
ARTICLE: { "xml" "intro" } "XML"
|
||||||
"The XML module attempts to implement the XML 1.1 standard, converting strings of text into XML and vice versa. It currently is a work in progress. Together with XML-RPC, this is a component of the F2EE framework."
|
"The XML module attempts to implement the XML 1.1 standard, converting strings of text into XML and vice versa. It currently is a work in progress. Together with XML-RPC, this is a component of the F2EE framework."
|
||||||
|
@ -237,7 +255,8 @@ ARTICLE: { "xml" "intro" } "XML"
|
||||||
{ $subsection { "xml" "classes" } }
|
{ $subsection { "xml" "classes" } }
|
||||||
{ $subsection { "xml" "construct" } }
|
{ $subsection { "xml" "construct" } }
|
||||||
{ $subsection { "xml" "utils" } }
|
{ $subsection { "xml" "utils" } }
|
||||||
{ $subsection { "xml" "internal" } } ;
|
{ $subsection { "xml" "internal" } }
|
||||||
|
{ $subsection { "xml" "events" } } ;
|
||||||
|
|
||||||
ARTICLE: { "xml" "basic" } "Basic words for XML processing"
|
ARTICLE: { "xml" "basic" } "Basic words for XML processing"
|
||||||
"These are the most basic words needed for processing an XML document"
|
"These are the most basic words needed for processing an XML document"
|
||||||
|
@ -246,7 +265,8 @@ ARTICLE: { "xml" "basic" } "Basic words for XML processing"
|
||||||
{ $subsection xml-parse-error }
|
{ $subsection xml-parse-error }
|
||||||
{ $subsection xml-reprint }
|
{ $subsection xml-reprint }
|
||||||
{ $subsection write-xml }
|
{ $subsection write-xml }
|
||||||
{ $subsection read-xml } ;
|
{ $subsection read-xml }
|
||||||
|
{ $subsection xml-chunk } ;
|
||||||
|
|
||||||
ARTICLE: { "xml" "classes" } "XML data classes"
|
ARTICLE: { "xml" "classes" } "XML data classes"
|
||||||
"Data types that XML documents are made of:"
|
"Data types that XML documents are made of:"
|
||||||
|
@ -302,3 +322,10 @@ ARTICLE: { "xml" "internal" } "Internals of the XML parser"
|
||||||
{ $subsection take-string }
|
{ $subsection take-string }
|
||||||
{ $subsection next }
|
{ $subsection next }
|
||||||
{ $subsection process } ; ! should I have more? less?
|
{ $subsection process } ; ! should I have more? less?
|
||||||
|
|
||||||
|
ARTICLE: { "xml" "events" } "Event-based XML parsing"
|
||||||
|
"In addition to DOM-style parsing based around " { $link read-xml } ", the XML module also provides SAX-style event-based parsing. This uses much of the same data structures as normal XML, with the exception of the classes " { $link xml-doc } " and " { $link tag } " and as such, the articles " { $link { "xml" "classes" } } " and " { $link { "xml" "construct" } } " may be useful in learning how to process documents in this way. Other useful words are:"
|
||||||
|
{ $subsection sax }
|
||||||
|
{ $subsection opener }
|
||||||
|
{ $subsection closer }
|
||||||
|
{ $subsection contained } ;
|
||||||
|
|
Loading…
Reference in New Issue