XML combinator refactoring

db4
Daniel Ehrenberg 2007-12-23 14:57:39 -05:00
parent b8f210a3be
commit beaa4601ed
12 changed files with 232 additions and 188 deletions

View File

@ -41,17 +41,13 @@ C: <instruction> instruction
TUPLE: prolog version encoding standalone ;
C: <prolog> prolog
TUPLE: xml prolog before after ;
: <xml> ( prolog before main after -- xml )
{ set-xml-prolog set-xml-before set-delegate set-xml-after }
xml construct ;
TUPLE: tag attrs children ;
TUPLE: attrs ;
: <attrs> ( alist -- attrs )
attrs construct-delegate ;
TUPLE: attrs alist ;
C: <attrs> attrs
: attr@ ( key alist -- index {key,value} )
>r assure-name r>
>r assure-name r> attrs-alist
[ first names-match? ] curry* find ;
M: attrs at*
@ -60,13 +56,13 @@ M: attrs set-at
2dup attr@ nip [
2nip set-second
] [
[ >r assure-name swap 2array r> ?push ] keep
set-delegate
>r assure-name swap 2array r>
[ attrs-alist ?push ] keep set-attrs-alist
] if* ;
M: attrs assoc-size length ;
M: attrs assoc-size attrs-alist length ;
M: attrs new-assoc drop V{ } new <attrs> ;
M: attrs >alist delegate >alist ;
M: attrs >alist attrs-alist >alist ;
: >attrs ( assoc -- attrs )
dup [
@ -77,13 +73,15 @@ M: attrs assoc-like
drop dup attrs? [ >attrs ] unless ;
M: attrs clear-assoc
f swap set-delegate ;
f swap set-attrs-alist ;
M: attrs delete-at
tuck attr@ drop [ swap delete-nth ] [ drop ] if* ;
tuck attr@ drop [ swap attrs-alist delete-nth ] [ drop ] if* ;
M: attrs clone
attrs-alist clone <attrs> ;
INSTANCE: attrs assoc
TUPLE: tag attrs children ;
: <tag> ( name attrs children -- tag )
>r >r assure-name r> T{ attrs } assoc-like r>
{ set-delegate set-tag-attrs set-tag-children }
@ -97,6 +95,45 @@ INSTANCE: tag assoc
CONSULT: sequence-protocol tag tag-children ;
INSTANCE: tag sequence
M: tag like
over tag? [
[ delegate ] keep tag-attrs
rot dup [ V{ } like ] when <tag>
] unless ;
M: tag clone
[ delegate clone ] keep [ tag-attrs clone ] keep
tag-children clone
{ set-delegate set-tag-attrs set-tag-children } tag construct ;
TUPLE: xml prolog before main after ;
: <xml> ( prolog before main after -- xml )
{ set-xml-prolog set-xml-before set-delegate set-xml-after }
xml construct ;
CONSULT: sequence-protocol xml delegate ;
INSTANCE: xml sequence
CONSULT: assoc-protocol xml delegate ;
INSTANCE: xml assoc
<PRIVATE
: tag>xml ( xml tag -- newxml )
swap [ dup xml-prolog swap xml-before rot ] keep xml-after <xml> ;
: seq>xml ( xml seq -- newxml )
over delegate like tag>xml ;
PRIVATE>
M: xml clone
[ xml-prolog clone ] keep [ xml-before clone ] keep
[ delegate clone ] keep xml-after clone <xml> ;
M: xml like
swap dup xml? [
dup tag? [ tag>xml ] [ seq>xml ] if
] unless ;
! tag with children=f is contained
: <contained-tag> ( name attrs -- tag )
f <tag> ;

View File

@ -1,7 +1,7 @@
! Copyright (C) 2006, 2007 Daniel Ehrenberg
! See http://factorcode.org/license.txt for BSD license.
USING: namespaces kernel xml.data xml.utilities assocs splitting
sequences parser quotations sequences.lib ;
sequences parser quotations sequences.lib xml.utilities ;
IN: xml.generator
: comment, ( string -- ) <comment> , ;

19
extra/xml/literal.factor Normal file
View File

@ -0,0 +1,19 @@
USING: peg peg.ebnf kernel strings sequences combinators.lib ;
IN: xml.literal
! EBNF-based XML generation syntax
! This is a terrible grammar for XML, only suitable for literals like this
: &ident ( -- parser )
[ {
[ printable? ]
[ blank? not ]
[ "<>" member? not ]
} <-&& ] satisfy ;
: make-name ( str/3array -- name )
dup array? [ first3 nip f <name> ] [ name-tag ] if ;
<EBNF
&name = ident | ident ':' ident => make-name
EBNF>

View File

@ -0,0 +1,64 @@
USING: peg peg.ebnf kernel strings sequences combinators.lib arrays xml.data
namespaces assocs xml.generator ;
IN: xml.literal
! EBNF-based XML generation syntax
! This is a terrible grammar for XML, only suitable for literals like this
: &ident ( -- parser )
[ {
[ printable? ]
[ blank? not ]
[ "<>" member? not ]
} <-&& ] satisfy repeat1 [ >string ] action ;
: 2choice 2array choice ;
: &name ( -- parser )
&ident ":" token &ident 3array seq [ first3 nip f <name> ] action
&ident [ <name-tag> ] action
2choice ;
: &quote ( quote -- parser )
[ token ] keep [ = not ] curry satisfy dupd seq swap seq ;
DEFER: &quot
: &code ( -- parser )
[ "[]" member? not ] satisfy [ &quot ] delay 2choice repeat0 ;
: &quot ( -- parser )
! This doesn't deal with "[" or "]" properly
"[" token &code
"]" token 3array seq [ second parse ] action ;
: &value ( -- parser )
"'" &quote "\"" &quote &quot 3array choice ;
: &attr ( -- parser )
&name "=" token &value sp 3array seq [ first3 nip 2array ] action ;
: &attrs ( -- parser )
&attr repeat0 [
[ swap [ set ] 2curry ] { } assoc>map concat
] action ;
: &tag-start ( -- parser )
"<" token &name sp &attrs sp 3array seq
[ first3 2array nip ] action ;
: tag-open-code ( {name,attrs} contents -- quot )
swap first2 dup empty? [ drop swap [ tag, ] 3curry ]
[ swap rot [ >r >r H{ } make-assoc r> r> swapd tag*, ] 3curry ] if ;
: &tag-open ( -- parser )
&tag-start ">" token &quot 3array seq
[ first3 nip tag-open-code ] action ;
: tag-contained-code ( {name,attrs} -- quot )
first2 dup empty? [ drop [ contained, ] curry ]
[ swap [ >r H{ } make-assoc r> swap contained*, ] 2curry ] if ;
: &tag-contained ( -- parser )
&tag-start "/>" token 2array seq
[ first tag-contained-code ] action ;

View File

@ -5,7 +5,7 @@ USING: sequences xml kernel arrays xml.utilities io.files tools.test ;
[ tag-named children>string ] curry* map ;
: parse-result ( xml -- seq )
"resultElements" tag-named* "item" tags-named
"resultElements" deep-tag-named "item" tags-named
[ assemble-data ] map ;
[ "http://www.foxnews.com/oreilly/" ] [

View File

@ -1,6 +1,6 @@
IN: templating
USING: kernel xml sequences assocs tools.test io arrays namespaces
xml.data xml.utilities xml.writer generic ;
xml.data xml.utilities xml.writer generic sequences.deep ;
: sub-tag
T{ name f f "sub" "http://littledan.onigirihouse.com/namespaces/replace" } ;
@ -16,7 +16,7 @@ M: tag (r-ref)
M: object (r-ref) drop ;
: template ( xml -- )
[ (r-ref) ] xml-each ;
[ (r-ref) ] deep-each ;
! Example

View File

@ -3,7 +3,7 @@
IN: temporary
USING: kernel xml tools.test io namespaces sequences xml.errors xml.entities
parser strings xml.data io.files xml.writer xml.utilities state-parser
continuations assocs ;
continuations assocs sequences.deep ;
! This is insufficient
SYMBOL: xml-file
@ -30,16 +30,19 @@ SYMBOL: xml-file
[ "<a b='c'/>" string>xml xml>string ] unit-test
[ "abcd" ] [
"<main>a<sub>bc</sub>d<nothing/></main>" string>xml
[ [ dup string? [ % ] [ drop ] if ] xml-each ] "" make
[ [ dup string? [ % ] [ drop ] if ] deep-each ] "" make
] unit-test
[ "abcd" ] [
"<main>a<sub>bc</sub>d<nothing/></main>" string>xml
[ string? ] xml-subset concat
[ string? ] deep-subset concat
] unit-test
[ "foo" ] [
"<a><b id='c'>foo</b><d id='e'/></a>" string>xml
"c" get-id children>string
] unit-test
[ "foo" ] [ "<x y='foo'/>" string>xml "y" <name-tag> over
at swap "z" <name-tag> >r tuck r> swap set-at T{ name f "blah" "z" f } swap at ] unit-test
[ "foo" ] [ "<x y='foo'/>" string>xml "y" over
at swap "z" >r tuck r> swap set-at
T{ name f "blah" "z" f } swap at ] unit-test
[ "foo" ] [ "<boo><![CDATA[foo]]></boo>" string>xml children>string ] unit-test
[ "<?xml version=\"1.0\" encoding=\"iso-8859-1\"?><foo>bar baz</foo>" ]
[ "<foo>bar</foo>" string>xml [ " baz" append ] map xml>string ] unit-test

View File

@ -124,7 +124,8 @@ SYMBOL: ns-stack
[ parse-attr (middle-tag) ] when ;
: middle-tag ( -- attrs-alist )
[ (middle-tag) ] V{ } make pass-blank ;
! f make will make a vector if it has any elements
[ (middle-tag) ] f make pass-blank ;
: end-tag ( name attrs-alist -- tag )
tag-ns pass-blank get-char CHAR: / =

View File

@ -2,7 +2,7 @@
! See http://factorcode.org/license.txt for BSD license.
USING: kernel namespaces sequences words io assocs
quotations strings parser arrays xml.data xml.writer debugger
splitting vectors ;
splitting vectors sequences.deep ;
IN: xml.utilities
! * System for words specialized on tag names
@ -59,59 +59,6 @@ M: process-missing error.
: first-child-tag ( tag -- tag )
tag-children [ tag? ] find nip ;
! * Utilities for searching through XML documents
! These all work from the outside in, top to bottom.
: with-delegate ( object quot -- object )
over clone >r >r delegate r> call r>
[ set-delegate ] keep ; inline
GENERIC# xml-each 1 ( quot tag -- ) inline
M: tag xml-each
[ call ] 2keep
swap tag-children [ swap xml-each ] curry* each ;
M: object xml-each
call ;
M: xml xml-each
>r delegate r> xml-each ;
GENERIC# xml-map 1 ( quot tag -- tag ) inline
M: tag xml-map
swap clone over >r swap call r>
swap [ tag-children [ swap xml-map ] curry* map ] keep
[ set-tag-children ] keep ;
M: object xml-map
call ;
M: xml xml-map
swap [ swap xml-map ] with-delegate ;
: xml-subset ( quot tag -- seq ) ! quot: tag -- ?
V{ } clone rot [
swap >r [ swap call ] 2keep rot r>
swap [ [ push ] keep ] [ nip ] if
] xml-each nip ;
GENERIC# xml-find 1 ( quot tag -- tag ) inline
M: tag xml-find
[ call ] 2keep swap rot [
f swap
[ nip over >r swap xml-find r> swap dup ] find
2drop ! leaves result of quot
] unless nip ;
M: object xml-find
keep f ? ;
M: xml xml-find
>r delegate r> xml-find ;
GENERIC# xml-inject 1 ( quot tag -- ) inline
M: tag xml-inject
swap [
swap [ call ] keep
[ xml-inject ] keep
] change-each ;
M: object xml-inject 2drop ;
M: xml xml-inject >r delegate >r xml-inject ;
! * Accessing part of an XML document
! for tag- words, a start means that it searches all children
! and no star searches only direct children
@ -119,11 +66,14 @@ M: xml xml-inject >r delegate >r xml-inject ;
: tag-named? ( name elem -- ? )
dup tag? [ names-match? ] [ 2drop f ] if ;
: tag-named* ( tag name/string -- matching-tag )
assure-name [ swap tag-named? ] curry xml-find ;
: tags@ ( tag name -- children name )
>r { } like r> assure-name ;
: tags-named* ( tag name/string -- tags-seq )
assure-name [ swap tag-named? ] curry xml-subset ;
: deep-tag-named ( tag name/string -- matching-tag )
assure-name [ swap tag-named? ] curry deep-find ;
: deep-tags-named ( tag name/string -- tags-seq )
tags@ [ swap tag-named? ] curry deep-subset ;
: tag-named ( tag name/string -- matching-tag )
! like get-name-tag but only looks at direct children,
@ -131,7 +81,28 @@ M: xml xml-inject >r delegate >r xml-inject ;
assure-name swap [ tag-named? ] curry* find nip ;
: tags-named ( tag name/string -- tags-seq )
assure-name swap [ tag-named? ] curry* subset ;
tags@ swap [ tag-named? ] curry* subset ;
: tag-with-attr? ( elem attr-value attr-name -- ? )
rot dup tag? [ at = ] [ 3drop f ] if ;
: tag-with-attr ( tag attr-value attr-name -- matching-tag )
assure-name [ tag-with-attr? ] 2curry find nip ;
: tags-with-attr ( tag attr-value attr-name -- tags-seq )
tags@ [ tag-with-attr? ] 2curry subset tag-children ;
: deep-tag-with-attr ( tag attr-value attr-name -- matching-tag )
assure-name [ tag-with-attr? ] 2curry deep-find ;
: deep-tags-with-attr ( tag attr-value attr-name -- tags-seq )
tags@ [ tag-with-attr? ] 2curry deep-subset ;
: get-id ( tag id -- elem ) ! elem=tag.getElementById(id)
"id" deep-tag-with-attr ;
: deep-tags-named-with-attr ( tag tag-name attr-value attr-name -- tags )
>r >r deep-tags-named r> r> tags-with-attr ;
: assert-tag ( name name -- )
names-match? [ "Unexpected XML tag found" throw ] unless ;
@ -142,25 +113,3 @@ M: xml xml-inject >r delegate >r xml-inject ;
: insert-child ( child tag -- )
>r 1vector r> insert-children ;
: tag-with-attr? ( elem attr-value attr-name -- ? )
rot dup tag? [ at = ] [ 3drop f ] if ;
: tag-with-attr ( tag attr-value attr-name -- matching-tag )
assure-name [ tag-with-attr? ] 2curry find nip ;
: tags-with-attr ( tag attr-value attr-name -- tags-seq )
assure-name [ tag-with-attr? ] 2curry subset ;
: tag-with-attr* ( tag attr-value attr-name -- matching-tag )
assure-name [ tag-with-attr? ] 2curry xml-find ;
: tags-with-attr* ( tag attr-value attr-name -- tags-seq )
assure-name [ tag-with-attr? ] 2curry xml-subset ;
: get-id ( tag id -- elem ) ! elem=tag.getElementById(id)
"id" tag-with-attr ;
: tags-named-with-attr* ( tag tag-name attr-value attr-name -- tags )
>r >r tags-named* r> r> tags-with-attr ;

View File

@ -22,14 +22,14 @@ IN: xml.writer
[ write CHAR: : write1 ] when*
name-tag write ;
: print-attrs ( hash -- )
: print-attrs ( assoc -- )
[
first2 " " write
" " write
swap print-name
"=\"" write
chars>entities write
"\"" write
] each ;
] assoc-each ;
GENERIC: write-item ( object -- )
@ -38,8 +38,7 @@ M: string write-item
M: contained-tag write-item
CHAR: < write1
dup print-name
tag-attrs print-attrs
dup print-name tag-attrs print-attrs
"/>" write ;
M: open-tag write-item

View File

@ -49,24 +49,6 @@ HELP: TAG:
{ $description "defines what a process should do when it encounters a specific tag" }
{ $examples { $code "PROCESS: x ( tag -- )\nTAG: a x drop \"hi\" write ;" } }
{ $see-also POSTPONE: PROCESS: } ;
HELP: xml-each
{ $values { "tag" tag } { "quot" "a quotation ( element -- )" } }
{ $description "applies the quotation to each element (tags, strings, etc) in the tag, moving top-down" }
{ $see-also xml-map xml-subset } ;
HELP: xml-map
{ $values { "tag" tag } { "quot" "a quotation ( element -- element )" }
{ "tag" "an XML tag with the quotation applied to each element" } }
{ $description "applies the quotation to each element (tags, strings, etc) in the tag, moving top-down, and produces a new tag" }
{ $see-also xml-each xml-subset } ;
HELP: xml-subset
{ $values { "tag" tag } { "quot" "a quotation ( tag -- ? )" }
{ "seq" "sequence of elements" } }
{ $description "applies the quotation to each element (tags, strings, etc) in the tag, moving top-down, producing a sequence of elements which do not return false for the sequence" }
{ $see-also xml-map xml-each } ;
HELP: build-tag*
{ $values { "items" "sequence of elements" } { "name" "string" }
{ "tag" tag } }
@ -166,15 +148,10 @@ HELP: xml-chunk
{ $description "rather than parse a document, as " { $link read-xml } " does, this word parses and returns a sequence of XML elements (tags, strings, etc), ie a document fragment. This is useful for pieces of XML which may have more than one main tag." }
{ $see-also write-chunk read-xml } ;
HELP: xml-find
{ $values { "tag" "an XML element or document" } { "quot" "a quotation ( elem -- ? )" } { "tag" "an XML element which satisfies the predicate" } }
{ $description "finds the first element in the XML document which satisfies the predicate, moving from the outermost element to the innermost, top-down" }
{ $see-also xml-each xml-map get-id } ;
HELP: get-id
{ $values { "tag" "an XML tag or document" } { "id" "a string" } { "elem" "an XML element or f" } }
{ $description "finds the XML tag with the specified id, ignoring the namespace" }
{ $see-also xml-find } ;
{ $see-also } ;
HELP: process
{ $values { "object" "an opener, closer, contained or text element" } }
@ -242,15 +219,15 @@ HELP: write-chunk
{ $description "writes an XML document fragment, ie a sequence of XML elements, to the " { $link stdio } " stream." }
{ $see-also write-item write-xml } ;
HELP: tag-named*
HELP: deep-tag-named
{ $values { "tag" "an XML tag or document" } { "name/string" "an XML name or string representing a name" } { "matching-tag" tag } }
{ $description "finds an XML tag with a matching name, recursively searching children and children of children" }
{ $see-also tags-named tag-named tags-named* } ;
{ $see-also tags-named tag-named deep-tags-named } ;
HELP: tags-named*
HELP: deep-tags-named
{ $values { "tag" "an XML tag or document" } { "name/string" "an XML name or string representing a name" } { "tags-seq" "a sequence of tags" } }
{ $description "returns a sequence of all tags of a matching name, recursively searching children and children of children" }
{ $see-also tag-named tag-named* tags-named } ;
{ $see-also tag-named deep-tag-named tags-named } ;
HELP: children>string
{ $values { "tag" "an XML tag or document" } { "string" "a string" } }
@ -306,14 +283,14 @@ HELP: tag-named
{ "name/string" "an XML name or string representing the name" }
{ "matching-tag" tag } }
{ $description "finds the first tag with matching name which is the direct child of the given tag" }
{ $see-also tags-named* tag-named* tags-named } ;
{ $see-also deep-tags-named deep-tag-named tags-named } ;
HELP: tags-named
{ $values { "tag" "an XML tag or document" }
{ "name/string" "an XML name or string representing the name" }
{ "tags-seq" "a sequence of tags" } }
{ $description "finds all tags with matching name that are the direct children of the given tag" }
{ $see-also tag-named* tags-named* tag-named } ;
{ $see-also deep-tag-named deep-tags-named tag-named } ;
HELP: state-parse
{ $values { "stream" "an input stream" } { "quot" "a quotation ( -- )" } }
@ -390,18 +367,13 @@ ARTICLE: { "xml" "utils" } "XML processing utilities"
"System sfor creating words which dispatch on XML tags:"
{ $subsection POSTPONE: PROCESS: }
{ $subsection POSTPONE: TAG: }
"Combinators for traversing XML trees:"
{ $subsection xml-each }
{ $subsection xml-map }
{ $subsection xml-subset }
{ $subsection xml-find }
"Getting parts of an XML document or tag:"
$nl
"Note: the difference between tag-named* and tag-named is that the former searches recursively among all children and children of children of the tag, while the latter only looks at the direct children, and is therefore more efficient."
"Note: the difference between deep-tag-named and tag-named is that the former searches recursively among all children and children of children of the tag, while the latter only looks at the direct children, and is therefore more efficient."
{ $subsection tag-named }
{ $subsection tags-named }
{ $subsection tag-named* }
{ $subsection tags-named* }
{ $subsection deep-tag-named }
{ $subsection deep-tags-named }
{ $subsection get-id }
"Words for simplified generation of XML:"
{ $subsection build-tag* }

View File

@ -9,7 +9,7 @@ TUPLE: result title url summary ;
C: <result> result
: parse-yahoo ( xml -- seq )
"Result" tags-named* [
"Result" deep-tags-named [
{ "Title" "Url" "Summary" }
[ tag-named children>string ] curry* map
first3 <result>