diff --git a/basis/syndication/syndication-tests.factor b/basis/syndication/syndication-tests.factor index 1ddcbf8090..4fbfee03d9 100644 --- a/basis/syndication/syndication-tests.factor +++ b/basis/syndication/syndication-tests.factor @@ -1,5 +1,5 @@ USING: syndication io kernel io.files tools.test io.encodings.utf8 -calendar urls ; +calendar urls xml.writer ; IN: syndication.tests \ download-feed must-infer @@ -43,3 +43,4 @@ IN: syndication.tests } } } ] [ "resource:basis/syndication/test/atom.xml" load-news-file ] unit-test +[ ] [ "resource:basis/syndication/test/atom.xml" load-news-file xml>string drop ] unit-test diff --git a/basis/syndication/syndication.factor b/basis/syndication/syndication.factor index 9d0419a818..fadb4f4fb3 100644 --- a/basis/syndication/syndication.factor +++ b/basis/syndication/syndication.factor @@ -1,10 +1,10 @@ ! Copyright (C) 2006 Chris Double, Daniel Ehrenberg. ! Portions copyright (C) 2008 Slava Pestov. ! See http://factorcode.org/license.txt for BSD license. -USING: xml.utilities kernel assocs xml.generator math.order +USING: xml.utilities kernel assocs math.order strings sequences xml.data xml.writer io.streams.string combinators xml xml.entities.html io.files io - http.client namespaces make xml.generator hashtables + http.client namespaces make xml.interpolate hashtables calendar.format accessors continuations urls present ; IN: syndication @@ -114,26 +114,31 @@ TUPLE: entry title url description date ; http-get nip string>feed ; ! Atom generation -: simple-tag, ( content name -- ) - [ , ] tag, ; -: simple-tag*, ( content name attrs -- ) - [ , ] tag*, ; - -: entry, ( entry -- ) - "entry" [ - { - [ title>> "title" { { "type" "html" } } simple-tag*, ] - [ url>> present "href" associate "link" swap contained*, ] - [ date>> timestamp>rfc3339 "published" simple-tag, ] - [ description>> [ "content" { { "type" "html" } } simple-tag*, ] when* ] - } cleave - ] tag, ; +: entry>xml ( entry -- xml ) + { + [ title>> ] + [ url>> present ] + [ date>> timestamp>rfc3339 ] + [ description>> ] + } cleave + [XML + + <-> + /> + <-> + <-> + + XML] ; : feed>xml ( feed -- xml ) - "feed" { { "xmlns" "http://www.w3.org/2005/Atom" } } [ - [ title>> "title" simple-tag, ] - [ url>> present "href" associate "link" swap contained*, ] - [ entries>> [ entry, ] each ] - tri - ] make-xml* ; + [ title>> ] + [ url>> present ] + [ entries>> [ entry>xml ] map ] tri + + <-> + /> + <-> + + XML> ; diff --git a/basis/xml/autoencoding/autoencoding.factor b/basis/xml/autoencoding/autoencoding.factor index 5d7e460862..5dc32958d4 100644 --- a/basis/xml/autoencoding/autoencoding.factor +++ b/basis/xml/autoencoding/autoencoding.factor @@ -10,8 +10,8 @@ IN: xml.autoencoding : start-utf16le ( -- tag ) utf16le decode-input-if - CHAR: ? expect - 0 expect check instruct ; + "?\0" expect + check instruct ; : 10xxxxxx? ( ch -- ? ) -6 shift 3 bitand 2 = ; @@ -36,10 +36,10 @@ IN: xml.autoencoding : skip-utf8-bom ( -- tag ) "\u0000bb\u0000bf" expect utf8 decode-input - CHAR: < expect check make-tag ; + "<" expect check make-tag ; : decode-expecting ( encoding string -- tag ) - [ decode-input-if next ] [ expect-string ] bi* check make-tag ; + [ decode-input-if next ] [ expect ] bi* check make-tag ; : start-utf16be ( -- tag ) utf16be "<" decode-expecting ; diff --git a/basis/xml/data/data-docs.factor b/basis/xml/data/data-docs.factor index c5f4f6d670..9a8c535f91 100644 --- a/basis/xml/data/data-docs.factor +++ b/basis/xml/data/data-docs.factor @@ -150,3 +150,52 @@ HELP: assure-name HELP: { $values { "string" string } { "name" name } } { $description "Converts a string into an XML name with an empty prefix and URL." } ; + +HELP: element-decl +{ $class-description "Describes the class of element declarations, like ." } ; + +HELP: +{ $values { "name" name } { "content-spec" string } { "element-decl" entity-decl } } +{ $description "Creates an element declaration object, of the class " { $link element-decl } } ; + +HELP: attlist-decl +{ $class-description "Describes the class of element declarations, like ." } ; + +HELP: +{ $values { "name" name } { "att-defs" string } { "attlist-decl" attlist-decl } } +{ $description "Creates an element declaration object, of the class " { $link attlist-decl } } ; + +HELP: entity-decl +{ $class-description "Describes the class of element declarations, like ." } ; + +HELP: +{ $values { "name" name } { "def" string } { "pe?" "t or f" } { "entity-decl" entity-decl } } +{ $description "Creates an entity declaration object, of the class " { $link entity-decl } ". The pe? slot should be t if the object is a DTD-internal entity, like and f if the object is like , that is, it can be used outside of the DTD." } ; + +HELP: system-id +{ $class-description "Describes the class of system identifiers within an XML DTD directive, such as " } ; + +HELP: +{ $values { "system-literal" string } { "system-id" system-id } } +{ $description "Constructs a " { $link system-id } " tuple." } ; + +HELP: public-id +{ $class-description "Describes the class of public identifiers within an XML DTD directive, such as " } ; + +HELP: +{ $values { "pubid-literal" string } { "system-literal" string } { "public-id" public-id } } +{ $description "Constructs a " { $link system-id } " tuple." } ; + +HELP: notation-decl +{ $class-description "Describes the class of element declarations, like ." } ; + +HELP: +{ $values { "name" string } { "id" id } { "notation-decl" notation-decl } } +{ $description "Creates an notation declaration object, of the class " { $link notation-decl } "." } ; + +HELP: doctype-decl +{ $class-description "Describes the class of doctype declarations." } ; + +HELP: +{ $values { "name" name } { "external-id" id } { "internal-subset" sequence } { "doctype-decl" doctype-decl } } +{ $description "Creates a new doctype declaration object, of the class " { $link doctype-decl } ". Only one of external-id or internal-subset will be non-null." } ; diff --git a/basis/xml/data/data.factor b/basis/xml/data/data.factor index 9d84791c1f..b014a96180 100644 --- a/basis/xml/data/data.factor +++ b/basis/xml/data/data.factor @@ -5,6 +5,9 @@ delegate.protocols delegate vectors accessors multiline macros words quotations combinators slots fry strings ; IN: xml.data +TUPLE: interpolated var ; +C: interpolated + UNION: nullable-string string POSTPONE: f ; TUPLE: name @@ -85,11 +88,13 @@ C: comment TUPLE: directive ; TUPLE: element-decl < directive - { name string } { content-spec string } ; + { name string } + { content-spec string } ; C: element-decl TUPLE: attlist-decl < directive - { name string } { att-defs string } ; + { name string } + { att-defs string } ; C: attlist-decl UNION: boolean t POSTPONE: f ; @@ -108,13 +113,23 @@ C: public-id UNION: id system-id public-id POSTPONE: f ; +TUPLE: dtd + { directives sequence } + { entities assoc } + { parameter-entities assoc } ; +C: dtd + +UNION: dtd/f dtd POSTPONE: f ; + TUPLE: doctype-decl < directive { name string } { external-id id } - { internal-subset sequence } ; + { internal-subset dtd/f } ; C: doctype-decl -TUPLE: notation-decl < directive name id ; +TUPLE: notation-decl < directive + { name string } + { id string } ; C: notation-decl TUPLE: instruction { text string } ; diff --git a/basis/xml/dtd/dtd.factor b/basis/xml/dtd/dtd.factor index a668717626..50de78ec11 100644 --- a/basis/xml/dtd/dtd.factor +++ b/basis/xml/dtd/dtd.factor @@ -2,12 +2,9 @@ ! See http://factorcode.org/license.txt for BSD license. USING: xml.tokenize xml.data xml.state kernel sequences ascii fry xml.errors combinators hashtables namespaces xml.entities -strings ; +strings xml.name ; IN: xml.dtd -: take-word ( -- string ) - [ get-char blank? ] take-until ; - : take-decl-contents ( -- first second ) pass-blank take-word pass-blank ">" take-string ; @@ -20,36 +17,15 @@ IN: xml.dtd : take-notation-decl ( -- notation-decl ) take-decl-contents ; -: take-until-one-of ( seps -- str sep ) - '[ get-char _ member? ] take-until get-char ; - -: take-system-id ( -- system-id ) - parse-quote close ; - -: take-public-id ( -- public-id ) - parse-quote parse-quote close ; - UNION: dtd-acceptable directive comment instruction ; -: (take-external-id) ( token -- external-id ) - pass-blank { - { "SYSTEM" [ take-system-id ] } - { "PUBLIC" [ take-public-id ] } - [ bad-external-id ] - } case ; - -: take-external-id ( -- external-id ) - take-word (take-external-id) ; - -: only-blanks ( str -- ) - [ blank? ] all? [ bad-decl ] unless ; : take-entity-def ( var -- entity-name entity-def ) [ take-word pass-blank get-char { { CHAR: ' [ parse-quote ] } { CHAR: " [ parse-quote ] } - [ drop take-external-id ] + [ drop take-external-id close ] } case ] dip '[ swap _ [ ?set-at ] change ] 2keep ; diff --git a/basis/xml/elements/elements.factor b/basis/xml/elements/elements.factor index 947c11e2a8..b2280bacb4 100644 --- a/basis/xml/elements/elements.factor +++ b/basis/xml/elements/elements.factor @@ -3,12 +3,26 @@ USING: kernel namespaces xml.tokenize xml.state xml.name xml.data accessors arrays make xml.char-classes fry assocs sequences math xml.errors sets combinators io.encodings io.encodings.iana -unicode.case xml.dtd strings ; +unicode.case xml.dtd strings xml.entities ; IN: xml.elements +: take-interpolated ( quot -- interpolated ) + interpolating? get [ + drop get-char CHAR: > = + [ next f ] [ + pass-blank " \t\r\n-" take-to + pass-blank "->" expect + ] if + ] [ call ] if ; inline + +: interpolate-quote ( -- interpolated ) + [ quoteless-attr ] take-interpolated ; + : parse-attr ( -- ) - parse-name pass-blank CHAR: = expect pass-blank - t parse-quote* 2array , ; + parse-name pass-blank "=" expect pass-blank + get-char CHAR: < = + [ "<-" expect interpolate-quote ] + [ t parse-quote* ] if 2array , ; : start-tag ( -- name ? ) #! Outputs the name and whether this is a closing tag @@ -31,14 +45,14 @@ IN: xml.elements : end-tag ( name attrs-alist -- tag ) tag-ns pass-blank get-char CHAR: / = - [ pop-ns next CHAR: > expect ] + [ pop-ns next ">" expect ] [ depth inc close ] if ; : take-comment ( -- comment ) - "--" expect-string + "--" expect "--" take-string - CHAR: > expect ; + ">" expect ; : assure-no-extra ( seq -- ) [ first ] map { @@ -80,7 +94,7 @@ SYMBOL: string-input? string-input? get [ drop ] [ decode-input ] if ; : parse-prolog ( -- prolog ) - pass-blank middle-tag "?>" expect-string + pass-blank middle-tag "?>" expect dup assure-no-extra prolog-attrs dup encoding>> dup "UTF-16" = [ drop ] [ name>encoding [ decode-input-if ] when* ] if @@ -96,45 +110,45 @@ SYMBOL: string-input? : take-cdata ( -- string ) depth get zero? [ bad-cdata ] when - "[CDATA[" expect-string "]]>" take-string ; + "[CDATA[" expect "]]>" take-string ; DEFER: make-tag ! Is this unavoidable? : expand-pe ( -- ) ; ! Make this run the contents of the pe within a DOCTYPE -: (take-internal-subset) ( -- ) +: dtd-loop ( -- ) pass-blank get-char { { CHAR: ] [ next ] } { CHAR: % [ expand-pe ] } { CHAR: < [ next make-tag dup dtd-acceptable? - [ bad-doctype ] unless , (take-internal-subset) + [ bad-doctype ] unless , dtd-loop ] } + { f [ ] } [ 1string bad-doctype ] } case ; -: take-internal-subset ( -- seq ) +: take-internal-subset ( -- dtd ) [ - H{ } pe-table set + H{ } clone pe-table set t in-dtd? set - (take-internal-subset) - ] { } make ; + dtd-loop + pe-table get + ] { } make swap extra-entities get swap ; -: nontrivial-doctype ( -- external-id internal-subset ) - pass-blank get-char CHAR: [ = [ - next take-internal-subset f swap close - ] [ - " >" take-until-one-of { - { CHAR: \s [ (take-external-id) ] } - { CHAR: > [ only-blanks f ] } - } case f - ] if ; +: take-optional-id ( -- id/f ) + get-char "SP" member? + [ take-external-id ] [ f ] if ; + +: take-internal ( -- dtd/f ) + get-char CHAR: [ = + [ next take-internal-subset ] [ f ] if ; : take-doctype-decl ( -- doctype-decl ) - pass-blank " >" take-until-one-of { - { CHAR: \s [ nontrivial-doctype ] } - { CHAR: > [ f f ] } - } case ; + pass-blank take-name + pass-blank take-optional-id + pass-blank take-internal + close ; : take-directive ( -- doctype ) take-name dup "DOCTYPE" = @@ -151,12 +165,18 @@ DEFER: make-tag ! Is this unavoidable? [ drop take-directive ] } case ; +: normal-tag ( -- tag ) + start-tag + [ dup add-ns pop-ns depth dec close ] + [ middle-tag end-tag ] if ; + +: interpolate-tag ( -- interpolated ) + [ "-" bad-name ] take-interpolated ; + : make-tag ( -- tag ) { { [ get-char dup CHAR: ! = ] [ drop next direct ] } - { [ CHAR: ? = ] [ next instruct ] } - [ - start-tag [ dup add-ns pop-ns depth dec close ] - [ middle-tag end-tag ] if - ] + { [ dup CHAR: ? = ] [ drop next instruct ] } + { [ dup CHAR: - = ] [ drop next interpolate-tag ] } + [ drop normal-tag ] } cond ; diff --git a/basis/xml/entities/entities.factor b/basis/xml/entities/entities.factor index a3812c7723..a730474f20 100644 --- a/basis/xml/entities/entities.factor +++ b/basis/xml/entities/entities.factor @@ -1,7 +1,7 @@ ! Copyright (C) 2005, 2006 Daniel Ehrenberg ! See http://factorcode.org/license.txt for BSD license. USING: namespaces make kernel assocs sequences fry values -io.files io.encodings.binary ; +io.files io.encodings.binary xml.state ; IN: xml.entities : entities-out @@ -37,7 +37,5 @@ IN: xml.entities { "quot" CHAR: " } } ; -SYMBOL: extra-entities - : with-entities ( entities quot -- ) [ swap extra-entities set call ] with-scope ; inline diff --git a/basis/xml/entities/html/html.factor b/basis/xml/entities/html/html.factor index 826dccf79d..f1e52319f1 100644 --- a/basis/xml/entities/html/html.factor +++ b/basis/xml/entities/html/html.factor @@ -1,13 +1,13 @@ ! Copyright (C) 2009 Daniel Ehrenberg. ! See http://factorcode.org/license.txt for BSD license. USING: assocs io.encodings.binary io.files kernel namespaces sequences -values xml xml.entities ; +values xml xml.entities accessors xml.state ; IN: xml.entities.html VALUE: html-entities : read-entities-file ( file -- table ) - file>dtd nip ; + file>dtd entities>> ; : get-html ( -- table ) { "lat1" "special" "symbol" } [ diff --git a/basis/xml/generator/authors.txt b/basis/xml/generator/authors.txt deleted file mode 100755 index f990dd0ed2..0000000000 --- a/basis/xml/generator/authors.txt +++ /dev/null @@ -1 +0,0 @@ -Daniel Ehrenberg diff --git a/basis/xml/generator/generator-tests.factor b/basis/xml/generator/generator-tests.factor deleted file mode 100644 index 17f7cab509..0000000000 --- a/basis/xml/generator/generator-tests.factor +++ /dev/null @@ -1,3 +0,0 @@ -USING: tools.test io.streams.string xml.generator xml.writer accessors ; -[ "" ] -[ "html" [ "body" [ "a" { { "href" "blah" } } contained*, ] tag, ] make-xml [ body>> write-xml-chunk ] with-string-writer ] unit-test diff --git a/basis/xml/generator/generator.factor b/basis/xml/generator/generator.factor deleted file mode 100644 index ac7b14b89e..0000000000 --- a/basis/xml/generator/generator.factor +++ /dev/null @@ -1,25 +0,0 @@ -! Copyright (C) 2006, 2007 Daniel Ehrenberg -! See http://factorcode.org/license.txt for BSD license. -USING: namespaces make kernel xml.data xml.utilities assocs -sequences ; -IN: xml.generator - -: comment, ( string -- ) , ; -: instruction, ( string -- ) , ; -: nl, ( -- ) "\n" , ; - -: (tag,) ( name attrs quot -- tag ) - -rot [ V{ } make ] 2dip rot ; inline -: tag*, ( name attrs quot -- ) - (tag,) , ; inline - -: contained*, ( name attrs -- ) - f , ; - -: tag, ( name quot -- ) f swap tag*, ; inline -: contained, ( name -- ) f contained*, ; inline - -: make-xml* ( name attrs quot -- xml ) - (tag,) build-xml ; inline -: make-xml ( name quot -- xml ) - f swap make-xml* ; inline diff --git a/basis/xml/interpolate/interpolate-tests.factor b/basis/xml/interpolate/interpolate-tests.factor index 0adcb51123..48f76b8746 100644 --- a/basis/xml/interpolate/interpolate-tests.factor +++ b/basis/xml/interpolate/interpolate-tests.factor @@ -1,4 +1,46 @@ ! Copyright (C) 2009 Daniel Ehrenberg. ! See http://factorcode.org/license.txt for BSD license. -USING: tools.test xml.interpolate ; +USING: tools.test xml.interpolate multiline kernel assocs +sequences accessors xml.writer xml.interpolate.private +locals splitting ; IN: xml.interpolate.tests + +[ "a" "c" { "a" "c" f } ] [ + "<-a->/><->" + interpolated-doc + [ second var>> ] + [ fourth "val" swap at var>> ] + [ extract-variables ] tri +] unit-test + +[ {" + + one + + y + +"} ] [ + [let* | a [ "one" ] c [ "two" ] x [ "y" ] + d [ [XML <-x-> XML] ] | + <-a-> /> <-d-> + XML> pprint-xml>string + ] +] unit-test + +[ {" + + + one + + + two + + + three + +"} ] [ + "one two three" " " split + [ [XML <-> XML] ] map + <-> XML> pprint-xml>string +] unit-test diff --git a/basis/xml/interpolate/interpolate.factor b/basis/xml/interpolate/interpolate.factor index 262d0e1adc..7b041ec53d 100644 --- a/basis/xml/interpolate/interpolate.factor +++ b/basis/xml/interpolate/interpolate.factor @@ -1,4 +1,95 @@ ! Copyright (C) 2009 Daniel Ehrenberg. ! See http://factorcode.org/license.txt for BSD license. -USING: ; +USING: xml xml.state kernel sequences fry assocs xml.data +accessors strings make multiline parser namespaces macros +sequences.deep generalizations locals words combinators +math ; IN: xml.interpolate + +xml-chunk ] with-variable ; + +: interpolated-doc ( string -- xml ) + t interpolating? [ string>xml ] with-variable ; + +DEFER: interpolate-sequence + +: interpolate-attrs ( table attrs -- attrs ) + swap '[ dup interpolated? [ var>> _ at ] when ] assoc-map ; + +: interpolate-tag ( table tag -- tag ) + [ nip name>> ] + [ attrs>> interpolate-attrs ] + [ children>> [ interpolate-sequence ] [ drop f ] if* ] 2tri + ; + +GENERIC: push-item ( item -- ) +M: string push-item , ; +M: object push-item , ; +M: sequence push-item % ; + +GENERIC: interpolate-item ( table item -- ) +M: object interpolate-item nip , ; +M: tag interpolate-item interpolate-tag , ; +M: interpolated interpolate-item + var>> swap at push-item ; + +: interpolate-sequence ( table seq -- seq ) + [ [ interpolate-item ] with each ] { } make ; + +: interpolate-xml-doc ( table xml -- xml ) + (clone) [ interpolate-tag ] change-body ; + +GENERIC# (each-interpolated) 1 ( item quot -- ) inline +M: interpolated (each-interpolated) call ; +M: tag (each-interpolated) + swap attrs>> values + [ interpolated? ] filter + swap each ; +M: object (each-interpolated) 2drop ; + +: each-interpolated ( xml quot -- ) + '[ _ (each-interpolated) ] deep-each ; inline + +:: number<-> ( doc -- doc ) + 0 :> n! doc [ + dup var>> [ n >>var n 1+ n! ] unless drop + ] each-interpolated doc ; + +MACRO: interpolate-xml ( string -- doc ) + interpolated-doc number<-> '[ _ interpolate-xml-doc ] ; + +MACRO: interpolate-chunk ( string -- chunk ) + interpolated-chunk number<-> '[ _ interpolate-sequence ] ; + +: >search-hash ( seq -- hash ) + [ dup search ] H{ } map>assoc ; + +: extract-variables ( xml -- seq ) + [ [ var>> , ] each-interpolated ] { } make ; + +: collect ( accum seq -- accum ) + { + { [ dup [ ] all? ] [ >search-hash parsed ] } ! locals + { [ dup [ not ] all? ] [ ! fry + length parsed \ narray parsed \ parsed + ] } + [ drop "XML interpolation contains both fry and locals" throw ] ! mixed + } cond ; + +: parse-def ( accum delimiter word -- accum ) + [ + parse-multiline-string + [ interpolated-chunk extract-variables collect ] keep + parsed + ] dip parsed ; + +PRIVATE> + +: " \ interpolate-xml parse-def ; parsing + +: [XML + "XML]" \ interpolate-chunk parse-def ; parsing diff --git a/basis/xml/name/name.factor b/basis/xml/name/name.factor index 32053b1eb4..83132d4d29 100644 --- a/basis/xml/name/name.factor +++ b/basis/xml/name/name.factor @@ -2,7 +2,7 @@ ! See http://factorcode.org/license.txt for BSD license. USING: kernel namespaces accessors xml.tokenize xml.data assocs xml.errors xml.char-classes combinators.short-circuit splitting -fry xml.state sequences ; +fry xml.state sequences combinators ascii ; IN: xml.name ! XML namespace processing: ns = namespace @@ -74,3 +74,21 @@ SYMBOL: ns-stack : parse-name-starting ( string -- name ) take-name append interpret-name ; +: take-system-id ( -- system-id ) + parse-quote ; + +: take-public-id ( -- public-id ) + parse-quote parse-quote ; + +: (take-external-id) ( token -- external-id ) + pass-blank { + { "SYSTEM" [ take-system-id ] } + { "PUBLIC" [ take-public-id ] } + [ bad-external-id ] + } case ; + +: take-word ( -- string ) + [ get-char blank? ] take-until ; + +: take-external-id ( -- external-id ) + take-word (take-external-id) ; diff --git a/basis/xml/state/state.factor b/basis/xml/state/state.factor index b00d723a1a..059d8267a0 100644 --- a/basis/xml/state/state.factor +++ b/basis/xml/state/state.factor @@ -23,3 +23,11 @@ SYMBOL: xml-stack SYMBOL: prolog-data SYMBOL: depth + +SYMBOL: interpolating? + +SYMBOL: in-dtd? + +SYMBOL: pe-table + +SYMBOL: extra-entities diff --git a/basis/xml/tests/state-parser-tests.factor b/basis/xml/tests/state-parser-tests.factor index 31d4a03c7b..24c3bc4b69 100644 --- a/basis/xml/tests/state-parser-tests.factor +++ b/basis/xml/tests/state-parser-tests.factor @@ -7,6 +7,9 @@ IN: xml.test.state : take-rest ( -- string ) [ f ] take-until ; +: take-char ( char -- string ) + 1string take-to ; + [ "hello" ] [ "hello" [ take-rest ] string-parse ] unit-test [ 2 4 ] [ "12\n123" [ take-rest drop get-line get-column ] string-parse ] unit-test [ "hi" " how are you?" ] [ "hi how are you?" [ [ get-char blank? ] take-until take-rest ] string-parse ] unit-test diff --git a/basis/xml/tests/test.factor b/basis/xml/tests/test.factor index 794796339e..488bd911bd 100644 --- a/basis/xml/tests/test.factor +++ b/basis/xml/tests/test.factor @@ -49,10 +49,10 @@ SYMBOL: xml-file [ "foo" ] [ "" string>xml children>string ] unit-test [ "" string>xml ] must-fail [ ] [ "" string>xml drop ] unit-test -[ T{ element-decl f "br" "EMPTY" } ] [ "" string>dtd drop second ] unit-test -[ T{ element-decl f "p" "(#PCDATA|emph)*" } ] [ "" string>dtd drop second ] unit-test -[ T{ element-decl f "%name.para;" "%content.para;" } ] [ "" string>dtd drop second ] unit-test -[ T{ element-decl f "container" "ANY" } ] [ "" string>dtd drop second ] unit-test +[ T{ element-decl f "br" "EMPTY" } ] [ "" string>dtd directives>> first ] unit-test +[ T{ element-decl f "p" "(#PCDATA|emph)*" } ] [ "" string>dtd directives>> first ] unit-test +[ T{ element-decl f "%name.para;" "%content.para;" } ] [ "" string>dtd directives>> first ] unit-test +[ T{ element-decl f "container" "ANY" } ] [ "" string>dtd directives>> first ] unit-test [ T{ doctype-decl f "foo" } ] [ "" string>xml-chunk first ] unit-test [ T{ doctype-decl f "foo" } ] [ "" string>xml-chunk first ] unit-test [ T{ doctype-decl f "foo" T{ system-id f "blah.dtd" } } ] [ "" string>xml-chunk first ] unit-test diff --git a/basis/xml/tests/xmode-dtd.factor b/basis/xml/tests/xmode-dtd.factor index 85e3516444..4408655d9c 100644 --- a/basis/xml/tests/xmode-dtd.factor +++ b/basis/xml/tests/xmode-dtd.factor @@ -1,8 +1,8 @@ ! Copyright (C) 2008 Slava Pestov. ! See http://factorcode.org/license.txt for BSD license. -USING: xml io.encodings.utf8 io.files kernel tools.test ; +USING: xml xml.data kernel tools.test ; IN: xml.tests -[ ] [ - "resource:basis/xmode/xmode.dtd" file>dtd 2drop +[ t ] [ + "resource:basis/xmode/xmode.dtd" file>dtd dtd? ] unit-test diff --git a/basis/xml/tokenize/tokenize.factor b/basis/xml/tokenize/tokenize.factor index 943f4e7a15..b629d46455 100644 --- a/basis/xml/tokenize/tokenize.factor +++ b/basis/xml/tokenize/tokenize.factor @@ -58,8 +58,8 @@ IN: xml.tokenize '[ @ [ t ] [ get-char _ push f ] if ] skip-until ] keep >string ; inline -: take-char ( ch -- string ) - [ dup get-char = ] take-until nip ; +: take-to ( seq -- string ) + '[ get-char _ member? ] take-until ; : pass-blank ( -- ) #! Advance code past any whitespace, including newlines @@ -75,33 +75,29 @@ IN: xml.tokenize dup length rot length 1- - head get-char [ missing-close ] unless next ; -: expect ( ch -- ) - get-char 2dup = [ 2drop ] [ - [ 1string ] bi@ expected - ] if next ; - -: expect-string ( string -- ) +: expect ( string -- ) dup [ get-char next ] replicate 2dup = [ 2drop ] [ expected ] if ; +! Suddenly XML-specific + : parse-named-entity ( string -- ) dup entities at [ , ] [ dup extra-entities get at [ % ] [ no-entity ] ?if ] ?if ; +: take-; ( -- string ) + next ";" take-to next ; + : parse-entity ( -- ) - next CHAR: ; take-char next - "#" ?head [ + take-; "#" ?head [ "x" ?head 16 10 ? base> , ] [ parse-named-entity ] if ; -SYMBOL: pe-table -SYMBOL: in-dtd? - : parse-pe ( -- ) - next CHAR: ; take-char dup next - pe-table get at [ % ] [ no-entity ] ?if ; + take-; dup pe-table get at + [ % ] [ no-entity ] ?if ; :: (parse-char) ( quot: ( ch -- ? ) -- ) get-char :> char @@ -131,7 +127,7 @@ SYMBOL: in-dtd? ] parse-char ; : close ( -- ) - pass-blank CHAR: > expect ; + pass-blank ">" expect ; : normalize-quote ( str -- str ) [ dup "\t\r\n" member? [ drop CHAR: \s ] when ] map ; diff --git a/basis/xml/writer/writer.factor b/basis/xml/writer/writer.factor index d8283963f1..8cb32af12f 100644 --- a/basis/xml/writer/writer.factor +++ b/basis/xml/writer/writer.factor @@ -136,10 +136,10 @@ M: public-id write-xml-chunk [ pubid-literal>> write "' '" write ] [ system-literal>> write "'" write ] bi ; -: write-internal-subset ( seq -- ) +: write-internal-subset ( dtd -- ) [ "[" write indent - [ ?indent write-xml-chunk ] each + directives>> [ ?indent write-xml-chunk ] each unindent ?indent "]" write ] when* ; diff --git a/basis/xml/xml.factor b/basis/xml/xml.factor index 4dd872156e..fdabbdc4df 100644 --- a/basis/xml/xml.factor +++ b/basis/xml/xml.factor @@ -164,21 +164,15 @@ TUPLE: pull-xml scope ; : file>xml ( filename -- xml ) binary read-xml ; -: (read-dtd) ( -- dtd ) - ! should filter out blanks, throw error on non-dtd stuff - V{ } clone dup [ push ] curry sax-loop ; - -: read-dtd ( stream -- dtd entities ) +: read-dtd ( stream -- dtd ) [ - t in-dtd? set reset-prolog H{ } clone extra-entities set - (read-dtd) - extra-entities get + take-internal-subset ] with-state ; -: file>dtd ( filename -- dtd entities ) +: file>dtd ( filename -- dtd ) utf8 read-dtd ; -: string>dtd ( string -- dtd entities ) +: string>dtd ( string -- dtd ) read-dtd ; diff --git a/unmaintained/xml/syntax/syntax.factor b/unmaintained/xml/syntax/syntax.factor deleted file mode 100644 index 91b31ec7e6..0000000000 --- a/unmaintained/xml/syntax/syntax.factor +++ /dev/null @@ -1,58 +0,0 @@ -! Copyright (C) 2008 Daniel Ehrenberg. -! See http://factorcode.org/license.txt for BSD license. -USING: lexer parser splitting kernel quotations namespaces make -sequences assocs sequences.lib xml.generator xml.utilities -xml.data ; -IN: xml.syntax - -: parsed-name ( accum -- accum ) - scan ":" split1 [ f ] [ ] if* parsed ; - -: run-combinator ( accum quot1 quot2 -- accum ) - >r [ ] like parsed r> [ parsed ] each ; - -: parse-tag-contents ( accum contained? -- accum ) - [ \ contained*, parsed ] [ - scan-word \ [ = - [ POSTPONE: [ \ tag*, parsed ] - [ "Expected [ missing" throw ] if - ] if ; - -DEFER: >> - -: attributes-parsed ( accum quot -- accum ) - [ f parsed ] [ - >r \ >r parsed r> parsed - [ H{ } make-assoc r> swap ] [ parsed ] each - ] if-empty ; - -: << - parsed-name [ - \ >> parse-until >quotation - attributes-parsed \ contained? get - ] with-scope parse-tag-contents ; parsing - -: == - \ call parsed parsed-name \ set parsed ; parsing - -: // - \ contained? on ; parsing - -: parse-special ( accum end-token word -- accum ) - >r parse-tokens " " join parsed r> parsed ; - -: " \ comment, parse-special ; parsing - -: " \ directive, parse-special ; parsing - -: " \ instruction, parse-special ; parsing - -: >xml-document ( seq -- xml ) - dup first prolog? [ unclip-slice ] [ standard-prolog ] if swap - [ tag? ] split-around ; - -DEFER: XML> - -: [ >quotation ] parse-literal - { } parsed \ make parsed \ >xml-document parsed ; parsing