Separating out reading DTDs
parent
934a23e818
commit
0041f26d90
|
@ -57,5 +57,13 @@ UNION: dtd-acceptable
|
||||||
pass-blank get-char {
|
pass-blank get-char {
|
||||||
{ CHAR: % [ next pass-blank pe-table take-entity-def t ] }
|
{ CHAR: % [ next pass-blank pe-table take-entity-def t ] }
|
||||||
[ drop extra-entities take-entity-def f ]
|
[ drop extra-entities take-entity-def f ]
|
||||||
} case
|
} case close <entity-decl> ;
|
||||||
close <entity-decl> ;
|
|
||||||
|
: take-inner-directive ( string -- directive )
|
||||||
|
{
|
||||||
|
{ "ELEMENT" [ take-element-decl ] }
|
||||||
|
{ "ATTLIST" [ take-attlist-decl ] }
|
||||||
|
{ "ENTITY" [ take-entity-decl ] }
|
||||||
|
{ "NOTATION" [ take-notation-decl ] }
|
||||||
|
[ bad-directive ]
|
||||||
|
} case ;
|
||||||
|
|
|
@ -136,16 +136,13 @@ DEFER: make-tag ! Is this unavoidable?
|
||||||
{ CHAR: > [ f f ] }
|
{ CHAR: > [ f f ] }
|
||||||
} case <doctype-decl> ;
|
} case <doctype-decl> ;
|
||||||
|
|
||||||
|
: take-directive ( -- doctype )
|
||||||
: take-directive ( -- directive )
|
take-name dup "DOCTYPE" =
|
||||||
take-name {
|
[ drop take-doctype-decl ] [
|
||||||
{ "ELEMENT" [ take-element-decl ] }
|
in-dtd? get
|
||||||
{ "ATTLIST" [ take-attlist-decl ] }
|
[ take-inner-directive ]
|
||||||
{ "DOCTYPE" [ take-doctype-decl ] }
|
[ misplaced-directive ] if
|
||||||
{ "ENTITY" [ take-entity-decl ] }
|
] if ;
|
||||||
{ "NOTATION" [ take-notation-decl ] }
|
|
||||||
[ bad-directive ]
|
|
||||||
} case ;
|
|
||||||
|
|
||||||
: direct ( -- object )
|
: direct ( -- object )
|
||||||
get-char {
|
get-char {
|
||||||
|
|
|
@ -7,10 +7,7 @@ IN: xml.entities.html
|
||||||
VALUE: html-entities
|
VALUE: html-entities
|
||||||
|
|
||||||
: read-entities-file ( file -- table )
|
: read-entities-file ( file -- table )
|
||||||
H{ } clone [ extra-entities [
|
file>dtd nip ;
|
||||||
binary <file-reader>
|
|
||||||
[ drop ] sax
|
|
||||||
] with-variable ] keep ;
|
|
||||||
|
|
||||||
: get-html ( -- table )
|
: get-html ( -- table )
|
||||||
{ "lat1" "special" "symbol" } [
|
{ "lat1" "special" "symbol" } [
|
||||||
|
|
|
@ -49,13 +49,14 @@ SYMBOL: xml-file
|
||||||
[ "foo" ] [ "<boo><![CDATA[foo]]></boo>" string>xml children>string ] unit-test
|
[ "foo" ] [ "<boo><![CDATA[foo]]></boo>" string>xml children>string ] unit-test
|
||||||
[ "<!-- B+, B, or B--->" string>xml ] must-fail
|
[ "<!-- B+, B, or B--->" string>xml ] must-fail
|
||||||
[ ] [ "<?xml version='1.0'?><!-- declarations for <head> & <body> --><foo/>" string>xml drop ] unit-test
|
[ ] [ "<?xml version='1.0'?><!-- declarations for <head> & <body> --><foo/>" string>xml drop ] unit-test
|
||||||
[ T{ element-decl f "br" "EMPTY" } ] [ "<!ELEMENT br EMPTY>" string>xml-chunk first ] unit-test
|
[ T{ element-decl f "br" "EMPTY" } ] [ "<!ELEMENT br EMPTY>" string>dtd drop second ] unit-test
|
||||||
[ T{ element-decl f "p" "(#PCDATA|emph)*" } ] [ "<!ELEMENT p (#PCDATA|emph)*>" string>xml-chunk first ] unit-test
|
[ T{ element-decl f "p" "(#PCDATA|emph)*" } ] [ "<!ELEMENT p (#PCDATA|emph)*>" string>dtd drop second ] unit-test
|
||||||
[ T{ element-decl f "%name.para;" "%content.para;" } ] [ "<!ELEMENT %name.para; %content.para;>" string>xml-chunk first ] unit-test
|
[ T{ element-decl f "%name.para;" "%content.para;" } ] [ "<!ELEMENT %name.para; %content.para;>" string>dtd drop second ] unit-test
|
||||||
[ T{ element-decl f "container" "ANY" } ] [ "<!ELEMENT container ANY>" string>xml-chunk first ] unit-test
|
[ T{ element-decl f "container" "ANY" } ] [ "<!ELEMENT container ANY>" string>dtd drop second ] unit-test
|
||||||
[ T{ doctype-decl f "foo" } ] [ "<!DOCTYPE foo>" string>xml-chunk first ] unit-test
|
[ T{ doctype-decl f "foo" } ] [ "<!DOCTYPE foo>" string>xml-chunk first ] unit-test
|
||||||
[ T{ doctype-decl f "foo" } ] [ "<!DOCTYPE foo >" string>xml-chunk first ] unit-test
|
[ T{ doctype-decl f "foo" } ] [ "<!DOCTYPE foo >" string>xml-chunk first ] unit-test
|
||||||
[ T{ doctype-decl f "foo" T{ system-id f "blah.dtd" } } ] [ "<!DOCTYPE foo SYSTEM 'blah.dtd'>" string>xml-chunk first ] unit-test
|
[ T{ doctype-decl f "foo" T{ system-id f "blah.dtd" } } ] [ "<!DOCTYPE foo SYSTEM 'blah.dtd'>" string>xml-chunk first ] unit-test
|
||||||
[ T{ doctype-decl f "foo" T{ system-id f "blah.dtd" } } ] [ "<!DOCTYPE foo SYSTEM \"blah.dtd\" >" string>xml-chunk first ] unit-test
|
[ T{ doctype-decl f "foo" T{ system-id f "blah.dtd" } } ] [ "<!DOCTYPE foo SYSTEM \"blah.dtd\" >" string>xml-chunk first ] unit-test
|
||||||
[ 958 ] [ [ "ξ" string>xml-chunk ] with-html-entities first first ] unit-test
|
[ 958 ] [ [ "ξ" string>xml-chunk ] with-html-entities first first ] unit-test
|
||||||
[ "x" "<" ] [ "<x value='<'/>" string>xml [ name>> main>> ] [ "value" swap at ] bi ] unit-test
|
[ "x" "<" ] [ "<x value='<'/>" string>xml [ name>> main>> ] [ "value" swap at ] bi ] unit-test
|
||||||
|
[ "foo" ] [ "<!DOCTYPE foo [<!ENTITY bar 'foo'>]><x>&bar;</x>" string>xml children>string ] unit-test
|
||||||
|
|
|
@ -4,6 +4,5 @@ USING: xml io.encodings.utf8 io.files kernel tools.test ;
|
||||||
IN: xml.tests
|
IN: xml.tests
|
||||||
|
|
||||||
[ ] [
|
[ ] [
|
||||||
"resource:basis/xmode/xmode.dtd" utf8 <file-reader>
|
"resource:basis/xmode/xmode.dtd" file>dtd 2drop
|
||||||
read-xml-chunk drop
|
|
||||||
] unit-test
|
] unit-test
|
||||||
|
|
|
@ -52,7 +52,6 @@ IN: xml.writer.tests
|
||||||
<x>&foo;</x>"} pprint-reprints-as
|
<x>&foo;</x>"} pprint-reprints-as
|
||||||
|
|
||||||
[ t ] [ "<!DOCTYPE html PUBLIC '-//W3C//DTD XHTML 1.1//EN' 'http://www.w3.org/TR/xhtml11/DTD/xhtml11.dtd' >" dup string>xml-chunk xml-chunk>string = ] unit-test
|
[ t ] [ "<!DOCTYPE html PUBLIC '-//W3C//DTD XHTML 1.1//EN' 'http://www.w3.org/TR/xhtml11/DTD/xhtml11.dtd' >" dup string>xml-chunk xml-chunk>string = ] unit-test
|
||||||
[ "foo" ] [ "<!ENTITY bar 'foo'><x>&bar;</x>" string>xml children>string ] unit-test
|
|
||||||
[ V{ "hello" } ] [ "hello" string>xml-chunk ] unit-test
|
[ V{ "hello" } ] [ "hello" string>xml-chunk ] unit-test
|
||||||
[ "<?xml version=\"1.0\" encoding=\"UTF-8\"?><a b=\"c\"/>" ]
|
[ "<?xml version=\"1.0\" encoding=\"UTF-8\"?><a b=\"c\"/>" ]
|
||||||
[ "<a b='c'/>" string>xml xml>string ] unit-test
|
[ "<a b='c'/>" string>xml xml>string ] unit-test
|
||||||
|
|
|
@ -1,7 +1,7 @@
|
||||||
! Copyright (C) 2005, 2009 Daniel Ehrenberg
|
! Copyright (C) 2005, 2009 Daniel Ehrenberg
|
||||||
! See http://factorcode.org/license.txt for BSD license.
|
! See http://factorcode.org/license.txt for BSD license.
|
||||||
USING: accessors arrays io io.encodings.binary io.files
|
USING: accessors arrays io io.encodings.binary io.files
|
||||||
io.streams.string kernel namespaces sequences strings
|
io.streams.string kernel namespaces sequences strings io.encodings.utf8
|
||||||
xml.backend xml.data xml.errors xml.elements ascii xml.entities
|
xml.backend xml.data xml.errors xml.elements ascii xml.entities
|
||||||
xml.writer xml.state xml.autoencoding assocs xml.tokenize xml.name ;
|
xml.writer xml.state xml.autoencoding assocs xml.tokenize xml.name ;
|
||||||
IN: xml
|
IN: xml
|
||||||
|
@ -163,3 +163,22 @@ TUPLE: pull-xml scope ;
|
||||||
|
|
||||||
: file>xml ( filename -- xml )
|
: file>xml ( filename -- xml )
|
||||||
binary <file-reader> read-xml ;
|
binary <file-reader> read-xml ;
|
||||||
|
|
||||||
|
: (read-dtd) ( -- dtd )
|
||||||
|
! should filter out blanks, throw error on non-dtd stuff
|
||||||
|
V{ } clone dup [ push ] curry sax-loop ;
|
||||||
|
|
||||||
|
: read-dtd ( stream -- dtd entities )
|
||||||
|
[
|
||||||
|
t in-dtd? set
|
||||||
|
reset-prolog
|
||||||
|
H{ } clone extra-entities set
|
||||||
|
(read-dtd)
|
||||||
|
extra-entities get
|
||||||
|
] with-state ;
|
||||||
|
|
||||||
|
: file>dtd ( filename -- dtd entities )
|
||||||
|
utf8 <file-reader> read-dtd ;
|
||||||
|
|
||||||
|
: string>dtd ( string -- dtd entities )
|
||||||
|
<string-reader> read-dtd ;
|
||||||
|
|
Loading…
Reference in New Issue