Separating out reading DTDs

db4
Daniel Ehrenberg 2009-01-22 16:31:22 -06:00
parent 934a23e818
commit 0041f26d90
7 changed files with 44 additions and 24 deletions

View File

@ -57,5 +57,13 @@ UNION: dtd-acceptable
pass-blank get-char { pass-blank get-char {
{ CHAR: % [ next pass-blank pe-table take-entity-def t ] } { CHAR: % [ next pass-blank pe-table take-entity-def t ] }
[ drop extra-entities take-entity-def f ] [ drop extra-entities take-entity-def f ]
} case } case close <entity-decl> ;
close <entity-decl> ;
: take-inner-directive ( string -- directive )
{
{ "ELEMENT" [ take-element-decl ] }
{ "ATTLIST" [ take-attlist-decl ] }
{ "ENTITY" [ take-entity-decl ] }
{ "NOTATION" [ take-notation-decl ] }
[ bad-directive ]
} case ;

View File

@ -136,16 +136,13 @@ DEFER: make-tag ! Is this unavoidable?
{ CHAR: > [ f f ] } { CHAR: > [ f f ] }
} case <doctype-decl> ; } case <doctype-decl> ;
: take-directive ( -- doctype )
: take-directive ( -- directive ) take-name dup "DOCTYPE" =
take-name { [ drop take-doctype-decl ] [
{ "ELEMENT" [ take-element-decl ] } in-dtd? get
{ "ATTLIST" [ take-attlist-decl ] } [ take-inner-directive ]
{ "DOCTYPE" [ take-doctype-decl ] } [ misplaced-directive ] if
{ "ENTITY" [ take-entity-decl ] } ] if ;
{ "NOTATION" [ take-notation-decl ] }
[ bad-directive ]
} case ;
: direct ( -- object ) : direct ( -- object )
get-char { get-char {

View File

@ -7,10 +7,7 @@ IN: xml.entities.html
VALUE: html-entities VALUE: html-entities
: read-entities-file ( file -- table ) : read-entities-file ( file -- table )
H{ } clone [ extra-entities [ file>dtd nip ;
binary <file-reader>
[ drop ] sax
] with-variable ] keep ;
: get-html ( -- table ) : get-html ( -- table )
{ "lat1" "special" "symbol" } [ { "lat1" "special" "symbol" } [

View File

@ -49,13 +49,14 @@ SYMBOL: xml-file
[ "foo" ] [ "<boo><![CDATA[foo]]></boo>" string>xml children>string ] unit-test [ "foo" ] [ "<boo><![CDATA[foo]]></boo>" string>xml children>string ] unit-test
[ "<!-- B+, B, or B--->" string>xml ] must-fail [ "<!-- B+, B, or B--->" string>xml ] must-fail
[ ] [ "<?xml version='1.0'?><!-- declarations for <head> & <body> --><foo/>" string>xml drop ] unit-test [ ] [ "<?xml version='1.0'?><!-- declarations for <head> & <body> --><foo/>" string>xml drop ] unit-test
[ T{ element-decl f "br" "EMPTY" } ] [ "<!ELEMENT br EMPTY>" string>xml-chunk first ] unit-test [ T{ element-decl f "br" "EMPTY" } ] [ "<!ELEMENT br EMPTY>" string>dtd drop second ] unit-test
[ T{ element-decl f "p" "(#PCDATA|emph)*" } ] [ "<!ELEMENT p (#PCDATA|emph)*>" string>xml-chunk first ] unit-test [ T{ element-decl f "p" "(#PCDATA|emph)*" } ] [ "<!ELEMENT p (#PCDATA|emph)*>" string>dtd drop second ] unit-test
[ T{ element-decl f "%name.para;" "%content.para;" } ] [ "<!ELEMENT %name.para; %content.para;>" string>xml-chunk first ] unit-test [ T{ element-decl f "%name.para;" "%content.para;" } ] [ "<!ELEMENT %name.para; %content.para;>" string>dtd drop second ] unit-test
[ T{ element-decl f "container" "ANY" } ] [ "<!ELEMENT container ANY>" string>xml-chunk first ] unit-test [ T{ element-decl f "container" "ANY" } ] [ "<!ELEMENT container ANY>" string>dtd drop second ] unit-test
[ T{ doctype-decl f "foo" } ] [ "<!DOCTYPE foo>" string>xml-chunk first ] unit-test [ T{ doctype-decl f "foo" } ] [ "<!DOCTYPE foo>" string>xml-chunk first ] unit-test
[ T{ doctype-decl f "foo" } ] [ "<!DOCTYPE foo >" string>xml-chunk first ] unit-test [ T{ doctype-decl f "foo" } ] [ "<!DOCTYPE foo >" string>xml-chunk first ] unit-test
[ T{ doctype-decl f "foo" T{ system-id f "blah.dtd" } } ] [ "<!DOCTYPE foo SYSTEM 'blah.dtd'>" string>xml-chunk first ] unit-test [ T{ doctype-decl f "foo" T{ system-id f "blah.dtd" } } ] [ "<!DOCTYPE foo SYSTEM 'blah.dtd'>" string>xml-chunk first ] unit-test
[ T{ doctype-decl f "foo" T{ system-id f "blah.dtd" } } ] [ "<!DOCTYPE foo SYSTEM \"blah.dtd\" >" string>xml-chunk first ] unit-test [ T{ doctype-decl f "foo" T{ system-id f "blah.dtd" } } ] [ "<!DOCTYPE foo SYSTEM \"blah.dtd\" >" string>xml-chunk first ] unit-test
[ 958 ] [ [ "&xi;" string>xml-chunk ] with-html-entities first first ] unit-test [ 958 ] [ [ "&xi;" string>xml-chunk ] with-html-entities first first ] unit-test
[ "x" "<" ] [ "<x value='&lt;'/>" string>xml [ name>> main>> ] [ "value" swap at ] bi ] unit-test [ "x" "<" ] [ "<x value='&lt;'/>" string>xml [ name>> main>> ] [ "value" swap at ] bi ] unit-test
[ "foo" ] [ "<!DOCTYPE foo [<!ENTITY bar 'foo'>]><x>&bar;</x>" string>xml children>string ] unit-test

View File

@ -4,6 +4,5 @@ USING: xml io.encodings.utf8 io.files kernel tools.test ;
IN: xml.tests IN: xml.tests
[ ] [ [ ] [
"resource:basis/xmode/xmode.dtd" utf8 <file-reader> "resource:basis/xmode/xmode.dtd" file>dtd 2drop
read-xml-chunk drop
] unit-test ] unit-test

View File

@ -52,7 +52,6 @@ IN: xml.writer.tests
<x>&foo;</x>"} pprint-reprints-as <x>&foo;</x>"} pprint-reprints-as
[ t ] [ "<!DOCTYPE html PUBLIC '-//W3C//DTD XHTML 1.1//EN' 'http://www.w3.org/TR/xhtml11/DTD/xhtml11.dtd' >" dup string>xml-chunk xml-chunk>string = ] unit-test [ t ] [ "<!DOCTYPE html PUBLIC '-//W3C//DTD XHTML 1.1//EN' 'http://www.w3.org/TR/xhtml11/DTD/xhtml11.dtd' >" dup string>xml-chunk xml-chunk>string = ] unit-test
[ "foo" ] [ "<!ENTITY bar 'foo'><x>&bar;</x>" string>xml children>string ] unit-test
[ V{ "hello" } ] [ "hello" string>xml-chunk ] unit-test [ V{ "hello" } ] [ "hello" string>xml-chunk ] unit-test
[ "<?xml version=\"1.0\" encoding=\"UTF-8\"?><a b=\"c\"/>" ] [ "<?xml version=\"1.0\" encoding=\"UTF-8\"?><a b=\"c\"/>" ]
[ "<a b='c'/>" string>xml xml>string ] unit-test [ "<a b='c'/>" string>xml xml>string ] unit-test

View File

@ -1,7 +1,7 @@
! Copyright (C) 2005, 2009 Daniel Ehrenberg ! Copyright (C) 2005, 2009 Daniel Ehrenberg
! See http://factorcode.org/license.txt for BSD license. ! See http://factorcode.org/license.txt for BSD license.
USING: accessors arrays io io.encodings.binary io.files USING: accessors arrays io io.encodings.binary io.files
io.streams.string kernel namespaces sequences strings io.streams.string kernel namespaces sequences strings io.encodings.utf8
xml.backend xml.data xml.errors xml.elements ascii xml.entities xml.backend xml.data xml.errors xml.elements ascii xml.entities
xml.writer xml.state xml.autoencoding assocs xml.tokenize xml.name ; xml.writer xml.state xml.autoencoding assocs xml.tokenize xml.name ;
IN: xml IN: xml
@ -163,3 +163,22 @@ TUPLE: pull-xml scope ;
: file>xml ( filename -- xml ) : file>xml ( filename -- xml )
binary <file-reader> read-xml ; binary <file-reader> read-xml ;
: (read-dtd) ( -- dtd )
! should filter out blanks, throw error on non-dtd stuff
V{ } clone dup [ push ] curry sax-loop ;
: read-dtd ( stream -- dtd entities )
[
t in-dtd? set
reset-prolog
H{ } clone extra-entities set
(read-dtd)
extra-entities get
] with-state ;
: file>dtd ( filename -- dtd entities )
utf8 <file-reader> read-dtd ;
: string>dtd ( string -- dtd entities )
<string-reader> read-dtd ;