Separating out reading DTDs

db4
Daniel Ehrenberg 2009-01-22 16:31:22 -06:00
parent 934a23e818
commit 0041f26d90
7 changed files with 44 additions and 24 deletions

View File

@ -57,5 +57,13 @@ UNION: dtd-acceptable
pass-blank get-char {
{ CHAR: % [ next pass-blank pe-table take-entity-def t ] }
[ drop extra-entities take-entity-def f ]
} case
close <entity-decl> ;
} case close <entity-decl> ;
: take-inner-directive ( string -- directive )
{
{ "ELEMENT" [ take-element-decl ] }
{ "ATTLIST" [ take-attlist-decl ] }
{ "ENTITY" [ take-entity-decl ] }
{ "NOTATION" [ take-notation-decl ] }
[ bad-directive ]
} case ;

View File

@ -136,16 +136,13 @@ DEFER: make-tag ! Is this unavoidable?
{ CHAR: > [ f f ] }
} case <doctype-decl> ;
: take-directive ( -- directive )
take-name {
{ "ELEMENT" [ take-element-decl ] }
{ "ATTLIST" [ take-attlist-decl ] }
{ "DOCTYPE" [ take-doctype-decl ] }
{ "ENTITY" [ take-entity-decl ] }
{ "NOTATION" [ take-notation-decl ] }
[ bad-directive ]
} case ;
: take-directive ( -- doctype )
take-name dup "DOCTYPE" =
[ drop take-doctype-decl ] [
in-dtd? get
[ take-inner-directive ]
[ misplaced-directive ] if
] if ;
: direct ( -- object )
get-char {

View File

@ -7,10 +7,7 @@ IN: xml.entities.html
VALUE: html-entities
: read-entities-file ( file -- table )
H{ } clone [ extra-entities [
binary <file-reader>
[ drop ] sax
] with-variable ] keep ;
file>dtd nip ;
: get-html ( -- table )
{ "lat1" "special" "symbol" } [

View File

@ -49,13 +49,14 @@ SYMBOL: xml-file
[ "foo" ] [ "<boo><![CDATA[foo]]></boo>" string>xml children>string ] unit-test
[ "<!-- B+, B, or B--->" string>xml ] must-fail
[ ] [ "<?xml version='1.0'?><!-- declarations for <head> & <body> --><foo/>" string>xml drop ] unit-test
[ T{ element-decl f "br" "EMPTY" } ] [ "<!ELEMENT br EMPTY>" string>xml-chunk first ] unit-test
[ T{ element-decl f "p" "(#PCDATA|emph)*" } ] [ "<!ELEMENT p (#PCDATA|emph)*>" string>xml-chunk first ] unit-test
[ T{ element-decl f "%name.para;" "%content.para;" } ] [ "<!ELEMENT %name.para; %content.para;>" string>xml-chunk first ] unit-test
[ T{ element-decl f "container" "ANY" } ] [ "<!ELEMENT container ANY>" string>xml-chunk first ] unit-test
[ T{ element-decl f "br" "EMPTY" } ] [ "<!ELEMENT br EMPTY>" string>dtd drop second ] unit-test
[ T{ element-decl f "p" "(#PCDATA|emph)*" } ] [ "<!ELEMENT p (#PCDATA|emph)*>" string>dtd drop second ] unit-test
[ T{ element-decl f "%name.para;" "%content.para;" } ] [ "<!ELEMENT %name.para; %content.para;>" string>dtd drop second ] unit-test
[ T{ element-decl f "container" "ANY" } ] [ "<!ELEMENT container ANY>" string>dtd drop second ] unit-test
[ T{ doctype-decl f "foo" } ] [ "<!DOCTYPE foo>" string>xml-chunk first ] unit-test
[ T{ doctype-decl f "foo" } ] [ "<!DOCTYPE foo >" string>xml-chunk first ] unit-test
[ T{ doctype-decl f "foo" T{ system-id f "blah.dtd" } } ] [ "<!DOCTYPE foo SYSTEM 'blah.dtd'>" string>xml-chunk first ] unit-test
[ T{ doctype-decl f "foo" T{ system-id f "blah.dtd" } } ] [ "<!DOCTYPE foo SYSTEM \"blah.dtd\" >" string>xml-chunk first ] unit-test
[ 958 ] [ [ "&xi;" string>xml-chunk ] with-html-entities first first ] unit-test
[ "x" "<" ] [ "<x value='&lt;'/>" string>xml [ name>> main>> ] [ "value" swap at ] bi ] unit-test
[ "foo" ] [ "<!DOCTYPE foo [<!ENTITY bar 'foo'>]><x>&bar;</x>" string>xml children>string ] unit-test

View File

@ -4,6 +4,5 @@ USING: xml io.encodings.utf8 io.files kernel tools.test ;
IN: xml.tests
[ ] [
"resource:basis/xmode/xmode.dtd" utf8 <file-reader>
read-xml-chunk drop
"resource:basis/xmode/xmode.dtd" file>dtd 2drop
] unit-test

View File

@ -52,7 +52,6 @@ IN: xml.writer.tests
<x>&foo;</x>"} pprint-reprints-as
[ t ] [ "<!DOCTYPE html PUBLIC '-//W3C//DTD XHTML 1.1//EN' 'http://www.w3.org/TR/xhtml11/DTD/xhtml11.dtd' >" dup string>xml-chunk xml-chunk>string = ] unit-test
[ "foo" ] [ "<!ENTITY bar 'foo'><x>&bar;</x>" string>xml children>string ] unit-test
[ V{ "hello" } ] [ "hello" string>xml-chunk ] unit-test
[ "<?xml version=\"1.0\" encoding=\"UTF-8\"?><a b=\"c\"/>" ]
[ "<a b='c'/>" string>xml xml>string ] unit-test

View File

@ -1,7 +1,7 @@
! Copyright (C) 2005, 2009 Daniel Ehrenberg
! See http://factorcode.org/license.txt for BSD license.
USING: accessors arrays io io.encodings.binary io.files
io.streams.string kernel namespaces sequences strings
io.streams.string kernel namespaces sequences strings io.encodings.utf8
xml.backend xml.data xml.errors xml.elements ascii xml.entities
xml.writer xml.state xml.autoencoding assocs xml.tokenize xml.name ;
IN: xml
@ -163,3 +163,22 @@ TUPLE: pull-xml scope ;
: file>xml ( filename -- xml )
binary <file-reader> read-xml ;
: (read-dtd) ( -- dtd )
! should filter out blanks, throw error on non-dtd stuff
V{ } clone dup [ push ] curry sax-loop ;
: read-dtd ( stream -- dtd entities )
[
t in-dtd? set
reset-prolog
H{ } clone extra-entities set
(read-dtd)
extra-entities get
] with-state ;
: file>dtd ( filename -- dtd entities )
utf8 <file-reader> read-dtd ;
: string>dtd ( string -- dtd entities )
<string-reader> read-dtd ;