From 0041f26d908551bda4cd3af0508b807e55fca9dd Mon Sep 17 00:00:00 2001 From: Daniel Ehrenberg Date: Thu, 22 Jan 2009 16:31:22 -0600 Subject: [PATCH] Separating out reading DTDs --- basis/xml/dtd/dtd.factor | 12 ++++++++++-- basis/xml/elements/elements.factor | 17 +++++++---------- basis/xml/entities/html/html.factor | 5 +---- basis/xml/tests/test.factor | 9 +++++---- basis/xml/tests/xmode-dtd.factor | 3 +-- basis/xml/writer/writer-tests.factor | 1 - basis/xml/xml.factor | 21 ++++++++++++++++++++- 7 files changed, 44 insertions(+), 24 deletions(-) diff --git a/basis/xml/dtd/dtd.factor b/basis/xml/dtd/dtd.factor index a1b90a60d7..a668717626 100644 --- a/basis/xml/dtd/dtd.factor +++ b/basis/xml/dtd/dtd.factor @@ -57,5 +57,13 @@ UNION: dtd-acceptable pass-blank get-char { { CHAR: % [ next pass-blank pe-table take-entity-def t ] } [ drop extra-entities take-entity-def f ] - } case - close ; + } case close ; + +: take-inner-directive ( string -- directive ) + { + { "ELEMENT" [ take-element-decl ] } + { "ATTLIST" [ take-attlist-decl ] } + { "ENTITY" [ take-entity-decl ] } + { "NOTATION" [ take-notation-decl ] } + [ bad-directive ] + } case ; diff --git a/basis/xml/elements/elements.factor b/basis/xml/elements/elements.factor index 65b8b66536..947c11e2a8 100644 --- a/basis/xml/elements/elements.factor +++ b/basis/xml/elements/elements.factor @@ -136,16 +136,13 @@ DEFER: make-tag ! Is this unavoidable? { CHAR: > [ f f ] } } case ; - -: take-directive ( -- directive ) - take-name { - { "ELEMENT" [ take-element-decl ] } - { "ATTLIST" [ take-attlist-decl ] } - { "DOCTYPE" [ take-doctype-decl ] } - { "ENTITY" [ take-entity-decl ] } - { "NOTATION" [ take-notation-decl ] } - [ bad-directive ] - } case ; +: take-directive ( -- doctype ) + take-name dup "DOCTYPE" = + [ drop take-doctype-decl ] [ + in-dtd? get + [ take-inner-directive ] + [ misplaced-directive ] if + ] if ; : direct ( -- object ) get-char { diff --git a/basis/xml/entities/html/html.factor b/basis/xml/entities/html/html.factor index 601b95a596..826dccf79d 100644 --- a/basis/xml/entities/html/html.factor +++ b/basis/xml/entities/html/html.factor @@ -7,10 +7,7 @@ IN: xml.entities.html VALUE: html-entities : read-entities-file ( file -- table ) - H{ } clone [ extra-entities [ - binary - [ drop ] sax - ] with-variable ] keep ; + file>dtd nip ; : get-html ( -- table ) { "lat1" "special" "symbol" } [ diff --git a/basis/xml/tests/test.factor b/basis/xml/tests/test.factor index 61873d85bf..794796339e 100644 --- a/basis/xml/tests/test.factor +++ b/basis/xml/tests/test.factor @@ -49,13 +49,14 @@ SYMBOL: xml-file [ "foo" ] [ "" string>xml children>string ] unit-test [ "" string>xml ] must-fail [ ] [ "" string>xml drop ] unit-test -[ T{ element-decl f "br" "EMPTY" } ] [ "" string>xml-chunk first ] unit-test -[ T{ element-decl f "p" "(#PCDATA|emph)*" } ] [ "" string>xml-chunk first ] unit-test -[ T{ element-decl f "%name.para;" "%content.para;" } ] [ "" string>xml-chunk first ] unit-test -[ T{ element-decl f "container" "ANY" } ] [ "" string>xml-chunk first ] unit-test +[ T{ element-decl f "br" "EMPTY" } ] [ "" string>dtd drop second ] unit-test +[ T{ element-decl f "p" "(#PCDATA|emph)*" } ] [ "" string>dtd drop second ] unit-test +[ T{ element-decl f "%name.para;" "%content.para;" } ] [ "" string>dtd drop second ] unit-test +[ T{ element-decl f "container" "ANY" } ] [ "" string>dtd drop second ] unit-test [ T{ doctype-decl f "foo" } ] [ "" string>xml-chunk first ] unit-test [ T{ doctype-decl f "foo" } ] [ "" string>xml-chunk first ] unit-test [ T{ doctype-decl f "foo" T{ system-id f "blah.dtd" } } ] [ "" string>xml-chunk first ] unit-test [ T{ doctype-decl f "foo" T{ system-id f "blah.dtd" } } ] [ "" string>xml-chunk first ] unit-test [ 958 ] [ [ "ξ" string>xml-chunk ] with-html-entities first first ] unit-test [ "x" "<" ] [ "" string>xml [ name>> main>> ] [ "value" swap at ] bi ] unit-test +[ "foo" ] [ "]>&bar;" string>xml children>string ] unit-test diff --git a/basis/xml/tests/xmode-dtd.factor b/basis/xml/tests/xmode-dtd.factor index c15d3a462e..85e3516444 100644 --- a/basis/xml/tests/xmode-dtd.factor +++ b/basis/xml/tests/xmode-dtd.factor @@ -4,6 +4,5 @@ USING: xml io.encodings.utf8 io.files kernel tools.test ; IN: xml.tests [ ] [ - "resource:basis/xmode/xmode.dtd" utf8 - read-xml-chunk drop + "resource:basis/xmode/xmode.dtd" file>dtd 2drop ] unit-test diff --git a/basis/xml/writer/writer-tests.factor b/basis/xml/writer/writer-tests.factor index 2b00c90344..2d3a90cc15 100644 --- a/basis/xml/writer/writer-tests.factor +++ b/basis/xml/writer/writer-tests.factor @@ -52,7 +52,6 @@ IN: xml.writer.tests &foo;"} pprint-reprints-as [ t ] [ "" dup string>xml-chunk xml-chunk>string = ] unit-test -[ "foo" ] [ "&bar;" string>xml children>string ] unit-test [ V{ "hello" } ] [ "hello" string>xml-chunk ] unit-test [ "" ] [ "" string>xml xml>string ] unit-test diff --git a/basis/xml/xml.factor b/basis/xml/xml.factor index 636aa288b5..29b647eb78 100644 --- a/basis/xml/xml.factor +++ b/basis/xml/xml.factor @@ -1,7 +1,7 @@ ! Copyright (C) 2005, 2009 Daniel Ehrenberg ! See http://factorcode.org/license.txt for BSD license. USING: accessors arrays io io.encodings.binary io.files -io.streams.string kernel namespaces sequences strings +io.streams.string kernel namespaces sequences strings io.encodings.utf8 xml.backend xml.data xml.errors xml.elements ascii xml.entities xml.writer xml.state xml.autoencoding assocs xml.tokenize xml.name ; IN: xml @@ -163,3 +163,22 @@ TUPLE: pull-xml scope ; : file>xml ( filename -- xml ) binary read-xml ; + +: (read-dtd) ( -- dtd ) + ! should filter out blanks, throw error on non-dtd stuff + V{ } clone dup [ push ] curry sax-loop ; + +: read-dtd ( stream -- dtd entities ) + [ + t in-dtd? set + reset-prolog + H{ } clone extra-entities set + (read-dtd) + extra-entities get + ] with-state ; + +: file>dtd ( filename -- dtd entities ) + utf8 read-dtd ; + +: string>dtd ( string -- dtd entities ) + read-dtd ;