Merge branch 'master' of git://factorcode.org/git/factor
commit
f29f625787
|
@ -20,11 +20,11 @@ HELP: <duplex-stream>
|
|||
|
||||
HELP: with-stream
|
||||
{ $values { "stream" duplex-stream } { "quot" quotation } }
|
||||
{ $description "Calls the quotation in a new dynamic scope, with both " { $link input-stream } " and " { $link output-stream } " rebound to " { $snippet "stream" } ". The stream is closed if the quotation returns or throws an error." } ;
|
||||
{ $description "Calls the quotation in a new dynamic scope, with both " { $link input-stream } " and " { $link output-stream } " rebound to " { $snippet "stream" } ", which must be a duplex stream. The stream is closed if the quotation returns or throws an error." } ;
|
||||
|
||||
HELP: with-stream*
|
||||
{ $values { "stream" duplex-stream } { "quot" quotation } }
|
||||
{ $description "Calls the quotation in a new dynamic scope, with both " { $link input-stream } " and " { $link output-stream } " rebound to " { $snippet "stream" } "." }
|
||||
{ $description "Calls the quotation in a new dynamic scope, with both " { $link input-stream } " and " { $link output-stream } " rebound to " { $snippet "stream" } ", which must be a duplex stream." }
|
||||
{ $notes "This word does not close the stream. Compare with " { $link with-stream } "." } ;
|
||||
|
||||
HELP: <encoder-duplex>
|
||||
|
|
|
@ -0,0 +1 @@
|
|||
<?xml version='1.0' encoding='ASCII'?><x>e</x>
|
|
@ -0,0 +1,12 @@
|
|||
USING: xml xml.data xml.utilities tools.test ;
|
||||
|
||||
[ "\u000131" ] [ "resource:basis/xml/tests/latin5.xml" file>xml children>string ] unit-test
|
||||
[ "\u0000e9" ] [ "resource:basis/xml/tests/latin1.xml" file>xml children>string ] unit-test
|
||||
[ "\u0000e9" ] [ "resource:basis/xml/tests/utf8.xml" file>xml children>string ] unit-test
|
||||
[ "\u0000e9" ] [ "resource:basis/xml/tests/utf16.xml" file>xml children>string ] unit-test
|
||||
[ "\u0000e9" ] [ "resource:basis/xml/tests/utf16be.xml" file>xml children>string ] unit-test
|
||||
[ "\u0000e9" ] [ "resource:basis/xml/tests/utf16le.xml" file>xml children>string ] unit-test
|
||||
[ "\u0000e9" ] [ "resource:basis/xml/tests/utf16be-bom.xml" file>xml children>string ] unit-test
|
||||
[ "\u0000e9" ] [ "resource:basis/xml/tests/utf16le-bom.xml" file>xml children>string ] unit-test
|
||||
[ "\u0000e9" ] [ "resource:basis/xml/tests/prologless.xml" file>xml children>string ] unit-test
|
||||
[ "e" ] [ "resource:basis/xml/tests/ascii.xml" file>xml children>string ] unit-test
|
|
@ -0,0 +1 @@
|
|||
<?xml version='1.0' encoding='ISO-8859-1'?><x>é</x>
|
|
@ -0,0 +1 @@
|
|||
<?xml version='1.0' encoding='ISO-8859-9'?><x>ý</x>
|
|
@ -0,0 +1 @@
|
|||
<x>é</x>
|
|
@ -53,12 +53,12 @@ SYMBOL: xml-file
|
|||
[ "<foo> bar </foo>" string>xml pprint-xml>string ] unit-test
|
||||
[ "<!-- B+, B, or B--->" string>xml ] must-fail
|
||||
[ ] [ "<?xml version='1.0'?><!-- declarations for <head> & <body> --><foo/>" string>xml drop ] unit-test
|
||||
[ T{ element-decl f "br" "EMPTY" } ] [ "<!ELEMENT br EMPTY>" string>xml-chunk second ] unit-test
|
||||
[ T{ element-decl f "p" "(#PCDATA|emph)*" } ] [ "<!ELEMENT p (#PCDATA|emph)*>" string>xml-chunk second ] unit-test
|
||||
[ T{ element-decl f "%name.para;" "%content.para;" } ] [ "<!ELEMENT %name.para; %content.para;>" string>xml-chunk second ] unit-test
|
||||
[ T{ element-decl f "container" "ANY" } ] [ "<!ELEMENT container ANY>" string>xml-chunk second ] unit-test
|
||||
[ T{ doctype-decl f "foo" } ] [ "<!DOCTYPE foo>" string>xml-chunk second ] unit-test
|
||||
[ T{ doctype-decl f "foo" } ] [ "<!DOCTYPE foo >" string>xml-chunk second ] unit-test
|
||||
[ T{ doctype-decl f "foo" T{ system-id f "blah.dtd" } } ] [ "<!DOCTYPE foo SYSTEM 'blah.dtd'>" string>xml-chunk second ] unit-test
|
||||
[ T{ doctype-decl f "foo" T{ system-id f "blah.dtd" } } ] [ "<!DOCTYPE foo SYSTEM \"blah.dtd\" >" string>xml-chunk second ] unit-test
|
||||
[ T{ element-decl f "br" "EMPTY" } ] [ "<!ELEMENT br EMPTY>" string>xml-chunk first ] unit-test
|
||||
[ T{ element-decl f "p" "(#PCDATA|emph)*" } ] [ "<!ELEMENT p (#PCDATA|emph)*>" string>xml-chunk first ] unit-test
|
||||
[ T{ element-decl f "%name.para;" "%content.para;" } ] [ "<!ELEMENT %name.para; %content.para;>" string>xml-chunk first ] unit-test
|
||||
[ T{ element-decl f "container" "ANY" } ] [ "<!ELEMENT container ANY>" string>xml-chunk first ] unit-test
|
||||
[ T{ doctype-decl f "foo" } ] [ "<!DOCTYPE foo>" string>xml-chunk first ] unit-test
|
||||
[ T{ doctype-decl f "foo" } ] [ "<!DOCTYPE foo >" string>xml-chunk first ] unit-test
|
||||
[ T{ doctype-decl f "foo" T{ system-id f "blah.dtd" } } ] [ "<!DOCTYPE foo SYSTEM 'blah.dtd'>" string>xml-chunk first ] unit-test
|
||||
[ T{ doctype-decl f "foo" T{ system-id f "blah.dtd" } } ] [ "<!DOCTYPE foo SYSTEM \"blah.dtd\" >" string>xml-chunk first ] unit-test
|
||||
[ t ] [ "<!DOCTYPE html PUBLIC '-//W3C//DTD XHTML 1.1//EN' 'http://www.w3.org/TR/xhtml11/DTD/xhtml11.dtd' >" dup string>xml-chunk [ write-xml-chunk ] with-string-writer = ] unit-test
|
||||
|
|
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
|
@ -0,0 +1 @@
|
|||
<?xml version='1.0' encoding='UTF-8'?><x/>
|
|
@ -0,0 +1 @@
|
|||
<?xml version='1.0' encoding='UTF-8'?><x>é</x>
|
|
@ -1,9 +1,9 @@
|
|||
! Copyright (C) 2005, 2006 Daniel Ehrenberg
|
||||
! See http://factorcode.org/license.txt for BSD license.
|
||||
USING: xml.errors xml.data xml.utilities xml.char-classes sets
|
||||
xml.entities kernel state-parser kernel namespaces make strings
|
||||
math math.parser sequences assocs arrays splitting combinators
|
||||
unicode.case accessors fry ascii ;
|
||||
USING: accessors arrays ascii assocs combinators fry io.encodings
|
||||
io.encodings.iana io.encodings.utf16 io.encodings.utf8 kernel
|
||||
make math.parser namespaces sequences sets splitting state-parser
|
||||
xml.char-classes xml.data xml.entities xml.errors strings ;
|
||||
IN: xml.tokenize
|
||||
|
||||
! XML namespace processing: ns = namespace
|
||||
|
@ -262,9 +262,15 @@ DEFER: direct
|
|||
[ yes/no>bool ] [ f ] if*
|
||||
<prolog> ;
|
||||
|
||||
SYMBOL: string-input?
|
||||
: decode-input-if ( encoding -- )
|
||||
string-input? get [ drop ] [ decode-input ] if ;
|
||||
|
||||
: parse-prolog ( -- prolog )
|
||||
pass-blank middle-tag "?>" expect-string
|
||||
dup assure-no-extra prolog-attrs
|
||||
dup encoding>> dup "UTF-16" =
|
||||
[ drop ] [ name>encoding [ decode-input-if ] when* ] if
|
||||
dup prolog-data set ;
|
||||
|
||||
: instruct ( -- instruction )
|
||||
|
@ -285,3 +291,50 @@ DEFER: direct
|
|||
CHAR: > expect
|
||||
]
|
||||
} cond ;
|
||||
|
||||
! Autodetecting encodings
|
||||
|
||||
: start-utf16le ( -- tag )
|
||||
utf16le decode-input-if
|
||||
CHAR: ? expect
|
||||
0 expect instruct ;
|
||||
|
||||
: start< ( -- tag )
|
||||
get-next {
|
||||
{ 0 [ next next start-utf16le ] }
|
||||
{ CHAR: ? [ next next instruct ] } ! XML prolog parsing sets the encoding
|
||||
[ drop utf8 decode-input-if next make-tag ]
|
||||
! That is a hack. It fails if you have <nonascii
|
||||
} case ;
|
||||
|
||||
: skip-utf8-bom ( -- tag )
|
||||
"\u0000bb\u0000bf" expect utf8 decode-input
|
||||
CHAR: < expect make-tag ;
|
||||
|
||||
: start-utf16be ( -- tag )
|
||||
utf16be decode-input-if
|
||||
next CHAR: < expect make-tag ;
|
||||
|
||||
: skip-utf16le-bom ( -- tag )
|
||||
utf16le decode-input-if
|
||||
next HEX: FE expect
|
||||
CHAR: < expect make-tag ;
|
||||
|
||||
: skip-utf16be-bom ( -- tag )
|
||||
utf16be decode-input-if
|
||||
next HEX: FF expect
|
||||
CHAR: < expect make-tag ;
|
||||
|
||||
: start-document ( -- tag )
|
||||
get-char {
|
||||
{ CHAR: < [ start< ] }
|
||||
{ 0 [ start-utf16be ] }
|
||||
{ HEX: EF [ skip-utf8-bom ] }
|
||||
{ HEX: FF [ skip-utf16le-bom ] }
|
||||
{ HEX: FE [ skip-utf16be-bom ] }
|
||||
{ f [ "" ] }
|
||||
[ dup blank?
|
||||
[ drop pass-blank utf8 decode-input-if CHAR: < expect make-tag ]
|
||||
[ 1string ] if ! Replace with proper error
|
||||
]
|
||||
} case ;
|
||||
|
|
|
@ -1,10 +1,9 @@
|
|||
! Copyright (C) 2005, 2006 Daniel Ehrenberg
|
||||
! See http://factorcode.org/license.txt for BSD license.
|
||||
USING: io io.streams.string io.files kernel math namespaces
|
||||
prettyprint sequences arrays generic strings vectors
|
||||
xml.char-classes xml.data xml.errors xml.tokenize xml.writer
|
||||
xml.utilities state-parser assocs ascii io.encodings.utf8
|
||||
accessors xml.backend ;
|
||||
USING: accessors arrays io io.encodings.binary io.files
|
||||
io.streams.string kernel namespaces sequences state-parser strings
|
||||
xml.backend xml.data xml.errors xml.tokenize ascii
|
||||
xml.writer ;
|
||||
IN: xml
|
||||
|
||||
! -- Overall parser with data tree
|
||||
|
@ -23,7 +22,7 @@ GENERIC: process ( object -- )
|
|||
M: object process add-child ;
|
||||
|
||||
M: prolog process
|
||||
xml-stack get V{ { f V{ "" } } } =
|
||||
xml-stack get V{ { f V{ } } } =
|
||||
[ bad-prolog ] unless drop ;
|
||||
|
||||
M: instruction process
|
||||
|
@ -101,6 +100,7 @@ TUPLE: pull-xml scope ;
|
|||
text-now? on
|
||||
] H{ } make-assoc
|
||||
pull-xml boa ;
|
||||
! pull-xml needs to call start-document somewhere
|
||||
|
||||
: pull-event ( pull -- xml-event/f )
|
||||
scope>> [
|
||||
|
@ -133,11 +133,12 @@ TUPLE: pull-xml scope ;
|
|||
: sax ( stream quot: ( xml-elem -- ) -- )
|
||||
swap [
|
||||
reset-prolog init-ns-stack
|
||||
prolog-data get call-under
|
||||
start-document call-under
|
||||
sax-loop
|
||||
] state-parse ; inline recursive
|
||||
|
||||
: (read-xml) ( -- )
|
||||
start-document process
|
||||
[ process ] sax-loop ; inline
|
||||
|
||||
: (read-xml-chunk) ( stream -- prolog seq )
|
||||
|
@ -159,11 +160,12 @@ TUPLE: pull-xml scope ;
|
|||
<string-reader> read-xml ;
|
||||
|
||||
: string>xml-chunk ( string -- xml )
|
||||
<string-reader> read-xml-chunk ;
|
||||
t string-input?
|
||||
[ <string-reader> read-xml-chunk ] with-variable ;
|
||||
|
||||
: file>xml ( filename -- xml )
|
||||
! Autodetect encoding!
|
||||
utf8 <file-reader> read-xml ;
|
||||
binary <file-reader> read-xml ;
|
||||
|
||||
: xml-reprint ( string -- )
|
||||
string>xml print-xml ;
|
||||
|
|
Loading…
Reference in New Issue