Merge branch 'master' of git://factorcode.org/git/factor
commit
f29f625787
|
@ -20,11 +20,11 @@ HELP: <duplex-stream>
|
||||||
|
|
||||||
HELP: with-stream
|
HELP: with-stream
|
||||||
{ $values { "stream" duplex-stream } { "quot" quotation } }
|
{ $values { "stream" duplex-stream } { "quot" quotation } }
|
||||||
{ $description "Calls the quotation in a new dynamic scope, with both " { $link input-stream } " and " { $link output-stream } " rebound to " { $snippet "stream" } ". The stream is closed if the quotation returns or throws an error." } ;
|
{ $description "Calls the quotation in a new dynamic scope, with both " { $link input-stream } " and " { $link output-stream } " rebound to " { $snippet "stream" } ", which must be a duplex stream. The stream is closed if the quotation returns or throws an error." } ;
|
||||||
|
|
||||||
HELP: with-stream*
|
HELP: with-stream*
|
||||||
{ $values { "stream" duplex-stream } { "quot" quotation } }
|
{ $values { "stream" duplex-stream } { "quot" quotation } }
|
||||||
{ $description "Calls the quotation in a new dynamic scope, with both " { $link input-stream } " and " { $link output-stream } " rebound to " { $snippet "stream" } "." }
|
{ $description "Calls the quotation in a new dynamic scope, with both " { $link input-stream } " and " { $link output-stream } " rebound to " { $snippet "stream" } ", which must be a duplex stream." }
|
||||||
{ $notes "This word does not close the stream. Compare with " { $link with-stream } "." } ;
|
{ $notes "This word does not close the stream. Compare with " { $link with-stream } "." } ;
|
||||||
|
|
||||||
HELP: <encoder-duplex>
|
HELP: <encoder-duplex>
|
||||||
|
|
|
@ -0,0 +1 @@
|
||||||
|
<?xml version='1.0' encoding='ASCII'?><x>e</x>
|
|
@ -0,0 +1,12 @@
|
||||||
|
USING: xml xml.data xml.utilities tools.test ;
|
||||||
|
|
||||||
|
[ "\u000131" ] [ "resource:basis/xml/tests/latin5.xml" file>xml children>string ] unit-test
|
||||||
|
[ "\u0000e9" ] [ "resource:basis/xml/tests/latin1.xml" file>xml children>string ] unit-test
|
||||||
|
[ "\u0000e9" ] [ "resource:basis/xml/tests/utf8.xml" file>xml children>string ] unit-test
|
||||||
|
[ "\u0000e9" ] [ "resource:basis/xml/tests/utf16.xml" file>xml children>string ] unit-test
|
||||||
|
[ "\u0000e9" ] [ "resource:basis/xml/tests/utf16be.xml" file>xml children>string ] unit-test
|
||||||
|
[ "\u0000e9" ] [ "resource:basis/xml/tests/utf16le.xml" file>xml children>string ] unit-test
|
||||||
|
[ "\u0000e9" ] [ "resource:basis/xml/tests/utf16be-bom.xml" file>xml children>string ] unit-test
|
||||||
|
[ "\u0000e9" ] [ "resource:basis/xml/tests/utf16le-bom.xml" file>xml children>string ] unit-test
|
||||||
|
[ "\u0000e9" ] [ "resource:basis/xml/tests/prologless.xml" file>xml children>string ] unit-test
|
||||||
|
[ "e" ] [ "resource:basis/xml/tests/ascii.xml" file>xml children>string ] unit-test
|
|
@ -0,0 +1 @@
|
||||||
|
<?xml version='1.0' encoding='ISO-8859-1'?><x>é</x>
|
|
@ -0,0 +1 @@
|
||||||
|
<?xml version='1.0' encoding='ISO-8859-9'?><x>ý</x>
|
|
@ -0,0 +1 @@
|
||||||
|
<x>é</x>
|
|
@ -53,12 +53,12 @@ SYMBOL: xml-file
|
||||||
[ "<foo> bar </foo>" string>xml pprint-xml>string ] unit-test
|
[ "<foo> bar </foo>" string>xml pprint-xml>string ] unit-test
|
||||||
[ "<!-- B+, B, or B--->" string>xml ] must-fail
|
[ "<!-- B+, B, or B--->" string>xml ] must-fail
|
||||||
[ ] [ "<?xml version='1.0'?><!-- declarations for <head> & <body> --><foo/>" string>xml drop ] unit-test
|
[ ] [ "<?xml version='1.0'?><!-- declarations for <head> & <body> --><foo/>" string>xml drop ] unit-test
|
||||||
[ T{ element-decl f "br" "EMPTY" } ] [ "<!ELEMENT br EMPTY>" string>xml-chunk second ] unit-test
|
[ T{ element-decl f "br" "EMPTY" } ] [ "<!ELEMENT br EMPTY>" string>xml-chunk first ] unit-test
|
||||||
[ T{ element-decl f "p" "(#PCDATA|emph)*" } ] [ "<!ELEMENT p (#PCDATA|emph)*>" string>xml-chunk second ] unit-test
|
[ T{ element-decl f "p" "(#PCDATA|emph)*" } ] [ "<!ELEMENT p (#PCDATA|emph)*>" string>xml-chunk first ] unit-test
|
||||||
[ T{ element-decl f "%name.para;" "%content.para;" } ] [ "<!ELEMENT %name.para; %content.para;>" string>xml-chunk second ] unit-test
|
[ T{ element-decl f "%name.para;" "%content.para;" } ] [ "<!ELEMENT %name.para; %content.para;>" string>xml-chunk first ] unit-test
|
||||||
[ T{ element-decl f "container" "ANY" } ] [ "<!ELEMENT container ANY>" string>xml-chunk second ] unit-test
|
[ T{ element-decl f "container" "ANY" } ] [ "<!ELEMENT container ANY>" string>xml-chunk first ] unit-test
|
||||||
[ T{ doctype-decl f "foo" } ] [ "<!DOCTYPE foo>" string>xml-chunk second ] unit-test
|
[ T{ doctype-decl f "foo" } ] [ "<!DOCTYPE foo>" string>xml-chunk first ] unit-test
|
||||||
[ T{ doctype-decl f "foo" } ] [ "<!DOCTYPE foo >" string>xml-chunk second ] unit-test
|
[ T{ doctype-decl f "foo" } ] [ "<!DOCTYPE foo >" string>xml-chunk first ] unit-test
|
||||||
[ T{ doctype-decl f "foo" T{ system-id f "blah.dtd" } } ] [ "<!DOCTYPE foo SYSTEM 'blah.dtd'>" string>xml-chunk second ] unit-test
|
[ T{ doctype-decl f "foo" T{ system-id f "blah.dtd" } } ] [ "<!DOCTYPE foo SYSTEM 'blah.dtd'>" string>xml-chunk first ] unit-test
|
||||||
[ T{ doctype-decl f "foo" T{ system-id f "blah.dtd" } } ] [ "<!DOCTYPE foo SYSTEM \"blah.dtd\" >" string>xml-chunk second ] unit-test
|
[ T{ doctype-decl f "foo" T{ system-id f "blah.dtd" } } ] [ "<!DOCTYPE foo SYSTEM \"blah.dtd\" >" string>xml-chunk first ] unit-test
|
||||||
[ t ] [ "<!DOCTYPE html PUBLIC '-//W3C//DTD XHTML 1.1//EN' 'http://www.w3.org/TR/xhtml11/DTD/xhtml11.dtd' >" dup string>xml-chunk [ write-xml-chunk ] with-string-writer = ] unit-test
|
[ t ] [ "<!DOCTYPE html PUBLIC '-//W3C//DTD XHTML 1.1//EN' 'http://www.w3.org/TR/xhtml11/DTD/xhtml11.dtd' >" dup string>xml-chunk [ write-xml-chunk ] with-string-writer = ] unit-test
|
||||||
|
|
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
|
@ -0,0 +1 @@
|
||||||
|
<?xml version='1.0' encoding='UTF-8'?><x/>
|
|
@ -0,0 +1 @@
|
||||||
|
<?xml version='1.0' encoding='UTF-8'?><x>é</x>
|
|
@ -1,9 +1,9 @@
|
||||||
! Copyright (C) 2005, 2006 Daniel Ehrenberg
|
! Copyright (C) 2005, 2006 Daniel Ehrenberg
|
||||||
! See http://factorcode.org/license.txt for BSD license.
|
! See http://factorcode.org/license.txt for BSD license.
|
||||||
USING: xml.errors xml.data xml.utilities xml.char-classes sets
|
USING: accessors arrays ascii assocs combinators fry io.encodings
|
||||||
xml.entities kernel state-parser kernel namespaces make strings
|
io.encodings.iana io.encodings.utf16 io.encodings.utf8 kernel
|
||||||
math math.parser sequences assocs arrays splitting combinators
|
make math.parser namespaces sequences sets splitting state-parser
|
||||||
unicode.case accessors fry ascii ;
|
xml.char-classes xml.data xml.entities xml.errors strings ;
|
||||||
IN: xml.tokenize
|
IN: xml.tokenize
|
||||||
|
|
||||||
! XML namespace processing: ns = namespace
|
! XML namespace processing: ns = namespace
|
||||||
|
@ -262,9 +262,15 @@ DEFER: direct
|
||||||
[ yes/no>bool ] [ f ] if*
|
[ yes/no>bool ] [ f ] if*
|
||||||
<prolog> ;
|
<prolog> ;
|
||||||
|
|
||||||
|
SYMBOL: string-input?
|
||||||
|
: decode-input-if ( encoding -- )
|
||||||
|
string-input? get [ drop ] [ decode-input ] if ;
|
||||||
|
|
||||||
: parse-prolog ( -- prolog )
|
: parse-prolog ( -- prolog )
|
||||||
pass-blank middle-tag "?>" expect-string
|
pass-blank middle-tag "?>" expect-string
|
||||||
dup assure-no-extra prolog-attrs
|
dup assure-no-extra prolog-attrs
|
||||||
|
dup encoding>> dup "UTF-16" =
|
||||||
|
[ drop ] [ name>encoding [ decode-input-if ] when* ] if
|
||||||
dup prolog-data set ;
|
dup prolog-data set ;
|
||||||
|
|
||||||
: instruct ( -- instruction )
|
: instruct ( -- instruction )
|
||||||
|
@ -285,3 +291,50 @@ DEFER: direct
|
||||||
CHAR: > expect
|
CHAR: > expect
|
||||||
]
|
]
|
||||||
} cond ;
|
} cond ;
|
||||||
|
|
||||||
|
! Autodetecting encodings
|
||||||
|
|
||||||
|
: start-utf16le ( -- tag )
|
||||||
|
utf16le decode-input-if
|
||||||
|
CHAR: ? expect
|
||||||
|
0 expect instruct ;
|
||||||
|
|
||||||
|
: start< ( -- tag )
|
||||||
|
get-next {
|
||||||
|
{ 0 [ next next start-utf16le ] }
|
||||||
|
{ CHAR: ? [ next next instruct ] } ! XML prolog parsing sets the encoding
|
||||||
|
[ drop utf8 decode-input-if next make-tag ]
|
||||||
|
! That is a hack. It fails if you have <nonascii
|
||||||
|
} case ;
|
||||||
|
|
||||||
|
: skip-utf8-bom ( -- tag )
|
||||||
|
"\u0000bb\u0000bf" expect utf8 decode-input
|
||||||
|
CHAR: < expect make-tag ;
|
||||||
|
|
||||||
|
: start-utf16be ( -- tag )
|
||||||
|
utf16be decode-input-if
|
||||||
|
next CHAR: < expect make-tag ;
|
||||||
|
|
||||||
|
: skip-utf16le-bom ( -- tag )
|
||||||
|
utf16le decode-input-if
|
||||||
|
next HEX: FE expect
|
||||||
|
CHAR: < expect make-tag ;
|
||||||
|
|
||||||
|
: skip-utf16be-bom ( -- tag )
|
||||||
|
utf16be decode-input-if
|
||||||
|
next HEX: FF expect
|
||||||
|
CHAR: < expect make-tag ;
|
||||||
|
|
||||||
|
: start-document ( -- tag )
|
||||||
|
get-char {
|
||||||
|
{ CHAR: < [ start< ] }
|
||||||
|
{ 0 [ start-utf16be ] }
|
||||||
|
{ HEX: EF [ skip-utf8-bom ] }
|
||||||
|
{ HEX: FF [ skip-utf16le-bom ] }
|
||||||
|
{ HEX: FE [ skip-utf16be-bom ] }
|
||||||
|
{ f [ "" ] }
|
||||||
|
[ dup blank?
|
||||||
|
[ drop pass-blank utf8 decode-input-if CHAR: < expect make-tag ]
|
||||||
|
[ 1string ] if ! Replace with proper error
|
||||||
|
]
|
||||||
|
} case ;
|
||||||
|
|
|
@ -1,10 +1,9 @@
|
||||||
! Copyright (C) 2005, 2006 Daniel Ehrenberg
|
! Copyright (C) 2005, 2006 Daniel Ehrenberg
|
||||||
! See http://factorcode.org/license.txt for BSD license.
|
! See http://factorcode.org/license.txt for BSD license.
|
||||||
USING: io io.streams.string io.files kernel math namespaces
|
USING: accessors arrays io io.encodings.binary io.files
|
||||||
prettyprint sequences arrays generic strings vectors
|
io.streams.string kernel namespaces sequences state-parser strings
|
||||||
xml.char-classes xml.data xml.errors xml.tokenize xml.writer
|
xml.backend xml.data xml.errors xml.tokenize ascii
|
||||||
xml.utilities state-parser assocs ascii io.encodings.utf8
|
xml.writer ;
|
||||||
accessors xml.backend ;
|
|
||||||
IN: xml
|
IN: xml
|
||||||
|
|
||||||
! -- Overall parser with data tree
|
! -- Overall parser with data tree
|
||||||
|
@ -23,7 +22,7 @@ GENERIC: process ( object -- )
|
||||||
M: object process add-child ;
|
M: object process add-child ;
|
||||||
|
|
||||||
M: prolog process
|
M: prolog process
|
||||||
xml-stack get V{ { f V{ "" } } } =
|
xml-stack get V{ { f V{ } } } =
|
||||||
[ bad-prolog ] unless drop ;
|
[ bad-prolog ] unless drop ;
|
||||||
|
|
||||||
M: instruction process
|
M: instruction process
|
||||||
|
@ -101,6 +100,7 @@ TUPLE: pull-xml scope ;
|
||||||
text-now? on
|
text-now? on
|
||||||
] H{ } make-assoc
|
] H{ } make-assoc
|
||||||
pull-xml boa ;
|
pull-xml boa ;
|
||||||
|
! pull-xml needs to call start-document somewhere
|
||||||
|
|
||||||
: pull-event ( pull -- xml-event/f )
|
: pull-event ( pull -- xml-event/f )
|
||||||
scope>> [
|
scope>> [
|
||||||
|
@ -133,11 +133,12 @@ TUPLE: pull-xml scope ;
|
||||||
: sax ( stream quot: ( xml-elem -- ) -- )
|
: sax ( stream quot: ( xml-elem -- ) -- )
|
||||||
swap [
|
swap [
|
||||||
reset-prolog init-ns-stack
|
reset-prolog init-ns-stack
|
||||||
prolog-data get call-under
|
start-document call-under
|
||||||
sax-loop
|
sax-loop
|
||||||
] state-parse ; inline recursive
|
] state-parse ; inline recursive
|
||||||
|
|
||||||
: (read-xml) ( -- )
|
: (read-xml) ( -- )
|
||||||
|
start-document process
|
||||||
[ process ] sax-loop ; inline
|
[ process ] sax-loop ; inline
|
||||||
|
|
||||||
: (read-xml-chunk) ( stream -- prolog seq )
|
: (read-xml-chunk) ( stream -- prolog seq )
|
||||||
|
@ -159,11 +160,12 @@ TUPLE: pull-xml scope ;
|
||||||
<string-reader> read-xml ;
|
<string-reader> read-xml ;
|
||||||
|
|
||||||
: string>xml-chunk ( string -- xml )
|
: string>xml-chunk ( string -- xml )
|
||||||
<string-reader> read-xml-chunk ;
|
t string-input?
|
||||||
|
[ <string-reader> read-xml-chunk ] with-variable ;
|
||||||
|
|
||||||
: file>xml ( filename -- xml )
|
: file>xml ( filename -- xml )
|
||||||
! Autodetect encoding!
|
! Autodetect encoding!
|
||||||
utf8 <file-reader> read-xml ;
|
binary <file-reader> read-xml ;
|
||||||
|
|
||||||
: xml-reprint ( string -- )
|
: xml-reprint ( string -- )
|
||||||
string>xml print-xml ;
|
string>xml print-xml ;
|
||||||
|
|
Loading…
Reference in New Issue