Fixing encodings/XML
parent
a3e52f283b
commit
246c4bb13c
|
@ -3,31 +3,33 @@
|
||||||
USING: math.parser arrays io.encodings sequences kernel assocs
|
USING: math.parser arrays io.encodings sequences kernel assocs
|
||||||
hashtables io.encodings.ascii generic parser classes.tuple words
|
hashtables io.encodings.ascii generic parser classes.tuple words
|
||||||
words.symbol io io.files splitting namespaces math
|
words.symbol io io.files splitting namespaces math
|
||||||
compiler.units accessors ;
|
compiler.units accessors classes.singleton classes.mixin
|
||||||
|
io.encodings.iana ;
|
||||||
IN: io.encodings.8-bit
|
IN: io.encodings.8-bit
|
||||||
|
|
||||||
<PRIVATE
|
<PRIVATE
|
||||||
|
|
||||||
: mappings {
|
: mappings {
|
||||||
{ "latin1" "8859-1" }
|
! encoding-name iana-name file-name
|
||||||
{ "latin2" "8859-2" }
|
{ "latin1" "ISO_8859-1:1987" "8859-1" }
|
||||||
{ "latin3" "8859-3" }
|
{ "latin2" "ISO_8859-2:1987" "8859-2" }
|
||||||
{ "latin4" "8859-4" }
|
{ "latin3" "ISO_8859-3:1988" "8859-3" }
|
||||||
{ "latin/cyrillic" "8859-5" }
|
{ "latin4" "ISO_8859-4:1988" "8859-4" }
|
||||||
{ "latin/arabic" "8859-6" }
|
{ "latin/cyrillic" "ISO_8859-5:1988" "8859-5" }
|
||||||
{ "latin/greek" "8859-7" }
|
{ "latin/arabic" "ISO_8859-6:1987" "8859-6" }
|
||||||
{ "latin/hebrew" "8859-8" }
|
{ "latin/greek" "ISO_8859-7:1987" "8859-7" }
|
||||||
{ "latin5" "8859-9" }
|
{ "latin/hebrew" "ISO_8859-8:1988" "8859-8" }
|
||||||
{ "latin6" "8859-10" }
|
{ "latin5" "ISO_8859-9:1989" "8859-9" }
|
||||||
{ "latin/thai" "8859-11" }
|
{ "latin6" "ISO-8859-10" "8859-10" }
|
||||||
{ "latin7" "8859-13" }
|
{ "latin/thai" "TIS-620" "8859-11" }
|
||||||
{ "latin8" "8859-14" }
|
{ "latin7" "ISO-8859-13" "8859-13" }
|
||||||
{ "latin9" "8859-15" }
|
{ "latin8" "ISO-8859-14" "8859-14" }
|
||||||
{ "latin10" "8859-16" }
|
{ "latin9" "ISO-8859-15" "8859-15" }
|
||||||
{ "koi8-r" "KOI8-R" }
|
{ "latin10" "ISO-8859-16" "8859-16" }
|
||||||
{ "windows-1252" "CP1252" }
|
{ "koi8-r" "KOI8-R" "KOI8-R" }
|
||||||
{ "ebcdic" "CP037" }
|
{ "windows-1252" "windows-1252" "CP1252" }
|
||||||
{ "mac-roman" "ROMAN" }
|
{ "ebcdic" "IBM037" "CP037" }
|
||||||
|
{ "mac-roman" "macintosh" "ROMAN" }
|
||||||
} ;
|
} ;
|
||||||
|
|
||||||
: encoding-file ( file-name -- stream )
|
: encoding-file ( file-name -- stream )
|
||||||
|
@ -65,8 +67,7 @@ M: 8-bit encode-char encode>> encode-8-bit ;
|
||||||
|
|
||||||
M: 8-bit decode-char decode>> decode-8-bit ;
|
M: 8-bit decode-char decode>> decode-8-bit ;
|
||||||
|
|
||||||
PREDICATE: 8-bit-encoding < word
|
MIXIN: 8-bit-encoding
|
||||||
8-bit-encodings get-global key? ;
|
|
||||||
|
|
||||||
M: 8-bit-encoding <encoder>
|
M: 8-bit-encoding <encoder>
|
||||||
8-bit-encodings get-global at <encoder> ;
|
8-bit-encodings get-global at <encoder> ;
|
||||||
|
@ -74,15 +75,21 @@ M: 8-bit-encoding <encoder>
|
||||||
M: 8-bit-encoding <decoder>
|
M: 8-bit-encoding <decoder>
|
||||||
8-bit-encodings get-global at <decoder> ;
|
8-bit-encodings get-global at <decoder> ;
|
||||||
|
|
||||||
|
: create-encoding ( name -- word )
|
||||||
|
"io.encodings.8-bit" create
|
||||||
|
[ define-singleton-class ]
|
||||||
|
[ 8-bit-encoding add-mixin-instance ]
|
||||||
|
[ ] tri ;
|
||||||
|
|
||||||
PRIVATE>
|
PRIVATE>
|
||||||
|
|
||||||
[
|
[
|
||||||
mappings [
|
mappings [
|
||||||
[ "io.encodings.8-bit" create ]
|
first3
|
||||||
|
[ create-encoding ]
|
||||||
|
[ dupd register-encoding ]
|
||||||
[ encoding-file parse-file 8-bit boa ]
|
[ encoding-file parse-file 8-bit boa ]
|
||||||
bi*
|
tri*
|
||||||
] assoc-map
|
] H{ } map>assoc
|
||||||
[ keys [ define-symbol ] each ]
|
8-bit-encodings set-global
|
||||||
[ 8-bit-encodings set-global ]
|
|
||||||
bi
|
|
||||||
] with-compilation-unit
|
] with-compilation-unit
|
||||||
|
|
|
@ -1,6 +1,6 @@
|
||||||
! Copyright (C) 2008 Daniel Ehrenberg.
|
! Copyright (C) 2008 Daniel Ehrenberg.
|
||||||
! See http://factorcode.org/license.txt for BSD license.
|
! See http://factorcode.org/license.txt for BSD license.
|
||||||
USING: io io.encodings kernel math io.encodings.private ;
|
USING: io io.encodings kernel math io.encodings.private io.encodings.iana ;
|
||||||
IN: io.encodings.ascii
|
IN: io.encodings.ascii
|
||||||
|
|
||||||
<PRIVATE
|
<PRIVATE
|
||||||
|
@ -20,3 +20,5 @@ M: ascii encode-char
|
||||||
|
|
||||||
M: ascii decode-char
|
M: ascii decode-char
|
||||||
128 decode-if< ;
|
128 decode-if< ;
|
||||||
|
|
||||||
|
ascii "ANSI_X3.4-1968" register-encoding
|
||||||
|
|
|
@ -3,11 +3,13 @@
|
||||||
USING: xml xml.data kernel io io.encodings interval-maps splitting fry
|
USING: xml xml.data kernel io io.encodings interval-maps splitting fry
|
||||||
math.parser sequences combinators assocs locals accessors math
|
math.parser sequences combinators assocs locals accessors math
|
||||||
arrays values io.encodings.ascii ascii io.files biassocs math.order
|
arrays values io.encodings.ascii ascii io.files biassocs math.order
|
||||||
combinators.short-circuit io.binary ;
|
combinators.short-circuit io.binary io.encodings.iana ;
|
||||||
IN: io.encodings.chinese
|
IN: io.encodings.chinese
|
||||||
|
|
||||||
SINGLETON: gb18030
|
SINGLETON: gb18030
|
||||||
|
|
||||||
|
gb18030 "GB18030" register-encoding
|
||||||
|
|
||||||
<PRIVATE
|
<PRIVATE
|
||||||
|
|
||||||
! GB to mean GB18030 is a terrible abuse of notation
|
! GB to mean GB18030 is a terrible abuse of notation
|
||||||
|
|
|
@ -1,12 +1,35 @@
|
||||||
USING: help.syntax help.markup ;
|
USING: help.syntax help.markup strings ;
|
||||||
IN: io.encodings.iana
|
IN: io.encodings.iana
|
||||||
|
|
||||||
|
ABOUT: "io.encodings.iana"
|
||||||
|
|
||||||
|
ARTICLE: "io.encodings.iana" "IANA-registered encoding names"
|
||||||
|
"The " { $vocab-link "io.encodings.iana" } " vocabulary provides words for accessing the names of encodings and the encoding descriptors corresponding to names." $nl
|
||||||
|
"Most text encodings in common use have been registered with IANA. There is a standard set of names for each encoding. Simple conversion functions:"
|
||||||
|
{ $subsection name>encoding }
|
||||||
|
{ $subsection encoding>name }
|
||||||
|
"To let a new encoding be used with the above words, use the following:"
|
||||||
|
{ $subsection register-encoding }
|
||||||
|
"Exceptions when encodings or names are not found:"
|
||||||
|
{ $subsection missing-encoding }
|
||||||
|
{ $subsection missing-name } ;
|
||||||
|
|
||||||
|
HELP: missing-encoding
|
||||||
|
{ $error-description "The error called from " { $link name>encoding } " when there is no encoding descriptor registered corresponding to the given name." } ;
|
||||||
|
|
||||||
|
HELP: missing-name
|
||||||
|
{ $error-description "The error called from " { $link encoding>name } " when there is no name registered corresponding to the given encoding." } ;
|
||||||
|
|
||||||
HELP: name>encoding
|
HELP: name>encoding
|
||||||
{ $values { "name" "an encoding name" } { "encoding" "an encoding descriptor" } }
|
{ $values { "name" "an encoding name" } { "encoding" "an encoding descriptor" } }
|
||||||
{ "Given an IANA-registered encoding name, find the encoding descriptor that represents it, or " { $code f } " if it is not found (either not implemented in Factor or not registered)." } ;
|
{ $description "Given an IANA-registered encoding name, find the encoding descriptor that represents it, or " { $code f } " if it is not found (either not implemented in Factor or not registered)." } ;
|
||||||
|
|
||||||
HELP: encoding>name
|
HELP: encoding>name
|
||||||
{ $values { "encoding" "an encoding descriptor" } { "name" "an encoding name" } }
|
{ $values { "encoding" "an encoding descriptor" } { "name" "an encoding name" } }
|
||||||
{ "Given an encoding descriptor, return the preferred IANA name." } ;
|
{ $description "Given an encoding descriptor, return the preferred IANA name." } ;
|
||||||
|
|
||||||
{ name>encoding encoding>name } related-words
|
{ name>encoding encoding>name } related-words
|
||||||
|
|
||||||
|
HELP: register-encoding
|
||||||
|
{ $values { "descriptor" "an encoding descriptor" } { "name" string } }
|
||||||
|
{ $description "Registers an encoding descriptor with the given name, available for lookup through " { $link name>encoding } " and " { $link encoding>name } ". IANA-registered aliases are automatically included. The name given must be the first name in the " { $snippet "resources:basis/io/encodings/iana/character-sets" } " file." } ;
|
||||||
|
|
|
@ -1,5 +1,28 @@
|
||||||
USING: io.encodings.iana io.encodings.ascii tools.test ;
|
USING: io.encodings.iana io.encodings.iana.private
|
||||||
|
io.encodings.utf8 tools.test assocs ;
|
||||||
|
IN: io.encodings.iana.tests
|
||||||
|
|
||||||
[ ascii ] [ "US-ASCII" name>encoding ] unit-test
|
[ utf8 ] [ "UTF-8" name>encoding ] unit-test
|
||||||
[ ascii ] [ "ASCII" name>encoding ] unit-test
|
[ utf8 ] [ "utf8" name>encoding ] unit-test
|
||||||
[ "US-ASCII" ] [ ascii encoding>name ] unit-test
|
[ "UTF-8" ] [ utf8 encoding>name ] unit-test
|
||||||
|
|
||||||
|
! We will never implement EBCDIC-FI-SE-A
|
||||||
|
SINGLETON: ebcdic-fisea
|
||||||
|
ebcdic-fisea "EBCDIC-FI-SE-A" register-encoding
|
||||||
|
[ ebcdic-fisea ] [ "EBCDIC-FI-SE-A" name>encoding ] unit-test
|
||||||
|
[ ebcdic-fisea ] [ "csEBCDICFISEA" name>encoding ] unit-test
|
||||||
|
[ "EBCDIC-FI-SE-A" ] [ ebcdic-fisea encoding>name ] unit-test
|
||||||
|
|
||||||
|
! Clean up after myself
|
||||||
|
[ ] [
|
||||||
|
"EBCDIC-FI-SE-A" n>e-table delete-at
|
||||||
|
"csEBCDICFISEA" n>e-table delete-at
|
||||||
|
ebcdic-fisea e>n-table delete-at
|
||||||
|
] unit-test
|
||||||
|
[ "EBCDIC-FI-SE-A" name>encoding ] must-fail
|
||||||
|
[ "csEBCDICFISEA" name>encoding ] must-fail
|
||||||
|
[ ebcdic-fisea encoding>name ] must-fail
|
||||||
|
|
||||||
|
[ ebcdic-fisea "foobar" register-encoding ] must-fail
|
||||||
|
[ "foobar" name>encoding ] must-fail
|
||||||
|
[ ebcdic-fisea encoding>name ] must-fail
|
||||||
|
|
|
@ -1,41 +1,24 @@
|
||||||
! Copyright (C) 2008 Daniel Ehrenberg
|
! Copyright (C) 2008 Daniel Ehrenberg
|
||||||
! See http://factorcode.org/license.txt for BSD license.
|
! See http://factorcode.org/license.txt for BSD license.
|
||||||
USING: kernel strings values io.files assocs
|
USING: kernel strings values io.files assocs
|
||||||
splitting sequences io namespaces sets io.encodings.8-bit
|
splitting sequences io namespaces sets io.encodings.utf8 ;
|
||||||
io.encodings.ascii io.encodings.utf8 io.encodings.utf16
|
|
||||||
io.encodings.chinese io.encodings.japanese ;
|
|
||||||
IN: io.encodings.iana
|
IN: io.encodings.iana
|
||||||
|
|
||||||
<PRIVATE
|
<PRIVATE
|
||||||
VALUE: n>e-table
|
SYMBOL: n>e-table
|
||||||
|
SYMBOL: e>n-table
|
||||||
: e>n-table H{
|
SYMBOL: aliases
|
||||||
{ ascii "US-ASCII" }
|
|
||||||
{ utf8 "UTF-8" }
|
|
||||||
{ utf16 "UTF-16" }
|
|
||||||
{ utf16be "UTF-16BE" }
|
|
||||||
{ utf16le "UTF-16LE" }
|
|
||||||
{ latin1 "ISO-8859-1" }
|
|
||||||
{ latin2 "ISO-8859-2" }
|
|
||||||
{ latin3 "ISO-8859-3" }
|
|
||||||
{ latin4 "ISO-8859-4" }
|
|
||||||
{ latin/cyrillic "ISO-8859-5" }
|
|
||||||
{ latin/arabic "ISO-8859-6" }
|
|
||||||
{ latin/greek "ISO-8859-7" }
|
|
||||||
{ latin/hebrew "ISO-8859-8" }
|
|
||||||
{ latin5 "ISO-8859-9" }
|
|
||||||
{ latin6 "ISO-8859-10" }
|
|
||||||
{ shift-jis "Shift_JIS" }
|
|
||||||
{ windows-31j "Windows-31J" }
|
|
||||||
{ gb18030 "GB18030" }
|
|
||||||
} ;
|
|
||||||
PRIVATE>
|
PRIVATE>
|
||||||
|
|
||||||
|
ERROR: missing-encoding name ;
|
||||||
|
|
||||||
: name>encoding ( name -- encoding )
|
: name>encoding ( name -- encoding )
|
||||||
n>e-table at ;
|
dup n>e-table get-global at [ ] [ missing-encoding ] ?if ;
|
||||||
|
|
||||||
|
ERROR: missing-name encoding ;
|
||||||
|
|
||||||
: encoding>name ( encoding -- name )
|
: encoding>name ( encoding -- name )
|
||||||
e>n-table at ;
|
dup e>n-table get-global at [ ] [ missing-name ] ?if ;
|
||||||
|
|
||||||
<PRIVATE
|
<PRIVATE
|
||||||
: parse-iana ( stream -- synonym-set )
|
: parse-iana ( stream -- synonym-set )
|
||||||
|
@ -43,24 +26,33 @@ PRIVATE>
|
||||||
[ " " split ] map
|
[ " " split ] map
|
||||||
[ first { "Name:" "Alias:" } member? ] filter
|
[ first { "Name:" "Alias:" } member? ] filter
|
||||||
[ second ] map { "None" } diff
|
[ second ] map { "None" } diff
|
||||||
] map ;
|
] map harvest ;
|
||||||
|
|
||||||
: more-aliases ( -- assoc )
|
: make-aliases ( stream -- n>e )
|
||||||
|
parse-iana [ [ first ] [ ] bi ] H{ } map>assoc ;
|
||||||
|
|
||||||
|
: initial-n>e ( -- assoc )
|
||||||
H{
|
H{
|
||||||
{ "UTF8" utf8 }
|
{ "UTF8" utf8 }
|
||||||
{ "utf8" utf8 }
|
{ "utf8" utf8 }
|
||||||
{ "utf-8" utf8 }
|
{ "utf-8" utf8 }
|
||||||
} ;
|
{ "UTF-8" utf8 }
|
||||||
|
} clone ;
|
||||||
|
|
||||||
|
: initial-e>n ( -- assoc )
|
||||||
|
H{ { utf8 "UTF-8" } } clone ;
|
||||||
|
|
||||||
: make-n>e ( stream -- n>e )
|
|
||||||
parse-iana [ [
|
|
||||||
dup [
|
|
||||||
e>n-table value-at
|
|
||||||
[ swap [ set ] with each ]
|
|
||||||
[ drop ] if*
|
|
||||||
] with each
|
|
||||||
] each ] H{ } make-assoc more-aliases assoc-union ;
|
|
||||||
PRIVATE>
|
PRIVATE>
|
||||||
|
|
||||||
"resource:basis/io/encodings/iana/character-sets"
|
"resource:basis/io/encodings/iana/character-sets"
|
||||||
ascii <file-reader> make-n>e to: n>e-table
|
utf8 <file-reader> make-aliases aliases set-global
|
||||||
|
|
||||||
|
n>e-table global [ initial-n>e or ] change-at
|
||||||
|
e>n-table global [ initial-e>n or ] change-at
|
||||||
|
|
||||||
|
: register-encoding ( descriptor name -- )
|
||||||
|
[
|
||||||
|
aliases get at [
|
||||||
|
[ n>e-table get-global set-at ] with each
|
||||||
|
] [ "Bad encoding registration" throw ] if*
|
||||||
|
] [ swap e>n-table get-global set-at ] 2bi ;
|
||||||
|
|
|
@ -4,7 +4,7 @@ USING: help.markup help.syntax ;
|
||||||
IN: io.encodings.japanese
|
IN: io.encodings.japanese
|
||||||
|
|
||||||
ARTICLE: "io.encodings.japanese" "Japanese text encodings"
|
ARTICLE: "io.encodings.japanese" "Japanese text encodings"
|
||||||
"The " { $vocab-link "io.encodings.japanese" } " vocabulary implements Japanese-specific text encodings. Several encodings are used for Japanese text besides the standard UTF encodings for Unicode strings. These are mostly based on the character set defined in the JIS X 208 standard. Current coverage of encodings is incomplete."
|
"Several encodings are used for Japanese text besides the standard UTF encodings for Unicode strings. These are mostly based on the character set defined in the JIS X 208 standard. Current coverage of encodings is incomplete."
|
||||||
{ $subsection shift-jis }
|
{ $subsection shift-jis }
|
||||||
{ $subsection windows-31j } ;
|
{ $subsection windows-31j } ;
|
||||||
|
|
||||||
|
|
|
@ -3,15 +3,29 @@
|
||||||
USING: sequences kernel io io.files combinators.short-circuit
|
USING: sequences kernel io io.files combinators.short-circuit
|
||||||
math.order values assocs io.encodings io.binary fry strings
|
math.order values assocs io.encodings io.binary fry strings
|
||||||
math io.encodings.ascii arrays accessors splitting math.parser
|
math io.encodings.ascii arrays accessors splitting math.parser
|
||||||
biassocs ;
|
biassocs io.encodings.iana ;
|
||||||
IN: io.encodings.japanese
|
IN: io.encodings.japanese
|
||||||
|
|
||||||
VALUE: shift-jis
|
SINGLETON: shift-jis
|
||||||
|
|
||||||
VALUE: windows-31j
|
shift-jis "Shift_JIS" register-encoding
|
||||||
|
|
||||||
|
SINGLETON: windows-31j
|
||||||
|
|
||||||
|
windows-31j "Windows-31J" register-encoding
|
||||||
|
|
||||||
<PRIVATE
|
<PRIVATE
|
||||||
|
|
||||||
|
VALUE: shift-jis-table
|
||||||
|
|
||||||
|
M: shift-jis <encoder> drop shift-jis-table <encoder> ;
|
||||||
|
M: shift-jis <decoder> drop shift-jis-table <decoder> ;
|
||||||
|
|
||||||
|
VALUE: windows-31j-table
|
||||||
|
|
||||||
|
M: windows-31j <encoder> drop windows-31j-table <encoder> ;
|
||||||
|
M: windows-31j <decoder> drop windows-31j-table <decoder> ;
|
||||||
|
|
||||||
TUPLE: jis assoc ;
|
TUPLE: jis assoc ;
|
||||||
|
|
||||||
: <jis> ( assoc -- jis )
|
: <jis> ( assoc -- jis )
|
||||||
|
@ -31,10 +45,10 @@ TUPLE: jis assoc ;
|
||||||
ascii file-lines process-jis <jis> ;
|
ascii file-lines process-jis <jis> ;
|
||||||
|
|
||||||
"resource:basis/io/encodings/japanese/CP932.txt"
|
"resource:basis/io/encodings/japanese/CP932.txt"
|
||||||
make-jis to: windows-31j
|
make-jis to: windows-31j-table
|
||||||
|
|
||||||
"resource:basis/io/encodings/japanese/sjis-0208-1997-std.txt"
|
"resource:basis/io/encodings/japanese/sjis-0208-1997-std.txt"
|
||||||
make-jis to: shift-jis
|
make-jis to: shift-jis-table
|
||||||
|
|
||||||
: small? ( char -- ? )
|
: small? ( char -- ? )
|
||||||
! ASCII range or single-byte halfwidth katakana
|
! ASCII range or single-byte halfwidth katakana
|
||||||
|
|
|
@ -1,15 +1,21 @@
|
||||||
! Copyright (C) 2006, 2008 Daniel Ehrenberg.
|
! Copyright (C) 2006, 2009 Daniel Ehrenberg.
|
||||||
! See http://factorcode.org/license.txt for BSD license.
|
! See http://factorcode.org/license.txt for BSD license.
|
||||||
USING: math kernel sequences sbufs vectors namespaces io.binary
|
USING: math kernel sequences sbufs vectors namespaces io.binary
|
||||||
io.encodings combinators splitting io byte-arrays ;
|
io.encodings combinators splitting io byte-arrays io.encodings.iana ;
|
||||||
IN: io.encodings.utf16
|
IN: io.encodings.utf16
|
||||||
|
|
||||||
SINGLETON: utf16be
|
SINGLETON: utf16be
|
||||||
|
|
||||||
|
utf16be "UTF-16BE" register-encoding
|
||||||
|
|
||||||
SINGLETON: utf16le
|
SINGLETON: utf16le
|
||||||
|
|
||||||
|
utf16le "UTF-16LE" register-encoding
|
||||||
|
|
||||||
SINGLETON: utf16
|
SINGLETON: utf16
|
||||||
|
|
||||||
|
utf16 "UTF-16" register-encoding
|
||||||
|
|
||||||
ERROR: missing-bom ;
|
ERROR: missing-bom ;
|
||||||
|
|
||||||
<PRIVATE
|
<PRIVATE
|
||||||
|
|
|
@ -1,15 +1,21 @@
|
||||||
! Copyright (C) 2009 Daniel Ehrenberg.
|
! Copyright (C) 2009 Daniel Ehrenberg.
|
||||||
! See http://factorcode.org/license.txt for BSD license.
|
! See http://factorcode.org/license.txt for BSD license.
|
||||||
USING: math kernel io.encodings combinators io io.encodings.utf16
|
USING: math kernel io.encodings combinators io io.encodings.utf16
|
||||||
sequences io.binary ;
|
sequences io.binary io.encodings.iana ;
|
||||||
IN: io.encodings.utf32
|
IN: io.encodings.utf32
|
||||||
|
|
||||||
SINGLETON: utf32be
|
SINGLETON: utf32be
|
||||||
|
|
||||||
|
utf32be "UTF-32BE" register-encoding
|
||||||
|
|
||||||
SINGLETON: utf32le
|
SINGLETON: utf32le
|
||||||
|
|
||||||
|
utf32le "UTF-32LE" register-encoding
|
||||||
|
|
||||||
SINGLETON: utf32
|
SINGLETON: utf32
|
||||||
|
|
||||||
|
utf32 "UTF-32" register-encoding
|
||||||
|
|
||||||
<PRIVATE
|
<PRIVATE
|
||||||
|
|
||||||
! Decoding
|
! Decoding
|
||||||
|
|
|
@ -1,4 +1,5 @@
|
||||||
USING: xml xml.data xml.utilities tools.test accessors kernel ;
|
USING: xml xml.data xml.utilities tools.test accessors kernel
|
||||||
|
io.encodings.8-bit ;
|
||||||
|
|
||||||
[ "\u000131" ] [ "resource:basis/xml/tests/latin5.xml" file>xml children>string ] unit-test
|
[ "\u000131" ] [ "resource:basis/xml/tests/latin5.xml" file>xml children>string ] unit-test
|
||||||
[ "\u0000e9" ] [ "resource:basis/xml/tests/latin1.xml" file>xml children>string ] unit-test
|
[ "\u0000e9" ] [ "resource:basis/xml/tests/latin1.xml" file>xml children>string ] unit-test
|
||||||
|
@ -11,4 +12,4 @@ USING: xml xml.data xml.utilities tools.test accessors kernel ;
|
||||||
[ "\u0000e9" ] [ "resource:basis/xml/tests/utf16le-bom.xml" file>xml children>string ] unit-test
|
[ "\u0000e9" ] [ "resource:basis/xml/tests/utf16le-bom.xml" file>xml children>string ] unit-test
|
||||||
[ "\u0000e9" ] [ "resource:basis/xml/tests/prologless.xml" file>xml children>string ] unit-test
|
[ "\u0000e9" ] [ "resource:basis/xml/tests/prologless.xml" file>xml children>string ] unit-test
|
||||||
[ "e" ] [ "resource:basis/xml/tests/ascii.xml" file>xml children>string ] unit-test
|
[ "e" ] [ "resource:basis/xml/tests/ascii.xml" file>xml children>string ] unit-test
|
||||||
[ "\u0000e9" "x" ] [ "resource:basis/xml/tests/unitag.xml" file>xml [ name>> main>> ] [ children>string ] bi ] unit-test
|
[ "\u0000e9" "x" ] [ "resource:basis/xml/tests/unitag.xml" file>xml [ name>> main>> ] [ children>string ] bi ] unit-test
|
||||||
|
|
Loading…
Reference in New Issue