2009-02-01 16:36:07 -05:00
|
|
|
! Copyright (C) 2009 Daniel Ehrenberg.
|
|
|
|
! See http://factorcode.org/license.txt for BSD license.
|
|
|
|
USING: math kernel io.encodings combinators io io.encodings.utf16
|
2009-02-03 18:32:05 -05:00
|
|
|
sequences io.binary io.encodings.iana ;
|
2009-02-01 16:36:07 -05:00
|
|
|
IN: io.encodings.utf32
|
|
|
|
|
|
|
|
SINGLETON: utf32be
|
|
|
|
|
2009-02-03 18:32:05 -05:00
|
|
|
utf32be "UTF-32BE" register-encoding
|
|
|
|
|
2009-02-01 16:36:07 -05:00
|
|
|
SINGLETON: utf32le
|
|
|
|
|
2009-02-03 18:32:05 -05:00
|
|
|
utf32le "UTF-32LE" register-encoding
|
|
|
|
|
2009-02-01 16:36:07 -05:00
|
|
|
SINGLETON: utf32
|
|
|
|
|
2009-02-03 18:32:05 -05:00
|
|
|
utf32 "UTF-32" register-encoding
|
|
|
|
|
2009-02-01 16:36:07 -05:00
|
|
|
<PRIVATE
|
|
|
|
|
|
|
|
! Decoding
|
|
|
|
|
2009-02-01 19:54:06 -05:00
|
|
|
: char> ( stream encoding quot -- ch )
|
|
|
|
nip swap 4 swap stream-read dup length {
|
|
|
|
{ 0 [ 2drop f ] }
|
|
|
|
{ 4 [ swap call ] }
|
|
|
|
[ 3drop replacement-char ]
|
|
|
|
} case ; inline
|
2009-02-01 16:36:07 -05:00
|
|
|
|
|
|
|
M: utf32be decode-char
|
2009-02-01 19:54:06 -05:00
|
|
|
[ be> ] char> ;
|
2009-02-01 16:36:07 -05:00
|
|
|
|
|
|
|
M: utf32le decode-char
|
2009-02-01 19:54:06 -05:00
|
|
|
[ le> ] char> ;
|
2009-02-01 16:36:07 -05:00
|
|
|
|
|
|
|
! Encoding
|
|
|
|
|
2009-02-01 19:54:06 -05:00
|
|
|
: >char ( char stream encoding quot -- )
|
|
|
|
nip 4 swap curry dip stream-write ; inline
|
2009-02-01 16:36:07 -05:00
|
|
|
|
|
|
|
M: utf32be encode-char
|
2009-02-01 19:54:06 -05:00
|
|
|
[ >be ] >char ;
|
2009-02-01 16:36:07 -05:00
|
|
|
|
|
|
|
M: utf32le encode-char
|
2009-02-01 19:54:06 -05:00
|
|
|
[ >le ] >char ;
|
2009-02-01 16:36:07 -05:00
|
|
|
|
|
|
|
! UTF-32
|
|
|
|
|
2011-11-23 21:49:33 -05:00
|
|
|
CONSTANT: bom-le B{ 0xff 0xfe 0 0 }
|
2009-02-01 16:36:07 -05:00
|
|
|
|
2011-11-23 21:49:33 -05:00
|
|
|
CONSTANT: bom-be B{ 0 0 0xfe 0xff }
|
2009-02-01 16:36:07 -05:00
|
|
|
|
|
|
|
: bom>le/be ( bom -- le/be )
|
|
|
|
dup bom-le sequence= [ drop utf32le ] [
|
|
|
|
bom-be sequence= [ utf32be ] [ missing-bom ] if
|
|
|
|
] if ;
|
|
|
|
|
|
|
|
M: utf32 <decoder> ( stream utf32 -- decoder )
|
|
|
|
drop 4 over stream-read bom>le/be <decoder> ;
|
|
|
|
|
|
|
|
M: utf32 <encoder> ( stream utf32 -- encoder )
|
|
|
|
drop bom-le over stream-write utf32le <encoder> ;
|
2013-03-10 20:57:03 -04:00
|
|
|
|
|
|
|
PRIVATE>
|