factor/basis/io/encodings/utf32/utf32.factor

63 lines
1.3 KiB
Factor
Raw Normal View History

2009-02-01 16:36:07 -05:00
! Copyright (C) 2009 Daniel Ehrenberg.
! See http://factorcode.org/license.txt for BSD license.
USING: math kernel io.encodings combinators io io.encodings.utf16
2009-02-03 18:32:05 -05:00
sequences io.binary io.encodings.iana ;
2009-02-01 16:36:07 -05:00
IN: io.encodings.utf32
SINGLETON: utf32be
2009-02-03 18:32:05 -05:00
utf32be "UTF-32BE" register-encoding
2009-02-01 16:36:07 -05:00
SINGLETON: utf32le
2009-02-03 18:32:05 -05:00
utf32le "UTF-32LE" register-encoding
2009-02-01 16:36:07 -05:00
SINGLETON: utf32
2009-02-03 18:32:05 -05:00
utf32 "UTF-32" register-encoding
2009-02-01 16:36:07 -05:00
<PRIVATE
! Decoding
2009-02-01 19:54:06 -05:00
: char> ( stream encoding quot -- ch )
nip swap 4 swap stream-read dup length {
{ 0 [ 2drop f ] }
{ 4 [ swap call ] }
[ 3drop replacement-char ]
} case ; inline
2009-02-01 16:36:07 -05:00
M: utf32be decode-char
2009-02-01 19:54:06 -05:00
[ be> ] char> ;
2009-02-01 16:36:07 -05:00
M: utf32le decode-char
2009-02-01 19:54:06 -05:00
[ le> ] char> ;
2009-02-01 16:36:07 -05:00
! Encoding
2009-02-01 19:54:06 -05:00
: >char ( char stream encoding quot -- )
nip 4 swap curry dip stream-write ; inline
2009-02-01 16:36:07 -05:00
M: utf32be encode-char
2009-02-01 19:54:06 -05:00
[ >be ] >char ;
2009-02-01 16:36:07 -05:00
M: utf32le encode-char
2009-02-01 19:54:06 -05:00
[ >le ] >char ;
2009-02-01 16:36:07 -05:00
! UTF-32
CONSTANT: bom-le B{ HEX: ff HEX: fe 0 0 }
2009-02-01 16:36:07 -05:00
CONSTANT: bom-be B{ 0 0 HEX: fe HEX: ff }
2009-02-01 16:36:07 -05:00
: bom>le/be ( bom -- le/be )
dup bom-le sequence= [ drop utf32le ] [
bom-be sequence= [ utf32be ] [ missing-bom ] if
] if ;
M: utf32 <decoder> ( stream utf32 -- decoder )
drop 4 over stream-read bom>le/be <decoder> ;
M: utf32 <encoder> ( stream utf32 -- encoder )
drop bom-le over stream-write utf32le <encoder> ;