Unicode encoding changes
parent
f378122dc5
commit
56afb67bfc
|
@ -1,3 +1,8 @@
|
|||
! Copyright (C) 2008 Daniel Ehrenberg.
|
||||
! See http://factorcode.org/license.txt for BSD license.
|
||||
IN: io.encodings.binary SYMBOL: binary
|
||||
USING: io.encodings kernel ;
|
||||
IN: io.encodings.binary
|
||||
|
||||
TUPLE: binary ;
|
||||
M: binary <encoder> drop ;
|
||||
M: binary <decoder> drop ;
|
||||
|
|
|
@ -2,62 +2,36 @@
|
|||
! See http://factorcode.org/license.txt for BSD license.
|
||||
USING: math kernel sequences sbufs vectors namespaces
|
||||
growable strings io classes continuations combinators
|
||||
io.styles io.streams.plain io.encodings.binary splitting
|
||||
io.streams.duplex byte-arrays ;
|
||||
io.styles io.streams.plain splitting
|
||||
io.streams.duplex byte-arrays sequences.private ;
|
||||
IN: io.encodings
|
||||
|
||||
! The encoding descriptor protocol
|
||||
|
||||
GENERIC: decode-step ( buf char encoding -- )
|
||||
M: object decode-step drop swap push ;
|
||||
GENERIC: decode-char ( stream encoding -- char/f )
|
||||
|
||||
GENERIC: init-decoder ( stream encoding -- encoding )
|
||||
M: tuple-class init-decoder construct-empty init-decoder ;
|
||||
M: object init-decoder nip ;
|
||||
GENERIC: encode-char ( char stream encoding -- )
|
||||
|
||||
GENERIC: stream-write-encoded ( string stream encoding -- byte-array )
|
||||
M: object stream-write-encoded drop stream-write ;
|
||||
GENERIC: <decoder> ( stream decoding -- newstream )
|
||||
|
||||
GENERIC: <encoder> ( stream encoding -- newstream )
|
||||
|
||||
: replacement-char HEX: fffd ;
|
||||
|
||||
! Decoding
|
||||
|
||||
<PRIVATE
|
||||
|
||||
TUPLE: decode-error ;
|
||||
|
||||
: decode-error ( -- * ) \ decode-error construct-empty throw ;
|
||||
|
||||
SYMBOL: begin
|
||||
TUPLE: decoder stream code cr ;
|
||||
M: tuple-class <decoder> construct-empty <decoder> ;
|
||||
M: tuple <decoder> f decoder construct-boa ;
|
||||
|
||||
: push-decoded ( buf ch -- buf ch state )
|
||||
over push 0 begin ;
|
||||
|
||||
: push-replacement ( buf -- buf ch state )
|
||||
! This is the replacement character
|
||||
HEX: fffd push-decoded ;
|
||||
|
||||
: space ( resizable -- room-left )
|
||||
dup underlying swap [ length ] 2apply - ;
|
||||
|
||||
: full? ( resizable -- ? ) space zero? ;
|
||||
|
||||
: end-read-loop ( buf ch state stream quot -- string/f )
|
||||
2drop 2drop >string f like ;
|
||||
|
||||
: decode-read-loop ( buf stream encoding -- string/f )
|
||||
pick full? [ 2drop >string ] [
|
||||
over stream-read1 [
|
||||
-rot tuck >r >r >r dupd r> decode-step r> r>
|
||||
decode-read-loop
|
||||
] [ 2drop >string f like ] if*
|
||||
] if ;
|
||||
|
||||
: decode-read ( length stream encoding -- string )
|
||||
rot <sbuf> -rot decode-read-loop ;
|
||||
|
||||
TUPLE: decoder code cr ;
|
||||
: <decoder> ( stream encoding -- newstream )
|
||||
dup binary eq? [ drop ] [
|
||||
dupd init-decoder { set-delegate set-decoder-code }
|
||||
decoder construct
|
||||
] if ;
|
||||
: >decoder< ( decoder -- stream encoding )
|
||||
{ decoder-stream decoder-code } get-slots ;
|
||||
|
||||
: cr+ t swap set-decoder-cr ; inline
|
||||
|
||||
|
@ -82,72 +56,78 @@ TUPLE: decoder code cr ;
|
|||
over decoder-cr [
|
||||
over cr-
|
||||
"\n" ?head [
|
||||
swap stream-read1 [ add ] when*
|
||||
] [ nip ] if
|
||||
] [ nip ] if ;
|
||||
over stream-read1 [ add ] when*
|
||||
] when
|
||||
] when nip ;
|
||||
|
||||
: read-loop ( n stream -- string )
|
||||
over 0 <string> [
|
||||
[
|
||||
>r stream-read1 dup
|
||||
[ swap r> set-nth-unsafe f ] [ r> 3drop t ] if
|
||||
] 2curry find-integer
|
||||
] keep swap [ head ] when* ;
|
||||
|
||||
M: decoder stream-read
|
||||
tuck { delegate decoder-code } get-slots decode-read fix-read ;
|
||||
tuck read-loop fix-read ;
|
||||
|
||||
M: decoder stream-read-partial stream-read ;
|
||||
|
||||
: decoder-read-until ( stream delim -- ch )
|
||||
! Copied from { c-reader stream-read-until }!!!
|
||||
over stream-read1 dup [
|
||||
dup pick memq? [ 2nip ] [ , decoder-read-until ] if
|
||||
] [
|
||||
2nip
|
||||
] if ;
|
||||
: (read-until) ( buf quot -- string/f sep/f )
|
||||
! quot: -- char keep-going?
|
||||
dup call
|
||||
[ >r drop "" like r> ]
|
||||
[ pick push (read-until) ] if ; inline
|
||||
|
||||
M: decoder stream-read-until
|
||||
! Copied from { c-reader stream-read-until }!!!
|
||||
[ swap decoder-read-until ] "" make
|
||||
swap over empty? over not and [ 2drop f f ] when ;
|
||||
SBUF" " clone -rot >decoder<
|
||||
[ decode-char dup rot memq? ] 3curry (read-until) ;
|
||||
|
||||
: fix-read1 ( stream char -- char )
|
||||
over decoder-cr [
|
||||
over cr-
|
||||
dup CHAR: \n = [
|
||||
drop stream-read1
|
||||
] [ nip ] if
|
||||
] [ nip ] if ;
|
||||
drop dup stream-read1
|
||||
] when
|
||||
] when nip ;
|
||||
|
||||
M: decoder stream-read1
|
||||
1 swap stream-read f like [ first ] [ f ] if* ;
|
||||
dup >decoder< decode-char fix-read1 ;
|
||||
|
||||
M: decoder stream-readln ( stream -- str )
|
||||
"\r\n" over stream-read-until handle-readln ;
|
||||
|
||||
M: decoder dispose decoder-stream dispose ;
|
||||
|
||||
! Encoding
|
||||
|
||||
TUPLE: encode-error ;
|
||||
|
||||
: encode-error ( -- * ) \ encode-error construct-empty throw ;
|
||||
|
||||
TUPLE: encoder code ;
|
||||
: <encoder> ( stream encoding -- newstream )
|
||||
dup binary eq? [ drop ] [
|
||||
construct-empty { set-delegate set-encoder-code }
|
||||
encoder construct
|
||||
] if ;
|
||||
TUPLE: encoder stream code ;
|
||||
M: tuple-class <encoder> construct-empty <encoder> ;
|
||||
M: tuple <encoder> encoder construct-boa ;
|
||||
|
||||
: >encoder< ( encoder -- stream encoding )
|
||||
{ encoder-stream encoder-code } get-slots ;
|
||||
|
||||
M: encoder stream-write1
|
||||
>r 1string r> stream-write ;
|
||||
>encoder< encode-char ;
|
||||
|
||||
M: encoder stream-write
|
||||
{ delegate encoder-code } get-slots stream-write-encoded ;
|
||||
>encoder< [ encode-char ] 2curry each ;
|
||||
|
||||
M: encoder dispose delegate dispose ;
|
||||
M: encoder dispose encoder-stream dispose ;
|
||||
|
||||
INSTANCE: encoder plain-writer
|
||||
|
||||
! Rebinding duplex streams which have not read anything yet
|
||||
|
||||
: reencode ( stream encoding -- newstream )
|
||||
over encoder? [ >r delegate r> ] when <encoder> ;
|
||||
over encoder? [ >r encoder-stream r> ] when <encoder> ;
|
||||
|
||||
: redecode ( stream encoding -- newstream )
|
||||
over decoder? [ >r delegate r> ] when <decoder> ;
|
||||
over decoder? [ >r decoder-stream r> ] when <decoder> ;
|
||||
PRIVATE>
|
||||
|
||||
: <encoder-duplex> ( stream-in stream-out encoding -- duplex )
|
||||
tuck reencode >r redecode r> <duplex-stream> ;
|
||||
|
|
|
@ -6,82 +6,68 @@ IN: io.encodings.utf8
|
|||
|
||||
! Decoding UTF-8
|
||||
|
||||
TUPLE: utf8 ch state ;
|
||||
TUPLE: utf8 ;
|
||||
|
||||
SYMBOL: double
|
||||
SYMBOL: triple
|
||||
SYMBOL: triple2
|
||||
SYMBOL: quad
|
||||
SYMBOL: quad2
|
||||
SYMBOL: quad3
|
||||
<PRIVATE
|
||||
|
||||
: starts-2? ( char -- ? )
|
||||
-6 shift BIN: 10 number= ;
|
||||
dup [ -6 shift BIN: 10 number= ] when ;
|
||||
|
||||
: append-nums ( buf bottom top state-out -- buf num state )
|
||||
>r over starts-2?
|
||||
[ 6 shift swap BIN: 111111 bitand bitor r> ]
|
||||
[ r> 3drop push-replacement ] if ;
|
||||
: append-nums ( stream byte -- stream char )
|
||||
over stream-read1 dup starts-2?
|
||||
[ 6 shift swap BIN: 111111 bitand bitor ]
|
||||
[ 2drop replacement-char ] if ;
|
||||
|
||||
: begin-utf8 ( buf byte -- buf ch state )
|
||||
: double ( stream byte -- stream char )
|
||||
BIN: 11111 bitand append-nums ;
|
||||
|
||||
: triple ( stream byte -- stream char )
|
||||
BIN: 1111 bitand append-nums append-nums ;
|
||||
|
||||
: quad ( stream byte -- stream char )
|
||||
BIN: 111 bitand append-nums append-nums append-nums ;
|
||||
|
||||
: begin-utf8 ( stream byte -- stream char )
|
||||
{
|
||||
{ [ dup -7 shift zero? ] [ push-decoded ] }
|
||||
{ [ dup -5 shift BIN: 110 number= ] [ BIN: 11111 bitand double ] }
|
||||
{ [ dup -4 shift BIN: 1110 number= ] [ BIN: 1111 bitand triple ] }
|
||||
{ [ dup -3 shift BIN: 11110 number= ] [ BIN: 111 bitand quad ] }
|
||||
{ [ t ] [ drop push-replacement ] }
|
||||
{ [ dup -7 shift zero? ] [ ] }
|
||||
{ [ dup -5 shift BIN: 110 number= ] [ double ] }
|
||||
{ [ dup -4 shift BIN: 1110 number= ] [ triple ] }
|
||||
{ [ dup -3 shift BIN: 11110 number= ] [ quad ] }
|
||||
{ [ t ] [ drop replacement-char ] }
|
||||
} cond ;
|
||||
|
||||
: end-multibyte ( buf byte ch -- buf ch state )
|
||||
f append-nums [ push-decoded ] unless* ;
|
||||
: decode-utf8 ( stream -- char/f )
|
||||
dup stream-read1 dup [ begin-utf8 ] when nip ;
|
||||
|
||||
: decode-utf8-step ( buf byte ch state -- buf ch state )
|
||||
{
|
||||
{ begin [ drop begin-utf8 ] }
|
||||
{ double [ end-multibyte ] }
|
||||
{ triple [ triple2 append-nums ] }
|
||||
{ triple2 [ end-multibyte ] }
|
||||
{ quad [ quad2 append-nums ] }
|
||||
{ quad2 [ quad3 append-nums ] }
|
||||
{ quad3 [ end-multibyte ] }
|
||||
} case ;
|
||||
|
||||
: unpack-state ( encoding -- ch state )
|
||||
{ utf8-ch utf8-state } get-slots ;
|
||||
|
||||
: pack-state ( ch state encoding -- )
|
||||
{ set-utf8-ch set-utf8-state } set-slots ;
|
||||
|
||||
M: utf8 decode-step ( buf char encoding -- )
|
||||
[ unpack-state decode-utf8-step ] keep pack-state drop ;
|
||||
|
||||
M: utf8 init-decoder nip begin over set-utf8-state ;
|
||||
M: utf8 decode-char
|
||||
drop decode-utf8 ;
|
||||
|
||||
! Encoding UTF-8
|
||||
|
||||
: encoded ( char -- )
|
||||
BIN: 111111 bitand BIN: 10000000 bitor write1 ;
|
||||
: encoded ( stream char -- )
|
||||
BIN: 111111 bitand BIN: 10000000 bitor swap stream-write1 ;
|
||||
|
||||
: char>utf8 ( char -- )
|
||||
: char>utf8 ( stream char -- )
|
||||
{
|
||||
{ [ dup -7 shift zero? ] [ write1 ] }
|
||||
{ [ dup -7 shift zero? ] [ swap stream-write1 ] }
|
||||
{ [ dup -11 shift zero? ] [
|
||||
dup -6 shift BIN: 11000000 bitor write1
|
||||
2dup -6 shift BIN: 11000000 bitor swap stream-write1
|
||||
encoded
|
||||
] }
|
||||
{ [ dup -16 shift zero? ] [
|
||||
dup -12 shift BIN: 11100000 bitor write1
|
||||
dup -6 shift encoded
|
||||
2dup -12 shift BIN: 11100000 bitor swap stream-write1
|
||||
2dup -6 shift encoded
|
||||
encoded
|
||||
] }
|
||||
{ [ t ] [
|
||||
dup -18 shift BIN: 11110000 bitor write1
|
||||
dup -12 shift encoded
|
||||
dup -6 shift encoded
|
||||
2dup -18 shift BIN: 11110000 bitor swap stream-write1
|
||||
2dup -12 shift encoded
|
||||
2dup -6 shift encoded
|
||||
encoded
|
||||
] }
|
||||
} cond ;
|
||||
|
||||
M: utf8 stream-write-encoded
|
||||
! For efficiency, this should be modified to avoid variable reads
|
||||
drop [ [ char>utf8 ] each ] with-stream* ;
|
||||
M: utf8 encode-char
|
||||
drop swap char>utf8 ;
|
||||
|
||||
PRIVATE>
|
||||
|
|
|
@ -1,9 +1,9 @@
|
|||
! Copyright (C) 2003, 2007 Slava Pestov.
|
||||
! See http://factorcode.org/license.txt for BSD license.
|
||||
IN: io.streams.string
|
||||
USING: io kernel math namespaces sequences sbufs strings
|
||||
generic splitting growable continuations io.streams.plain
|
||||
io.encodings ;
|
||||
io.encodings io.encodings.private ;
|
||||
IN: io.streams.string
|
||||
|
||||
M: growable dispose drop ;
|
||||
|
||||
|
|
|
@ -1,18 +1,20 @@
|
|||
! Copyright (C) 2008 Daniel Ehrenberg.
|
||||
! See http://factorcode.org/license.txt for BSD license.
|
||||
USING: io io.encodings strings kernel math sequences byte-arrays io.encodings ;
|
||||
USING: io io.encodings kernel math ;
|
||||
IN: io.encodings.ascii
|
||||
|
||||
: encode-check< ( string stream max -- )
|
||||
[ pick <= [ encode-error ] [ stream-write1 ] if ] 2curry each ;
|
||||
<PRIVATE
|
||||
: encode-if< ( char stream encoding max -- )
|
||||
nip pick > [ encode-error ] [ stream-write1 ] if ;
|
||||
|
||||
: push-if< ( sbuf character max -- )
|
||||
over <= [ drop HEX: fffd ] when swap push ;
|
||||
: decode-if< ( stream encoding max -- character )
|
||||
nip swap stream-read1 tuck > [ drop replacement-character ] unless ;
|
||||
PRIVATE>
|
||||
|
||||
TUPLE: ascii ;
|
||||
|
||||
M: ascii stream-write-encoded ( string stream encoding -- )
|
||||
drop 128 encode-check< ;
|
||||
M: ascii encode-char
|
||||
128 encode-if< ;
|
||||
|
||||
M: ascii decode-step
|
||||
drop 128 push-if< ;
|
||||
M: ascii decode-char
|
||||
128 decode-if< ;
|
||||
|
|
|
@ -1,12 +1,12 @@
|
|||
! Copyright (C) 2008 Daniel Ehrenberg.
|
||||
! See http://factorcode.org/license.txt for BSD license.
|
||||
USING: io io.encodings strings kernel io.encodings.ascii sequences math ;
|
||||
USING: io io.encodings kernel io.encodings.ascii.private ;
|
||||
IN: io.encodings.latin1
|
||||
|
||||
TUPLE: latin1 ;
|
||||
|
||||
M: latin1 stream-write-encoded
|
||||
drop 256 encode-check< ;
|
||||
M: latin1 encode-char
|
||||
256 encode-if< ;
|
||||
|
||||
M: latin1 decode-step
|
||||
drop swap push ;
|
||||
M: latin1 decode-char
|
||||
drop stream-read1 ;
|
||||
|
|
|
@ -4,92 +4,71 @@ USING: math kernel sequences sbufs vectors namespaces io.binary
|
|||
io.encodings combinators splitting io byte-arrays ;
|
||||
IN: io.encodings.utf16
|
||||
|
||||
! UTF-16BE decoding
|
||||
|
||||
TUPLE: utf16be ch state ;
|
||||
|
||||
SYMBOL: double
|
||||
SYMBOL: quad1
|
||||
SYMBOL: quad2
|
||||
SYMBOL: quad3
|
||||
SYMBOL: ignore
|
||||
|
||||
: do-ignore ( -- ch state ) 0 ignore ;
|
||||
|
||||
: append-nums ( byte ch -- ch )
|
||||
8 shift bitor ;
|
||||
|
||||
: end-multibyte ( buf byte ch -- buf ch state )
|
||||
append-nums push-decoded ;
|
||||
|
||||
: begin-utf16be ( buf byte -- buf ch state )
|
||||
dup -3 shift BIN: 11011 number= [
|
||||
dup BIN: 00000100 bitand zero?
|
||||
[ BIN: 11 bitand quad1 ]
|
||||
[ drop do-ignore ] if
|
||||
] [ double ] if ;
|
||||
|
||||
: handle-quad2be ( byte ch -- ch state )
|
||||
swap dup -2 shift BIN: 110111 number= [
|
||||
>r 2 shift r> BIN: 11 bitand bitor quad3
|
||||
] [ 2drop do-ignore ] if ;
|
||||
|
||||
: decode-utf16be-step ( buf byte ch state -- buf ch state )
|
||||
{
|
||||
{ begin [ drop begin-utf16be ] }
|
||||
{ double [ end-multibyte ] }
|
||||
{ quad1 [ append-nums quad2 ] }
|
||||
{ quad2 [ handle-quad2be ] }
|
||||
{ quad3 [ append-nums HEX: 10000 + push-decoded ] }
|
||||
{ ignore [ 2drop push-replacement ] }
|
||||
} case ;
|
||||
|
||||
: unpack-state-be ( encoding -- ch state )
|
||||
{ utf16be-ch utf16be-state } get-slots ;
|
||||
|
||||
: pack-state-be ( ch state encoding -- )
|
||||
{ set-utf16be-ch set-utf16be-state } set-slots ;
|
||||
|
||||
M: utf16be decode-step
|
||||
[ unpack-state-be decode-utf16be-step ] keep pack-state-be drop ;
|
||||
|
||||
M: utf16be init-decoder nip begin over set-utf16be-state ;
|
||||
|
||||
! UTF-16LE decoding
|
||||
TUPLE: utf16be ;
|
||||
|
||||
TUPLE: utf16le ch state ;
|
||||
|
||||
: handle-double ( buf byte ch -- buf ch state )
|
||||
swap dup -3 shift BIN: 11011 = [
|
||||
dup BIN: 100 bitand 0 number=
|
||||
[ BIN: 11 bitand 8 shift bitor quad2 ]
|
||||
[ 2drop push-replacement ] if
|
||||
] [ end-multibyte ] if ;
|
||||
TUPLE: utf16 started? ;
|
||||
|
||||
: handle-quad3le ( buf byte ch -- buf ch state )
|
||||
swap dup -2 shift BIN: 110111 = [
|
||||
BIN: 11 bitand append-nums HEX: 10000 + push-decoded
|
||||
] [ 2drop push-replacement ] if ;
|
||||
<PRIVATE
|
||||
|
||||
! UTF-16BE decoding
|
||||
|
||||
: append-nums ( byte ch -- ch )
|
||||
over [ 8 shift bitor ] [ 2drop replacement-char ] if ;
|
||||
|
||||
: double-be ( stream byte -- stream char )
|
||||
over stream-read1 swap append-nums ;
|
||||
|
||||
: quad-be ( stream byte -- stream char )
|
||||
double-be over stream-read1 dup [
|
||||
dup -2 shift BIN: 110111 number= [
|
||||
>r 2 shift r> BIN: 11 bitand bitor
|
||||
over stream-read1 swap append-nums HEX: 10000 +
|
||||
] [ 2drop replacement-char ] if
|
||||
] when ;
|
||||
|
||||
: ignore ( stream -- stream char )
|
||||
dup stream-read1 drop replacement-char ;
|
||||
|
||||
: begin-utf16be ( stream byte -- stream char )
|
||||
dup -3 shift BIN: 11011 number= [
|
||||
dup BIN: 00000100 bitand zero?
|
||||
[ BIN: 11 bitand quad-be ]
|
||||
[ drop ignore ] if
|
||||
] [ double-be ] if ;
|
||||
|
||||
M: decode-char
|
||||
drop dup stream-read1 dup [ begin-utf16be ] when nip ;
|
||||
|
||||
! UTF-16LE decoding
|
||||
|
||||
: quad-le ( stream ch -- stream char )
|
||||
over stream-read1 swap 10 shift bitor
|
||||
over stream-read1 dup -2 shift BIN: 110111 = [
|
||||
BIN: 11 bitand append-nums HEX: 10000 +
|
||||
] [ 2drop replacement-char ] if ;
|
||||
|
||||
: double-le ( stream byte1 byte2 -- stream char )
|
||||
dup -3 shift BIN: 11011 = [
|
||||
dup BIN: 100 bitand 0 number=
|
||||
[ BIN: 11 bitand 8 shift bitor quad-le ]
|
||||
[ 2drop replacement-char ] if
|
||||
] [ swap append-nums ] if ;
|
||||
|
||||
: decode-utf16le-step ( buf byte ch state -- buf ch state )
|
||||
{
|
||||
{ begin [ drop double ] }
|
||||
{ double [ handle-double ] }
|
||||
{ quad1 [ append-nums quad2 ] }
|
||||
{ quad2 [ 10 shift bitor quad3 ] }
|
||||
{ quad3 [ handle-quad3le ] }
|
||||
} case ;
|
||||
|
||||
: unpack-state-le ( encoding -- ch state )
|
||||
{ utf16le-ch utf16le-state } get-slots ;
|
||||
: begin-utf16le ( stream byte -- stream char )
|
||||
over stream-read1 [ double-le ] [ drop replacement-char ] if*
|
||||
|
||||
: pack-state-le ( ch state encoding -- )
|
||||
{ set-utf16le-ch set-utf16le-state } set-slots ;
|
||||
|
||||
M: utf16le decode-step
|
||||
[ unpack-state-le decode-utf16le-step ] keep pack-state-le drop ;
|
||||
|
||||
M: utf16le init-decoder nip begin over set-utf16le-state ;
|
||||
M: decode-char
|
||||
drop dup stream-read1 dup [ begin-utf16le ] when nip ;
|
||||
|
||||
! UTF-16LE/BE encoding
|
||||
|
||||
|
@ -103,25 +82,25 @@ M: utf16le init-decoder nip begin over set-utf16le-state ;
|
|||
dup -8 shift BIN: 11011100 bitor
|
||||
swap BIN: 11111111 bitand ;
|
||||
|
||||
: char>utf16be ( char -- )
|
||||
: stream-write2 ( stream char1 char2 -- )
|
||||
rot [ stream-write1 ] 2apply ;
|
||||
|
||||
: char>utf16be ( stream char -- )
|
||||
dup HEX: FFFF > [
|
||||
HEX: 10000 -
|
||||
dup encode-first swap write1 write1
|
||||
encode-second swap write1 write1
|
||||
] [ h>b/b write1 write1 ] if ;
|
||||
dup encode-first stream-write2
|
||||
encode-second stream-write2
|
||||
] [ h>b/b swap stream-write2 ] if ;
|
||||
|
||||
: stream-write-utf16be ( string stream -- )
|
||||
[ [ char>utf16be ] each ] with-stream* ;
|
||||
|
||||
M: utf16be stream-write-encoded ( string stream encoding -- )
|
||||
drop stream-write-utf16be ;
|
||||
M: utf16be encode-char ( char stream encoding -- )
|
||||
drop char>utf16be ;
|
||||
|
||||
: char>utf16le ( char -- )
|
||||
dup HEX: FFFF > [
|
||||
HEX: 10000 -
|
||||
dup encode-first write1 write1
|
||||
encode-second write1 write1
|
||||
] [ h>b/b swap write1 write1 ] if ;
|
||||
dup encode-first swap stream-write2
|
||||
encode-second swap stream-write2
|
||||
] [ h>b/b stream-write2 ] if ;
|
||||
|
||||
: stream-write-utf16le ( string stream -- )
|
||||
[ [ char>utf16le ] each ] with-stream* ;
|
||||
|
@ -139,17 +118,15 @@ M: utf16le stream-write-encoded ( string stream encoding -- )
|
|||
|
||||
: start-utf16be? ( seq1 -- seq2 ? ) bom-be ?head ;
|
||||
|
||||
TUPLE: utf16 started? ;
|
||||
|
||||
M: utf16 stream-write-encoded
|
||||
dup utf16-started? [ drop ]
|
||||
[ t swap set-utf16-started? bom-le over stream-write ] if
|
||||
stream-write-utf16le ;
|
||||
|
||||
: bom>le/be ( bom -- le/be )
|
||||
dup bom-le sequence= [ drop utf16le ] [
|
||||
bom-be sequence= [ utf16be ] [ decode-error ] if
|
||||
] if ;
|
||||
|
||||
M: utf16 init-decoder ( stream encoding -- newencoding )
|
||||
2 rot stream-read bom>le/be construct-empty init-decoder ;
|
||||
M: utf16 <decoder> ( stream utf16 -- decoder )
|
||||
2 rot stream-read bom>le/be <decoder> ;
|
||||
|
||||
M: utf16 <encoder> ( stream utf16 -- encoder )
|
||||
drop bom-le over stream-write utf16le <encoder> ;
|
||||
|
||||
PRIVATE>
|
||||
|
|
Loading…
Reference in New Issue