Merge branch 'master' of git://factorcode.org/git/littledan
commit
e826f1fe4a
|
@ -1,3 +1,8 @@
|
|||
! Copyright (C) 2008 Daniel Ehrenberg.
|
||||
! See http://factorcode.org/license.txt for BSD license.
|
||||
IN: io.encodings.binary SYMBOL: binary
|
||||
USING: io.encodings kernel ;
|
||||
IN: io.encodings.binary
|
||||
|
||||
TUPLE: binary ;
|
||||
M: binary <encoder> drop ;
|
||||
M: binary <decoder> drop ;
|
||||
|
|
|
@ -44,25 +44,21 @@ $nl { $vocab-link "io.encodings.utf16" } ;
|
|||
|
||||
ARTICLE: "encodings-protocol" "Encoding protocol"
|
||||
"An encoding descriptor must implement the following methods. The methods are implemented on tuple classes by instantiating the class and calling the method again."
|
||||
{ $subsection decode-step }
|
||||
{ $subsection init-decoder }
|
||||
{ $subsection stream-write-encoded } ;
|
||||
{ $subsection decode-char }
|
||||
{ $subsection encode-char }
|
||||
"The following methods are optional:"
|
||||
{ $subsection <encoder> }
|
||||
{ $subsection <decoder> } ;
|
||||
|
||||
HELP: decode-step ( buf char encoding -- )
|
||||
{ $values { "buf" "A string buffer which characters can be pushed to" }
|
||||
{ "char" "An octet which is read from a stream" }
|
||||
HELP: decode-char ( stream encoding -- char/f )
|
||||
{ $values { "stream" "an underlying input stream" }
|
||||
{ "encoding" "An encoding descriptor tuple" } }
|
||||
{ $description "A single step in the decoding process must be defined for the decoding descriptor. When each octet is read, this word is called, and depending on the decoder's internal state, something may be pushed to the buffer or the state may change. This should not be used directly." } ;
|
||||
{ $description "Reads a single code point from the underlying stream, interpreting it by the encoding. This should not be used directly." } ;
|
||||
|
||||
HELP: stream-write-encoded ( string stream encoding -- )
|
||||
{ $values { "string" "a string" }
|
||||
{ "stream" "an output stream" }
|
||||
HELP: encode-char ( char stream encoding -- )
|
||||
{ $values { "char" "a character" }
|
||||
{ "stream" "an underlying output stream" }
|
||||
{ "encoding" "an encoding descriptor" } }
|
||||
{ $description "Encodes the string with the given encoding descriptor, outputing the result to the given stream. This should not be used directly." } ;
|
||||
{ $description "Writes the code point in the encoding to the underlying stream given. This should not be used directly." } ;
|
||||
|
||||
HELP: init-decoder ( stream encoding -- encoding )
|
||||
{ $values { "stream" "an input stream" }
|
||||
{ "encoding" "an encoding descriptor" } }
|
||||
{ $description "Initializes the decoder tuple's state. The stream is exposed so that it can be read, eg for a BOM. This should not be used directly." } ;
|
||||
|
||||
{ init-decoder decode-step stream-write-encoded } related-words
|
||||
{ encode-char decode-char } related-words
|
||||
|
|
|
@ -2,62 +2,36 @@
|
|||
! See http://factorcode.org/license.txt for BSD license.
|
||||
USING: math kernel sequences sbufs vectors namespaces
|
||||
growable strings io classes continuations combinators
|
||||
io.styles io.streams.plain io.encodings.binary splitting
|
||||
io.streams.duplex byte-arrays ;
|
||||
io.styles io.streams.plain splitting
|
||||
io.streams.duplex byte-arrays sequences.private ;
|
||||
IN: io.encodings
|
||||
|
||||
! The encoding descriptor protocol
|
||||
|
||||
GENERIC: decode-step ( buf char encoding -- )
|
||||
M: object decode-step drop swap push ;
|
||||
GENERIC: decode-char ( stream encoding -- char/f )
|
||||
|
||||
GENERIC: init-decoder ( stream encoding -- encoding )
|
||||
M: tuple-class init-decoder construct-empty init-decoder ;
|
||||
M: object init-decoder nip ;
|
||||
GENERIC: encode-char ( char stream encoding -- )
|
||||
|
||||
GENERIC: stream-write-encoded ( string stream encoding -- byte-array )
|
||||
M: object stream-write-encoded drop stream-write ;
|
||||
GENERIC: <decoder> ( stream decoding -- newstream )
|
||||
|
||||
GENERIC: <encoder> ( stream encoding -- newstream )
|
||||
|
||||
: replacement-char HEX: fffd ;
|
||||
|
||||
! Decoding
|
||||
|
||||
<PRIVATE
|
||||
|
||||
TUPLE: decode-error ;
|
||||
|
||||
: decode-error ( -- * ) \ decode-error construct-empty throw ;
|
||||
|
||||
SYMBOL: begin
|
||||
TUPLE: decoder stream code cr ;
|
||||
M: tuple-class <decoder> construct-empty <decoder> ;
|
||||
M: tuple <decoder> f decoder construct-boa ;
|
||||
|
||||
: push-decoded ( buf ch -- buf ch state )
|
||||
over push 0 begin ;
|
||||
|
||||
: push-replacement ( buf -- buf ch state )
|
||||
! This is the replacement character
|
||||
HEX: fffd push-decoded ;
|
||||
|
||||
: space ( resizable -- room-left )
|
||||
dup underlying swap [ length ] 2apply - ;
|
||||
|
||||
: full? ( resizable -- ? ) space zero? ;
|
||||
|
||||
: end-read-loop ( buf ch state stream quot -- string/f )
|
||||
2drop 2drop >string f like ;
|
||||
|
||||
: decode-read-loop ( buf stream encoding -- string/f )
|
||||
pick full? [ 2drop >string ] [
|
||||
over stream-read1 [
|
||||
-rot tuck >r >r >r dupd r> decode-step r> r>
|
||||
decode-read-loop
|
||||
] [ 2drop >string f like ] if*
|
||||
] if ;
|
||||
|
||||
: decode-read ( length stream encoding -- string )
|
||||
rot <sbuf> -rot decode-read-loop ;
|
||||
|
||||
TUPLE: decoder code cr ;
|
||||
: <decoder> ( stream encoding -- newstream )
|
||||
dup binary eq? [ drop ] [
|
||||
dupd init-decoder { set-delegate set-decoder-code }
|
||||
decoder construct
|
||||
] if ;
|
||||
: >decoder< ( decoder -- stream encoding )
|
||||
{ decoder-stream decoder-code } get-slots ;
|
||||
|
||||
: cr+ t swap set-decoder-cr ; inline
|
||||
|
||||
|
@ -82,72 +56,83 @@ TUPLE: decoder code cr ;
|
|||
over decoder-cr [
|
||||
over cr-
|
||||
"\n" ?head [
|
||||
swap stream-read1 [ add ] when*
|
||||
] [ nip ] if
|
||||
] [ nip ] if ;
|
||||
over stream-read1 [ add ] when*
|
||||
] when
|
||||
] when nip ;
|
||||
|
||||
: read-loop ( n stream -- string )
|
||||
SBUF" " clone [
|
||||
[
|
||||
>r nip stream-read1 dup
|
||||
[ r> push f ] [ r> 2drop t ] if
|
||||
] 2curry find-integer drop
|
||||
] keep "" like f like ;
|
||||
|
||||
M: decoder stream-read
|
||||
tuck { delegate decoder-code } get-slots decode-read fix-read ;
|
||||
tuck read-loop fix-read ;
|
||||
|
||||
M: decoder stream-read-partial stream-read ;
|
||||
|
||||
: decoder-read-until ( stream delim -- ch )
|
||||
! Copied from { c-reader stream-read-until }!!!
|
||||
over stream-read1 dup [
|
||||
dup pick memq? [ 2nip ] [ , decoder-read-until ] if
|
||||
] [
|
||||
2nip
|
||||
] if ;
|
||||
: (read-until) ( buf quot -- string/f sep/f )
|
||||
! quot: -- char stop?
|
||||
dup call
|
||||
[ >r drop "" like r> ]
|
||||
[ pick push (read-until) ] if ; inline
|
||||
|
||||
M: decoder stream-read-until
|
||||
! Copied from { c-reader stream-read-until }!!!
|
||||
[ swap decoder-read-until ] "" make
|
||||
swap over empty? over not and [ 2drop f f ] when ;
|
||||
SBUF" " clone -rot >decoder<
|
||||
[ decode-char [ dup rot memq? ] [ drop f t ] if* ] 3curry
|
||||
(read-until) ;
|
||||
|
||||
: fix-read1 ( stream char -- char )
|
||||
over decoder-cr [
|
||||
over cr-
|
||||
dup CHAR: \n = [
|
||||
drop stream-read1
|
||||
] [ nip ] if
|
||||
] [ nip ] if ;
|
||||
drop dup stream-read1
|
||||
] when
|
||||
] when nip ;
|
||||
|
||||
M: decoder stream-read1
|
||||
1 swap stream-read f like [ first ] [ f ] if* ;
|
||||
dup >decoder< decode-char fix-read1 ;
|
||||
|
||||
M: decoder stream-readln ( stream -- str )
|
||||
"\r\n" over stream-read-until handle-readln ;
|
||||
|
||||
M: decoder dispose decoder-stream dispose ;
|
||||
|
||||
! Encoding
|
||||
|
||||
TUPLE: encode-error ;
|
||||
|
||||
: encode-error ( -- * ) \ encode-error construct-empty throw ;
|
||||
|
||||
TUPLE: encoder code ;
|
||||
: <encoder> ( stream encoding -- newstream )
|
||||
dup binary eq? [ drop ] [
|
||||
construct-empty { set-delegate set-encoder-code }
|
||||
encoder construct
|
||||
] if ;
|
||||
TUPLE: encoder stream code ;
|
||||
M: tuple-class <encoder> construct-empty <encoder> ;
|
||||
M: tuple <encoder> encoder construct-boa ;
|
||||
|
||||
: >encoder< ( encoder -- stream encoding )
|
||||
{ encoder-stream encoder-code } get-slots ;
|
||||
|
||||
M: encoder stream-write1
|
||||
>r 1string r> stream-write ;
|
||||
>encoder< encode-char ;
|
||||
|
||||
M: encoder stream-write
|
||||
{ delegate encoder-code } get-slots stream-write-encoded ;
|
||||
>encoder< [ encode-char ] 2curry each ;
|
||||
|
||||
M: encoder dispose delegate dispose ;
|
||||
M: encoder dispose encoder-stream dispose ;
|
||||
|
||||
M: encoder stream-flush encoder-stream stream-flush ;
|
||||
|
||||
INSTANCE: encoder plain-writer
|
||||
|
||||
! Rebinding duplex streams which have not read anything yet
|
||||
|
||||
: reencode ( stream encoding -- newstream )
|
||||
over encoder? [ >r delegate r> ] when <encoder> ;
|
||||
over encoder? [ >r encoder-stream r> ] when <encoder> ;
|
||||
|
||||
: redecode ( stream encoding -- newstream )
|
||||
over decoder? [ >r delegate r> ] when <decoder> ;
|
||||
over decoder? [ >r decoder-stream r> ] when <decoder> ;
|
||||
PRIVATE>
|
||||
|
||||
: <encoder-duplex> ( stream-in stream-out encoding -- duplex )
|
||||
tuck reencode >r redecode r> <duplex-stream> ;
|
||||
|
|
|
@ -6,82 +6,68 @@ IN: io.encodings.utf8
|
|||
|
||||
! Decoding UTF-8
|
||||
|
||||
TUPLE: utf8 ch state ;
|
||||
TUPLE: utf8 ;
|
||||
|
||||
SYMBOL: double
|
||||
SYMBOL: triple
|
||||
SYMBOL: triple2
|
||||
SYMBOL: quad
|
||||
SYMBOL: quad2
|
||||
SYMBOL: quad3
|
||||
<PRIVATE
|
||||
|
||||
: starts-2? ( char -- ? )
|
||||
-6 shift BIN: 10 number= ;
|
||||
dup [ -6 shift BIN: 10 number= ] when ;
|
||||
|
||||
: append-nums ( buf bottom top state-out -- buf num state )
|
||||
>r over starts-2?
|
||||
[ 6 shift swap BIN: 111111 bitand bitor r> ]
|
||||
[ r> 3drop push-replacement ] if ;
|
||||
: append-nums ( stream byte -- stream char )
|
||||
over stream-read1 dup starts-2?
|
||||
[ swap 6 shift swap BIN: 111111 bitand bitor ]
|
||||
[ 2drop replacement-char ] if ;
|
||||
|
||||
: begin-utf8 ( buf byte -- buf ch state )
|
||||
: double ( stream byte -- stream char )
|
||||
BIN: 11111 bitand append-nums ;
|
||||
|
||||
: triple ( stream byte -- stream char )
|
||||
BIN: 1111 bitand append-nums append-nums ;
|
||||
|
||||
: quad ( stream byte -- stream char )
|
||||
BIN: 111 bitand append-nums append-nums append-nums ;
|
||||
|
||||
: begin-utf8 ( stream byte -- stream char )
|
||||
{
|
||||
{ [ dup -7 shift zero? ] [ push-decoded ] }
|
||||
{ [ dup -5 shift BIN: 110 number= ] [ BIN: 11111 bitand double ] }
|
||||
{ [ dup -4 shift BIN: 1110 number= ] [ BIN: 1111 bitand triple ] }
|
||||
{ [ dup -3 shift BIN: 11110 number= ] [ BIN: 111 bitand quad ] }
|
||||
{ [ t ] [ drop push-replacement ] }
|
||||
{ [ dup -7 shift zero? ] [ ] }
|
||||
{ [ dup -5 shift BIN: 110 number= ] [ double ] }
|
||||
{ [ dup -4 shift BIN: 1110 number= ] [ triple ] }
|
||||
{ [ dup -3 shift BIN: 11110 number= ] [ quad ] }
|
||||
{ [ t ] [ drop replacement-char ] }
|
||||
} cond ;
|
||||
|
||||
: end-multibyte ( buf byte ch -- buf ch state )
|
||||
f append-nums [ push-decoded ] unless* ;
|
||||
: decode-utf8 ( stream -- char/f )
|
||||
dup stream-read1 dup [ begin-utf8 ] when nip ;
|
||||
|
||||
: decode-utf8-step ( buf byte ch state -- buf ch state )
|
||||
{
|
||||
{ begin [ drop begin-utf8 ] }
|
||||
{ double [ end-multibyte ] }
|
||||
{ triple [ triple2 append-nums ] }
|
||||
{ triple2 [ end-multibyte ] }
|
||||
{ quad [ quad2 append-nums ] }
|
||||
{ quad2 [ quad3 append-nums ] }
|
||||
{ quad3 [ end-multibyte ] }
|
||||
} case ;
|
||||
|
||||
: unpack-state ( encoding -- ch state )
|
||||
{ utf8-ch utf8-state } get-slots ;
|
||||
|
||||
: pack-state ( ch state encoding -- )
|
||||
{ set-utf8-ch set-utf8-state } set-slots ;
|
||||
|
||||
M: utf8 decode-step ( buf char encoding -- )
|
||||
[ unpack-state decode-utf8-step ] keep pack-state drop ;
|
||||
|
||||
M: utf8 init-decoder nip begin over set-utf8-state ;
|
||||
M: utf8 decode-char
|
||||
drop decode-utf8 ;
|
||||
|
||||
! Encoding UTF-8
|
||||
|
||||
: encoded ( char -- )
|
||||
BIN: 111111 bitand BIN: 10000000 bitor write1 ;
|
||||
: encoded ( stream char -- )
|
||||
BIN: 111111 bitand BIN: 10000000 bitor swap stream-write1 ;
|
||||
|
||||
: char>utf8 ( char -- )
|
||||
: char>utf8 ( stream char -- )
|
||||
{
|
||||
{ [ dup -7 shift zero? ] [ write1 ] }
|
||||
{ [ dup -7 shift zero? ] [ swap stream-write1 ] }
|
||||
{ [ dup -11 shift zero? ] [
|
||||
dup -6 shift BIN: 11000000 bitor write1
|
||||
2dup -6 shift BIN: 11000000 bitor swap stream-write1
|
||||
encoded
|
||||
] }
|
||||
{ [ dup -16 shift zero? ] [
|
||||
dup -12 shift BIN: 11100000 bitor write1
|
||||
dup -6 shift encoded
|
||||
2dup -12 shift BIN: 11100000 bitor swap stream-write1
|
||||
2dup -6 shift encoded
|
||||
encoded
|
||||
] }
|
||||
{ [ t ] [
|
||||
dup -18 shift BIN: 11110000 bitor write1
|
||||
dup -12 shift encoded
|
||||
dup -6 shift encoded
|
||||
2dup -18 shift BIN: 11110000 bitor swap stream-write1
|
||||
2dup -12 shift encoded
|
||||
2dup -6 shift encoded
|
||||
encoded
|
||||
] }
|
||||
} cond ;
|
||||
|
||||
M: utf8 stream-write-encoded
|
||||
! For efficiency, this should be modified to avoid variable reads
|
||||
drop [ [ char>utf8 ] each ] with-stream* ;
|
||||
M: utf8 encode-char
|
||||
drop swap char>utf8 ;
|
||||
|
||||
PRIVATE>
|
||||
|
|
|
@ -1,5 +1,5 @@
|
|||
USING: byte-arrays byte-vectors kernel io.encodings io.streams.string
|
||||
sequences io namespaces ;
|
||||
sequences io namespaces io.encodings.private ;
|
||||
IN: io.streams.byte-array
|
||||
|
||||
: <byte-writer> ( encoding -- stream )
|
||||
|
@ -7,7 +7,7 @@ IN: io.streams.byte-array
|
|||
|
||||
: with-byte-writer ( encoding quot -- byte-array )
|
||||
>r <byte-writer> r> [ stdio get ] compose with-stream*
|
||||
>byte-array ; inline
|
||||
dup encoder? [ encoder-stream ] when >byte-array ; inline
|
||||
|
||||
: <byte-reader> ( byte-array encoding -- stream )
|
||||
>r >byte-vector dup reverse-here r> <decoder> ;
|
||||
|
|
|
@ -1,9 +1,9 @@
|
|||
! Copyright (C) 2003, 2007 Slava Pestov.
|
||||
! See http://factorcode.org/license.txt for BSD license.
|
||||
IN: io.streams.string
|
||||
USING: io kernel math namespaces sequences sbufs strings
|
||||
generic splitting growable continuations io.streams.plain
|
||||
io.encodings ;
|
||||
io.encodings io.encodings.private ;
|
||||
IN: io.streams.string
|
||||
|
||||
M: growable dispose drop ;
|
||||
|
||||
|
@ -49,8 +49,11 @@ M: growable stream-read
|
|||
M: growable stream-read-partial
|
||||
stream-read ;
|
||||
|
||||
TUPLE: null ;
|
||||
M: null decode-char drop stream-read1 ;
|
||||
|
||||
: <string-reader> ( str -- stream )
|
||||
>sbuf dup reverse-here f <decoder> ;
|
||||
>sbuf dup reverse-here null <decoder> ;
|
||||
|
||||
: with-string-reader ( str quot -- )
|
||||
>r <string-reader> r> with-stream ; inline
|
||||
|
|
|
@ -1,18 +1,22 @@
|
|||
! Copyright (C) 2008 Daniel Ehrenberg.
|
||||
! See http://factorcode.org/license.txt for BSD license.
|
||||
USING: io io.encodings strings kernel math sequences byte-arrays io.encodings ;
|
||||
USING: io io.encodings kernel math io.encodings.private ;
|
||||
IN: io.encodings.ascii
|
||||
|
||||
: encode-check< ( string stream max -- )
|
||||
[ pick <= [ encode-error ] [ stream-write1 ] if ] 2curry each ;
|
||||
<PRIVATE
|
||||
: encode-if< ( char stream encoding max -- )
|
||||
nip 1- pick < [ encode-error ] [ stream-write1 ] if ;
|
||||
|
||||
: push-if< ( sbuf character max -- )
|
||||
over <= [ drop HEX: fffd ] when swap push ;
|
||||
: decode-if< ( stream encoding max -- character )
|
||||
nip swap stream-read1
|
||||
[ tuck > [ drop replacement-char ] unless ]
|
||||
[ drop f ] if* ;
|
||||
PRIVATE>
|
||||
|
||||
TUPLE: ascii ;
|
||||
|
||||
M: ascii stream-write-encoded ( string stream encoding -- )
|
||||
drop 128 encode-check< ;
|
||||
M: ascii encode-char
|
||||
128 encode-if< ;
|
||||
|
||||
M: ascii decode-step
|
||||
drop 128 push-if< ;
|
||||
M: ascii decode-char
|
||||
128 decode-if< ;
|
||||
|
|
|
@ -1,12 +1,12 @@
|
|||
! Copyright (C) 2008 Daniel Ehrenberg.
|
||||
! See http://factorcode.org/license.txt for BSD license.
|
||||
USING: io io.encodings strings kernel io.encodings.ascii sequences math ;
|
||||
USING: io io.encodings kernel io.encodings.ascii.private ;
|
||||
IN: io.encodings.latin1
|
||||
|
||||
TUPLE: latin1 ;
|
||||
|
||||
M: latin1 stream-write-encoded
|
||||
drop 256 encode-check< ;
|
||||
M: latin1 encode-char
|
||||
256 encode-if< ;
|
||||
|
||||
M: latin1 decode-step
|
||||
drop swap push ;
|
||||
M: latin1 decode-char
|
||||
drop stream-read1 ;
|
||||
|
|
|
@ -1,133 +1,101 @@
|
|||
! Copyright (C) 2006, 2008 Daniel Ehrenberg.
|
||||
! See http://factorcode.org/license.txt for BSD license.
|
||||
USING: math kernel sequences sbufs vectors namespaces io.binary
|
||||
io.encodings combinators splitting io byte-arrays ;
|
||||
io.encodings combinators splitting io byte-arrays inspector ;
|
||||
IN: io.encodings.utf16
|
||||
|
||||
TUPLE: utf16be ;
|
||||
|
||||
TUPLE: utf16le ;
|
||||
|
||||
TUPLE: utf16 ;
|
||||
|
||||
<PRIVATE
|
||||
|
||||
! UTF-16BE decoding
|
||||
|
||||
TUPLE: utf16be ch state ;
|
||||
|
||||
SYMBOL: double
|
||||
SYMBOL: quad1
|
||||
SYMBOL: quad2
|
||||
SYMBOL: quad3
|
||||
SYMBOL: ignore
|
||||
|
||||
: do-ignore ( -- ch state ) 0 ignore ;
|
||||
|
||||
: append-nums ( byte ch -- ch )
|
||||
8 shift bitor ;
|
||||
over [ 8 shift bitor ] [ 2drop replacement-char ] if ;
|
||||
|
||||
: end-multibyte ( buf byte ch -- buf ch state )
|
||||
append-nums push-decoded ;
|
||||
: double-be ( stream byte -- stream char )
|
||||
over stream-read1 swap append-nums ;
|
||||
|
||||
: begin-utf16be ( buf byte -- buf ch state )
|
||||
: quad-be ( stream byte -- stream char )
|
||||
double-be over stream-read1 [
|
||||
dup -2 shift BIN: 110111 number= [
|
||||
>r 2 shift r> BIN: 11 bitand bitor
|
||||
over stream-read1 swap append-nums HEX: 10000 +
|
||||
] [ 2drop dup stream-read1 drop replacement-char ] if
|
||||
] when* ;
|
||||
|
||||
: ignore ( stream -- stream char )
|
||||
dup stream-read1 drop replacement-char ;
|
||||
|
||||
: begin-utf16be ( stream byte -- stream char )
|
||||
dup -3 shift BIN: 11011 number= [
|
||||
dup BIN: 00000100 bitand zero?
|
||||
[ BIN: 11 bitand quad1 ]
|
||||
[ drop do-ignore ] if
|
||||
] [ double ] if ;
|
||||
|
||||
: handle-quad2be ( byte ch -- ch state )
|
||||
swap dup -2 shift BIN: 110111 number= [
|
||||
>r 2 shift r> BIN: 11 bitand bitor quad3
|
||||
] [ 2drop do-ignore ] if ;
|
||||
|
||||
: decode-utf16be-step ( buf byte ch state -- buf ch state )
|
||||
{
|
||||
{ begin [ drop begin-utf16be ] }
|
||||
{ double [ end-multibyte ] }
|
||||
{ quad1 [ append-nums quad2 ] }
|
||||
{ quad2 [ handle-quad2be ] }
|
||||
{ quad3 [ append-nums HEX: 10000 + push-decoded ] }
|
||||
{ ignore [ 2drop push-replacement ] }
|
||||
} case ;
|
||||
|
||||
: unpack-state-be ( encoding -- ch state )
|
||||
{ utf16be-ch utf16be-state } get-slots ;
|
||||
|
||||
: pack-state-be ( ch state encoding -- )
|
||||
{ set-utf16be-ch set-utf16be-state } set-slots ;
|
||||
|
||||
M: utf16be decode-step
|
||||
[ unpack-state-be decode-utf16be-step ] keep pack-state-be drop ;
|
||||
|
||||
M: utf16be init-decoder nip begin over set-utf16be-state ;
|
||||
[ BIN: 11 bitand quad-be ]
|
||||
[ drop ignore ] if
|
||||
] [ double-be ] if ;
|
||||
|
||||
M: utf16be decode-char
|
||||
drop dup stream-read1 dup [ begin-utf16be ] when nip ;
|
||||
|
||||
! UTF-16LE decoding
|
||||
|
||||
TUPLE: utf16le ch state ;
|
||||
: quad-le ( stream ch -- stream char )
|
||||
over stream-read1 swap 10 shift bitor
|
||||
over stream-read1 dup -2 shift BIN: 110111 = [
|
||||
BIN: 11 bitand append-nums HEX: 10000 +
|
||||
] [ 2drop replacement-char ] if ;
|
||||
|
||||
: handle-double ( buf byte ch -- buf ch state )
|
||||
swap dup -3 shift BIN: 11011 = [
|
||||
: double-le ( stream byte1 byte2 -- stream char )
|
||||
dup -3 shift BIN: 11011 = [
|
||||
dup BIN: 100 bitand 0 number=
|
||||
[ BIN: 11 bitand 8 shift bitor quad2 ]
|
||||
[ 2drop push-replacement ] if
|
||||
] [ end-multibyte ] if ;
|
||||
[ BIN: 11 bitand 8 shift bitor quad-le ]
|
||||
[ 2drop replacement-char ] if
|
||||
] [ append-nums ] if ;
|
||||
|
||||
: handle-quad3le ( buf byte ch -- buf ch state )
|
||||
swap dup -2 shift BIN: 110111 = [
|
||||
BIN: 11 bitand append-nums HEX: 10000 + push-decoded
|
||||
] [ 2drop push-replacement ] if ;
|
||||
: begin-utf16le ( stream byte -- stream char )
|
||||
over stream-read1 [ double-le ] [ drop replacement-char ] if* ;
|
||||
|
||||
: decode-utf16le-step ( buf byte ch state -- buf ch state )
|
||||
{
|
||||
{ begin [ drop double ] }
|
||||
{ double [ handle-double ] }
|
||||
{ quad1 [ append-nums quad2 ] }
|
||||
{ quad2 [ 10 shift bitor quad3 ] }
|
||||
{ quad3 [ handle-quad3le ] }
|
||||
} case ;
|
||||
|
||||
: unpack-state-le ( encoding -- ch state )
|
||||
{ utf16le-ch utf16le-state } get-slots ;
|
||||
|
||||
: pack-state-le ( ch state encoding -- )
|
||||
{ set-utf16le-ch set-utf16le-state } set-slots ;
|
||||
|
||||
M: utf16le decode-step
|
||||
[ unpack-state-le decode-utf16le-step ] keep pack-state-le drop ;
|
||||
|
||||
M: utf16le init-decoder nip begin over set-utf16le-state ;
|
||||
M: utf16le decode-char
|
||||
drop dup stream-read1 dup [ begin-utf16le ] when nip ;
|
||||
|
||||
! UTF-16LE/BE encoding
|
||||
|
||||
: encode-first
|
||||
: encode-first ( char -- byte1 byte2 )
|
||||
-10 shift
|
||||
dup -8 shift BIN: 11011000 bitor
|
||||
swap HEX: FF bitand ;
|
||||
|
||||
: encode-second
|
||||
: encode-second ( char -- byte3 byte4 )
|
||||
BIN: 1111111111 bitand
|
||||
dup -8 shift BIN: 11011100 bitor
|
||||
swap BIN: 11111111 bitand ;
|
||||
|
||||
: char>utf16be ( char -- )
|
||||
: stream-write2 ( stream char1 char2 -- )
|
||||
rot [ stream-write1 ] curry 2apply ;
|
||||
|
||||
: char>utf16be ( stream char -- )
|
||||
dup HEX: FFFF > [
|
||||
HEX: 10000 -
|
||||
dup encode-first swap write1 write1
|
||||
encode-second swap write1 write1
|
||||
] [ h>b/b write1 write1 ] if ;
|
||||
2dup encode-first stream-write2
|
||||
encode-second stream-write2
|
||||
] [ h>b/b swap stream-write2 ] if ;
|
||||
|
||||
: stream-write-utf16be ( string stream -- )
|
||||
[ [ char>utf16be ] each ] with-stream* ;
|
||||
M: utf16be encode-char ( char stream encoding -- )
|
||||
drop swap char>utf16be ;
|
||||
|
||||
M: utf16be stream-write-encoded ( string stream encoding -- )
|
||||
drop stream-write-utf16be ;
|
||||
|
||||
: char>utf16le ( char -- )
|
||||
: char>utf16le ( char stream -- )
|
||||
dup HEX: FFFF > [
|
||||
HEX: 10000 -
|
||||
dup encode-first write1 write1
|
||||
encode-second write1 write1
|
||||
] [ h>b/b swap write1 write1 ] if ;
|
||||
2dup encode-first swap stream-write2
|
||||
encode-second swap stream-write2
|
||||
] [ h>b/b stream-write2 ] if ;
|
||||
|
||||
: stream-write-utf16le ( string stream -- )
|
||||
[ [ char>utf16le ] each ] with-stream* ;
|
||||
|
||||
M: utf16le stream-write-encoded ( string stream encoding -- )
|
||||
drop stream-write-utf16le ;
|
||||
M: utf16le encode-char ( char stream encoding -- )
|
||||
drop swap char>utf16le ;
|
||||
|
||||
! UTF-16
|
||||
|
||||
|
@ -139,17 +107,18 @@ M: utf16le stream-write-encoded ( string stream encoding -- )
|
|||
|
||||
: start-utf16be? ( seq1 -- seq2 ? ) bom-be ?head ;
|
||||
|
||||
TUPLE: utf16 started? ;
|
||||
|
||||
M: utf16 stream-write-encoded
|
||||
dup utf16-started? [ drop ]
|
||||
[ t swap set-utf16-started? bom-le over stream-write ] if
|
||||
stream-write-utf16le ;
|
||||
TUPLE: missing-bom ;
|
||||
M: missing-bom summary drop "The BOM for a UTF-16 stream was missing" ;
|
||||
|
||||
: bom>le/be ( bom -- le/be )
|
||||
dup bom-le sequence= [ drop utf16le ] [
|
||||
bom-be sequence= [ utf16be ] [ decode-error ] if
|
||||
bom-be sequence= [ utf16be ] [ missing-bom ] if
|
||||
] if ;
|
||||
|
||||
M: utf16 init-decoder ( stream encoding -- newencoding )
|
||||
2 rot stream-read bom>le/be construct-empty init-decoder ;
|
||||
M: utf16 <decoder> ( stream utf16 -- decoder )
|
||||
drop 2 over stream-read bom>le/be <decoder> ;
|
||||
|
||||
M: utf16 <encoder> ( stream utf16 -- encoder )
|
||||
drop bom-le over stream-write utf16le <encoder> ;
|
||||
|
||||
PRIVATE>
|
||||
|
|
|
@ -1,6 +1,6 @@
|
|||
IN: io.unix.launcher.tests
|
||||
USING: io.files tools.test io.launcher arrays io namespaces
|
||||
continuations math io.encodings.ascii io.encodings.latin1
|
||||
continuations math io.encodings.binary io.encodings.ascii
|
||||
accessors kernel sequences ;
|
||||
|
||||
[ ] [
|
||||
|
@ -64,7 +64,7 @@ accessors kernel sequences ;
|
|||
|
||||
[ ] [
|
||||
2 [
|
||||
"launcher-test-1" temp-file ascii <file-appender> [
|
||||
"launcher-test-1" temp-file binary <file-appender> [
|
||||
<process>
|
||||
swap >>stdout
|
||||
"echo Hello" >>command
|
||||
|
@ -84,7 +84,7 @@ accessors kernel sequences ;
|
|||
<process>
|
||||
"env" >>command
|
||||
{ { "A" "B" } } >>environment
|
||||
latin1 <process-stream> lines
|
||||
ascii <process-stream> lines
|
||||
"A=B" swap member?
|
||||
] unit-test
|
||||
|
||||
|
@ -93,5 +93,5 @@ accessors kernel sequences ;
|
|||
"env" >>command
|
||||
{ { "A" "B" } } >>environment
|
||||
+replace-environment+ >>environment-mode
|
||||
latin1 <process-stream> lines
|
||||
ascii <process-stream> lines
|
||||
] unit-test
|
||||
|
|
Loading…
Reference in New Issue