Merge branch 'master' of git://factorcode.org/git/littledan

db4
Slava Pestov 2008-03-19 13:23:55 -05:00
commit e826f1fe4a
10 changed files with 216 additions and 268 deletions

View File

@ -1,3 +1,8 @@
! Copyright (C) 2008 Daniel Ehrenberg. ! Copyright (C) 2008 Daniel Ehrenberg.
! See http://factorcode.org/license.txt for BSD license. ! See http://factorcode.org/license.txt for BSD license.
IN: io.encodings.binary SYMBOL: binary USING: io.encodings kernel ;
IN: io.encodings.binary
TUPLE: binary ;
M: binary <encoder> drop ;
M: binary <decoder> drop ;

View File

@ -44,25 +44,21 @@ $nl { $vocab-link "io.encodings.utf16" } ;
ARTICLE: "encodings-protocol" "Encoding protocol" ARTICLE: "encodings-protocol" "Encoding protocol"
"An encoding descriptor must implement the following methods. The methods are implemented on tuple classes by instantiating the class and calling the method again." "An encoding descriptor must implement the following methods. The methods are implemented on tuple classes by instantiating the class and calling the method again."
{ $subsection decode-step } { $subsection decode-char }
{ $subsection init-decoder } { $subsection encode-char }
{ $subsection stream-write-encoded } ; "The following methods are optional:"
{ $subsection <encoder> }
{ $subsection <decoder> } ;
HELP: decode-step ( buf char encoding -- ) HELP: decode-char ( stream encoding -- char/f )
{ $values { "buf" "A string buffer which characters can be pushed to" } { $values { "stream" "an underlying input stream" }
{ "char" "An octet which is read from a stream" }
{ "encoding" "An encoding descriptor tuple" } } { "encoding" "An encoding descriptor tuple" } }
{ $description "A single step in the decoding process must be defined for the decoding descriptor. When each octet is read, this word is called, and depending on the decoder's internal state, something may be pushed to the buffer or the state may change. This should not be used directly." } ; { $description "Reads a single code point from the underlying stream, interpreting it by the encoding. This should not be used directly." } ;
HELP: stream-write-encoded ( string stream encoding -- ) HELP: encode-char ( char stream encoding -- )
{ $values { "string" "a string" } { $values { "char" "a character" }
{ "stream" "an output stream" } { "stream" "an underlying output stream" }
{ "encoding" "an encoding descriptor" } } { "encoding" "an encoding descriptor" } }
{ $description "Encodes the string with the given encoding descriptor, outputing the result to the given stream. This should not be used directly." } ; { $description "Writes the code point in the encoding to the underlying stream given. This should not be used directly." } ;
HELP: init-decoder ( stream encoding -- encoding ) { encode-char decode-char } related-words
{ $values { "stream" "an input stream" }
{ "encoding" "an encoding descriptor" } }
{ $description "Initializes the decoder tuple's state. The stream is exposed so that it can be read, eg for a BOM. This should not be used directly." } ;
{ init-decoder decode-step stream-write-encoded } related-words

View File

@ -2,62 +2,36 @@
! See http://factorcode.org/license.txt for BSD license. ! See http://factorcode.org/license.txt for BSD license.
USING: math kernel sequences sbufs vectors namespaces USING: math kernel sequences sbufs vectors namespaces
growable strings io classes continuations combinators growable strings io classes continuations combinators
io.styles io.streams.plain io.encodings.binary splitting io.styles io.streams.plain splitting
io.streams.duplex byte-arrays ; io.streams.duplex byte-arrays sequences.private ;
IN: io.encodings IN: io.encodings
! The encoding descriptor protocol ! The encoding descriptor protocol
GENERIC: decode-step ( buf char encoding -- ) GENERIC: decode-char ( stream encoding -- char/f )
M: object decode-step drop swap push ;
GENERIC: init-decoder ( stream encoding -- encoding ) GENERIC: encode-char ( char stream encoding -- )
M: tuple-class init-decoder construct-empty init-decoder ;
M: object init-decoder nip ;
GENERIC: stream-write-encoded ( string stream encoding -- byte-array ) GENERIC: <decoder> ( stream decoding -- newstream )
M: object stream-write-encoded drop stream-write ;
GENERIC: <encoder> ( stream encoding -- newstream )
: replacement-char HEX: fffd ;
! Decoding ! Decoding
<PRIVATE
TUPLE: decode-error ; TUPLE: decode-error ;
: decode-error ( -- * ) \ decode-error construct-empty throw ; : decode-error ( -- * ) \ decode-error construct-empty throw ;
SYMBOL: begin TUPLE: decoder stream code cr ;
M: tuple-class <decoder> construct-empty <decoder> ;
M: tuple <decoder> f decoder construct-boa ;
: push-decoded ( buf ch -- buf ch state ) : >decoder< ( decoder -- stream encoding )
over push 0 begin ; { decoder-stream decoder-code } get-slots ;
: push-replacement ( buf -- buf ch state )
! This is the replacement character
HEX: fffd push-decoded ;
: space ( resizable -- room-left )
dup underlying swap [ length ] 2apply - ;
: full? ( resizable -- ? ) space zero? ;
: end-read-loop ( buf ch state stream quot -- string/f )
2drop 2drop >string f like ;
: decode-read-loop ( buf stream encoding -- string/f )
pick full? [ 2drop >string ] [
over stream-read1 [
-rot tuck >r >r >r dupd r> decode-step r> r>
decode-read-loop
] [ 2drop >string f like ] if*
] if ;
: decode-read ( length stream encoding -- string )
rot <sbuf> -rot decode-read-loop ;
TUPLE: decoder code cr ;
: <decoder> ( stream encoding -- newstream )
dup binary eq? [ drop ] [
dupd init-decoder { set-delegate set-decoder-code }
decoder construct
] if ;
: cr+ t swap set-decoder-cr ; inline : cr+ t swap set-decoder-cr ; inline
@ -82,72 +56,83 @@ TUPLE: decoder code cr ;
over decoder-cr [ over decoder-cr [
over cr- over cr-
"\n" ?head [ "\n" ?head [
swap stream-read1 [ add ] when* over stream-read1 [ add ] when*
] [ nip ] if ] when
] [ nip ] if ; ] when nip ;
: read-loop ( n stream -- string )
SBUF" " clone [
[
>r nip stream-read1 dup
[ r> push f ] [ r> 2drop t ] if
] 2curry find-integer drop
] keep "" like f like ;
M: decoder stream-read M: decoder stream-read
tuck { delegate decoder-code } get-slots decode-read fix-read ; tuck read-loop fix-read ;
M: decoder stream-read-partial stream-read ; M: decoder stream-read-partial stream-read ;
: decoder-read-until ( stream delim -- ch ) : (read-until) ( buf quot -- string/f sep/f )
! Copied from { c-reader stream-read-until }!!! ! quot: -- char stop?
over stream-read1 dup [ dup call
dup pick memq? [ 2nip ] [ , decoder-read-until ] if [ >r drop "" like r> ]
] [ [ pick push (read-until) ] if ; inline
2nip
] if ;
M: decoder stream-read-until M: decoder stream-read-until
! Copied from { c-reader stream-read-until }!!! SBUF" " clone -rot >decoder<
[ swap decoder-read-until ] "" make [ decode-char [ dup rot memq? ] [ drop f t ] if* ] 3curry
swap over empty? over not and [ 2drop f f ] when ; (read-until) ;
: fix-read1 ( stream char -- char ) : fix-read1 ( stream char -- char )
over decoder-cr [ over decoder-cr [
over cr- over cr-
dup CHAR: \n = [ dup CHAR: \n = [
drop stream-read1 drop dup stream-read1
] [ nip ] if ] when
] [ nip ] if ; ] when nip ;
M: decoder stream-read1 M: decoder stream-read1
1 swap stream-read f like [ first ] [ f ] if* ; dup >decoder< decode-char fix-read1 ;
M: decoder stream-readln ( stream -- str ) M: decoder stream-readln ( stream -- str )
"\r\n" over stream-read-until handle-readln ; "\r\n" over stream-read-until handle-readln ;
M: decoder dispose decoder-stream dispose ;
! Encoding ! Encoding
TUPLE: encode-error ; TUPLE: encode-error ;
: encode-error ( -- * ) \ encode-error construct-empty throw ; : encode-error ( -- * ) \ encode-error construct-empty throw ;
TUPLE: encoder code ; TUPLE: encoder stream code ;
: <encoder> ( stream encoding -- newstream ) M: tuple-class <encoder> construct-empty <encoder> ;
dup binary eq? [ drop ] [ M: tuple <encoder> encoder construct-boa ;
construct-empty { set-delegate set-encoder-code }
encoder construct : >encoder< ( encoder -- stream encoding )
] if ; { encoder-stream encoder-code } get-slots ;
M: encoder stream-write1 M: encoder stream-write1
>r 1string r> stream-write ; >encoder< encode-char ;
M: encoder stream-write M: encoder stream-write
{ delegate encoder-code } get-slots stream-write-encoded ; >encoder< [ encode-char ] 2curry each ;
M: encoder dispose delegate dispose ; M: encoder dispose encoder-stream dispose ;
M: encoder stream-flush encoder-stream stream-flush ;
INSTANCE: encoder plain-writer INSTANCE: encoder plain-writer
! Rebinding duplex streams which have not read anything yet ! Rebinding duplex streams which have not read anything yet
: reencode ( stream encoding -- newstream ) : reencode ( stream encoding -- newstream )
over encoder? [ >r delegate r> ] when <encoder> ; over encoder? [ >r encoder-stream r> ] when <encoder> ;
: redecode ( stream encoding -- newstream ) : redecode ( stream encoding -- newstream )
over decoder? [ >r delegate r> ] when <decoder> ; over decoder? [ >r decoder-stream r> ] when <decoder> ;
PRIVATE>
: <encoder-duplex> ( stream-in stream-out encoding -- duplex ) : <encoder-duplex> ( stream-in stream-out encoding -- duplex )
tuck reencode >r redecode r> <duplex-stream> ; tuck reencode >r redecode r> <duplex-stream> ;

View File

@ -6,82 +6,68 @@ IN: io.encodings.utf8
! Decoding UTF-8 ! Decoding UTF-8
TUPLE: utf8 ch state ; TUPLE: utf8 ;
SYMBOL: double <PRIVATE
SYMBOL: triple
SYMBOL: triple2
SYMBOL: quad
SYMBOL: quad2
SYMBOL: quad3
: starts-2? ( char -- ? ) : starts-2? ( char -- ? )
-6 shift BIN: 10 number= ; dup [ -6 shift BIN: 10 number= ] when ;
: append-nums ( buf bottom top state-out -- buf num state ) : append-nums ( stream byte -- stream char )
>r over starts-2? over stream-read1 dup starts-2?
[ 6 shift swap BIN: 111111 bitand bitor r> ] [ swap 6 shift swap BIN: 111111 bitand bitor ]
[ r> 3drop push-replacement ] if ; [ 2drop replacement-char ] if ;
: begin-utf8 ( buf byte -- buf ch state ) : double ( stream byte -- stream char )
BIN: 11111 bitand append-nums ;
: triple ( stream byte -- stream char )
BIN: 1111 bitand append-nums append-nums ;
: quad ( stream byte -- stream char )
BIN: 111 bitand append-nums append-nums append-nums ;
: begin-utf8 ( stream byte -- stream char )
{ {
{ [ dup -7 shift zero? ] [ push-decoded ] } { [ dup -7 shift zero? ] [ ] }
{ [ dup -5 shift BIN: 110 number= ] [ BIN: 11111 bitand double ] } { [ dup -5 shift BIN: 110 number= ] [ double ] }
{ [ dup -4 shift BIN: 1110 number= ] [ BIN: 1111 bitand triple ] } { [ dup -4 shift BIN: 1110 number= ] [ triple ] }
{ [ dup -3 shift BIN: 11110 number= ] [ BIN: 111 bitand quad ] } { [ dup -3 shift BIN: 11110 number= ] [ quad ] }
{ [ t ] [ drop push-replacement ] } { [ t ] [ drop replacement-char ] }
} cond ; } cond ;
: end-multibyte ( buf byte ch -- buf ch state ) : decode-utf8 ( stream -- char/f )
f append-nums [ push-decoded ] unless* ; dup stream-read1 dup [ begin-utf8 ] when nip ;
: decode-utf8-step ( buf byte ch state -- buf ch state ) M: utf8 decode-char
{ drop decode-utf8 ;
{ begin [ drop begin-utf8 ] }
{ double [ end-multibyte ] }
{ triple [ triple2 append-nums ] }
{ triple2 [ end-multibyte ] }
{ quad [ quad2 append-nums ] }
{ quad2 [ quad3 append-nums ] }
{ quad3 [ end-multibyte ] }
} case ;
: unpack-state ( encoding -- ch state )
{ utf8-ch utf8-state } get-slots ;
: pack-state ( ch state encoding -- )
{ set-utf8-ch set-utf8-state } set-slots ;
M: utf8 decode-step ( buf char encoding -- )
[ unpack-state decode-utf8-step ] keep pack-state drop ;
M: utf8 init-decoder nip begin over set-utf8-state ;
! Encoding UTF-8 ! Encoding UTF-8
: encoded ( char -- ) : encoded ( stream char -- )
BIN: 111111 bitand BIN: 10000000 bitor write1 ; BIN: 111111 bitand BIN: 10000000 bitor swap stream-write1 ;
: char>utf8 ( char -- ) : char>utf8 ( stream char -- )
{ {
{ [ dup -7 shift zero? ] [ write1 ] } { [ dup -7 shift zero? ] [ swap stream-write1 ] }
{ [ dup -11 shift zero? ] [ { [ dup -11 shift zero? ] [
dup -6 shift BIN: 11000000 bitor write1 2dup -6 shift BIN: 11000000 bitor swap stream-write1
encoded encoded
] } ] }
{ [ dup -16 shift zero? ] [ { [ dup -16 shift zero? ] [
dup -12 shift BIN: 11100000 bitor write1 2dup -12 shift BIN: 11100000 bitor swap stream-write1
dup -6 shift encoded 2dup -6 shift encoded
encoded encoded
] } ] }
{ [ t ] [ { [ t ] [
dup -18 shift BIN: 11110000 bitor write1 2dup -18 shift BIN: 11110000 bitor swap stream-write1
dup -12 shift encoded 2dup -12 shift encoded
dup -6 shift encoded 2dup -6 shift encoded
encoded encoded
] } ] }
} cond ; } cond ;
M: utf8 stream-write-encoded M: utf8 encode-char
! For efficiency, this should be modified to avoid variable reads drop swap char>utf8 ;
drop [ [ char>utf8 ] each ] with-stream* ;
PRIVATE>

View File

@ -1,5 +1,5 @@
USING: byte-arrays byte-vectors kernel io.encodings io.streams.string USING: byte-arrays byte-vectors kernel io.encodings io.streams.string
sequences io namespaces ; sequences io namespaces io.encodings.private ;
IN: io.streams.byte-array IN: io.streams.byte-array
: <byte-writer> ( encoding -- stream ) : <byte-writer> ( encoding -- stream )
@ -7,7 +7,7 @@ IN: io.streams.byte-array
: with-byte-writer ( encoding quot -- byte-array ) : with-byte-writer ( encoding quot -- byte-array )
>r <byte-writer> r> [ stdio get ] compose with-stream* >r <byte-writer> r> [ stdio get ] compose with-stream*
>byte-array ; inline dup encoder? [ encoder-stream ] when >byte-array ; inline
: <byte-reader> ( byte-array encoding -- stream ) : <byte-reader> ( byte-array encoding -- stream )
>r >byte-vector dup reverse-here r> <decoder> ; >r >byte-vector dup reverse-here r> <decoder> ;

View File

@ -1,9 +1,9 @@
! Copyright (C) 2003, 2007 Slava Pestov. ! Copyright (C) 2003, 2007 Slava Pestov.
! See http://factorcode.org/license.txt for BSD license. ! See http://factorcode.org/license.txt for BSD license.
IN: io.streams.string
USING: io kernel math namespaces sequences sbufs strings USING: io kernel math namespaces sequences sbufs strings
generic splitting growable continuations io.streams.plain generic splitting growable continuations io.streams.plain
io.encodings ; io.encodings io.encodings.private ;
IN: io.streams.string
M: growable dispose drop ; M: growable dispose drop ;
@ -49,8 +49,11 @@ M: growable stream-read
M: growable stream-read-partial M: growable stream-read-partial
stream-read ; stream-read ;
TUPLE: null ;
M: null decode-char drop stream-read1 ;
: <string-reader> ( str -- stream ) : <string-reader> ( str -- stream )
>sbuf dup reverse-here f <decoder> ; >sbuf dup reverse-here null <decoder> ;
: with-string-reader ( str quot -- ) : with-string-reader ( str quot -- )
>r <string-reader> r> with-stream ; inline >r <string-reader> r> with-stream ; inline

View File

@ -1,18 +1,22 @@
! Copyright (C) 2008 Daniel Ehrenberg. ! Copyright (C) 2008 Daniel Ehrenberg.
! See http://factorcode.org/license.txt for BSD license. ! See http://factorcode.org/license.txt for BSD license.
USING: io io.encodings strings kernel math sequences byte-arrays io.encodings ; USING: io io.encodings kernel math io.encodings.private ;
IN: io.encodings.ascii IN: io.encodings.ascii
: encode-check< ( string stream max -- ) <PRIVATE
[ pick <= [ encode-error ] [ stream-write1 ] if ] 2curry each ; : encode-if< ( char stream encoding max -- )
nip 1- pick < [ encode-error ] [ stream-write1 ] if ;
: push-if< ( sbuf character max -- ) : decode-if< ( stream encoding max -- character )
over <= [ drop HEX: fffd ] when swap push ; nip swap stream-read1
[ tuck > [ drop replacement-char ] unless ]
[ drop f ] if* ;
PRIVATE>
TUPLE: ascii ; TUPLE: ascii ;
M: ascii stream-write-encoded ( string stream encoding -- ) M: ascii encode-char
drop 128 encode-check< ; 128 encode-if< ;
M: ascii decode-step M: ascii decode-char
drop 128 push-if< ; 128 decode-if< ;

View File

@ -1,12 +1,12 @@
! Copyright (C) 2008 Daniel Ehrenberg. ! Copyright (C) 2008 Daniel Ehrenberg.
! See http://factorcode.org/license.txt for BSD license. ! See http://factorcode.org/license.txt for BSD license.
USING: io io.encodings strings kernel io.encodings.ascii sequences math ; USING: io io.encodings kernel io.encodings.ascii.private ;
IN: io.encodings.latin1 IN: io.encodings.latin1
TUPLE: latin1 ; TUPLE: latin1 ;
M: latin1 stream-write-encoded M: latin1 encode-char
drop 256 encode-check< ; 256 encode-if< ;
M: latin1 decode-step M: latin1 decode-char
drop swap push ; drop stream-read1 ;

View File

@ -1,133 +1,101 @@
! Copyright (C) 2006, 2008 Daniel Ehrenberg. ! Copyright (C) 2006, 2008 Daniel Ehrenberg.
! See http://factorcode.org/license.txt for BSD license. ! See http://factorcode.org/license.txt for BSD license.
USING: math kernel sequences sbufs vectors namespaces io.binary USING: math kernel sequences sbufs vectors namespaces io.binary
io.encodings combinators splitting io byte-arrays ; io.encodings combinators splitting io byte-arrays inspector ;
IN: io.encodings.utf16 IN: io.encodings.utf16
TUPLE: utf16be ;
TUPLE: utf16le ;
TUPLE: utf16 ;
<PRIVATE
! UTF-16BE decoding ! UTF-16BE decoding
TUPLE: utf16be ch state ;
SYMBOL: double
SYMBOL: quad1
SYMBOL: quad2
SYMBOL: quad3
SYMBOL: ignore
: do-ignore ( -- ch state ) 0 ignore ;
: append-nums ( byte ch -- ch ) : append-nums ( byte ch -- ch )
8 shift bitor ; over [ 8 shift bitor ] [ 2drop replacement-char ] if ;
: end-multibyte ( buf byte ch -- buf ch state ) : double-be ( stream byte -- stream char )
append-nums push-decoded ; over stream-read1 swap append-nums ;
: begin-utf16be ( buf byte -- buf ch state ) : quad-be ( stream byte -- stream char )
double-be over stream-read1 [
dup -2 shift BIN: 110111 number= [
>r 2 shift r> BIN: 11 bitand bitor
over stream-read1 swap append-nums HEX: 10000 +
] [ 2drop dup stream-read1 drop replacement-char ] if
] when* ;
: ignore ( stream -- stream char )
dup stream-read1 drop replacement-char ;
: begin-utf16be ( stream byte -- stream char )
dup -3 shift BIN: 11011 number= [ dup -3 shift BIN: 11011 number= [
dup BIN: 00000100 bitand zero? dup BIN: 00000100 bitand zero?
[ BIN: 11 bitand quad1 ] [ BIN: 11 bitand quad-be ]
[ drop do-ignore ] if [ drop ignore ] if
] [ double ] if ; ] [ double-be ] if ;
: handle-quad2be ( byte ch -- ch state ) M: utf16be decode-char
swap dup -2 shift BIN: 110111 number= [ drop dup stream-read1 dup [ begin-utf16be ] when nip ;
>r 2 shift r> BIN: 11 bitand bitor quad3
] [ 2drop do-ignore ] if ;
: decode-utf16be-step ( buf byte ch state -- buf ch state )
{
{ begin [ drop begin-utf16be ] }
{ double [ end-multibyte ] }
{ quad1 [ append-nums quad2 ] }
{ quad2 [ handle-quad2be ] }
{ quad3 [ append-nums HEX: 10000 + push-decoded ] }
{ ignore [ 2drop push-replacement ] }
} case ;
: unpack-state-be ( encoding -- ch state )
{ utf16be-ch utf16be-state } get-slots ;
: pack-state-be ( ch state encoding -- )
{ set-utf16be-ch set-utf16be-state } set-slots ;
M: utf16be decode-step
[ unpack-state-be decode-utf16be-step ] keep pack-state-be drop ;
M: utf16be init-decoder nip begin over set-utf16be-state ;
! UTF-16LE decoding ! UTF-16LE decoding
TUPLE: utf16le ch state ; : quad-le ( stream ch -- stream char )
over stream-read1 swap 10 shift bitor
over stream-read1 dup -2 shift BIN: 110111 = [
BIN: 11 bitand append-nums HEX: 10000 +
] [ 2drop replacement-char ] if ;
: handle-double ( buf byte ch -- buf ch state ) : double-le ( stream byte1 byte2 -- stream char )
swap dup -3 shift BIN: 11011 = [ dup -3 shift BIN: 11011 = [
dup BIN: 100 bitand 0 number= dup BIN: 100 bitand 0 number=
[ BIN: 11 bitand 8 shift bitor quad2 ] [ BIN: 11 bitand 8 shift bitor quad-le ]
[ 2drop push-replacement ] if [ 2drop replacement-char ] if
] [ end-multibyte ] if ; ] [ append-nums ] if ;
: handle-quad3le ( buf byte ch -- buf ch state ) : begin-utf16le ( stream byte -- stream char )
swap dup -2 shift BIN: 110111 = [ over stream-read1 [ double-le ] [ drop replacement-char ] if* ;
BIN: 11 bitand append-nums HEX: 10000 + push-decoded
] [ 2drop push-replacement ] if ;
: decode-utf16le-step ( buf byte ch state -- buf ch state ) M: utf16le decode-char
{ drop dup stream-read1 dup [ begin-utf16le ] when nip ;
{ begin [ drop double ] }
{ double [ handle-double ] }
{ quad1 [ append-nums quad2 ] }
{ quad2 [ 10 shift bitor quad3 ] }
{ quad3 [ handle-quad3le ] }
} case ;
: unpack-state-le ( encoding -- ch state )
{ utf16le-ch utf16le-state } get-slots ;
: pack-state-le ( ch state encoding -- )
{ set-utf16le-ch set-utf16le-state } set-slots ;
M: utf16le decode-step
[ unpack-state-le decode-utf16le-step ] keep pack-state-le drop ;
M: utf16le init-decoder nip begin over set-utf16le-state ;
! UTF-16LE/BE encoding ! UTF-16LE/BE encoding
: encode-first : encode-first ( char -- byte1 byte2 )
-10 shift -10 shift
dup -8 shift BIN: 11011000 bitor dup -8 shift BIN: 11011000 bitor
swap HEX: FF bitand ; swap HEX: FF bitand ;
: encode-second : encode-second ( char -- byte3 byte4 )
BIN: 1111111111 bitand BIN: 1111111111 bitand
dup -8 shift BIN: 11011100 bitor dup -8 shift BIN: 11011100 bitor
swap BIN: 11111111 bitand ; swap BIN: 11111111 bitand ;
: char>utf16be ( char -- ) : stream-write2 ( stream char1 char2 -- )
rot [ stream-write1 ] curry 2apply ;
: char>utf16be ( stream char -- )
dup HEX: FFFF > [ dup HEX: FFFF > [
HEX: 10000 - HEX: 10000 -
dup encode-first swap write1 write1 2dup encode-first stream-write2
encode-second swap write1 write1 encode-second stream-write2
] [ h>b/b write1 write1 ] if ; ] [ h>b/b swap stream-write2 ] if ;
: stream-write-utf16be ( string stream -- ) M: utf16be encode-char ( char stream encoding -- )
[ [ char>utf16be ] each ] with-stream* ; drop swap char>utf16be ;
M: utf16be stream-write-encoded ( string stream encoding -- ) : char>utf16le ( char stream -- )
drop stream-write-utf16be ;
: char>utf16le ( char -- )
dup HEX: FFFF > [ dup HEX: FFFF > [
HEX: 10000 - HEX: 10000 -
dup encode-first write1 write1 2dup encode-first swap stream-write2
encode-second write1 write1 encode-second swap stream-write2
] [ h>b/b swap write1 write1 ] if ; ] [ h>b/b stream-write2 ] if ;
: stream-write-utf16le ( string stream -- ) M: utf16le encode-char ( char stream encoding -- )
[ [ char>utf16le ] each ] with-stream* ; drop swap char>utf16le ;
M: utf16le stream-write-encoded ( string stream encoding -- )
drop stream-write-utf16le ;
! UTF-16 ! UTF-16
@ -139,17 +107,18 @@ M: utf16le stream-write-encoded ( string stream encoding -- )
: start-utf16be? ( seq1 -- seq2 ? ) bom-be ?head ; : start-utf16be? ( seq1 -- seq2 ? ) bom-be ?head ;
TUPLE: utf16 started? ; TUPLE: missing-bom ;
M: missing-bom summary drop "The BOM for a UTF-16 stream was missing" ;
M: utf16 stream-write-encoded
dup utf16-started? [ drop ]
[ t swap set-utf16-started? bom-le over stream-write ] if
stream-write-utf16le ;
: bom>le/be ( bom -- le/be ) : bom>le/be ( bom -- le/be )
dup bom-le sequence= [ drop utf16le ] [ dup bom-le sequence= [ drop utf16le ] [
bom-be sequence= [ utf16be ] [ decode-error ] if bom-be sequence= [ utf16be ] [ missing-bom ] if
] if ; ] if ;
M: utf16 init-decoder ( stream encoding -- newencoding ) M: utf16 <decoder> ( stream utf16 -- decoder )
2 rot stream-read bom>le/be construct-empty init-decoder ; drop 2 over stream-read bom>le/be <decoder> ;
M: utf16 <encoder> ( stream utf16 -- encoder )
drop bom-le over stream-write utf16le <encoder> ;
PRIVATE>

View File

@ -1,6 +1,6 @@
IN: io.unix.launcher.tests IN: io.unix.launcher.tests
USING: io.files tools.test io.launcher arrays io namespaces USING: io.files tools.test io.launcher arrays io namespaces
continuations math io.encodings.ascii io.encodings.latin1 continuations math io.encodings.binary io.encodings.ascii
accessors kernel sequences ; accessors kernel sequences ;
[ ] [ [ ] [
@ -64,7 +64,7 @@ accessors kernel sequences ;
[ ] [ [ ] [
2 [ 2 [
"launcher-test-1" temp-file ascii <file-appender> [ "launcher-test-1" temp-file binary <file-appender> [
<process> <process>
swap >>stdout swap >>stdout
"echo Hello" >>command "echo Hello" >>command
@ -84,7 +84,7 @@ accessors kernel sequences ;
<process> <process>
"env" >>command "env" >>command
{ { "A" "B" } } >>environment { { "A" "B" } } >>environment
latin1 <process-stream> lines ascii <process-stream> lines
"A=B" swap member? "A=B" swap member?
] unit-test ] unit-test
@ -93,5 +93,5 @@ accessors kernel sequences ;
"env" >>command "env" >>command
{ { "A" "B" } } >>environment { { "A" "B" } } >>environment
+replace-environment+ >>environment-mode +replace-environment+ >>environment-mode
latin1 <process-stream> lines ascii <process-stream> lines
] unit-test ] unit-test