Merge branch 'master' of git://factorcode.org/git/littledan
commit
e826f1fe4a
|
@ -1,3 +1,8 @@
|
||||||
! Copyright (C) 2008 Daniel Ehrenberg.
|
! Copyright (C) 2008 Daniel Ehrenberg.
|
||||||
! See http://factorcode.org/license.txt for BSD license.
|
! See http://factorcode.org/license.txt for BSD license.
|
||||||
IN: io.encodings.binary SYMBOL: binary
|
USING: io.encodings kernel ;
|
||||||
|
IN: io.encodings.binary
|
||||||
|
|
||||||
|
TUPLE: binary ;
|
||||||
|
M: binary <encoder> drop ;
|
||||||
|
M: binary <decoder> drop ;
|
||||||
|
|
|
@ -44,25 +44,21 @@ $nl { $vocab-link "io.encodings.utf16" } ;
|
||||||
|
|
||||||
ARTICLE: "encodings-protocol" "Encoding protocol"
|
ARTICLE: "encodings-protocol" "Encoding protocol"
|
||||||
"An encoding descriptor must implement the following methods. The methods are implemented on tuple classes by instantiating the class and calling the method again."
|
"An encoding descriptor must implement the following methods. The methods are implemented on tuple classes by instantiating the class and calling the method again."
|
||||||
{ $subsection decode-step }
|
{ $subsection decode-char }
|
||||||
{ $subsection init-decoder }
|
{ $subsection encode-char }
|
||||||
{ $subsection stream-write-encoded } ;
|
"The following methods are optional:"
|
||||||
|
{ $subsection <encoder> }
|
||||||
|
{ $subsection <decoder> } ;
|
||||||
|
|
||||||
HELP: decode-step ( buf char encoding -- )
|
HELP: decode-char ( stream encoding -- char/f )
|
||||||
{ $values { "buf" "A string buffer which characters can be pushed to" }
|
{ $values { "stream" "an underlying input stream" }
|
||||||
{ "char" "An octet which is read from a stream" }
|
|
||||||
{ "encoding" "An encoding descriptor tuple" } }
|
{ "encoding" "An encoding descriptor tuple" } }
|
||||||
{ $description "A single step in the decoding process must be defined for the decoding descriptor. When each octet is read, this word is called, and depending on the decoder's internal state, something may be pushed to the buffer or the state may change. This should not be used directly." } ;
|
{ $description "Reads a single code point from the underlying stream, interpreting it by the encoding. This should not be used directly." } ;
|
||||||
|
|
||||||
HELP: stream-write-encoded ( string stream encoding -- )
|
HELP: encode-char ( char stream encoding -- )
|
||||||
{ $values { "string" "a string" }
|
{ $values { "char" "a character" }
|
||||||
{ "stream" "an output stream" }
|
{ "stream" "an underlying output stream" }
|
||||||
{ "encoding" "an encoding descriptor" } }
|
{ "encoding" "an encoding descriptor" } }
|
||||||
{ $description "Encodes the string with the given encoding descriptor, outputing the result to the given stream. This should not be used directly." } ;
|
{ $description "Writes the code point in the encoding to the underlying stream given. This should not be used directly." } ;
|
||||||
|
|
||||||
HELP: init-decoder ( stream encoding -- encoding )
|
{ encode-char decode-char } related-words
|
||||||
{ $values { "stream" "an input stream" }
|
|
||||||
{ "encoding" "an encoding descriptor" } }
|
|
||||||
{ $description "Initializes the decoder tuple's state. The stream is exposed so that it can be read, eg for a BOM. This should not be used directly." } ;
|
|
||||||
|
|
||||||
{ init-decoder decode-step stream-write-encoded } related-words
|
|
||||||
|
|
|
@ -2,62 +2,36 @@
|
||||||
! See http://factorcode.org/license.txt for BSD license.
|
! See http://factorcode.org/license.txt for BSD license.
|
||||||
USING: math kernel sequences sbufs vectors namespaces
|
USING: math kernel sequences sbufs vectors namespaces
|
||||||
growable strings io classes continuations combinators
|
growable strings io classes continuations combinators
|
||||||
io.styles io.streams.plain io.encodings.binary splitting
|
io.styles io.streams.plain splitting
|
||||||
io.streams.duplex byte-arrays ;
|
io.streams.duplex byte-arrays sequences.private ;
|
||||||
IN: io.encodings
|
IN: io.encodings
|
||||||
|
|
||||||
! The encoding descriptor protocol
|
! The encoding descriptor protocol
|
||||||
|
|
||||||
GENERIC: decode-step ( buf char encoding -- )
|
GENERIC: decode-char ( stream encoding -- char/f )
|
||||||
M: object decode-step drop swap push ;
|
|
||||||
|
|
||||||
GENERIC: init-decoder ( stream encoding -- encoding )
|
GENERIC: encode-char ( char stream encoding -- )
|
||||||
M: tuple-class init-decoder construct-empty init-decoder ;
|
|
||||||
M: object init-decoder nip ;
|
|
||||||
|
|
||||||
GENERIC: stream-write-encoded ( string stream encoding -- byte-array )
|
GENERIC: <decoder> ( stream decoding -- newstream )
|
||||||
M: object stream-write-encoded drop stream-write ;
|
|
||||||
|
GENERIC: <encoder> ( stream encoding -- newstream )
|
||||||
|
|
||||||
|
: replacement-char HEX: fffd ;
|
||||||
|
|
||||||
! Decoding
|
! Decoding
|
||||||
|
|
||||||
|
<PRIVATE
|
||||||
|
|
||||||
TUPLE: decode-error ;
|
TUPLE: decode-error ;
|
||||||
|
|
||||||
: decode-error ( -- * ) \ decode-error construct-empty throw ;
|
: decode-error ( -- * ) \ decode-error construct-empty throw ;
|
||||||
|
|
||||||
SYMBOL: begin
|
TUPLE: decoder stream code cr ;
|
||||||
|
M: tuple-class <decoder> construct-empty <decoder> ;
|
||||||
|
M: tuple <decoder> f decoder construct-boa ;
|
||||||
|
|
||||||
: push-decoded ( buf ch -- buf ch state )
|
: >decoder< ( decoder -- stream encoding )
|
||||||
over push 0 begin ;
|
{ decoder-stream decoder-code } get-slots ;
|
||||||
|
|
||||||
: push-replacement ( buf -- buf ch state )
|
|
||||||
! This is the replacement character
|
|
||||||
HEX: fffd push-decoded ;
|
|
||||||
|
|
||||||
: space ( resizable -- room-left )
|
|
||||||
dup underlying swap [ length ] 2apply - ;
|
|
||||||
|
|
||||||
: full? ( resizable -- ? ) space zero? ;
|
|
||||||
|
|
||||||
: end-read-loop ( buf ch state stream quot -- string/f )
|
|
||||||
2drop 2drop >string f like ;
|
|
||||||
|
|
||||||
: decode-read-loop ( buf stream encoding -- string/f )
|
|
||||||
pick full? [ 2drop >string ] [
|
|
||||||
over stream-read1 [
|
|
||||||
-rot tuck >r >r >r dupd r> decode-step r> r>
|
|
||||||
decode-read-loop
|
|
||||||
] [ 2drop >string f like ] if*
|
|
||||||
] if ;
|
|
||||||
|
|
||||||
: decode-read ( length stream encoding -- string )
|
|
||||||
rot <sbuf> -rot decode-read-loop ;
|
|
||||||
|
|
||||||
TUPLE: decoder code cr ;
|
|
||||||
: <decoder> ( stream encoding -- newstream )
|
|
||||||
dup binary eq? [ drop ] [
|
|
||||||
dupd init-decoder { set-delegate set-decoder-code }
|
|
||||||
decoder construct
|
|
||||||
] if ;
|
|
||||||
|
|
||||||
: cr+ t swap set-decoder-cr ; inline
|
: cr+ t swap set-decoder-cr ; inline
|
||||||
|
|
||||||
|
@ -82,72 +56,83 @@ TUPLE: decoder code cr ;
|
||||||
over decoder-cr [
|
over decoder-cr [
|
||||||
over cr-
|
over cr-
|
||||||
"\n" ?head [
|
"\n" ?head [
|
||||||
swap stream-read1 [ add ] when*
|
over stream-read1 [ add ] when*
|
||||||
] [ nip ] if
|
] when
|
||||||
] [ nip ] if ;
|
] when nip ;
|
||||||
|
|
||||||
|
: read-loop ( n stream -- string )
|
||||||
|
SBUF" " clone [
|
||||||
|
[
|
||||||
|
>r nip stream-read1 dup
|
||||||
|
[ r> push f ] [ r> 2drop t ] if
|
||||||
|
] 2curry find-integer drop
|
||||||
|
] keep "" like f like ;
|
||||||
|
|
||||||
M: decoder stream-read
|
M: decoder stream-read
|
||||||
tuck { delegate decoder-code } get-slots decode-read fix-read ;
|
tuck read-loop fix-read ;
|
||||||
|
|
||||||
M: decoder stream-read-partial stream-read ;
|
M: decoder stream-read-partial stream-read ;
|
||||||
|
|
||||||
: decoder-read-until ( stream delim -- ch )
|
: (read-until) ( buf quot -- string/f sep/f )
|
||||||
! Copied from { c-reader stream-read-until }!!!
|
! quot: -- char stop?
|
||||||
over stream-read1 dup [
|
dup call
|
||||||
dup pick memq? [ 2nip ] [ , decoder-read-until ] if
|
[ >r drop "" like r> ]
|
||||||
] [
|
[ pick push (read-until) ] if ; inline
|
||||||
2nip
|
|
||||||
] if ;
|
|
||||||
|
|
||||||
M: decoder stream-read-until
|
M: decoder stream-read-until
|
||||||
! Copied from { c-reader stream-read-until }!!!
|
SBUF" " clone -rot >decoder<
|
||||||
[ swap decoder-read-until ] "" make
|
[ decode-char [ dup rot memq? ] [ drop f t ] if* ] 3curry
|
||||||
swap over empty? over not and [ 2drop f f ] when ;
|
(read-until) ;
|
||||||
|
|
||||||
: fix-read1 ( stream char -- char )
|
: fix-read1 ( stream char -- char )
|
||||||
over decoder-cr [
|
over decoder-cr [
|
||||||
over cr-
|
over cr-
|
||||||
dup CHAR: \n = [
|
dup CHAR: \n = [
|
||||||
drop stream-read1
|
drop dup stream-read1
|
||||||
] [ nip ] if
|
] when
|
||||||
] [ nip ] if ;
|
] when nip ;
|
||||||
|
|
||||||
M: decoder stream-read1
|
M: decoder stream-read1
|
||||||
1 swap stream-read f like [ first ] [ f ] if* ;
|
dup >decoder< decode-char fix-read1 ;
|
||||||
|
|
||||||
M: decoder stream-readln ( stream -- str )
|
M: decoder stream-readln ( stream -- str )
|
||||||
"\r\n" over stream-read-until handle-readln ;
|
"\r\n" over stream-read-until handle-readln ;
|
||||||
|
|
||||||
|
M: decoder dispose decoder-stream dispose ;
|
||||||
|
|
||||||
! Encoding
|
! Encoding
|
||||||
|
|
||||||
TUPLE: encode-error ;
|
TUPLE: encode-error ;
|
||||||
|
|
||||||
: encode-error ( -- * ) \ encode-error construct-empty throw ;
|
: encode-error ( -- * ) \ encode-error construct-empty throw ;
|
||||||
|
|
||||||
TUPLE: encoder code ;
|
TUPLE: encoder stream code ;
|
||||||
: <encoder> ( stream encoding -- newstream )
|
M: tuple-class <encoder> construct-empty <encoder> ;
|
||||||
dup binary eq? [ drop ] [
|
M: tuple <encoder> encoder construct-boa ;
|
||||||
construct-empty { set-delegate set-encoder-code }
|
|
||||||
encoder construct
|
: >encoder< ( encoder -- stream encoding )
|
||||||
] if ;
|
{ encoder-stream encoder-code } get-slots ;
|
||||||
|
|
||||||
M: encoder stream-write1
|
M: encoder stream-write1
|
||||||
>r 1string r> stream-write ;
|
>encoder< encode-char ;
|
||||||
|
|
||||||
M: encoder stream-write
|
M: encoder stream-write
|
||||||
{ delegate encoder-code } get-slots stream-write-encoded ;
|
>encoder< [ encode-char ] 2curry each ;
|
||||||
|
|
||||||
M: encoder dispose delegate dispose ;
|
M: encoder dispose encoder-stream dispose ;
|
||||||
|
|
||||||
|
M: encoder stream-flush encoder-stream stream-flush ;
|
||||||
|
|
||||||
INSTANCE: encoder plain-writer
|
INSTANCE: encoder plain-writer
|
||||||
|
|
||||||
! Rebinding duplex streams which have not read anything yet
|
! Rebinding duplex streams which have not read anything yet
|
||||||
|
|
||||||
: reencode ( stream encoding -- newstream )
|
: reencode ( stream encoding -- newstream )
|
||||||
over encoder? [ >r delegate r> ] when <encoder> ;
|
over encoder? [ >r encoder-stream r> ] when <encoder> ;
|
||||||
|
|
||||||
: redecode ( stream encoding -- newstream )
|
: redecode ( stream encoding -- newstream )
|
||||||
over decoder? [ >r delegate r> ] when <decoder> ;
|
over decoder? [ >r decoder-stream r> ] when <decoder> ;
|
||||||
|
PRIVATE>
|
||||||
|
|
||||||
: <encoder-duplex> ( stream-in stream-out encoding -- duplex )
|
: <encoder-duplex> ( stream-in stream-out encoding -- duplex )
|
||||||
tuck reencode >r redecode r> <duplex-stream> ;
|
tuck reencode >r redecode r> <duplex-stream> ;
|
||||||
|
|
|
@ -6,82 +6,68 @@ IN: io.encodings.utf8
|
||||||
|
|
||||||
! Decoding UTF-8
|
! Decoding UTF-8
|
||||||
|
|
||||||
TUPLE: utf8 ch state ;
|
TUPLE: utf8 ;
|
||||||
|
|
||||||
SYMBOL: double
|
<PRIVATE
|
||||||
SYMBOL: triple
|
|
||||||
SYMBOL: triple2
|
|
||||||
SYMBOL: quad
|
|
||||||
SYMBOL: quad2
|
|
||||||
SYMBOL: quad3
|
|
||||||
|
|
||||||
: starts-2? ( char -- ? )
|
: starts-2? ( char -- ? )
|
||||||
-6 shift BIN: 10 number= ;
|
dup [ -6 shift BIN: 10 number= ] when ;
|
||||||
|
|
||||||
: append-nums ( buf bottom top state-out -- buf num state )
|
: append-nums ( stream byte -- stream char )
|
||||||
>r over starts-2?
|
over stream-read1 dup starts-2?
|
||||||
[ 6 shift swap BIN: 111111 bitand bitor r> ]
|
[ swap 6 shift swap BIN: 111111 bitand bitor ]
|
||||||
[ r> 3drop push-replacement ] if ;
|
[ 2drop replacement-char ] if ;
|
||||||
|
|
||||||
: begin-utf8 ( buf byte -- buf ch state )
|
: double ( stream byte -- stream char )
|
||||||
|
BIN: 11111 bitand append-nums ;
|
||||||
|
|
||||||
|
: triple ( stream byte -- stream char )
|
||||||
|
BIN: 1111 bitand append-nums append-nums ;
|
||||||
|
|
||||||
|
: quad ( stream byte -- stream char )
|
||||||
|
BIN: 111 bitand append-nums append-nums append-nums ;
|
||||||
|
|
||||||
|
: begin-utf8 ( stream byte -- stream char )
|
||||||
{
|
{
|
||||||
{ [ dup -7 shift zero? ] [ push-decoded ] }
|
{ [ dup -7 shift zero? ] [ ] }
|
||||||
{ [ dup -5 shift BIN: 110 number= ] [ BIN: 11111 bitand double ] }
|
{ [ dup -5 shift BIN: 110 number= ] [ double ] }
|
||||||
{ [ dup -4 shift BIN: 1110 number= ] [ BIN: 1111 bitand triple ] }
|
{ [ dup -4 shift BIN: 1110 number= ] [ triple ] }
|
||||||
{ [ dup -3 shift BIN: 11110 number= ] [ BIN: 111 bitand quad ] }
|
{ [ dup -3 shift BIN: 11110 number= ] [ quad ] }
|
||||||
{ [ t ] [ drop push-replacement ] }
|
{ [ t ] [ drop replacement-char ] }
|
||||||
} cond ;
|
} cond ;
|
||||||
|
|
||||||
: end-multibyte ( buf byte ch -- buf ch state )
|
: decode-utf8 ( stream -- char/f )
|
||||||
f append-nums [ push-decoded ] unless* ;
|
dup stream-read1 dup [ begin-utf8 ] when nip ;
|
||||||
|
|
||||||
: decode-utf8-step ( buf byte ch state -- buf ch state )
|
M: utf8 decode-char
|
||||||
{
|
drop decode-utf8 ;
|
||||||
{ begin [ drop begin-utf8 ] }
|
|
||||||
{ double [ end-multibyte ] }
|
|
||||||
{ triple [ triple2 append-nums ] }
|
|
||||||
{ triple2 [ end-multibyte ] }
|
|
||||||
{ quad [ quad2 append-nums ] }
|
|
||||||
{ quad2 [ quad3 append-nums ] }
|
|
||||||
{ quad3 [ end-multibyte ] }
|
|
||||||
} case ;
|
|
||||||
|
|
||||||
: unpack-state ( encoding -- ch state )
|
|
||||||
{ utf8-ch utf8-state } get-slots ;
|
|
||||||
|
|
||||||
: pack-state ( ch state encoding -- )
|
|
||||||
{ set-utf8-ch set-utf8-state } set-slots ;
|
|
||||||
|
|
||||||
M: utf8 decode-step ( buf char encoding -- )
|
|
||||||
[ unpack-state decode-utf8-step ] keep pack-state drop ;
|
|
||||||
|
|
||||||
M: utf8 init-decoder nip begin over set-utf8-state ;
|
|
||||||
|
|
||||||
! Encoding UTF-8
|
! Encoding UTF-8
|
||||||
|
|
||||||
: encoded ( char -- )
|
: encoded ( stream char -- )
|
||||||
BIN: 111111 bitand BIN: 10000000 bitor write1 ;
|
BIN: 111111 bitand BIN: 10000000 bitor swap stream-write1 ;
|
||||||
|
|
||||||
: char>utf8 ( char -- )
|
: char>utf8 ( stream char -- )
|
||||||
{
|
{
|
||||||
{ [ dup -7 shift zero? ] [ write1 ] }
|
{ [ dup -7 shift zero? ] [ swap stream-write1 ] }
|
||||||
{ [ dup -11 shift zero? ] [
|
{ [ dup -11 shift zero? ] [
|
||||||
dup -6 shift BIN: 11000000 bitor write1
|
2dup -6 shift BIN: 11000000 bitor swap stream-write1
|
||||||
encoded
|
encoded
|
||||||
] }
|
] }
|
||||||
{ [ dup -16 shift zero? ] [
|
{ [ dup -16 shift zero? ] [
|
||||||
dup -12 shift BIN: 11100000 bitor write1
|
2dup -12 shift BIN: 11100000 bitor swap stream-write1
|
||||||
dup -6 shift encoded
|
2dup -6 shift encoded
|
||||||
encoded
|
encoded
|
||||||
] }
|
] }
|
||||||
{ [ t ] [
|
{ [ t ] [
|
||||||
dup -18 shift BIN: 11110000 bitor write1
|
2dup -18 shift BIN: 11110000 bitor swap stream-write1
|
||||||
dup -12 shift encoded
|
2dup -12 shift encoded
|
||||||
dup -6 shift encoded
|
2dup -6 shift encoded
|
||||||
encoded
|
encoded
|
||||||
] }
|
] }
|
||||||
} cond ;
|
} cond ;
|
||||||
|
|
||||||
M: utf8 stream-write-encoded
|
M: utf8 encode-char
|
||||||
! For efficiency, this should be modified to avoid variable reads
|
drop swap char>utf8 ;
|
||||||
drop [ [ char>utf8 ] each ] with-stream* ;
|
|
||||||
|
PRIVATE>
|
||||||
|
|
|
@ -1,5 +1,5 @@
|
||||||
USING: byte-arrays byte-vectors kernel io.encodings io.streams.string
|
USING: byte-arrays byte-vectors kernel io.encodings io.streams.string
|
||||||
sequences io namespaces ;
|
sequences io namespaces io.encodings.private ;
|
||||||
IN: io.streams.byte-array
|
IN: io.streams.byte-array
|
||||||
|
|
||||||
: <byte-writer> ( encoding -- stream )
|
: <byte-writer> ( encoding -- stream )
|
||||||
|
@ -7,7 +7,7 @@ IN: io.streams.byte-array
|
||||||
|
|
||||||
: with-byte-writer ( encoding quot -- byte-array )
|
: with-byte-writer ( encoding quot -- byte-array )
|
||||||
>r <byte-writer> r> [ stdio get ] compose with-stream*
|
>r <byte-writer> r> [ stdio get ] compose with-stream*
|
||||||
>byte-array ; inline
|
dup encoder? [ encoder-stream ] when >byte-array ; inline
|
||||||
|
|
||||||
: <byte-reader> ( byte-array encoding -- stream )
|
: <byte-reader> ( byte-array encoding -- stream )
|
||||||
>r >byte-vector dup reverse-here r> <decoder> ;
|
>r >byte-vector dup reverse-here r> <decoder> ;
|
||||||
|
|
|
@ -1,9 +1,9 @@
|
||||||
! Copyright (C) 2003, 2007 Slava Pestov.
|
! Copyright (C) 2003, 2007 Slava Pestov.
|
||||||
! See http://factorcode.org/license.txt for BSD license.
|
! See http://factorcode.org/license.txt for BSD license.
|
||||||
IN: io.streams.string
|
|
||||||
USING: io kernel math namespaces sequences sbufs strings
|
USING: io kernel math namespaces sequences sbufs strings
|
||||||
generic splitting growable continuations io.streams.plain
|
generic splitting growable continuations io.streams.plain
|
||||||
io.encodings ;
|
io.encodings io.encodings.private ;
|
||||||
|
IN: io.streams.string
|
||||||
|
|
||||||
M: growable dispose drop ;
|
M: growable dispose drop ;
|
||||||
|
|
||||||
|
@ -49,8 +49,11 @@ M: growable stream-read
|
||||||
M: growable stream-read-partial
|
M: growable stream-read-partial
|
||||||
stream-read ;
|
stream-read ;
|
||||||
|
|
||||||
|
TUPLE: null ;
|
||||||
|
M: null decode-char drop stream-read1 ;
|
||||||
|
|
||||||
: <string-reader> ( str -- stream )
|
: <string-reader> ( str -- stream )
|
||||||
>sbuf dup reverse-here f <decoder> ;
|
>sbuf dup reverse-here null <decoder> ;
|
||||||
|
|
||||||
: with-string-reader ( str quot -- )
|
: with-string-reader ( str quot -- )
|
||||||
>r <string-reader> r> with-stream ; inline
|
>r <string-reader> r> with-stream ; inline
|
||||||
|
|
|
@ -1,18 +1,22 @@
|
||||||
! Copyright (C) 2008 Daniel Ehrenberg.
|
! Copyright (C) 2008 Daniel Ehrenberg.
|
||||||
! See http://factorcode.org/license.txt for BSD license.
|
! See http://factorcode.org/license.txt for BSD license.
|
||||||
USING: io io.encodings strings kernel math sequences byte-arrays io.encodings ;
|
USING: io io.encodings kernel math io.encodings.private ;
|
||||||
IN: io.encodings.ascii
|
IN: io.encodings.ascii
|
||||||
|
|
||||||
: encode-check< ( string stream max -- )
|
<PRIVATE
|
||||||
[ pick <= [ encode-error ] [ stream-write1 ] if ] 2curry each ;
|
: encode-if< ( char stream encoding max -- )
|
||||||
|
nip 1- pick < [ encode-error ] [ stream-write1 ] if ;
|
||||||
|
|
||||||
: push-if< ( sbuf character max -- )
|
: decode-if< ( stream encoding max -- character )
|
||||||
over <= [ drop HEX: fffd ] when swap push ;
|
nip swap stream-read1
|
||||||
|
[ tuck > [ drop replacement-char ] unless ]
|
||||||
|
[ drop f ] if* ;
|
||||||
|
PRIVATE>
|
||||||
|
|
||||||
TUPLE: ascii ;
|
TUPLE: ascii ;
|
||||||
|
|
||||||
M: ascii stream-write-encoded ( string stream encoding -- )
|
M: ascii encode-char
|
||||||
drop 128 encode-check< ;
|
128 encode-if< ;
|
||||||
|
|
||||||
M: ascii decode-step
|
M: ascii decode-char
|
||||||
drop 128 push-if< ;
|
128 decode-if< ;
|
||||||
|
|
|
@ -1,12 +1,12 @@
|
||||||
! Copyright (C) 2008 Daniel Ehrenberg.
|
! Copyright (C) 2008 Daniel Ehrenberg.
|
||||||
! See http://factorcode.org/license.txt for BSD license.
|
! See http://factorcode.org/license.txt for BSD license.
|
||||||
USING: io io.encodings strings kernel io.encodings.ascii sequences math ;
|
USING: io io.encodings kernel io.encodings.ascii.private ;
|
||||||
IN: io.encodings.latin1
|
IN: io.encodings.latin1
|
||||||
|
|
||||||
TUPLE: latin1 ;
|
TUPLE: latin1 ;
|
||||||
|
|
||||||
M: latin1 stream-write-encoded
|
M: latin1 encode-char
|
||||||
drop 256 encode-check< ;
|
256 encode-if< ;
|
||||||
|
|
||||||
M: latin1 decode-step
|
M: latin1 decode-char
|
||||||
drop swap push ;
|
drop stream-read1 ;
|
||||||
|
|
|
@ -1,133 +1,101 @@
|
||||||
! Copyright (C) 2006, 2008 Daniel Ehrenberg.
|
! Copyright (C) 2006, 2008 Daniel Ehrenberg.
|
||||||
! See http://factorcode.org/license.txt for BSD license.
|
! See http://factorcode.org/license.txt for BSD license.
|
||||||
USING: math kernel sequences sbufs vectors namespaces io.binary
|
USING: math kernel sequences sbufs vectors namespaces io.binary
|
||||||
io.encodings combinators splitting io byte-arrays ;
|
io.encodings combinators splitting io byte-arrays inspector ;
|
||||||
IN: io.encodings.utf16
|
IN: io.encodings.utf16
|
||||||
|
|
||||||
|
TUPLE: utf16be ;
|
||||||
|
|
||||||
|
TUPLE: utf16le ;
|
||||||
|
|
||||||
|
TUPLE: utf16 ;
|
||||||
|
|
||||||
|
<PRIVATE
|
||||||
|
|
||||||
! UTF-16BE decoding
|
! UTF-16BE decoding
|
||||||
|
|
||||||
TUPLE: utf16be ch state ;
|
|
||||||
|
|
||||||
SYMBOL: double
|
|
||||||
SYMBOL: quad1
|
|
||||||
SYMBOL: quad2
|
|
||||||
SYMBOL: quad3
|
|
||||||
SYMBOL: ignore
|
|
||||||
|
|
||||||
: do-ignore ( -- ch state ) 0 ignore ;
|
|
||||||
|
|
||||||
: append-nums ( byte ch -- ch )
|
: append-nums ( byte ch -- ch )
|
||||||
8 shift bitor ;
|
over [ 8 shift bitor ] [ 2drop replacement-char ] if ;
|
||||||
|
|
||||||
: end-multibyte ( buf byte ch -- buf ch state )
|
: double-be ( stream byte -- stream char )
|
||||||
append-nums push-decoded ;
|
over stream-read1 swap append-nums ;
|
||||||
|
|
||||||
: begin-utf16be ( buf byte -- buf ch state )
|
: quad-be ( stream byte -- stream char )
|
||||||
|
double-be over stream-read1 [
|
||||||
|
dup -2 shift BIN: 110111 number= [
|
||||||
|
>r 2 shift r> BIN: 11 bitand bitor
|
||||||
|
over stream-read1 swap append-nums HEX: 10000 +
|
||||||
|
] [ 2drop dup stream-read1 drop replacement-char ] if
|
||||||
|
] when* ;
|
||||||
|
|
||||||
|
: ignore ( stream -- stream char )
|
||||||
|
dup stream-read1 drop replacement-char ;
|
||||||
|
|
||||||
|
: begin-utf16be ( stream byte -- stream char )
|
||||||
dup -3 shift BIN: 11011 number= [
|
dup -3 shift BIN: 11011 number= [
|
||||||
dup BIN: 00000100 bitand zero?
|
dup BIN: 00000100 bitand zero?
|
||||||
[ BIN: 11 bitand quad1 ]
|
[ BIN: 11 bitand quad-be ]
|
||||||
[ drop do-ignore ] if
|
[ drop ignore ] if
|
||||||
] [ double ] if ;
|
] [ double-be ] if ;
|
||||||
|
|
||||||
: handle-quad2be ( byte ch -- ch state )
|
M: utf16be decode-char
|
||||||
swap dup -2 shift BIN: 110111 number= [
|
drop dup stream-read1 dup [ begin-utf16be ] when nip ;
|
||||||
>r 2 shift r> BIN: 11 bitand bitor quad3
|
|
||||||
] [ 2drop do-ignore ] if ;
|
|
||||||
|
|
||||||
: decode-utf16be-step ( buf byte ch state -- buf ch state )
|
|
||||||
{
|
|
||||||
{ begin [ drop begin-utf16be ] }
|
|
||||||
{ double [ end-multibyte ] }
|
|
||||||
{ quad1 [ append-nums quad2 ] }
|
|
||||||
{ quad2 [ handle-quad2be ] }
|
|
||||||
{ quad3 [ append-nums HEX: 10000 + push-decoded ] }
|
|
||||||
{ ignore [ 2drop push-replacement ] }
|
|
||||||
} case ;
|
|
||||||
|
|
||||||
: unpack-state-be ( encoding -- ch state )
|
|
||||||
{ utf16be-ch utf16be-state } get-slots ;
|
|
||||||
|
|
||||||
: pack-state-be ( ch state encoding -- )
|
|
||||||
{ set-utf16be-ch set-utf16be-state } set-slots ;
|
|
||||||
|
|
||||||
M: utf16be decode-step
|
|
||||||
[ unpack-state-be decode-utf16be-step ] keep pack-state-be drop ;
|
|
||||||
|
|
||||||
M: utf16be init-decoder nip begin over set-utf16be-state ;
|
|
||||||
|
|
||||||
! UTF-16LE decoding
|
! UTF-16LE decoding
|
||||||
|
|
||||||
TUPLE: utf16le ch state ;
|
: quad-le ( stream ch -- stream char )
|
||||||
|
over stream-read1 swap 10 shift bitor
|
||||||
|
over stream-read1 dup -2 shift BIN: 110111 = [
|
||||||
|
BIN: 11 bitand append-nums HEX: 10000 +
|
||||||
|
] [ 2drop replacement-char ] if ;
|
||||||
|
|
||||||
: handle-double ( buf byte ch -- buf ch state )
|
: double-le ( stream byte1 byte2 -- stream char )
|
||||||
swap dup -3 shift BIN: 11011 = [
|
dup -3 shift BIN: 11011 = [
|
||||||
dup BIN: 100 bitand 0 number=
|
dup BIN: 100 bitand 0 number=
|
||||||
[ BIN: 11 bitand 8 shift bitor quad2 ]
|
[ BIN: 11 bitand 8 shift bitor quad-le ]
|
||||||
[ 2drop push-replacement ] if
|
[ 2drop replacement-char ] if
|
||||||
] [ end-multibyte ] if ;
|
] [ append-nums ] if ;
|
||||||
|
|
||||||
: handle-quad3le ( buf byte ch -- buf ch state )
|
: begin-utf16le ( stream byte -- stream char )
|
||||||
swap dup -2 shift BIN: 110111 = [
|
over stream-read1 [ double-le ] [ drop replacement-char ] if* ;
|
||||||
BIN: 11 bitand append-nums HEX: 10000 + push-decoded
|
|
||||||
] [ 2drop push-replacement ] if ;
|
|
||||||
|
|
||||||
: decode-utf16le-step ( buf byte ch state -- buf ch state )
|
M: utf16le decode-char
|
||||||
{
|
drop dup stream-read1 dup [ begin-utf16le ] when nip ;
|
||||||
{ begin [ drop double ] }
|
|
||||||
{ double [ handle-double ] }
|
|
||||||
{ quad1 [ append-nums quad2 ] }
|
|
||||||
{ quad2 [ 10 shift bitor quad3 ] }
|
|
||||||
{ quad3 [ handle-quad3le ] }
|
|
||||||
} case ;
|
|
||||||
|
|
||||||
: unpack-state-le ( encoding -- ch state )
|
|
||||||
{ utf16le-ch utf16le-state } get-slots ;
|
|
||||||
|
|
||||||
: pack-state-le ( ch state encoding -- )
|
|
||||||
{ set-utf16le-ch set-utf16le-state } set-slots ;
|
|
||||||
|
|
||||||
M: utf16le decode-step
|
|
||||||
[ unpack-state-le decode-utf16le-step ] keep pack-state-le drop ;
|
|
||||||
|
|
||||||
M: utf16le init-decoder nip begin over set-utf16le-state ;
|
|
||||||
|
|
||||||
! UTF-16LE/BE encoding
|
! UTF-16LE/BE encoding
|
||||||
|
|
||||||
: encode-first
|
: encode-first ( char -- byte1 byte2 )
|
||||||
-10 shift
|
-10 shift
|
||||||
dup -8 shift BIN: 11011000 bitor
|
dup -8 shift BIN: 11011000 bitor
|
||||||
swap HEX: FF bitand ;
|
swap HEX: FF bitand ;
|
||||||
|
|
||||||
: encode-second
|
: encode-second ( char -- byte3 byte4 )
|
||||||
BIN: 1111111111 bitand
|
BIN: 1111111111 bitand
|
||||||
dup -8 shift BIN: 11011100 bitor
|
dup -8 shift BIN: 11011100 bitor
|
||||||
swap BIN: 11111111 bitand ;
|
swap BIN: 11111111 bitand ;
|
||||||
|
|
||||||
: char>utf16be ( char -- )
|
: stream-write2 ( stream char1 char2 -- )
|
||||||
|
rot [ stream-write1 ] curry 2apply ;
|
||||||
|
|
||||||
|
: char>utf16be ( stream char -- )
|
||||||
dup HEX: FFFF > [
|
dup HEX: FFFF > [
|
||||||
HEX: 10000 -
|
HEX: 10000 -
|
||||||
dup encode-first swap write1 write1
|
2dup encode-first stream-write2
|
||||||
encode-second swap write1 write1
|
encode-second stream-write2
|
||||||
] [ h>b/b write1 write1 ] if ;
|
] [ h>b/b swap stream-write2 ] if ;
|
||||||
|
|
||||||
: stream-write-utf16be ( string stream -- )
|
M: utf16be encode-char ( char stream encoding -- )
|
||||||
[ [ char>utf16be ] each ] with-stream* ;
|
drop swap char>utf16be ;
|
||||||
|
|
||||||
M: utf16be stream-write-encoded ( string stream encoding -- )
|
: char>utf16le ( char stream -- )
|
||||||
drop stream-write-utf16be ;
|
|
||||||
|
|
||||||
: char>utf16le ( char -- )
|
|
||||||
dup HEX: FFFF > [
|
dup HEX: FFFF > [
|
||||||
HEX: 10000 -
|
HEX: 10000 -
|
||||||
dup encode-first write1 write1
|
2dup encode-first swap stream-write2
|
||||||
encode-second write1 write1
|
encode-second swap stream-write2
|
||||||
] [ h>b/b swap write1 write1 ] if ;
|
] [ h>b/b stream-write2 ] if ;
|
||||||
|
|
||||||
: stream-write-utf16le ( string stream -- )
|
M: utf16le encode-char ( char stream encoding -- )
|
||||||
[ [ char>utf16le ] each ] with-stream* ;
|
drop swap char>utf16le ;
|
||||||
|
|
||||||
M: utf16le stream-write-encoded ( string stream encoding -- )
|
|
||||||
drop stream-write-utf16le ;
|
|
||||||
|
|
||||||
! UTF-16
|
! UTF-16
|
||||||
|
|
||||||
|
@ -139,17 +107,18 @@ M: utf16le stream-write-encoded ( string stream encoding -- )
|
||||||
|
|
||||||
: start-utf16be? ( seq1 -- seq2 ? ) bom-be ?head ;
|
: start-utf16be? ( seq1 -- seq2 ? ) bom-be ?head ;
|
||||||
|
|
||||||
TUPLE: utf16 started? ;
|
TUPLE: missing-bom ;
|
||||||
|
M: missing-bom summary drop "The BOM for a UTF-16 stream was missing" ;
|
||||||
M: utf16 stream-write-encoded
|
|
||||||
dup utf16-started? [ drop ]
|
|
||||||
[ t swap set-utf16-started? bom-le over stream-write ] if
|
|
||||||
stream-write-utf16le ;
|
|
||||||
|
|
||||||
: bom>le/be ( bom -- le/be )
|
: bom>le/be ( bom -- le/be )
|
||||||
dup bom-le sequence= [ drop utf16le ] [
|
dup bom-le sequence= [ drop utf16le ] [
|
||||||
bom-be sequence= [ utf16be ] [ decode-error ] if
|
bom-be sequence= [ utf16be ] [ missing-bom ] if
|
||||||
] if ;
|
] if ;
|
||||||
|
|
||||||
M: utf16 init-decoder ( stream encoding -- newencoding )
|
M: utf16 <decoder> ( stream utf16 -- decoder )
|
||||||
2 rot stream-read bom>le/be construct-empty init-decoder ;
|
drop 2 over stream-read bom>le/be <decoder> ;
|
||||||
|
|
||||||
|
M: utf16 <encoder> ( stream utf16 -- encoder )
|
||||||
|
drop bom-le over stream-write utf16le <encoder> ;
|
||||||
|
|
||||||
|
PRIVATE>
|
||||||
|
|
|
@ -1,6 +1,6 @@
|
||||||
IN: io.unix.launcher.tests
|
IN: io.unix.launcher.tests
|
||||||
USING: io.files tools.test io.launcher arrays io namespaces
|
USING: io.files tools.test io.launcher arrays io namespaces
|
||||||
continuations math io.encodings.ascii io.encodings.latin1
|
continuations math io.encodings.binary io.encodings.ascii
|
||||||
accessors kernel sequences ;
|
accessors kernel sequences ;
|
||||||
|
|
||||||
[ ] [
|
[ ] [
|
||||||
|
@ -64,7 +64,7 @@ accessors kernel sequences ;
|
||||||
|
|
||||||
[ ] [
|
[ ] [
|
||||||
2 [
|
2 [
|
||||||
"launcher-test-1" temp-file ascii <file-appender> [
|
"launcher-test-1" temp-file binary <file-appender> [
|
||||||
<process>
|
<process>
|
||||||
swap >>stdout
|
swap >>stdout
|
||||||
"echo Hello" >>command
|
"echo Hello" >>command
|
||||||
|
@ -84,7 +84,7 @@ accessors kernel sequences ;
|
||||||
<process>
|
<process>
|
||||||
"env" >>command
|
"env" >>command
|
||||||
{ { "A" "B" } } >>environment
|
{ { "A" "B" } } >>environment
|
||||||
latin1 <process-stream> lines
|
ascii <process-stream> lines
|
||||||
"A=B" swap member?
|
"A=B" swap member?
|
||||||
] unit-test
|
] unit-test
|
||||||
|
|
||||||
|
@ -93,5 +93,5 @@ accessors kernel sequences ;
|
||||||
"env" >>command
|
"env" >>command
|
||||||
{ { "A" "B" } } >>environment
|
{ { "A" "B" } } >>environment
|
||||||
+replace-environment+ >>environment-mode
|
+replace-environment+ >>environment-mode
|
||||||
latin1 <process-stream> lines
|
ascii <process-stream> lines
|
||||||
] unit-test
|
] unit-test
|
||||||
|
|
Loading…
Reference in New Issue