2010-04-19 02:13:21 -04:00
|
|
|
! Copyright (C) 2008, 2010 Daniel Ehrenberg, Slava Pestov.
|
2007-09-20 18:09:08 -04:00
|
|
|
! See http://factorcode.org/license.txt for BSD license.
|
2013-11-24 16:43:00 -05:00
|
|
|
USING: accessors byte-arrays combinators destructors io
|
|
|
|
io.streams.plain kernel kernel.private math namespaces sbufs
|
|
|
|
sequences sequences.private splitting strings strings.private ;
|
2007-09-20 18:09:08 -04:00
|
|
|
IN: io.encodings
|
|
|
|
|
2008-03-05 15:51:01 -05:00
|
|
|
! The encoding descriptor protocol
|
|
|
|
|
2011-10-13 00:08:58 -04:00
|
|
|
GENERIC: guess-encoded-length ( string-length encoding -- byte-length )
|
|
|
|
GENERIC: guess-decoded-length ( byte-length encoding -- string-length )
|
|
|
|
|
|
|
|
M: object guess-decoded-length drop ; inline
|
|
|
|
M: object guess-encoded-length drop ; inline
|
|
|
|
|
2008-03-14 04:09:51 -04:00
|
|
|
GENERIC: decode-char ( stream encoding -- char/f )
|
2008-03-05 15:51:01 -05:00
|
|
|
|
2013-03-18 16:35:22 -04:00
|
|
|
GENERIC: decode-until ( seps stream encoding -- string/f sep/f )
|
|
|
|
|
|
|
|
<PRIVATE
|
|
|
|
|
|
|
|
! If the stop? branch is taken convert the sbuf to a string
|
|
|
|
! If sep is present, returns ``string sep'' (string can be "")
|
|
|
|
! If sep is f, returns ``string f'' or ``f f''
|
|
|
|
: read-until-loop ( buf quot: ( -- char stop? ) -- string/f sep/f )
|
|
|
|
dup call
|
|
|
|
[ nip [ "" like ] dip [ f like f ] unless* ]
|
|
|
|
[ pick push read-until-loop ] if ; inline recursive
|
|
|
|
|
|
|
|
PRIVATE>
|
|
|
|
|
|
|
|
: (decode-until) ( seps stream encoding -- string/f sep/f )
|
|
|
|
[ decode-char dup ] 2curry swap [ dupd member? ] curry
|
|
|
|
[ [ drop f t ] if ] curry compose
|
|
|
|
[ 100 <sbuf> ] dip read-until-loop ; inline
|
|
|
|
|
|
|
|
M: object decode-until (decode-until) ;
|
|
|
|
|
2013-11-24 19:08:26 -05:00
|
|
|
CONSTANT: replacement-char 0xfffd
|
|
|
|
|
2013-11-24 16:43:00 -05:00
|
|
|
<PRIVATE
|
|
|
|
|
|
|
|
: string>byte-array-fast ( string -- byte-array )
|
|
|
|
{ string } declare ! aux>> must be f
|
|
|
|
[ length ] keep over <byte-array> [
|
|
|
|
[
|
|
|
|
[ [ string-nth-fast ] 2keep drop ]
|
|
|
|
[ set-nth-unsafe ] bi*
|
|
|
|
] 2curry each-integer
|
|
|
|
] keep ; inline
|
|
|
|
|
2013-11-24 19:08:26 -05:00
|
|
|
: byte-array>string-fast ( byte-array -- string )
|
|
|
|
{ byte-array } declare
|
|
|
|
[ length ] keep over 0 <string> [
|
|
|
|
[
|
|
|
|
[
|
|
|
|
[
|
|
|
|
nth-unsafe dup 127 <=
|
|
|
|
[ drop replacement-char ] unless
|
|
|
|
] 2keep drop
|
|
|
|
]
|
|
|
|
[ set-string-nth ] bi*
|
|
|
|
] 2curry each-integer
|
|
|
|
] keep dup reset-string-hashcode ;
|
|
|
|
|
2013-11-24 16:43:00 -05:00
|
|
|
PRIVATE>
|
|
|
|
|
2008-03-14 04:09:51 -04:00
|
|
|
GENERIC: encode-char ( char stream encoding -- )
|
2008-03-05 15:51:01 -05:00
|
|
|
|
2010-04-19 02:13:21 -04:00
|
|
|
GENERIC: encode-string ( string stream encoding -- )
|
|
|
|
|
|
|
|
M: object encode-string [ encode-char ] 2curry each ; inline
|
|
|
|
|
2008-03-20 21:11:45 -04:00
|
|
|
GENERIC: <decoder> ( stream encoding -- newstream )
|
2008-03-05 15:51:01 -05:00
|
|
|
|
2011-10-13 01:18:06 -04:00
|
|
|
TUPLE: decoder { stream read-only } { code read-only } { cr boolean } ;
|
2011-10-18 19:24:50 -04:00
|
|
|
INSTANCE: decoder input-stream
|
2008-02-13 02:02:37 -05:00
|
|
|
|
2008-03-20 16:00:49 -04:00
|
|
|
ERROR: decode-error ;
|
2008-02-13 18:53:10 -05:00
|
|
|
|
2008-03-19 16:24:49 -04:00
|
|
|
GENERIC: <encoder> ( stream encoding -- newstream )
|
|
|
|
|
2011-10-13 01:18:06 -04:00
|
|
|
TUPLE: encoder { stream read-only } { code read-only } ;
|
2011-10-18 19:24:50 -04:00
|
|
|
INSTANCE: encoder output-stream
|
2008-03-19 16:24:49 -04:00
|
|
|
|
2008-03-20 16:00:49 -04:00
|
|
|
ERROR: encode-error ;
|
2008-03-19 16:24:49 -04:00
|
|
|
|
|
|
|
! Decoding
|
|
|
|
|
2011-10-13 00:08:58 -04:00
|
|
|
M: object <decoder> f decoder boa ; inline
|
2008-02-11 00:14:42 -05:00
|
|
|
|
2008-06-17 06:22:33 -04:00
|
|
|
<PRIVATE
|
2008-02-13 20:53:53 -05:00
|
|
|
|
2008-12-15 20:44:56 -05:00
|
|
|
: cr+ ( stream -- ) t >>cr drop ; inline
|
2008-02-16 16:35:44 -05:00
|
|
|
|
2008-12-15 20:44:56 -05:00
|
|
|
: cr- ( stream -- ) f >>cr drop ; inline
|
2008-02-13 20:53:53 -05:00
|
|
|
|
2008-06-17 06:22:33 -04:00
|
|
|
: >decoder< ( decoder -- stream encoding )
|
|
|
|
[ stream>> ] [ code>> ] bi ; inline
|
2008-02-13 20:53:53 -05:00
|
|
|
|
2009-03-15 18:11:18 -04:00
|
|
|
M: decoder stream-element-type
|
2011-10-13 01:18:06 -04:00
|
|
|
drop +character+ ; inline
|
|
|
|
|
|
|
|
: (read1) ( decoder -- ch )
|
|
|
|
>decoder< decode-char ; inline
|
|
|
|
|
2011-10-13 02:25:10 -04:00
|
|
|
: fix-cr ( decoder c -- c' )
|
|
|
|
over cr>> [
|
|
|
|
over cr-
|
|
|
|
dup CHAR: \n eq? [ drop (read1) ] [ nip ] if
|
|
|
|
] [ nip ] if ; inline
|
2011-10-13 01:18:06 -04:00
|
|
|
|
|
|
|
M: decoder stream-read1 ( decoder -- ch )
|
|
|
|
dup (read1) fix-cr ; inline
|
|
|
|
|
2013-06-04 20:46:25 -04:00
|
|
|
M: decoder stream-tell stream>> stream-tell ;
|
|
|
|
|
2011-10-13 02:25:10 -04:00
|
|
|
: (read-first) ( n buf decoder -- buf stream encoding n c )
|
|
|
|
[ rot [ >decoder< ] dip 2over decode-char ]
|
|
|
|
[ swap fix-cr ] bi ; inline
|
|
|
|
|
|
|
|
: (store-read) ( buf stream encoding n c i -- buf stream encoding n )
|
|
|
|
[ rot [ set-nth-unsafe ] keep ] 2curry 3dip ; inline
|
|
|
|
|
|
|
|
: (finish-read) ( buf stream encoding n i -- i )
|
|
|
|
2nip 2nip ; inline
|
|
|
|
|
|
|
|
: (read-next) ( stream encoding n i -- stream encoding n i c )
|
|
|
|
[ 2dup decode-char ] 2dip rot ; inline
|
|
|
|
|
|
|
|
: (read-rest) ( buf stream encoding n i -- count )
|
|
|
|
2dup = [ (finish-read) ] [
|
|
|
|
(read-next) [
|
|
|
|
swap [ (store-read) ] [ 1 + ] bi (read-rest)
|
|
|
|
] [ (finish-read) ] if*
|
2011-10-13 01:18:06 -04:00
|
|
|
] if ; inline recursive
|
|
|
|
|
2011-10-14 21:08:27 -04:00
|
|
|
M: decoder stream-read-unsafe
|
2011-10-13 02:25:10 -04:00
|
|
|
pick 0 = [ 3drop 0 ] [
|
|
|
|
(read-first) [
|
|
|
|
0 (store-read)
|
|
|
|
1 (read-rest)
|
2012-09-28 12:16:08 -04:00
|
|
|
] [ 4drop 0 ] if*
|
2011-10-13 01:18:06 -04:00
|
|
|
] if ; inline
|
|
|
|
|
2012-08-25 20:03:51 -04:00
|
|
|
M: decoder stream-contents*
|
|
|
|
(stream-contents-by-element) ; inline
|
2011-10-14 21:08:27 -04:00
|
|
|
|
2008-06-17 06:22:33 -04:00
|
|
|
: line-ends/eof ( stream str -- str ) f like swap cr- ; inline
|
|
|
|
|
|
|
|
: line-ends\r ( stream str -- str ) swap cr+ ; inline
|
|
|
|
|
|
|
|
: line-ends\n ( stream str -- str )
|
|
|
|
over cr>> over empty? and
|
|
|
|
[ drop dup cr- stream-readln ] [ swap cr- ] if ; inline
|
|
|
|
|
|
|
|
: handle-readln ( stream str ch -- str )
|
|
|
|
{
|
|
|
|
{ f [ line-ends/eof ] }
|
|
|
|
{ CHAR: \r [ line-ends\r ] }
|
|
|
|
{ CHAR: \n [ line-ends\n ] }
|
|
|
|
} case ; inline
|
|
|
|
|
2013-03-18 16:35:22 -04:00
|
|
|
M: decoder stream-read-until >decoder< decode-until ;
|
2008-02-13 20:53:53 -05:00
|
|
|
|
2013-03-15 13:50:40 -04:00
|
|
|
M: decoder stream-readln
|
2013-03-18 16:35:22 -04:00
|
|
|
"\r\n" over >decoder< decode-until handle-readln ;
|
2008-02-16 16:35:44 -05:00
|
|
|
|
2008-06-17 06:22:33 -04:00
|
|
|
M: decoder dispose stream>> dispose ;
|
2008-03-14 04:09:51 -04:00
|
|
|
|
2008-02-16 16:35:44 -05:00
|
|
|
! Encoding
|
2011-10-13 00:08:58 -04:00
|
|
|
M: object <encoder> encoder boa ; inline
|
2008-03-14 04:09:51 -04:00
|
|
|
|
|
|
|
: >encoder< ( encoder -- stream encoding )
|
2008-06-17 06:22:33 -04:00
|
|
|
[ stream>> ] [ code>> ] bi ; inline
|
2008-02-16 16:35:44 -05:00
|
|
|
|
2009-03-15 18:11:18 -04:00
|
|
|
M: encoder stream-element-type
|
2011-10-13 00:08:58 -04:00
|
|
|
drop +character+ ; inline
|
2009-03-15 18:11:18 -04:00
|
|
|
|
2008-03-05 15:51:01 -05:00
|
|
|
M: encoder stream-write1
|
2011-10-13 00:08:58 -04:00
|
|
|
>encoder< encode-char ; inline
|
2008-02-13 20:53:53 -05:00
|
|
|
|
2008-03-05 15:51:01 -05:00
|
|
|
M: encoder stream-write
|
2011-10-13 00:08:58 -04:00
|
|
|
>encoder< encode-string ; inline
|
2008-02-16 16:35:44 -05:00
|
|
|
|
2011-10-13 00:08:58 -04:00
|
|
|
M: encoder dispose stream>> dispose ; inline
|
2008-02-13 20:53:53 -05:00
|
|
|
|
2011-10-13 00:08:58 -04:00
|
|
|
M: encoder stream-flush stream>> stream-flush ; inline
|
2008-03-18 17:01:14 -04:00
|
|
|
|
2008-03-05 15:51:01 -05:00
|
|
|
INSTANCE: encoder plain-writer
|
2008-05-10 21:17:24 -04:00
|
|
|
PRIVATE>
|
2008-02-15 20:44:35 -05:00
|
|
|
|
2008-06-12 04:49:29 -04:00
|
|
|
GENERIC# re-encode 1 ( stream encoding -- newstream )
|
|
|
|
|
|
|
|
M: object re-encode <encoder> ;
|
|
|
|
|
|
|
|
M: encoder re-encode [ stream>> ] dip re-encode ;
|
2008-02-15 20:44:35 -05:00
|
|
|
|
2008-05-10 21:17:24 -04:00
|
|
|
: encode-output ( encoding -- )
|
|
|
|
output-stream [ swap re-encode ] change ;
|
|
|
|
|
2008-06-12 04:49:29 -04:00
|
|
|
: with-encoded-output ( encoding quot -- )
|
|
|
|
[ [ output-stream get ] dip re-encode ] dip
|
|
|
|
with-output-stream* ; inline
|
|
|
|
|
|
|
|
GENERIC# re-decode 1 ( stream encoding -- newstream )
|
|
|
|
|
|
|
|
M: object re-decode <decoder> ;
|
|
|
|
|
|
|
|
M: decoder re-decode [ stream>> ] dip re-decode ;
|
2008-03-19 16:24:49 -04:00
|
|
|
|
2008-05-10 21:17:24 -04:00
|
|
|
: decode-input ( encoding -- )
|
|
|
|
input-stream [ swap re-decode ] change ;
|
2008-06-12 04:49:29 -04:00
|
|
|
|
|
|
|
: with-decoded-input ( encoding quot -- )
|
|
|
|
[ [ input-stream get ] dip re-decode ] dip
|
|
|
|
with-input-stream* ; inline
|