io.encodings.string: faster for ascii and utf8.

db4
John Benediktsson 2013-11-24 16:08:26 -08:00
parent 3c5ca8195e
commit 13ec450ac8
2 changed files with 37 additions and 14 deletions

View File

@ -1,23 +1,32 @@
! Copyright (C) 2008 Daniel Ehrenberg.
! See http://factorcode.org/license.txt for BSD license.
USING: byte-vectors io io.encodings io.streams.byte-array
io.streams.string kernel locals sbufs sequences io.private
io.encodings.binary ;
USING: accessors byte-arrays byte-vectors io io.encodings
io.streams.byte-array io.streams.string kernel locals
sbufs sequences io.private io.encodings.ascii
io.encodings.binary io.encodings.private io.encodings.utf8 ;
IN: io.encodings.string
:: decode ( byte-array encoding -- string )
encoding binary eq? [ byte-array ] [
byte-array encoding <byte-reader> :> reader
byte-array length
encoding guess-decoded-length
reader stream-exemplar-growable new-resizable :> buf
[ reader stream-read1 dup ] [ buf push ] while drop
buf reader stream-exemplar like
byte-array byte-array? encoding ascii eq? and [
byte-array byte-array>string-fast
] [
byte-array encoding <byte-reader> :> reader
byte-array length
encoding guess-decoded-length
reader stream-exemplar-growable new-resizable :> buf
[ reader stream-read1 dup ] [ buf push ] while drop
buf reader stream-exemplar like
] if
] if ; inline
:: encode ( string encoding -- byte-array )
encoding binary eq? [ string ] [
string length encoding guess-encoded-length <byte-vector> :> vec
string vec encoding <encoder> stream-write
vec B{ } like
string aux>> not encoding { ascii utf8 } member-eq? and [
string string>byte-array-fast
] [
string length encoding guess-encoded-length <byte-vector> :> vec
string vec encoding <encoder> stream-write
vec B{ } like
] if
] if ; inline

View File

@ -36,6 +36,8 @@ PRIVATE>
M: object decode-until (decode-until) ;
CONSTANT: replacement-char 0xfffd
<PRIVATE
: string>byte-array-fast ( string -- byte-array )
@ -47,6 +49,20 @@ M: object decode-until (decode-until) ;
] 2curry each-integer
] keep ; inline
: byte-array>string-fast ( byte-array -- string )
{ byte-array } declare
[ length ] keep over 0 <string> [
[
[
[
nth-unsafe dup 127 <=
[ drop replacement-char ] unless
] 2keep drop
]
[ set-string-nth ] bi*
] 2curry each-integer
] keep dup reset-string-hashcode ;
PRIVATE>
GENERIC: encode-char ( char stream encoding -- )
@ -57,8 +73,6 @@ M: object encode-string [ encode-char ] 2curry each ; inline
GENERIC: <decoder> ( stream encoding -- newstream )
CONSTANT: replacement-char 0xfffd
TUPLE: decoder { stream read-only } { code read-only } { cr boolean } ;
INSTANCE: decoder input-stream