io.encodings: add a fast-path for ascii, utf8 and 8-bit encodings when string only contains ASCII characters

db4
Slava Pestov 2010-04-19 01:13:21 -05:00
parent d143aa64b2
commit 2eda6fc6aa
12 changed files with 87 additions and 66 deletions

View File

@ -1,8 +1,7 @@
! (c)2009, 2010 Slava Pestov, Joe Groff bsd license
USING: accessors alien alien.c-types alien.arrays alien.strings arrays
byte-arrays cpu.architecture fry io io.encodings.binary
io.files io.streams.memory kernel libc math sequences words
byte-vectors ;
USING: accessors alien alien.c-types alien.arrays alien.strings
arrays byte-arrays cpu.architecture fry io io.encodings.binary
io.files io.streams.memory kernel libc math sequences words ;
IN: alien.data
GENERIC: require-c-array ( c-type -- )
@ -63,13 +62,6 @@ M: memory-stream stream-read
swap memory>byte-array
] [ [ + ] change-index drop ] 2bi ;
M: byte-vector stream-write
[ dup byte-length tail-slice ]
[ [ [ byte-length ] bi@ + ] keep lengthen ]
[ drop byte-length ]
2tri
[ >c-ptr swap >c-ptr ] dip memcpy ;
M: value-type c-type-rep drop int-rep ;
M: value-type c-type-getter
@ -83,4 +75,3 @@ M: array c-type-boxer-quot
unclip [ array-length ] dip [ <c-direct-array> ] 2curry ;
M: array c-type-unboxer-quot drop [ >c-ptr ] ;

View File

@ -13,9 +13,9 @@ TUPLE: biassoc from to ;
M: biassoc assoc-size from>> assoc-size ;
M: biassoc at* from>> at* ;
M: biassoc at* from>> at* ; inline
M: biassoc value-at* to>> at* ;
M: biassoc value-at* to>> at* ; inline
: once-at ( value key assoc -- )
2dup key? [ 3drop ] [ set-at ] if ;

View File

@ -35,7 +35,7 @@ gc
[ optimized? not ] filter compile ;
"debug-compiler" get [
nl
"Compiling..." write flush
@ -117,4 +117,6 @@ gc
" done" print flush
"io.streams.byte-array.fast" require
] unless

View File

@ -2,10 +2,10 @@
! See http://factorcode.org/license.txt for BSD license.
USING: accessors arrays assocs byte-arrays byte-vectors classes
combinators definitions effects fry generic generic.single
generic.standard hashtables io.binary io.streams.string kernel
kernel.private math math.integers.private math.parser
namespaces parser sbufs sequences splitting splitting.private strings
vectors words ;
generic.standard hashtables io.binary io.encodings
io.streams.string kernel kernel.private math
math.integers.private math.parser namespaces parser sbufs
sequences splitting splitting.private strings vectors words ;
IN: hints
GENERIC: specializer-predicate ( spec -- quot )
@ -131,3 +131,5 @@ M\ hashtable at* { { fixnum object } { word object } } "specializer" set-word-pr
M\ hashtable set-at { { object fixnum object } { object word object } } "specializer" set-word-prop
\ bignum/f { { bignum bignum } { bignum fixnum } { fixnum bignum } { fixnum fixnum } } "specializer" set-word-prop
\ encode-string { string object object } "specializer" set-word-prop

View File

@ -1,10 +1,10 @@
! Copyright (C) 2008 Daniel Ehrenberg, Doug Coleman.
! See http://factorcode.org/license.txt for BSD license.
USING: math.parser arrays io.encodings sequences kernel assocs
hashtables io.encodings.ascii generic parser classes.tuple words
words.symbol io io.files splitting namespaces math
compiler.units accessors classes.singleton classes.mixin
io.encodings.iana fry simple-flat-file lexer ;
USING: arrays assocs biassocs kernel io.encodings math.parser
sequences hashtables io.encodings.ascii generic parser
classes.tuple words words.symbol io io.files splitting
namespaces math compiler.units accessors classes.singleton
classes.mixin io.encodings.iana fry simple-flat-file lexer ;
IN: io.encodings.8-bit
<PRIVATE
@ -15,20 +15,22 @@ IN: io.encodings.8-bit
SYMBOL: 8-bit-encodings
8-bit-encodings [ H{ } clone ] initialize
TUPLE: 8-bit biassoc ;
TUPLE: 8-bit { biassoc biassoc read-only } ;
: encode-8-bit ( char stream assoc -- )
swapd value-at
[ swap stream-write1 ] [ encode-error ] if* ; inline
: 8-bit-encode ( char 8-bit -- byte )
biassoc>> value-at [ encode-error ] unless* ; inline
M: 8-bit encode-char biassoc>> encode-8-bit ;
M: 8-bit encode-char
swap [ 8-bit-encode ] dip stream-write1 ;
: decode-8-bit ( stream assoc -- char/f )
swap stream-read1
[ swap at [ replacement-char ] unless* ]
[ drop f ] if* ; inline
M: 8-bit encode-string
swap [ '[ _ 8-bit-encode ] B{ } map-as ] dip stream-write ;
M: 8-bit decode-char biassoc>> decode-8-bit ;
M: 8-bit decode-char
swap stream-read1 dup
[ swap biassoc>> at [ replacement-char ] unless* ]
[ 2drop f ]
if ;
MIXIN: 8-bit-encoding

View File

@ -1,22 +1,27 @@
! Copyright (C) 2008 Daniel Ehrenberg.
! See http://factorcode.org/license.txt for BSD license.
USING: io io.encodings kernel math io.encodings.private ;
USING: accessors byte-arrays io io.encodings
io.encodings.private kernel math sequences ;
IN: io.encodings.ascii
<PRIVATE
: encode-if< ( char stream encoding max -- )
nip 1 - pick < [ encode-error ] [ stream-write1 ] if ; inline
: decode-if< ( stream encoding max -- character )
nip swap stream-read1 dup
[ [ nip ] [ > ] 2bi [ >fixnum ] [ drop replacement-char ] if ]
[ 2drop f ] if ; inline
PRIVATE>
SINGLETON: ascii
M: ascii encode-char
128 encode-if< ; inline
drop
over 127 <= [ stream-write1 ] [ encode-error ] if ; inline
M: ascii encode-string
drop
[
dup aux>>
[ [ dup 127 <= [ encode-error ] unless ] B{ } map-as ]
[ >byte-array ]
if
] dip
stream-write ;
M: ascii decode-char
128 decode-if< ; inline
drop
stream-read1 dup [
dup 127 <= [ >fixnum ] [ drop replacement-char ] if
] when ; inline

View File

@ -114,7 +114,7 @@ M: output-port stream-write1
: write-in-groups ( byte-array port -- )
[ binary-object <direct-uchar-array> ] dip
[ buffer>> size>> <groups> ] [ '[ _ stream-write ] ] bi
[ buffer>> size>> <sliced-groups> ] [ '[ _ stream-write ] ] bi
each ;
M: output-port stream-write
@ -198,5 +198,3 @@ io.encodings.private ;
HINTS: decoder-read-until { string input-port utf8 } { string input-port ascii } ;
HINTS: decoder-readln { input-port utf8 } { input-port ascii } ;
HINTS: encoder-write { object output-port utf8 } { object output-port ascii } ;

View File

@ -0,0 +1 @@
Slava Pestov

View File

@ -0,0 +1,15 @@
! Copyright (C) 2010 Slava Pestov.
! See http://factorcode.org/license.txt for BSD license.
USING: alien byte-vectors io kernel libc math sequences ;
IN: io.streams.byte-array.fast
! This is split off from io.streams.byte-array because it uses
! memcpy, which is a non-core word that only works after the
! optimizing compiler has been loaded.
M: byte-vector stream-write
[ dup byte-length tail-slice ]
[ [ [ byte-length ] bi@ + ] keep lengthen ]
[ drop byte-length ]
2tri
[ >c-ptr swap >c-ptr ] dip memcpy ;

View File

@ -1,4 +1,4 @@
! Copyright (C) 2008 Daniel Ehrenberg.
! Copyright (C) 2008, 2010 Daniel Ehrenberg, Slava Pestov.
! See http://factorcode.org/license.txt for BSD license.
USING: math kernel sequences sbufs vectors namespaces growable
strings io classes continuations destructors combinators
@ -12,6 +12,10 @@ GENERIC: decode-char ( stream encoding -- char/f )
GENERIC: encode-char ( char stream encoding -- )
GENERIC: encode-string ( string stream encoding -- )
M: object encode-string [ encode-char ] 2curry each ; inline
GENERIC: <decoder> ( stream encoding -- newstream )
CONSTANT: replacement-char HEX: fffd
@ -134,13 +138,8 @@ M: encoder stream-element-type
M: encoder stream-write1
>encoder< encode-char ;
GENERIC# encoder-write 2 ( string stream encoding -- )
M: string encoder-write
[ encode-char ] 2curry each ;
M: encoder stream-write
>encoder< encoder-write ;
>encoder< encode-string ;
M: encoder dispose stream>> dispose ;

View File

@ -1,7 +1,8 @@
! Copyright (C) 2006, 2008 Daniel Ehrenberg.
! See http://factorcode.org/license.txt for BSD license.
USING: math math.order kernel sequences sbufs vectors growable io
continuations namespaces io.encodings combinators strings ;
USING: accessors byte-arrays math math.order kernel sequences
sbufs vectors growable io continuations namespaces io.encodings
combinators strings ;
IN: io.encodings.utf8
! Decoding UTF-8
@ -45,10 +46,10 @@ M: utf8 decode-char
! Encoding UTF-8
: encoded ( stream char -- )
BIN: 111111 bitand BIN: 10000000 bitor swap stream-write1 ;
BIN: 111111 bitand BIN: 10000000 bitor swap stream-write1 ; inline
: char>utf8 ( stream char -- )
{
: char>utf8 ( char stream -- )
swap {
{ [ dup -7 shift zero? ] [ swap stream-write1 ] }
{ [ dup -11 shift zero? ] [
2dup -6 shift BIN: 11000000 bitor swap stream-write1
@ -65,10 +66,16 @@ M: utf8 decode-char
2dup -6 shift encoded
encoded
]
} cond ;
} cond ; inline
M: utf8 encode-char
drop swap char>utf8 ;
drop char>utf8 ;
M: utf8 encode-string
drop
over aux>>
[ [ char>utf8 ] curry each ]
[ [ >byte-array ] dip stream-write ] if ;
PRIVATE>

View File

@ -1,8 +1,7 @@
! Copyright (C) 2003, 2008 Slava Pestov.
! See http://factorcode.org/license.txt for BSD license.
USING: accessors kernel math.private sequences kernel.private
math sequences.private slots.private byte-arrays
alien.accessors ;
math sequences.private slots.private alien.accessors ;
IN: strings
<PRIVATE