io.encodings: add a fast-path for ascii, utf8 and 8-bit encodings when string only contains ASCII characters

db4
Slava Pestov 2010-04-19 01:13:21 -05:00
parent d143aa64b2
commit 2eda6fc6aa
12 changed files with 87 additions and 66 deletions

View File

@ -1,8 +1,7 @@
! (c)2009, 2010 Slava Pestov, Joe Groff bsd license ! (c)2009, 2010 Slava Pestov, Joe Groff bsd license
USING: accessors alien alien.c-types alien.arrays alien.strings arrays USING: accessors alien alien.c-types alien.arrays alien.strings
byte-arrays cpu.architecture fry io io.encodings.binary arrays byte-arrays cpu.architecture fry io io.encodings.binary
io.files io.streams.memory kernel libc math sequences words io.files io.streams.memory kernel libc math sequences words ;
byte-vectors ;
IN: alien.data IN: alien.data
GENERIC: require-c-array ( c-type -- ) GENERIC: require-c-array ( c-type -- )
@ -63,13 +62,6 @@ M: memory-stream stream-read
swap memory>byte-array swap memory>byte-array
] [ [ + ] change-index drop ] 2bi ; ] [ [ + ] change-index drop ] 2bi ;
M: byte-vector stream-write
[ dup byte-length tail-slice ]
[ [ [ byte-length ] bi@ + ] keep lengthen ]
[ drop byte-length ]
2tri
[ >c-ptr swap >c-ptr ] dip memcpy ;
M: value-type c-type-rep drop int-rep ; M: value-type c-type-rep drop int-rep ;
M: value-type c-type-getter M: value-type c-type-getter
@ -83,4 +75,3 @@ M: array c-type-boxer-quot
unclip [ array-length ] dip [ <c-direct-array> ] 2curry ; unclip [ array-length ] dip [ <c-direct-array> ] 2curry ;
M: array c-type-unboxer-quot drop [ >c-ptr ] ; M: array c-type-unboxer-quot drop [ >c-ptr ] ;

View File

@ -13,9 +13,9 @@ TUPLE: biassoc from to ;
M: biassoc assoc-size from>> assoc-size ; M: biassoc assoc-size from>> assoc-size ;
M: biassoc at* from>> at* ; M: biassoc at* from>> at* ; inline
M: biassoc value-at* to>> at* ; M: biassoc value-at* to>> at* ; inline
: once-at ( value key assoc -- ) : once-at ( value key assoc -- )
2dup key? [ 3drop ] [ set-at ] if ; 2dup key? [ 3drop ] [ set-at ] if ;

View File

@ -35,7 +35,7 @@ gc
[ optimized? not ] filter compile ; [ optimized? not ] filter compile ;
"debug-compiler" get [ "debug-compiler" get [
nl nl
"Compiling..." write flush "Compiling..." write flush
@ -117,4 +117,6 @@ gc
" done" print flush " done" print flush
"io.streams.byte-array.fast" require
] unless ] unless

View File

@ -2,10 +2,10 @@
! See http://factorcode.org/license.txt for BSD license. ! See http://factorcode.org/license.txt for BSD license.
USING: accessors arrays assocs byte-arrays byte-vectors classes USING: accessors arrays assocs byte-arrays byte-vectors classes
combinators definitions effects fry generic generic.single combinators definitions effects fry generic generic.single
generic.standard hashtables io.binary io.streams.string kernel generic.standard hashtables io.binary io.encodings
kernel.private math math.integers.private math.parser io.streams.string kernel kernel.private math
namespaces parser sbufs sequences splitting splitting.private strings math.integers.private math.parser namespaces parser sbufs
vectors words ; sequences splitting splitting.private strings vectors words ;
IN: hints IN: hints
GENERIC: specializer-predicate ( spec -- quot ) GENERIC: specializer-predicate ( spec -- quot )
@ -131,3 +131,5 @@ M\ hashtable at* { { fixnum object } { word object } } "specializer" set-word-pr
M\ hashtable set-at { { object fixnum object } { object word object } } "specializer" set-word-prop M\ hashtable set-at { { object fixnum object } { object word object } } "specializer" set-word-prop
\ bignum/f { { bignum bignum } { bignum fixnum } { fixnum bignum } { fixnum fixnum } } "specializer" set-word-prop \ bignum/f { { bignum bignum } { bignum fixnum } { fixnum bignum } { fixnum fixnum } } "specializer" set-word-prop
\ encode-string { string object object } "specializer" set-word-prop

View File

@ -1,10 +1,10 @@
! Copyright (C) 2008 Daniel Ehrenberg, Doug Coleman. ! Copyright (C) 2008 Daniel Ehrenberg, Doug Coleman.
! See http://factorcode.org/license.txt for BSD license. ! See http://factorcode.org/license.txt for BSD license.
USING: math.parser arrays io.encodings sequences kernel assocs USING: arrays assocs biassocs kernel io.encodings math.parser
hashtables io.encodings.ascii generic parser classes.tuple words sequences hashtables io.encodings.ascii generic parser
words.symbol io io.files splitting namespaces math classes.tuple words words.symbol io io.files splitting
compiler.units accessors classes.singleton classes.mixin namespaces math compiler.units accessors classes.singleton
io.encodings.iana fry simple-flat-file lexer ; classes.mixin io.encodings.iana fry simple-flat-file lexer ;
IN: io.encodings.8-bit IN: io.encodings.8-bit
<PRIVATE <PRIVATE
@ -15,20 +15,22 @@ IN: io.encodings.8-bit
SYMBOL: 8-bit-encodings SYMBOL: 8-bit-encodings
8-bit-encodings [ H{ } clone ] initialize 8-bit-encodings [ H{ } clone ] initialize
TUPLE: 8-bit biassoc ; TUPLE: 8-bit { biassoc biassoc read-only } ;
: encode-8-bit ( char stream assoc -- ) : 8-bit-encode ( char 8-bit -- byte )
swapd value-at biassoc>> value-at [ encode-error ] unless* ; inline
[ swap stream-write1 ] [ encode-error ] if* ; inline
M: 8-bit encode-char biassoc>> encode-8-bit ; M: 8-bit encode-char
swap [ 8-bit-encode ] dip stream-write1 ;
: decode-8-bit ( stream assoc -- char/f ) M: 8-bit encode-string
swap stream-read1 swap [ '[ _ 8-bit-encode ] B{ } map-as ] dip stream-write ;
[ swap at [ replacement-char ] unless* ]
[ drop f ] if* ; inline
M: 8-bit decode-char biassoc>> decode-8-bit ; M: 8-bit decode-char
swap stream-read1 dup
[ swap biassoc>> at [ replacement-char ] unless* ]
[ 2drop f ]
if ;
MIXIN: 8-bit-encoding MIXIN: 8-bit-encoding

View File

@ -1,22 +1,27 @@
! Copyright (C) 2008 Daniel Ehrenberg. ! Copyright (C) 2008 Daniel Ehrenberg.
! See http://factorcode.org/license.txt for BSD license. ! See http://factorcode.org/license.txt for BSD license.
USING: io io.encodings kernel math io.encodings.private ; USING: accessors byte-arrays io io.encodings
io.encodings.private kernel math sequences ;
IN: io.encodings.ascii IN: io.encodings.ascii
<PRIVATE
: encode-if< ( char stream encoding max -- )
nip 1 - pick < [ encode-error ] [ stream-write1 ] if ; inline
: decode-if< ( stream encoding max -- character )
nip swap stream-read1 dup
[ [ nip ] [ > ] 2bi [ >fixnum ] [ drop replacement-char ] if ]
[ 2drop f ] if ; inline
PRIVATE>
SINGLETON: ascii SINGLETON: ascii
M: ascii encode-char M: ascii encode-char
128 encode-if< ; inline drop
over 127 <= [ stream-write1 ] [ encode-error ] if ; inline
M: ascii encode-string
drop
[
dup aux>>
[ [ dup 127 <= [ encode-error ] unless ] B{ } map-as ]
[ >byte-array ]
if
] dip
stream-write ;
M: ascii decode-char M: ascii decode-char
128 decode-if< ; inline drop
stream-read1 dup [
dup 127 <= [ >fixnum ] [ drop replacement-char ] if
] when ; inline

View File

@ -114,7 +114,7 @@ M: output-port stream-write1
: write-in-groups ( byte-array port -- ) : write-in-groups ( byte-array port -- )
[ binary-object <direct-uchar-array> ] dip [ binary-object <direct-uchar-array> ] dip
[ buffer>> size>> <groups> ] [ '[ _ stream-write ] ] bi [ buffer>> size>> <sliced-groups> ] [ '[ _ stream-write ] ] bi
each ; each ;
M: output-port stream-write M: output-port stream-write
@ -198,5 +198,3 @@ io.encodings.private ;
HINTS: decoder-read-until { string input-port utf8 } { string input-port ascii } ; HINTS: decoder-read-until { string input-port utf8 } { string input-port ascii } ;
HINTS: decoder-readln { input-port utf8 } { input-port ascii } ; HINTS: decoder-readln { input-port utf8 } { input-port ascii } ;
HINTS: encoder-write { object output-port utf8 } { object output-port ascii } ;

View File

@ -0,0 +1 @@
Slava Pestov

View File

@ -0,0 +1,15 @@
! Copyright (C) 2010 Slava Pestov.
! See http://factorcode.org/license.txt for BSD license.
USING: alien byte-vectors io kernel libc math sequences ;
IN: io.streams.byte-array.fast
! This is split off from io.streams.byte-array because it uses
! memcpy, which is a non-core word that only works after the
! optimizing compiler has been loaded.
M: byte-vector stream-write
[ dup byte-length tail-slice ]
[ [ [ byte-length ] bi@ + ] keep lengthen ]
[ drop byte-length ]
2tri
[ >c-ptr swap >c-ptr ] dip memcpy ;

View File

@ -1,4 +1,4 @@
! Copyright (C) 2008 Daniel Ehrenberg. ! Copyright (C) 2008, 2010 Daniel Ehrenberg, Slava Pestov.
! See http://factorcode.org/license.txt for BSD license. ! See http://factorcode.org/license.txt for BSD license.
USING: math kernel sequences sbufs vectors namespaces growable USING: math kernel sequences sbufs vectors namespaces growable
strings io classes continuations destructors combinators strings io classes continuations destructors combinators
@ -12,6 +12,10 @@ GENERIC: decode-char ( stream encoding -- char/f )
GENERIC: encode-char ( char stream encoding -- ) GENERIC: encode-char ( char stream encoding -- )
GENERIC: encode-string ( string stream encoding -- )
M: object encode-string [ encode-char ] 2curry each ; inline
GENERIC: <decoder> ( stream encoding -- newstream ) GENERIC: <decoder> ( stream encoding -- newstream )
CONSTANT: replacement-char HEX: fffd CONSTANT: replacement-char HEX: fffd
@ -134,13 +138,8 @@ M: encoder stream-element-type
M: encoder stream-write1 M: encoder stream-write1
>encoder< encode-char ; >encoder< encode-char ;
GENERIC# encoder-write 2 ( string stream encoding -- )
M: string encoder-write
[ encode-char ] 2curry each ;
M: encoder stream-write M: encoder stream-write
>encoder< encoder-write ; >encoder< encode-string ;
M: encoder dispose stream>> dispose ; M: encoder dispose stream>> dispose ;

View File

@ -1,7 +1,8 @@
! Copyright (C) 2006, 2008 Daniel Ehrenberg. ! Copyright (C) 2006, 2008 Daniel Ehrenberg.
! See http://factorcode.org/license.txt for BSD license. ! See http://factorcode.org/license.txt for BSD license.
USING: math math.order kernel sequences sbufs vectors growable io USING: accessors byte-arrays math math.order kernel sequences
continuations namespaces io.encodings combinators strings ; sbufs vectors growable io continuations namespaces io.encodings
combinators strings ;
IN: io.encodings.utf8 IN: io.encodings.utf8
! Decoding UTF-8 ! Decoding UTF-8
@ -45,10 +46,10 @@ M: utf8 decode-char
! Encoding UTF-8 ! Encoding UTF-8
: encoded ( stream char -- ) : encoded ( stream char -- )
BIN: 111111 bitand BIN: 10000000 bitor swap stream-write1 ; BIN: 111111 bitand BIN: 10000000 bitor swap stream-write1 ; inline
: char>utf8 ( stream char -- ) : char>utf8 ( char stream -- )
{ swap {
{ [ dup -7 shift zero? ] [ swap stream-write1 ] } { [ dup -7 shift zero? ] [ swap stream-write1 ] }
{ [ dup -11 shift zero? ] [ { [ dup -11 shift zero? ] [
2dup -6 shift BIN: 11000000 bitor swap stream-write1 2dup -6 shift BIN: 11000000 bitor swap stream-write1
@ -65,10 +66,16 @@ M: utf8 decode-char
2dup -6 shift encoded 2dup -6 shift encoded
encoded encoded
] ]
} cond ; } cond ; inline
M: utf8 encode-char M: utf8 encode-char
drop swap char>utf8 ; drop char>utf8 ;
M: utf8 encode-string
drop
over aux>>
[ [ char>utf8 ] curry each ]
[ [ >byte-array ] dip stream-write ] if ;
PRIVATE> PRIVATE>

View File

@ -1,8 +1,7 @@
! Copyright (C) 2003, 2008 Slava Pestov. ! Copyright (C) 2003, 2008 Slava Pestov.
! See http://factorcode.org/license.txt for BSD license. ! See http://factorcode.org/license.txt for BSD license.
USING: accessors kernel math.private sequences kernel.private USING: accessors kernel math.private sequences kernel.private
math sequences.private slots.private byte-arrays math sequences.private slots.private alien.accessors ;
alien.accessors ;
IN: strings IN: strings
<PRIVATE <PRIVATE