io.encodings: add a fast-path for ascii, utf8 and 8-bit encodings when string only contains ASCII characters
parent
d143aa64b2
commit
2eda6fc6aa
|
@ -1,8 +1,7 @@
|
||||||
! (c)2009, 2010 Slava Pestov, Joe Groff bsd license
|
! (c)2009, 2010 Slava Pestov, Joe Groff bsd license
|
||||||
USING: accessors alien alien.c-types alien.arrays alien.strings arrays
|
USING: accessors alien alien.c-types alien.arrays alien.strings
|
||||||
byte-arrays cpu.architecture fry io io.encodings.binary
|
arrays byte-arrays cpu.architecture fry io io.encodings.binary
|
||||||
io.files io.streams.memory kernel libc math sequences words
|
io.files io.streams.memory kernel libc math sequences words ;
|
||||||
byte-vectors ;
|
|
||||||
IN: alien.data
|
IN: alien.data
|
||||||
|
|
||||||
GENERIC: require-c-array ( c-type -- )
|
GENERIC: require-c-array ( c-type -- )
|
||||||
|
@ -63,13 +62,6 @@ M: memory-stream stream-read
|
||||||
swap memory>byte-array
|
swap memory>byte-array
|
||||||
] [ [ + ] change-index drop ] 2bi ;
|
] [ [ + ] change-index drop ] 2bi ;
|
||||||
|
|
||||||
M: byte-vector stream-write
|
|
||||||
[ dup byte-length tail-slice ]
|
|
||||||
[ [ [ byte-length ] bi@ + ] keep lengthen ]
|
|
||||||
[ drop byte-length ]
|
|
||||||
2tri
|
|
||||||
[ >c-ptr swap >c-ptr ] dip memcpy ;
|
|
||||||
|
|
||||||
M: value-type c-type-rep drop int-rep ;
|
M: value-type c-type-rep drop int-rep ;
|
||||||
|
|
||||||
M: value-type c-type-getter
|
M: value-type c-type-getter
|
||||||
|
@ -83,4 +75,3 @@ M: array c-type-boxer-quot
|
||||||
unclip [ array-length ] dip [ <c-direct-array> ] 2curry ;
|
unclip [ array-length ] dip [ <c-direct-array> ] 2curry ;
|
||||||
|
|
||||||
M: array c-type-unboxer-quot drop [ >c-ptr ] ;
|
M: array c-type-unboxer-quot drop [ >c-ptr ] ;
|
||||||
|
|
||||||
|
|
|
@ -13,9 +13,9 @@ TUPLE: biassoc from to ;
|
||||||
|
|
||||||
M: biassoc assoc-size from>> assoc-size ;
|
M: biassoc assoc-size from>> assoc-size ;
|
||||||
|
|
||||||
M: biassoc at* from>> at* ;
|
M: biassoc at* from>> at* ; inline
|
||||||
|
|
||||||
M: biassoc value-at* to>> at* ;
|
M: biassoc value-at* to>> at* ; inline
|
||||||
|
|
||||||
: once-at ( value key assoc -- )
|
: once-at ( value key assoc -- )
|
||||||
2dup key? [ 3drop ] [ set-at ] if ;
|
2dup key? [ 3drop ] [ set-at ] if ;
|
||||||
|
|
|
@ -117,4 +117,6 @@ gc
|
||||||
|
|
||||||
" done" print flush
|
" done" print flush
|
||||||
|
|
||||||
|
"io.streams.byte-array.fast" require
|
||||||
|
|
||||||
] unless
|
] unless
|
||||||
|
|
|
@ -2,10 +2,10 @@
|
||||||
! See http://factorcode.org/license.txt for BSD license.
|
! See http://factorcode.org/license.txt for BSD license.
|
||||||
USING: accessors arrays assocs byte-arrays byte-vectors classes
|
USING: accessors arrays assocs byte-arrays byte-vectors classes
|
||||||
combinators definitions effects fry generic generic.single
|
combinators definitions effects fry generic generic.single
|
||||||
generic.standard hashtables io.binary io.streams.string kernel
|
generic.standard hashtables io.binary io.encodings
|
||||||
kernel.private math math.integers.private math.parser
|
io.streams.string kernel kernel.private math
|
||||||
namespaces parser sbufs sequences splitting splitting.private strings
|
math.integers.private math.parser namespaces parser sbufs
|
||||||
vectors words ;
|
sequences splitting splitting.private strings vectors words ;
|
||||||
IN: hints
|
IN: hints
|
||||||
|
|
||||||
GENERIC: specializer-predicate ( spec -- quot )
|
GENERIC: specializer-predicate ( spec -- quot )
|
||||||
|
@ -131,3 +131,5 @@ M\ hashtable at* { { fixnum object } { word object } } "specializer" set-word-pr
|
||||||
M\ hashtable set-at { { object fixnum object } { object word object } } "specializer" set-word-prop
|
M\ hashtable set-at { { object fixnum object } { object word object } } "specializer" set-word-prop
|
||||||
|
|
||||||
\ bignum/f { { bignum bignum } { bignum fixnum } { fixnum bignum } { fixnum fixnum } } "specializer" set-word-prop
|
\ bignum/f { { bignum bignum } { bignum fixnum } { fixnum bignum } { fixnum fixnum } } "specializer" set-word-prop
|
||||||
|
|
||||||
|
\ encode-string { string object object } "specializer" set-word-prop
|
||||||
|
|
|
@ -1,10 +1,10 @@
|
||||||
! Copyright (C) 2008 Daniel Ehrenberg, Doug Coleman.
|
! Copyright (C) 2008 Daniel Ehrenberg, Doug Coleman.
|
||||||
! See http://factorcode.org/license.txt for BSD license.
|
! See http://factorcode.org/license.txt for BSD license.
|
||||||
USING: math.parser arrays io.encodings sequences kernel assocs
|
USING: arrays assocs biassocs kernel io.encodings math.parser
|
||||||
hashtables io.encodings.ascii generic parser classes.tuple words
|
sequences hashtables io.encodings.ascii generic parser
|
||||||
words.symbol io io.files splitting namespaces math
|
classes.tuple words words.symbol io io.files splitting
|
||||||
compiler.units accessors classes.singleton classes.mixin
|
namespaces math compiler.units accessors classes.singleton
|
||||||
io.encodings.iana fry simple-flat-file lexer ;
|
classes.mixin io.encodings.iana fry simple-flat-file lexer ;
|
||||||
IN: io.encodings.8-bit
|
IN: io.encodings.8-bit
|
||||||
|
|
||||||
<PRIVATE
|
<PRIVATE
|
||||||
|
@ -15,20 +15,22 @@ IN: io.encodings.8-bit
|
||||||
SYMBOL: 8-bit-encodings
|
SYMBOL: 8-bit-encodings
|
||||||
8-bit-encodings [ H{ } clone ] initialize
|
8-bit-encodings [ H{ } clone ] initialize
|
||||||
|
|
||||||
TUPLE: 8-bit biassoc ;
|
TUPLE: 8-bit { biassoc biassoc read-only } ;
|
||||||
|
|
||||||
: encode-8-bit ( char stream assoc -- )
|
: 8-bit-encode ( char 8-bit -- byte )
|
||||||
swapd value-at
|
biassoc>> value-at [ encode-error ] unless* ; inline
|
||||||
[ swap stream-write1 ] [ encode-error ] if* ; inline
|
|
||||||
|
|
||||||
M: 8-bit encode-char biassoc>> encode-8-bit ;
|
M: 8-bit encode-char
|
||||||
|
swap [ 8-bit-encode ] dip stream-write1 ;
|
||||||
|
|
||||||
: decode-8-bit ( stream assoc -- char/f )
|
M: 8-bit encode-string
|
||||||
swap stream-read1
|
swap [ '[ _ 8-bit-encode ] B{ } map-as ] dip stream-write ;
|
||||||
[ swap at [ replacement-char ] unless* ]
|
|
||||||
[ drop f ] if* ; inline
|
|
||||||
|
|
||||||
M: 8-bit decode-char biassoc>> decode-8-bit ;
|
M: 8-bit decode-char
|
||||||
|
swap stream-read1 dup
|
||||||
|
[ swap biassoc>> at [ replacement-char ] unless* ]
|
||||||
|
[ 2drop f ]
|
||||||
|
if ;
|
||||||
|
|
||||||
MIXIN: 8-bit-encoding
|
MIXIN: 8-bit-encoding
|
||||||
|
|
||||||
|
|
|
@ -1,22 +1,27 @@
|
||||||
! Copyright (C) 2008 Daniel Ehrenberg.
|
! Copyright (C) 2008 Daniel Ehrenberg.
|
||||||
! See http://factorcode.org/license.txt for BSD license.
|
! See http://factorcode.org/license.txt for BSD license.
|
||||||
USING: io io.encodings kernel math io.encodings.private ;
|
USING: accessors byte-arrays io io.encodings
|
||||||
|
io.encodings.private kernel math sequences ;
|
||||||
IN: io.encodings.ascii
|
IN: io.encodings.ascii
|
||||||
|
|
||||||
<PRIVATE
|
|
||||||
: encode-if< ( char stream encoding max -- )
|
|
||||||
nip 1 - pick < [ encode-error ] [ stream-write1 ] if ; inline
|
|
||||||
|
|
||||||
: decode-if< ( stream encoding max -- character )
|
|
||||||
nip swap stream-read1 dup
|
|
||||||
[ [ nip ] [ > ] 2bi [ >fixnum ] [ drop replacement-char ] if ]
|
|
||||||
[ 2drop f ] if ; inline
|
|
||||||
PRIVATE>
|
|
||||||
|
|
||||||
SINGLETON: ascii
|
SINGLETON: ascii
|
||||||
|
|
||||||
M: ascii encode-char
|
M: ascii encode-char
|
||||||
128 encode-if< ; inline
|
drop
|
||||||
|
over 127 <= [ stream-write1 ] [ encode-error ] if ; inline
|
||||||
|
|
||||||
|
M: ascii encode-string
|
||||||
|
drop
|
||||||
|
[
|
||||||
|
dup aux>>
|
||||||
|
[ [ dup 127 <= [ encode-error ] unless ] B{ } map-as ]
|
||||||
|
[ >byte-array ]
|
||||||
|
if
|
||||||
|
] dip
|
||||||
|
stream-write ;
|
||||||
|
|
||||||
M: ascii decode-char
|
M: ascii decode-char
|
||||||
128 decode-if< ; inline
|
drop
|
||||||
|
stream-read1 dup [
|
||||||
|
dup 127 <= [ >fixnum ] [ drop replacement-char ] if
|
||||||
|
] when ; inline
|
||||||
|
|
|
@ -114,7 +114,7 @@ M: output-port stream-write1
|
||||||
|
|
||||||
: write-in-groups ( byte-array port -- )
|
: write-in-groups ( byte-array port -- )
|
||||||
[ binary-object <direct-uchar-array> ] dip
|
[ binary-object <direct-uchar-array> ] dip
|
||||||
[ buffer>> size>> <groups> ] [ '[ _ stream-write ] ] bi
|
[ buffer>> size>> <sliced-groups> ] [ '[ _ stream-write ] ] bi
|
||||||
each ;
|
each ;
|
||||||
|
|
||||||
M: output-port stream-write
|
M: output-port stream-write
|
||||||
|
@ -198,5 +198,3 @@ io.encodings.private ;
|
||||||
HINTS: decoder-read-until { string input-port utf8 } { string input-port ascii } ;
|
HINTS: decoder-read-until { string input-port utf8 } { string input-port ascii } ;
|
||||||
|
|
||||||
HINTS: decoder-readln { input-port utf8 } { input-port ascii } ;
|
HINTS: decoder-readln { input-port utf8 } { input-port ascii } ;
|
||||||
|
|
||||||
HINTS: encoder-write { object output-port utf8 } { object output-port ascii } ;
|
|
||||||
|
|
|
@ -0,0 +1 @@
|
||||||
|
Slava Pestov
|
|
@ -0,0 +1,15 @@
|
||||||
|
! Copyright (C) 2010 Slava Pestov.
|
||||||
|
! See http://factorcode.org/license.txt for BSD license.
|
||||||
|
USING: alien byte-vectors io kernel libc math sequences ;
|
||||||
|
IN: io.streams.byte-array.fast
|
||||||
|
|
||||||
|
! This is split off from io.streams.byte-array because it uses
|
||||||
|
! memcpy, which is a non-core word that only works after the
|
||||||
|
! optimizing compiler has been loaded.
|
||||||
|
|
||||||
|
M: byte-vector stream-write
|
||||||
|
[ dup byte-length tail-slice ]
|
||||||
|
[ [ [ byte-length ] bi@ + ] keep lengthen ]
|
||||||
|
[ drop byte-length ]
|
||||||
|
2tri
|
||||||
|
[ >c-ptr swap >c-ptr ] dip memcpy ;
|
|
@ -1,4 +1,4 @@
|
||||||
! Copyright (C) 2008 Daniel Ehrenberg.
|
! Copyright (C) 2008, 2010 Daniel Ehrenberg, Slava Pestov.
|
||||||
! See http://factorcode.org/license.txt for BSD license.
|
! See http://factorcode.org/license.txt for BSD license.
|
||||||
USING: math kernel sequences sbufs vectors namespaces growable
|
USING: math kernel sequences sbufs vectors namespaces growable
|
||||||
strings io classes continuations destructors combinators
|
strings io classes continuations destructors combinators
|
||||||
|
@ -12,6 +12,10 @@ GENERIC: decode-char ( stream encoding -- char/f )
|
||||||
|
|
||||||
GENERIC: encode-char ( char stream encoding -- )
|
GENERIC: encode-char ( char stream encoding -- )
|
||||||
|
|
||||||
|
GENERIC: encode-string ( string stream encoding -- )
|
||||||
|
|
||||||
|
M: object encode-string [ encode-char ] 2curry each ; inline
|
||||||
|
|
||||||
GENERIC: <decoder> ( stream encoding -- newstream )
|
GENERIC: <decoder> ( stream encoding -- newstream )
|
||||||
|
|
||||||
CONSTANT: replacement-char HEX: fffd
|
CONSTANT: replacement-char HEX: fffd
|
||||||
|
@ -134,13 +138,8 @@ M: encoder stream-element-type
|
||||||
M: encoder stream-write1
|
M: encoder stream-write1
|
||||||
>encoder< encode-char ;
|
>encoder< encode-char ;
|
||||||
|
|
||||||
GENERIC# encoder-write 2 ( string stream encoding -- )
|
|
||||||
|
|
||||||
M: string encoder-write
|
|
||||||
[ encode-char ] 2curry each ;
|
|
||||||
|
|
||||||
M: encoder stream-write
|
M: encoder stream-write
|
||||||
>encoder< encoder-write ;
|
>encoder< encode-string ;
|
||||||
|
|
||||||
M: encoder dispose stream>> dispose ;
|
M: encoder dispose stream>> dispose ;
|
||||||
|
|
||||||
|
|
|
@ -1,7 +1,8 @@
|
||||||
! Copyright (C) 2006, 2008 Daniel Ehrenberg.
|
! Copyright (C) 2006, 2008 Daniel Ehrenberg.
|
||||||
! See http://factorcode.org/license.txt for BSD license.
|
! See http://factorcode.org/license.txt for BSD license.
|
||||||
USING: math math.order kernel sequences sbufs vectors growable io
|
USING: accessors byte-arrays math math.order kernel sequences
|
||||||
continuations namespaces io.encodings combinators strings ;
|
sbufs vectors growable io continuations namespaces io.encodings
|
||||||
|
combinators strings ;
|
||||||
IN: io.encodings.utf8
|
IN: io.encodings.utf8
|
||||||
|
|
||||||
! Decoding UTF-8
|
! Decoding UTF-8
|
||||||
|
@ -45,10 +46,10 @@ M: utf8 decode-char
|
||||||
! Encoding UTF-8
|
! Encoding UTF-8
|
||||||
|
|
||||||
: encoded ( stream char -- )
|
: encoded ( stream char -- )
|
||||||
BIN: 111111 bitand BIN: 10000000 bitor swap stream-write1 ;
|
BIN: 111111 bitand BIN: 10000000 bitor swap stream-write1 ; inline
|
||||||
|
|
||||||
: char>utf8 ( stream char -- )
|
: char>utf8 ( char stream -- )
|
||||||
{
|
swap {
|
||||||
{ [ dup -7 shift zero? ] [ swap stream-write1 ] }
|
{ [ dup -7 shift zero? ] [ swap stream-write1 ] }
|
||||||
{ [ dup -11 shift zero? ] [
|
{ [ dup -11 shift zero? ] [
|
||||||
2dup -6 shift BIN: 11000000 bitor swap stream-write1
|
2dup -6 shift BIN: 11000000 bitor swap stream-write1
|
||||||
|
@ -65,10 +66,16 @@ M: utf8 decode-char
|
||||||
2dup -6 shift encoded
|
2dup -6 shift encoded
|
||||||
encoded
|
encoded
|
||||||
]
|
]
|
||||||
} cond ;
|
} cond ; inline
|
||||||
|
|
||||||
M: utf8 encode-char
|
M: utf8 encode-char
|
||||||
drop swap char>utf8 ;
|
drop char>utf8 ;
|
||||||
|
|
||||||
|
M: utf8 encode-string
|
||||||
|
drop
|
||||||
|
over aux>>
|
||||||
|
[ [ char>utf8 ] curry each ]
|
||||||
|
[ [ >byte-array ] dip stream-write ] if ;
|
||||||
|
|
||||||
PRIVATE>
|
PRIVATE>
|
||||||
|
|
||||||
|
|
|
@ -1,8 +1,7 @@
|
||||||
! Copyright (C) 2003, 2008 Slava Pestov.
|
! Copyright (C) 2003, 2008 Slava Pestov.
|
||||||
! See http://factorcode.org/license.txt for BSD license.
|
! See http://factorcode.org/license.txt for BSD license.
|
||||||
USING: accessors kernel math.private sequences kernel.private
|
USING: accessors kernel math.private sequences kernel.private
|
||||||
math sequences.private slots.private byte-arrays
|
math sequences.private slots.private alien.accessors ;
|
||||||
alien.accessors ;
|
|
||||||
IN: strings
|
IN: strings
|
||||||
|
|
||||||
<PRIVATE
|
<PRIVATE
|
||||||
|
|
Loading…
Reference in New Issue