io.encodings: add a fast-path for ascii, utf8 and 8-bit encodings when string only contains ASCII characters
parent
d143aa64b2
commit
2eda6fc6aa
|
@ -1,8 +1,7 @@
|
|||
! (c)2009, 2010 Slava Pestov, Joe Groff bsd license
|
||||
USING: accessors alien alien.c-types alien.arrays alien.strings arrays
|
||||
byte-arrays cpu.architecture fry io io.encodings.binary
|
||||
io.files io.streams.memory kernel libc math sequences words
|
||||
byte-vectors ;
|
||||
USING: accessors alien alien.c-types alien.arrays alien.strings
|
||||
arrays byte-arrays cpu.architecture fry io io.encodings.binary
|
||||
io.files io.streams.memory kernel libc math sequences words ;
|
||||
IN: alien.data
|
||||
|
||||
GENERIC: require-c-array ( c-type -- )
|
||||
|
@ -63,13 +62,6 @@ M: memory-stream stream-read
|
|||
swap memory>byte-array
|
||||
] [ [ + ] change-index drop ] 2bi ;
|
||||
|
||||
M: byte-vector stream-write
|
||||
[ dup byte-length tail-slice ]
|
||||
[ [ [ byte-length ] bi@ + ] keep lengthen ]
|
||||
[ drop byte-length ]
|
||||
2tri
|
||||
[ >c-ptr swap >c-ptr ] dip memcpy ;
|
||||
|
||||
M: value-type c-type-rep drop int-rep ;
|
||||
|
||||
M: value-type c-type-getter
|
||||
|
@ -83,4 +75,3 @@ M: array c-type-boxer-quot
|
|||
unclip [ array-length ] dip [ <c-direct-array> ] 2curry ;
|
||||
|
||||
M: array c-type-unboxer-quot drop [ >c-ptr ] ;
|
||||
|
||||
|
|
|
@ -13,9 +13,9 @@ TUPLE: biassoc from to ;
|
|||
|
||||
M: biassoc assoc-size from>> assoc-size ;
|
||||
|
||||
M: biassoc at* from>> at* ;
|
||||
M: biassoc at* from>> at* ; inline
|
||||
|
||||
M: biassoc value-at* to>> at* ;
|
||||
M: biassoc value-at* to>> at* ; inline
|
||||
|
||||
: once-at ( value key assoc -- )
|
||||
2dup key? [ 3drop ] [ set-at ] if ;
|
||||
|
|
|
@ -35,7 +35,7 @@ gc
|
|||
[ optimized? not ] filter compile ;
|
||||
|
||||
"debug-compiler" get [
|
||||
|
||||
|
||||
nl
|
||||
"Compiling..." write flush
|
||||
|
||||
|
@ -117,4 +117,6 @@ gc
|
|||
|
||||
" done" print flush
|
||||
|
||||
"io.streams.byte-array.fast" require
|
||||
|
||||
] unless
|
||||
|
|
|
@ -2,10 +2,10 @@
|
|||
! See http://factorcode.org/license.txt for BSD license.
|
||||
USING: accessors arrays assocs byte-arrays byte-vectors classes
|
||||
combinators definitions effects fry generic generic.single
|
||||
generic.standard hashtables io.binary io.streams.string kernel
|
||||
kernel.private math math.integers.private math.parser
|
||||
namespaces parser sbufs sequences splitting splitting.private strings
|
||||
vectors words ;
|
||||
generic.standard hashtables io.binary io.encodings
|
||||
io.streams.string kernel kernel.private math
|
||||
math.integers.private math.parser namespaces parser sbufs
|
||||
sequences splitting splitting.private strings vectors words ;
|
||||
IN: hints
|
||||
|
||||
GENERIC: specializer-predicate ( spec -- quot )
|
||||
|
@ -131,3 +131,5 @@ M\ hashtable at* { { fixnum object } { word object } } "specializer" set-word-pr
|
|||
M\ hashtable set-at { { object fixnum object } { object word object } } "specializer" set-word-prop
|
||||
|
||||
\ bignum/f { { bignum bignum } { bignum fixnum } { fixnum bignum } { fixnum fixnum } } "specializer" set-word-prop
|
||||
|
||||
\ encode-string { string object object } "specializer" set-word-prop
|
||||
|
|
|
@ -1,10 +1,10 @@
|
|||
! Copyright (C) 2008 Daniel Ehrenberg, Doug Coleman.
|
||||
! See http://factorcode.org/license.txt for BSD license.
|
||||
USING: math.parser arrays io.encodings sequences kernel assocs
|
||||
hashtables io.encodings.ascii generic parser classes.tuple words
|
||||
words.symbol io io.files splitting namespaces math
|
||||
compiler.units accessors classes.singleton classes.mixin
|
||||
io.encodings.iana fry simple-flat-file lexer ;
|
||||
USING: arrays assocs biassocs kernel io.encodings math.parser
|
||||
sequences hashtables io.encodings.ascii generic parser
|
||||
classes.tuple words words.symbol io io.files splitting
|
||||
namespaces math compiler.units accessors classes.singleton
|
||||
classes.mixin io.encodings.iana fry simple-flat-file lexer ;
|
||||
IN: io.encodings.8-bit
|
||||
|
||||
<PRIVATE
|
||||
|
@ -15,20 +15,22 @@ IN: io.encodings.8-bit
|
|||
SYMBOL: 8-bit-encodings
|
||||
8-bit-encodings [ H{ } clone ] initialize
|
||||
|
||||
TUPLE: 8-bit biassoc ;
|
||||
TUPLE: 8-bit { biassoc biassoc read-only } ;
|
||||
|
||||
: encode-8-bit ( char stream assoc -- )
|
||||
swapd value-at
|
||||
[ swap stream-write1 ] [ encode-error ] if* ; inline
|
||||
: 8-bit-encode ( char 8-bit -- byte )
|
||||
biassoc>> value-at [ encode-error ] unless* ; inline
|
||||
|
||||
M: 8-bit encode-char biassoc>> encode-8-bit ;
|
||||
M: 8-bit encode-char
|
||||
swap [ 8-bit-encode ] dip stream-write1 ;
|
||||
|
||||
: decode-8-bit ( stream assoc -- char/f )
|
||||
swap stream-read1
|
||||
[ swap at [ replacement-char ] unless* ]
|
||||
[ drop f ] if* ; inline
|
||||
M: 8-bit encode-string
|
||||
swap [ '[ _ 8-bit-encode ] B{ } map-as ] dip stream-write ;
|
||||
|
||||
M: 8-bit decode-char biassoc>> decode-8-bit ;
|
||||
M: 8-bit decode-char
|
||||
swap stream-read1 dup
|
||||
[ swap biassoc>> at [ replacement-char ] unless* ]
|
||||
[ 2drop f ]
|
||||
if ;
|
||||
|
||||
MIXIN: 8-bit-encoding
|
||||
|
||||
|
|
|
@ -1,22 +1,27 @@
|
|||
! Copyright (C) 2008 Daniel Ehrenberg.
|
||||
! See http://factorcode.org/license.txt for BSD license.
|
||||
USING: io io.encodings kernel math io.encodings.private ;
|
||||
USING: accessors byte-arrays io io.encodings
|
||||
io.encodings.private kernel math sequences ;
|
||||
IN: io.encodings.ascii
|
||||
|
||||
<PRIVATE
|
||||
: encode-if< ( char stream encoding max -- )
|
||||
nip 1 - pick < [ encode-error ] [ stream-write1 ] if ; inline
|
||||
|
||||
: decode-if< ( stream encoding max -- character )
|
||||
nip swap stream-read1 dup
|
||||
[ [ nip ] [ > ] 2bi [ >fixnum ] [ drop replacement-char ] if ]
|
||||
[ 2drop f ] if ; inline
|
||||
PRIVATE>
|
||||
|
||||
SINGLETON: ascii
|
||||
|
||||
M: ascii encode-char
|
||||
128 encode-if< ; inline
|
||||
drop
|
||||
over 127 <= [ stream-write1 ] [ encode-error ] if ; inline
|
||||
|
||||
M: ascii encode-string
|
||||
drop
|
||||
[
|
||||
dup aux>>
|
||||
[ [ dup 127 <= [ encode-error ] unless ] B{ } map-as ]
|
||||
[ >byte-array ]
|
||||
if
|
||||
] dip
|
||||
stream-write ;
|
||||
|
||||
M: ascii decode-char
|
||||
128 decode-if< ; inline
|
||||
drop
|
||||
stream-read1 dup [
|
||||
dup 127 <= [ >fixnum ] [ drop replacement-char ] if
|
||||
] when ; inline
|
||||
|
|
|
@ -114,7 +114,7 @@ M: output-port stream-write1
|
|||
|
||||
: write-in-groups ( byte-array port -- )
|
||||
[ binary-object <direct-uchar-array> ] dip
|
||||
[ buffer>> size>> <groups> ] [ '[ _ stream-write ] ] bi
|
||||
[ buffer>> size>> <sliced-groups> ] [ '[ _ stream-write ] ] bi
|
||||
each ;
|
||||
|
||||
M: output-port stream-write
|
||||
|
@ -198,5 +198,3 @@ io.encodings.private ;
|
|||
HINTS: decoder-read-until { string input-port utf8 } { string input-port ascii } ;
|
||||
|
||||
HINTS: decoder-readln { input-port utf8 } { input-port ascii } ;
|
||||
|
||||
HINTS: encoder-write { object output-port utf8 } { object output-port ascii } ;
|
||||
|
|
|
@ -0,0 +1 @@
|
|||
Slava Pestov
|
|
@ -0,0 +1,15 @@
|
|||
! Copyright (C) 2010 Slava Pestov.
|
||||
! See http://factorcode.org/license.txt for BSD license.
|
||||
USING: alien byte-vectors io kernel libc math sequences ;
|
||||
IN: io.streams.byte-array.fast
|
||||
|
||||
! This is split off from io.streams.byte-array because it uses
|
||||
! memcpy, which is a non-core word that only works after the
|
||||
! optimizing compiler has been loaded.
|
||||
|
||||
M: byte-vector stream-write
|
||||
[ dup byte-length tail-slice ]
|
||||
[ [ [ byte-length ] bi@ + ] keep lengthen ]
|
||||
[ drop byte-length ]
|
||||
2tri
|
||||
[ >c-ptr swap >c-ptr ] dip memcpy ;
|
|
@ -1,4 +1,4 @@
|
|||
! Copyright (C) 2008 Daniel Ehrenberg.
|
||||
! Copyright (C) 2008, 2010 Daniel Ehrenberg, Slava Pestov.
|
||||
! See http://factorcode.org/license.txt for BSD license.
|
||||
USING: math kernel sequences sbufs vectors namespaces growable
|
||||
strings io classes continuations destructors combinators
|
||||
|
@ -12,6 +12,10 @@ GENERIC: decode-char ( stream encoding -- char/f )
|
|||
|
||||
GENERIC: encode-char ( char stream encoding -- )
|
||||
|
||||
GENERIC: encode-string ( string stream encoding -- )
|
||||
|
||||
M: object encode-string [ encode-char ] 2curry each ; inline
|
||||
|
||||
GENERIC: <decoder> ( stream encoding -- newstream )
|
||||
|
||||
CONSTANT: replacement-char HEX: fffd
|
||||
|
@ -134,13 +138,8 @@ M: encoder stream-element-type
|
|||
M: encoder stream-write1
|
||||
>encoder< encode-char ;
|
||||
|
||||
GENERIC# encoder-write 2 ( string stream encoding -- )
|
||||
|
||||
M: string encoder-write
|
||||
[ encode-char ] 2curry each ;
|
||||
|
||||
M: encoder stream-write
|
||||
>encoder< encoder-write ;
|
||||
>encoder< encode-string ;
|
||||
|
||||
M: encoder dispose stream>> dispose ;
|
||||
|
||||
|
|
|
@ -1,7 +1,8 @@
|
|||
! Copyright (C) 2006, 2008 Daniel Ehrenberg.
|
||||
! See http://factorcode.org/license.txt for BSD license.
|
||||
USING: math math.order kernel sequences sbufs vectors growable io
|
||||
continuations namespaces io.encodings combinators strings ;
|
||||
USING: accessors byte-arrays math math.order kernel sequences
|
||||
sbufs vectors growable io continuations namespaces io.encodings
|
||||
combinators strings ;
|
||||
IN: io.encodings.utf8
|
||||
|
||||
! Decoding UTF-8
|
||||
|
@ -45,10 +46,10 @@ M: utf8 decode-char
|
|||
! Encoding UTF-8
|
||||
|
||||
: encoded ( stream char -- )
|
||||
BIN: 111111 bitand BIN: 10000000 bitor swap stream-write1 ;
|
||||
BIN: 111111 bitand BIN: 10000000 bitor swap stream-write1 ; inline
|
||||
|
||||
: char>utf8 ( stream char -- )
|
||||
{
|
||||
: char>utf8 ( char stream -- )
|
||||
swap {
|
||||
{ [ dup -7 shift zero? ] [ swap stream-write1 ] }
|
||||
{ [ dup -11 shift zero? ] [
|
||||
2dup -6 shift BIN: 11000000 bitor swap stream-write1
|
||||
|
@ -65,10 +66,16 @@ M: utf8 decode-char
|
|||
2dup -6 shift encoded
|
||||
encoded
|
||||
]
|
||||
} cond ;
|
||||
} cond ; inline
|
||||
|
||||
M: utf8 encode-char
|
||||
drop swap char>utf8 ;
|
||||
drop char>utf8 ;
|
||||
|
||||
M: utf8 encode-string
|
||||
drop
|
||||
over aux>>
|
||||
[ [ char>utf8 ] curry each ]
|
||||
[ [ >byte-array ] dip stream-write ] if ;
|
||||
|
||||
PRIVATE>
|
||||
|
||||
|
|
|
@ -1,8 +1,7 @@
|
|||
! Copyright (C) 2003, 2008 Slava Pestov.
|
||||
! See http://factorcode.org/license.txt for BSD license.
|
||||
USING: accessors kernel math.private sequences kernel.private
|
||||
math sequences.private slots.private byte-arrays
|
||||
alien.accessors ;
|
||||
math sequences.private slots.private alien.accessors ;
|
||||
IN: strings
|
||||
|
||||
<PRIVATE
|
||||
|
|
Loading…
Reference in New Issue