io.encodings.utf8: assume streams are largely ascii.
Results in 30% faster file-contents for test file and 7% faster benchmark.xml.db4
parent
acbe85f0f4
commit
fec4cf9109
|
@ -78,10 +78,10 @@ M: decoder stream-read1
|
||||||
] keep ; inline
|
] keep ; inline
|
||||||
|
|
||||||
: finish-read ( n/f string -- string/f )
|
: finish-read ( n/f string -- string/f )
|
||||||
{
|
swap {
|
||||||
{ [ over 0 = ] [ 2drop f ] }
|
{ [ dup zero? ] [ 2drop f ] }
|
||||||
{ [ over not ] [ nip ] }
|
{ [ dup not ] [ drop ] }
|
||||||
[ swap head ]
|
[ head ]
|
||||||
} cond ; inline
|
} cond ; inline
|
||||||
|
|
||||||
M: decoder stream-read
|
M: decoder stream-read
|
||||||
|
|
|
@ -39,13 +39,14 @@ SINGLETON: utf8
|
||||||
HEX: 10FFFF maximum-code-point ; inline
|
HEX: 10FFFF maximum-code-point ; inline
|
||||||
|
|
||||||
: begin-utf8 ( stream byte -- stream char )
|
: begin-utf8 ( stream byte -- stream char )
|
||||||
{
|
dup 127 > [
|
||||||
{ [ dup -7 shift zero? ] [ ] }
|
{
|
||||||
{ [ dup -5 shift BIN: 110 = ] [ double ] }
|
{ [ dup -5 shift BIN: 110 = ] [ double ] }
|
||||||
{ [ dup -4 shift BIN: 1110 = ] [ triple ] }
|
{ [ dup -4 shift BIN: 1110 = ] [ triple ] }
|
||||||
{ [ dup -3 shift BIN: 11110 = ] [ quadruple ] }
|
{ [ dup -3 shift BIN: 11110 = ] [ quadruple ] }
|
||||||
[ drop replacement-char ]
|
[ drop replacement-char ]
|
||||||
} cond ; inline
|
} cond
|
||||||
|
] when ; inline
|
||||||
|
|
||||||
: decode-utf8 ( stream -- char/f )
|
: decode-utf8 ( stream -- char/f )
|
||||||
dup stream-read1 dup [ begin-utf8 ] when nip ; inline
|
dup stream-read1 dup [ begin-utf8 ] when nip ; inline
|
||||||
|
@ -59,24 +60,25 @@ M: utf8 decode-char
|
||||||
BIN: 111111 bitand BIN: 10000000 bitor swap stream-write1 ; inline
|
BIN: 111111 bitand BIN: 10000000 bitor swap stream-write1 ; inline
|
||||||
|
|
||||||
: char>utf8 ( char stream -- )
|
: char>utf8 ( char stream -- )
|
||||||
swap {
|
over 127 <= [ stream-write1 ] [
|
||||||
{ [ dup -7 shift zero? ] [ swap stream-write1 ] }
|
swap {
|
||||||
{ [ dup -11 shift zero? ] [
|
{ [ dup -11 shift zero? ] [
|
||||||
2dup -6 shift BIN: 11000000 bitor swap stream-write1
|
2dup -6 shift BIN: 11000000 bitor swap stream-write1
|
||||||
encoded
|
encoded
|
||||||
] }
|
] }
|
||||||
{ [ dup -16 shift zero? ] [
|
{ [ dup -16 shift zero? ] [
|
||||||
2dup -12 shift BIN: 11100000 bitor swap stream-write1
|
2dup -12 shift BIN: 11100000 bitor swap stream-write1
|
||||||
2dup -6 shift encoded
|
2dup -6 shift encoded
|
||||||
encoded
|
encoded
|
||||||
] }
|
] }
|
||||||
[
|
[
|
||||||
2dup -18 shift BIN: 11110000 bitor swap stream-write1
|
2dup -18 shift BIN: 11110000 bitor swap stream-write1
|
||||||
2dup -12 shift encoded
|
2dup -12 shift encoded
|
||||||
2dup -6 shift encoded
|
2dup -6 shift encoded
|
||||||
encoded
|
encoded
|
||||||
]
|
]
|
||||||
} cond ; inline
|
} cond
|
||||||
|
] if ; inline
|
||||||
|
|
||||||
M: utf8 encode-char
|
M: utf8 encode-char
|
||||||
drop char>utf8 ;
|
drop char>utf8 ;
|
||||||
|
|
Loading…
Reference in New Issue