io.encodings.utf8: assume streams are largely ascii.

Results in 30% faster file-contents for test file and 7% faster benchmark.xml.
db4
John Benediktsson 2011-10-03 12:31:46 -07:00
parent acbe85f0f4
commit fec4cf9109
2 changed files with 31 additions and 29 deletions

View File

@ -78,10 +78,10 @@ M: decoder stream-read1
] keep ; inline ] keep ; inline
: finish-read ( n/f string -- string/f ) : finish-read ( n/f string -- string/f )
{ swap {
{ [ over 0 = ] [ 2drop f ] } { [ dup zero? ] [ 2drop f ] }
{ [ over not ] [ nip ] } { [ dup not ] [ drop ] }
[ swap head ] [ head ]
} cond ; inline } cond ; inline
M: decoder stream-read M: decoder stream-read

View File

@ -39,13 +39,14 @@ SINGLETON: utf8
HEX: 10FFFF maximum-code-point ; inline HEX: 10FFFF maximum-code-point ; inline
: begin-utf8 ( stream byte -- stream char ) : begin-utf8 ( stream byte -- stream char )
{ dup 127 > [
{ [ dup -7 shift zero? ] [ ] } {
{ [ dup -5 shift BIN: 110 = ] [ double ] } { [ dup -5 shift BIN: 110 = ] [ double ] }
{ [ dup -4 shift BIN: 1110 = ] [ triple ] } { [ dup -4 shift BIN: 1110 = ] [ triple ] }
{ [ dup -3 shift BIN: 11110 = ] [ quadruple ] } { [ dup -3 shift BIN: 11110 = ] [ quadruple ] }
[ drop replacement-char ] [ drop replacement-char ]
} cond ; inline } cond
] when ; inline
: decode-utf8 ( stream -- char/f ) : decode-utf8 ( stream -- char/f )
dup stream-read1 dup [ begin-utf8 ] when nip ; inline dup stream-read1 dup [ begin-utf8 ] when nip ; inline
@ -59,24 +60,25 @@ M: utf8 decode-char
BIN: 111111 bitand BIN: 10000000 bitor swap stream-write1 ; inline BIN: 111111 bitand BIN: 10000000 bitor swap stream-write1 ; inline
: char>utf8 ( char stream -- ) : char>utf8 ( char stream -- )
swap { over 127 <= [ stream-write1 ] [
{ [ dup -7 shift zero? ] [ swap stream-write1 ] } swap {
{ [ dup -11 shift zero? ] [ { [ dup -11 shift zero? ] [
2dup -6 shift BIN: 11000000 bitor swap stream-write1 2dup -6 shift BIN: 11000000 bitor swap stream-write1
encoded encoded
] } ] }
{ [ dup -16 shift zero? ] [ { [ dup -16 shift zero? ] [
2dup -12 shift BIN: 11100000 bitor swap stream-write1 2dup -12 shift BIN: 11100000 bitor swap stream-write1
2dup -6 shift encoded 2dup -6 shift encoded
encoded encoded
] } ] }
[ [
2dup -18 shift BIN: 11110000 bitor swap stream-write1 2dup -18 shift BIN: 11110000 bitor swap stream-write1
2dup -12 shift encoded 2dup -12 shift encoded
2dup -6 shift encoded 2dup -6 shift encoded
encoded encoded
] ]
} cond ; inline } cond
] if ; inline
M: utf8 encode-char M: utf8 encode-char
drop char>utf8 ; drop char>utf8 ;