From fec4cf9109f4dab2f512fb2be7db1bcab8f3d23f Mon Sep 17 00:00:00 2001 From: John Benediktsson Date: Mon, 3 Oct 2011 12:31:46 -0700 Subject: [PATCH] io.encodings.utf8: assume streams are largely ascii. Results in 30% faster file-contents for test file and 7% faster benchmark.xml. --- core/io/encodings/encodings.factor | 8 ++--- core/io/encodings/utf8/utf8.factor | 52 ++++++++++++++++-------------- 2 files changed, 31 insertions(+), 29 deletions(-) diff --git a/core/io/encodings/encodings.factor b/core/io/encodings/encodings.factor index 1b34b6aa8b..81037ca2df 100644 --- a/core/io/encodings/encodings.factor +++ b/core/io/encodings/encodings.factor @@ -78,10 +78,10 @@ M: decoder stream-read1 ] keep ; inline : finish-read ( n/f string -- string/f ) - { - { [ over 0 = ] [ 2drop f ] } - { [ over not ] [ nip ] } - [ swap head ] + swap { + { [ dup zero? ] [ 2drop f ] } + { [ dup not ] [ drop ] } + [ head ] } cond ; inline M: decoder stream-read diff --git a/core/io/encodings/utf8/utf8.factor b/core/io/encodings/utf8/utf8.factor index 09e3dd5f4b..8fbc71e016 100644 --- a/core/io/encodings/utf8/utf8.factor +++ b/core/io/encodings/utf8/utf8.factor @@ -39,13 +39,14 @@ SINGLETON: utf8 HEX: 10FFFF maximum-code-point ; inline : begin-utf8 ( stream byte -- stream char ) - { - { [ dup -7 shift zero? ] [ ] } - { [ dup -5 shift BIN: 110 = ] [ double ] } - { [ dup -4 shift BIN: 1110 = ] [ triple ] } - { [ dup -3 shift BIN: 11110 = ] [ quadruple ] } - [ drop replacement-char ] - } cond ; inline + dup 127 > [ + { + { [ dup -5 shift BIN: 110 = ] [ double ] } + { [ dup -4 shift BIN: 1110 = ] [ triple ] } + { [ dup -3 shift BIN: 11110 = ] [ quadruple ] } + [ drop replacement-char ] + } cond + ] when ; inline : decode-utf8 ( stream -- char/f ) dup stream-read1 dup [ begin-utf8 ] when nip ; inline @@ -59,24 +60,25 @@ M: utf8 decode-char BIN: 111111 bitand BIN: 10000000 bitor swap stream-write1 ; inline : char>utf8 ( char stream -- ) - swap { - { [ dup -7 shift zero? ] [ swap stream-write1 ] } - { [ dup -11 shift zero? ] [ - 2dup -6 shift BIN: 11000000 bitor swap stream-write1 - encoded - ] } - { [ dup -16 shift zero? ] [ - 2dup -12 shift BIN: 11100000 bitor swap stream-write1 - 2dup -6 shift encoded - encoded - ] } - [ - 2dup -18 shift BIN: 11110000 bitor swap stream-write1 - 2dup -12 shift encoded - 2dup -6 shift encoded - encoded - ] - } cond ; inline + over 127 <= [ stream-write1 ] [ + swap { + { [ dup -11 shift zero? ] [ + 2dup -6 shift BIN: 11000000 bitor swap stream-write1 + encoded + ] } + { [ dup -16 shift zero? ] [ + 2dup -12 shift BIN: 11100000 bitor swap stream-write1 + 2dup -6 shift encoded + encoded + ] } + [ + 2dup -18 shift BIN: 11110000 bitor swap stream-write1 + 2dup -12 shift encoded + 2dup -6 shift encoded + encoded + ] + } cond + ] if ; inline M: utf8 encode-char drop char>utf8 ;