From c32760cc1df1391523d3240dfca203b2f611749e Mon Sep 17 00:00:00 2001 From: Joe Groff Date: Wed, 25 Aug 2010 09:28:39 -0700 Subject: [PATCH] io.encodings.utf8: also guard against decoding code points > 0x10FFFF --- core/io/encodings/utf8/utf8-tests.factor | 4 ++-- core/io/encodings/utf8/utf8.factor | 7 +++++-- 2 files changed, 7 insertions(+), 4 deletions(-) diff --git a/core/io/encodings/utf8/utf8-tests.factor b/core/io/encodings/utf8/utf8-tests.factor index 6d785ce9c3..7f6c7e9876 100644 --- a/core/io/encodings/utf8/utf8-tests.factor +++ b/core/io/encodings/utf8/utf8-tests.factor @@ -10,8 +10,6 @@ IN: io.encodings.utf8.tests [ { CHAR: replacement-character } ] [ { BIN: 11110,101 BIN: 10,111111 BIN: 10,000000 BIN: 11111111 } decode-utf8-w/stream ] unit-test -[ { BIN: 101111111000000111111 } ] [ { BIN: 11110,101 BIN: 10,111111 BIN: 10,000000 BIN: 10,111111 } decode-utf8-w/stream ] unit-test - [ "x" ] [ "x" decode-utf8-w/stream >string ] unit-test [ { BIN: 11111000000 } ] [ { BIN: 110,11111 BIN: 10,000000 } decode-utf8-w/stream >array ] unit-test @@ -40,4 +38,6 @@ IN: io.encodings.utf8.tests [ { CHAR: replacement-character } ] [ { BIN: 11110,000 BIN: 10,000000 BIN: 10,000000 BIN: 10,000000 } decode-utf8-w/stream ] unit-test [ { CHAR: replacement-character } ] [ { BIN: 11110,000 BIN: 10,001111 BIN: 10,111111 BIN: 10,111111 } decode-utf8-w/stream ] unit-test +[ { CHAR: replacement-character } ] [ { BIN: 11110,100 BIN: 10,010000 BIN: 10,000000 BIN: 10,000000 } decode-utf8-w/stream ] unit-test [ { HEX: 10000 } ] [ { BIN: 11110,000 BIN: 10,010000 BIN: 10,000000 BIN: 10,000000 } decode-utf8-w/stream ] unit-test +[ { HEX: 10FFFF } ] [ { BIN: 11110,100 BIN: 10,001111 BIN: 10,111111 BIN: 10,111111 } decode-utf8-w/stream ] unit-test diff --git a/core/io/encodings/utf8/utf8.factor b/core/io/encodings/utf8/utf8.factor index b6c89a6983..3bc88078b6 100644 --- a/core/io/encodings/utf8/utf8.factor +++ b/core/io/encodings/utf8/utf8.factor @@ -20,7 +20,9 @@ SINGLETON: utf8 [ 2drop replacement-char ] if ; inline : minimum-code-point ( char minimum -- char ) - over > [ drop replacement-char ] when ; + over > [ drop replacement-char ] when ; inline +: maximum-code-point ( char maximum -- char ) + over < [ drop replacement-char ] when ; inline : double ( stream byte -- stream char ) BIN: 11111 bitand append-nums @@ -32,7 +34,8 @@ SINGLETON: utf8 : quadruple ( stream byte -- stream char ) BIN: 111 bitand append-nums append-nums append-nums - HEX: 10000 minimum-code-point ; inline + HEX: 10000 minimum-code-point + HEX: 10FFFF maximum-code-point ; inline : begin-utf8 ( stream byte -- stream char ) {