From 64650d8500e99b88fcbb19570537a2232fab77da Mon Sep 17 00:00:00 2001 From: Daniel Ehrenberg Date: Fri, 1 Feb 2008 22:50:30 -0600 Subject: [PATCH] Fixing UTF-8 to put the replacement character for malformed stuff --- core/io/utf8/utf8-tests.factor | 12 ++++++------ core/io/utf8/utf8.factor | 2 +- 2 files changed, 7 insertions(+), 7 deletions(-) diff --git a/core/io/utf8/utf8-tests.factor b/core/io/utf8/utf8-tests.factor index d120b6243d..3576471586 100644 --- a/core/io/utf8/utf8-tests.factor +++ b/core/io/utf8/utf8-tests.factor @@ -1,16 +1,16 @@ -USING: io.utf8 tools.test strings ; +USING: io.utf8 tools.test strings arrays unicode.syntax ; -[ { BIN: 11110101 BIN: 10111111 BIN: 10000000 BIN: 11111111 } decode-utf8 ] unit-test-fails +[ { UNICHAR: replacement-character } ] [ { BIN: 11110101 BIN: 10111111 BIN: 10000000 BIN: 11111111 } decode-utf8 >array ] unit-test -[ { BIN: 101111111000000111111 } ] [ { BIN: 11110101 BIN: 10111111 BIN: 10000000 BIN: 10111111 } decode-utf8 ] unit-test +[ { BIN: 101111111000000111111 } ] [ { BIN: 11110101 BIN: 10111111 BIN: 10000000 BIN: 10111111 } decode-utf8 >array ] unit-test [ "x" ] [ "x" decode-utf8 >string ] unit-test -[ { BIN: 11111000000 } ] [ { BIN: 11011111 BIN: 10000000 } decode-utf8 ] unit-test +[ { BIN: 11111000000 } ] [ { BIN: 11011111 BIN: 10000000 } decode-utf8 >array ] unit-test -[ { BIN: 10000000 } decode-utf8 ] unit-test-fails +[ { UNICHAR: replacement-character } ] [ { BIN: 10000000 } decode-utf8 >array ] unit-test -[ { BIN: 1111000000111111 } ] [ { BIN: 11101111 BIN: 10000000 BIN: 10111111 } decode-utf8 ] unit-test +[ { BIN: 1111000000111111 } ] [ { BIN: 11101111 BIN: 10000000 BIN: 10111111 } decode-utf8 >array ] unit-test [ B{ BIN: 11110101 BIN: 10111111 BIN: 10000000 BIN: 10111111 BIN: 11101111 BIN: 10000000 BIN: 10111111 BIN: 11011111 BIN: 10000000 CHAR: x } ] [ { BIN: 101111111000000111111 BIN: 1111000000111111 BIN: 11111000000 CHAR: x } encode-utf8 ] unit-test diff --git a/core/io/utf8/utf8.factor b/core/io/utf8/utf8.factor index 321469378d..213afb6eae 100644 --- a/core/io/utf8/utf8.factor +++ b/core/io/utf8/utf8.factor @@ -29,7 +29,7 @@ SYMBOL: quad3 } cond ; : end-multibyte ( buf byte ch -- buf ch state ) - begin append-nums decoded ; + f append-nums [ decoded ] unless* ; : (decode-utf8) ( buf byte ch state -- buf ch state ) {