Fixing UTF-8 to put the replacement character for malformed stuff

db4
Daniel Ehrenberg 2008-02-01 22:50:30 -06:00
parent 8be367f52f
commit 64650d8500
2 changed files with 7 additions and 7 deletions

View File

@ -1,16 +1,16 @@
USING: io.utf8 tools.test strings ;
USING: io.utf8 tools.test strings arrays unicode.syntax ;
[ { BIN: 11110101 BIN: 10111111 BIN: 10000000 BIN: 11111111 } decode-utf8 ] unit-test-fails
[ { UNICHAR: replacement-character } ] [ { BIN: 11110101 BIN: 10111111 BIN: 10000000 BIN: 11111111 } decode-utf8 >array ] unit-test
[ { BIN: 101111111000000111111 } ] [ { BIN: 11110101 BIN: 10111111 BIN: 10000000 BIN: 10111111 } decode-utf8 ] unit-test
[ { BIN: 101111111000000111111 } ] [ { BIN: 11110101 BIN: 10111111 BIN: 10000000 BIN: 10111111 } decode-utf8 >array ] unit-test
[ "x" ] [ "x" decode-utf8 >string ] unit-test
[ { BIN: 11111000000 } ] [ { BIN: 11011111 BIN: 10000000 } decode-utf8 ] unit-test
[ { BIN: 11111000000 } ] [ { BIN: 11011111 BIN: 10000000 } decode-utf8 >array ] unit-test
[ { BIN: 10000000 } decode-utf8 ] unit-test-fails
[ { UNICHAR: replacement-character } ] [ { BIN: 10000000 } decode-utf8 >array ] unit-test
[ { BIN: 1111000000111111 } ] [ { BIN: 11101111 BIN: 10000000 BIN: 10111111 } decode-utf8 ] unit-test
[ { BIN: 1111000000111111 } ] [ { BIN: 11101111 BIN: 10000000 BIN: 10111111 } decode-utf8 >array ] unit-test
[ B{ BIN: 11110101 BIN: 10111111 BIN: 10000000 BIN: 10111111 BIN: 11101111 BIN: 10000000 BIN: 10111111 BIN: 11011111 BIN: 10000000 CHAR: x } ]
[ { BIN: 101111111000000111111 BIN: 1111000000111111 BIN: 11111000000 CHAR: x } encode-utf8 ] unit-test

View File

@ -29,7 +29,7 @@ SYMBOL: quad3
} cond ;
: end-multibyte ( buf byte ch -- buf ch state )
begin append-nums decoded ;
f append-nums [ decoded ] unless* ;
: (decode-utf8) ( buf byte ch state -- buf ch state )
{