Fixing UTF-8 to put the replacement character for malformed stuff
parent
8be367f52f
commit
64650d8500
|
@ -1,16 +1,16 @@
|
|||
USING: io.utf8 tools.test strings ;
|
||||
USING: io.utf8 tools.test strings arrays unicode.syntax ;
|
||||
|
||||
[ { BIN: 11110101 BIN: 10111111 BIN: 10000000 BIN: 11111111 } decode-utf8 ] unit-test-fails
|
||||
[ { UNICHAR: replacement-character } ] [ { BIN: 11110101 BIN: 10111111 BIN: 10000000 BIN: 11111111 } decode-utf8 >array ] unit-test
|
||||
|
||||
[ { BIN: 101111111000000111111 } ] [ { BIN: 11110101 BIN: 10111111 BIN: 10000000 BIN: 10111111 } decode-utf8 ] unit-test
|
||||
[ { BIN: 101111111000000111111 } ] [ { BIN: 11110101 BIN: 10111111 BIN: 10000000 BIN: 10111111 } decode-utf8 >array ] unit-test
|
||||
|
||||
[ "x" ] [ "x" decode-utf8 >string ] unit-test
|
||||
|
||||
[ { BIN: 11111000000 } ] [ { BIN: 11011111 BIN: 10000000 } decode-utf8 ] unit-test
|
||||
[ { BIN: 11111000000 } ] [ { BIN: 11011111 BIN: 10000000 } decode-utf8 >array ] unit-test
|
||||
|
||||
[ { BIN: 10000000 } decode-utf8 ] unit-test-fails
|
||||
[ { UNICHAR: replacement-character } ] [ { BIN: 10000000 } decode-utf8 >array ] unit-test
|
||||
|
||||
[ { BIN: 1111000000111111 } ] [ { BIN: 11101111 BIN: 10000000 BIN: 10111111 } decode-utf8 ] unit-test
|
||||
[ { BIN: 1111000000111111 } ] [ { BIN: 11101111 BIN: 10000000 BIN: 10111111 } decode-utf8 >array ] unit-test
|
||||
|
||||
[ B{ BIN: 11110101 BIN: 10111111 BIN: 10000000 BIN: 10111111 BIN: 11101111 BIN: 10000000 BIN: 10111111 BIN: 11011111 BIN: 10000000 CHAR: x } ]
|
||||
[ { BIN: 101111111000000111111 BIN: 1111000000111111 BIN: 11111000000 CHAR: x } encode-utf8 ] unit-test
|
||||
|
|
|
@ -29,7 +29,7 @@ SYMBOL: quad3
|
|||
} cond ;
|
||||
|
||||
: end-multibyte ( buf byte ch -- buf ch state )
|
||||
begin append-nums decoded ;
|
||||
f append-nums [ decoded ] unless* ;
|
||||
|
||||
: (decode-utf8) ( buf byte ch state -- buf ch state )
|
||||
{
|
||||
|
|
Loading…
Reference in New Issue