io.encodings.utf8: also guard against decoding code points > 0x10FFFF

db4
Joe Groff 2010-08-25 09:28:39 -07:00
parent fc6e308d7a
commit c32760cc1d
2 changed files with 7 additions and 4 deletions

View File

@ -10,8 +10,6 @@ IN: io.encodings.utf8.tests
[ { CHAR: replacement-character } ] [ { BIN: 11110,101 BIN: 10,111111 BIN: 10,000000 BIN: 11111111 } decode-utf8-w/stream ] unit-test [ { CHAR: replacement-character } ] [ { BIN: 11110,101 BIN: 10,111111 BIN: 10,000000 BIN: 11111111 } decode-utf8-w/stream ] unit-test
[ { BIN: 101111111000000111111 } ] [ { BIN: 11110,101 BIN: 10,111111 BIN: 10,000000 BIN: 10,111111 } decode-utf8-w/stream ] unit-test
[ "x" ] [ "x" decode-utf8-w/stream >string ] unit-test [ "x" ] [ "x" decode-utf8-w/stream >string ] unit-test
[ { BIN: 11111000000 } ] [ { BIN: 110,11111 BIN: 10,000000 } decode-utf8-w/stream >array ] unit-test [ { BIN: 11111000000 } ] [ { BIN: 110,11111 BIN: 10,000000 } decode-utf8-w/stream >array ] unit-test
@ -40,4 +38,6 @@ IN: io.encodings.utf8.tests
[ { CHAR: replacement-character } ] [ { BIN: 11110,000 BIN: 10,000000 BIN: 10,000000 BIN: 10,000000 } decode-utf8-w/stream ] unit-test [ { CHAR: replacement-character } ] [ { BIN: 11110,000 BIN: 10,000000 BIN: 10,000000 BIN: 10,000000 } decode-utf8-w/stream ] unit-test
[ { CHAR: replacement-character } ] [ { BIN: 11110,000 BIN: 10,001111 BIN: 10,111111 BIN: 10,111111 } decode-utf8-w/stream ] unit-test [ { CHAR: replacement-character } ] [ { BIN: 11110,000 BIN: 10,001111 BIN: 10,111111 BIN: 10,111111 } decode-utf8-w/stream ] unit-test
[ { CHAR: replacement-character } ] [ { BIN: 11110,100 BIN: 10,010000 BIN: 10,000000 BIN: 10,000000 } decode-utf8-w/stream ] unit-test
[ { HEX: 10000 } ] [ { BIN: 11110,000 BIN: 10,010000 BIN: 10,000000 BIN: 10,000000 } decode-utf8-w/stream ] unit-test [ { HEX: 10000 } ] [ { BIN: 11110,000 BIN: 10,010000 BIN: 10,000000 BIN: 10,000000 } decode-utf8-w/stream ] unit-test
[ { HEX: 10FFFF } ] [ { BIN: 11110,100 BIN: 10,001111 BIN: 10,111111 BIN: 10,111111 } decode-utf8-w/stream ] unit-test

View File

@ -20,7 +20,9 @@ SINGLETON: utf8
[ 2drop replacement-char ] if ; inline [ 2drop replacement-char ] if ; inline
: minimum-code-point ( char minimum -- char ) : minimum-code-point ( char minimum -- char )
over > [ drop replacement-char ] when ; over > [ drop replacement-char ] when ; inline
: maximum-code-point ( char maximum -- char )
over < [ drop replacement-char ] when ; inline
: double ( stream byte -- stream char ) : double ( stream byte -- stream char )
BIN: 11111 bitand append-nums BIN: 11111 bitand append-nums
@ -32,7 +34,8 @@ SINGLETON: utf8
: quadruple ( stream byte -- stream char ) : quadruple ( stream byte -- stream char )
BIN: 111 bitand append-nums append-nums append-nums BIN: 111 bitand append-nums append-nums append-nums
HEX: 10000 minimum-code-point ; inline HEX: 10000 minimum-code-point
HEX: 10FFFF maximum-code-point ; inline
: begin-utf8 ( stream byte -- stream char ) : begin-utf8 ( stream byte -- stream char )
{ {