io.encodings.utf8: guard against decoding overlong encodings
							parent
							
								
									b88b2c9b81
								
							
						
					
					
						commit
						fc6e308d7a
					
				| 
						 | 
				
			
			@ -8,19 +8,19 @@ IN: io.encodings.utf8.tests
 | 
			
		|||
: encode-utf8-w/stream ( array -- newarray )
 | 
			
		||||
    >string utf8 encode >array ;
 | 
			
		||||
 | 
			
		||||
[ { CHAR: replacement-character } ] [ { BIN: 11110101 BIN: 10111111 BIN: 10000000 BIN: 11111111 } decode-utf8-w/stream ] unit-test
 | 
			
		||||
[ { CHAR: replacement-character } ] [ { BIN: 11110,101 BIN: 10,111111 BIN: 10,000000 BIN: 11111111 } decode-utf8-w/stream ] unit-test
 | 
			
		||||
 | 
			
		||||
[ { BIN: 101111111000000111111 } ] [ { BIN: 11110101 BIN: 10111111 BIN: 10000000 BIN: 10111111 } decode-utf8-w/stream ] unit-test
 | 
			
		||||
[ { BIN: 101111111000000111111 } ] [ { BIN: 11110,101 BIN: 10,111111 BIN: 10,000000 BIN: 10,111111 } decode-utf8-w/stream ] unit-test
 | 
			
		||||
 | 
			
		||||
[ "x" ] [ "x" decode-utf8-w/stream >string ] unit-test
 | 
			
		||||
 | 
			
		||||
[ { BIN: 11111000000 } ] [ { BIN: 11011111 BIN: 10000000 } decode-utf8-w/stream >array ] unit-test
 | 
			
		||||
[ { BIN: 11111000000 } ] [ { BIN: 110,11111 BIN: 10,000000 } decode-utf8-w/stream >array ] unit-test
 | 
			
		||||
 | 
			
		||||
[ { CHAR: replacement-character } ] [ { BIN: 10000000 } decode-utf8-w/stream ] unit-test
 | 
			
		||||
 | 
			
		||||
[ { BIN: 1111000000111111 } ] [ { BIN: 11101111 BIN: 10000000 BIN: 10111111 } decode-utf8-w/stream >array ] unit-test
 | 
			
		||||
[ { BIN: 1111000000111111 } ] [ { BIN: 1110,1111 BIN: 10,000000 BIN: 10,111111 } decode-utf8-w/stream >array ] unit-test
 | 
			
		||||
 | 
			
		||||
[ { BIN: 11110101 BIN: 10111111 BIN: 10000000 BIN: 10111111 BIN: 11101111 BIN: 10000000 BIN: 10111111 BIN: 11011111 BIN: 10000000 CHAR: x } ]
 | 
			
		||||
[ { BIN: 11110,101 BIN: 10,111111 BIN: 10,000000 BIN: 10,111111 BIN: 1110,1111 BIN: 10,000000 BIN: 10,111111 BIN: 110,11111 BIN: 10,000000 CHAR: x } ]
 | 
			
		||||
[ { BIN: 101111111000000111111 BIN: 1111000000111111 BIN: 11111000000 CHAR: x } encode-utf8-w/stream ] unit-test
 | 
			
		||||
 | 
			
		||||
[ 3 ] [ 1 "日本語" >utf8-index ] unit-test
 | 
			
		||||
| 
						 | 
				
			
			@ -29,3 +29,15 @@ IN: io.encodings.utf8.tests
 | 
			
		|||
[ 3 ] [ 2 "lápis" >utf8-index ] unit-test
 | 
			
		||||
 | 
			
		||||
[ V{ } ] [ 100000 iota [ [ code-point-length ] [ 1string utf8 encode length ] bi = not ] filter ] unit-test
 | 
			
		||||
 | 
			
		||||
[ { CHAR: replacement-character } ] [ { BIN: 110,00000 BIN: 10,000000 } decode-utf8-w/stream ] unit-test
 | 
			
		||||
[ { CHAR: replacement-character } ] [ { BIN: 110,00001 BIN: 10,111111 } decode-utf8-w/stream ] unit-test
 | 
			
		||||
[ { HEX: 80 } ] [ { BIN: 110,00010 BIN: 10,000000 } decode-utf8-w/stream ] unit-test
 | 
			
		||||
 | 
			
		||||
[ { CHAR: replacement-character } ] [ { BIN: 1110,0000 BIN: 10,000000 BIN: 10,000000 } decode-utf8-w/stream ] unit-test
 | 
			
		||||
[ { CHAR: replacement-character } ] [ { BIN: 1110,0000 BIN: 10,011111 BIN: 10,111111 } decode-utf8-w/stream ] unit-test
 | 
			
		||||
[ { HEX: 800 } ] [ { BIN: 1110,0000 BIN: 10,100000 BIN: 10,000000 } decode-utf8-w/stream ] unit-test
 | 
			
		||||
 | 
			
		||||
[ { CHAR: replacement-character } ] [ { BIN: 11110,000 BIN: 10,000000 BIN: 10,000000 BIN: 10,000000 } decode-utf8-w/stream ] unit-test
 | 
			
		||||
[ { CHAR: replacement-character } ] [ { BIN: 11110,000 BIN: 10,001111 BIN: 10,111111 BIN: 10,111111 } decode-utf8-w/stream ] unit-test
 | 
			
		||||
[ { HEX: 10000 } ] [ { BIN: 11110,000 BIN: 10,010000 BIN: 10,000000 BIN: 10,000000 } decode-utf8-w/stream ] unit-test
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
| 
						 | 
				
			
			@ -19,14 +19,20 @@ SINGLETON: utf8
 | 
			
		|||
    [ swap 6 shift swap BIN: 111111 bitand bitor ]
 | 
			
		||||
    [ 2drop replacement-char ] if ; inline
 | 
			
		||||
 | 
			
		||||
: minimum-code-point ( char minimum -- char )
 | 
			
		||||
    over > [ drop replacement-char ] when ; 
 | 
			
		||||
 | 
			
		||||
: double ( stream byte -- stream char )
 | 
			
		||||
    BIN: 11111 bitand append-nums ; inline
 | 
			
		||||
    BIN: 11111 bitand append-nums
 | 
			
		||||
    HEX: 80 minimum-code-point ; inline
 | 
			
		||||
 | 
			
		||||
: triple ( stream byte -- stream char )
 | 
			
		||||
    BIN: 1111 bitand append-nums append-nums ; inline
 | 
			
		||||
    BIN: 1111 bitand append-nums append-nums
 | 
			
		||||
    HEX: 800 minimum-code-point ; inline
 | 
			
		||||
 | 
			
		||||
: quadruple ( stream byte -- stream char )
 | 
			
		||||
    BIN: 111 bitand append-nums append-nums append-nums ; inline
 | 
			
		||||
    BIN: 111 bitand append-nums append-nums append-nums
 | 
			
		||||
    HEX: 10000 minimum-code-point ; inline
 | 
			
		||||
 | 
			
		||||
: begin-utf8 ( stream byte -- stream char )
 | 
			
		||||
    {
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
		Loading…
	
		Reference in New Issue