Finishing updating UTF
parent
64650d8500
commit
c584e50c04
|
@ -10,7 +10,7 @@ TUPLE: encode-error ;
|
||||||
|
|
||||||
TUPLE: decode-error ;
|
TUPLE: decode-error ;
|
||||||
|
|
||||||
: decode-error ( -- * ) \ encode-error construct-empty throw ;
|
: decode-error ( -- * ) \ decode-error construct-empty throw ;
|
||||||
|
|
||||||
SYMBOL: begin
|
SYMBOL: begin
|
||||||
|
|
||||||
|
|
|
@ -1,15 +1,15 @@
|
||||||
USING: tools.test io.utf16 ;
|
USING: tools.test io.utf16 arrays unicode.syntax ;
|
||||||
|
|
||||||
[ { CHAR: x } ] [ { 0 CHAR: x } decode-utf16be >array ] unit-test
|
[ { CHAR: x } ] [ { 0 CHAR: x } decode-utf16be >array ] unit-test
|
||||||
[ { HEX: 1D11E } ] [ { HEX: D8 HEX: 34 HEX: DD HEX: 1E } decode-utf16be >array ] unit-test
|
[ { HEX: 1D11E } ] [ { HEX: D8 HEX: 34 HEX: DD HEX: 1E } decode-utf16be >array ] unit-test
|
||||||
[ { BIN: 11011111 CHAR: q } decode-utf16be >array ] unit-test-fails
|
[ { UNICHAR: replacement-character } ] [ { BIN: 11011111 CHAR: q } decode-utf16be >array ] unit-test
|
||||||
[ { BIN: 11011011 CHAR: x BIN: 11011011 CHAR: x } decode-utf16be >array ] unit-test-fails
|
[ { UNICHAR: replacement-character } ] [ { BIN: 11011011 CHAR: x BIN: 11011011 CHAR: x } decode-utf16be >array ] unit-test
|
||||||
|
|
||||||
[ B{ 0 120 216 52 221 30 } ] [ { CHAR: x HEX: 1d11e } encode-utf16be >array ] unit-test
|
[ B{ 0 120 216 52 221 30 } ] [ { CHAR: x HEX: 1d11e } encode-utf16be ] unit-test
|
||||||
|
|
||||||
[ { CHAR: x } ] [ { CHAR: x 0 } decode-utf16le >array ] unit-test
|
[ { CHAR: x } ] [ { CHAR: x 0 } decode-utf16le >array ] unit-test
|
||||||
[ { 119070 } ] [ { HEX: 34 HEX: D8 HEX: 1E HEX: DD } decode-utf16le >array ] unit-test
|
[ { 119070 } ] [ { HEX: 34 HEX: D8 HEX: 1E HEX: DD } decode-utf16le >array ] unit-test
|
||||||
[ { 0 BIN: 11011111 } decode-utf16le >array ] unit-test-fails
|
[ { UNICHAR: replacement-character } ] [ { 0 BIN: 11011111 } decode-utf16le >array ] unit-test
|
||||||
[ { 0 BIN: 11011011 0 0 } decode-utf16le >array ] unit-test-fails
|
[ { UNICHAR: replacement-character } ] [ { 0 BIN: 11011011 0 0 } decode-utf16le >array ] unit-test
|
||||||
|
|
||||||
[ B{ 120 0 52 216 30 221 } ] [ { CHAR: x HEX: 1d11e } encode-utf16le >array ] unit-test
|
[ B{ 120 0 52 216 30 221 } ] [ { CHAR: x HEX: 1d11e } encode-utf16le ] unit-test
|
||||||
|
|
|
@ -8,6 +8,9 @@ SYMBOL: double
|
||||||
SYMBOL: quad1
|
SYMBOL: quad1
|
||||||
SYMBOL: quad2
|
SYMBOL: quad2
|
||||||
SYMBOL: quad3
|
SYMBOL: quad3
|
||||||
|
SYMBOL: ignore
|
||||||
|
|
||||||
|
: do-ignore ( -- ch state ) 0 ignore ;
|
||||||
|
|
||||||
: append-nums ( byte ch -- ch )
|
: append-nums ( byte ch -- ch )
|
||||||
8 shift bitor ;
|
8 shift bitor ;
|
||||||
|
@ -19,21 +22,22 @@ SYMBOL: quad3
|
||||||
dup -3 shift BIN: 11011 number= [
|
dup -3 shift BIN: 11011 number= [
|
||||||
dup BIN: 00000100 bitand zero?
|
dup BIN: 00000100 bitand zero?
|
||||||
[ BIN: 11 bitand quad1 ]
|
[ BIN: 11 bitand quad1 ]
|
||||||
[ decode-error ] if
|
[ drop do-ignore ] if
|
||||||
] [ double ] if ;
|
] [ double ] if ;
|
||||||
|
|
||||||
: handle-quad2be ( byte ch -- ch )
|
: handle-quad2be ( byte ch -- ch state )
|
||||||
swap dup -2 shift BIN: 110111 number= [
|
swap dup -2 shift BIN: 110111 number= [
|
||||||
>r 2 shift r> BIN: 11 bitand bitor
|
>r 2 shift r> BIN: 11 bitand bitor quad3
|
||||||
] [ decode-error ] if ;
|
] [ 2drop do-ignore ] if ;
|
||||||
|
|
||||||
: (decode-utf16be) ( buf byte ch state -- buf ch state )
|
: (decode-utf16be) ( buf byte ch state -- buf ch state )
|
||||||
{
|
{
|
||||||
{ begin [ drop begin-utf16be ] }
|
{ begin [ drop begin-utf16be ] }
|
||||||
{ double [ end-multibyte ] }
|
{ double [ end-multibyte ] }
|
||||||
{ quad1 [ append-nums quad2 ] }
|
{ quad1 [ append-nums quad2 ] }
|
||||||
{ quad2 [ handle-quad2be quad3 ] }
|
{ quad2 [ handle-quad2be ] }
|
||||||
{ quad3 [ append-nums HEX: 10000 + decoded ] }
|
{ quad3 [ append-nums HEX: 10000 + decoded ] }
|
||||||
|
{ ignore [ 2drop push-replacement ] }
|
||||||
} case ;
|
} case ;
|
||||||
|
|
||||||
: decode-utf16be ( seq -- str )
|
: decode-utf16be ( seq -- str )
|
||||||
|
@ -43,13 +47,13 @@ SYMBOL: quad3
|
||||||
swap dup -3 shift BIN: 11011 = [
|
swap dup -3 shift BIN: 11011 = [
|
||||||
dup BIN: 100 bitand 0 number=
|
dup BIN: 100 bitand 0 number=
|
||||||
[ BIN: 11 bitand 8 shift bitor quad2 ]
|
[ BIN: 11 bitand 8 shift bitor quad2 ]
|
||||||
[ decode-error ] if
|
[ 2drop push-replacement ] if
|
||||||
] [ end-multibyte ] if ;
|
] [ end-multibyte ] if ;
|
||||||
|
|
||||||
: handle-quad3le ( buf byte ch -- buf ch state )
|
: handle-quad3le ( buf byte ch -- buf ch state )
|
||||||
swap dup -2 shift BIN: 110111 = [
|
swap dup -2 shift BIN: 110111 = [
|
||||||
BIN: 11 bitand append-nums HEX: 10000 + decoded
|
BIN: 11 bitand append-nums HEX: 10000 + decoded
|
||||||
] [ decode-error ] if ;
|
] [ 2drop push-replacement ] if ;
|
||||||
|
|
||||||
: (decode-utf16le) ( buf byte ch state -- buf ch state )
|
: (decode-utf16le) ( buf byte ch state -- buf ch state )
|
||||||
{
|
{
|
||||||
|
|
Loading…
Reference in New Issue