Incomplete update of UTF decoder
parent
7cd7af7bd1
commit
c66b264af5
|
@ -1,7 +1,7 @@
|
||||||
! Copyright (C) 2006, 2007 Daniel Ehrenberg.
|
! Copyright (C) 2006, 2007 Daniel Ehrenberg.
|
||||||
! See http://factorcode.org/license.txt for BSD license.
|
! See http://factorcode.org/license.txt for BSD license.
|
||||||
USING: math kernel sequences sbufs vectors
|
USING: math kernel sequences sbufs vectors
|
||||||
namespaces ;
|
namespaces unicode.syntax ;
|
||||||
IN: io.encodings
|
IN: io.encodings
|
||||||
|
|
||||||
TUPLE: encode-error ;
|
TUPLE: encode-error ;
|
||||||
|
@ -10,13 +10,16 @@ TUPLE: encode-error ;
|
||||||
|
|
||||||
TUPLE: decode-error ;
|
TUPLE: decode-error ;
|
||||||
|
|
||||||
: decode-error ( -- * ) \ decode-error construct-empty throw ;
|
: decode-error ( -- * ) \ encode-error construct-empty throw ;
|
||||||
|
|
||||||
SYMBOL: begin
|
SYMBOL: begin
|
||||||
|
|
||||||
: decoded ( buf ch -- buf ch state )
|
: decoded ( buf ch -- buf ch state )
|
||||||
over push 0 begin ;
|
over push 0 begin ;
|
||||||
|
|
||||||
|
: push-replacement ( buf -- buf ch state )
|
||||||
|
UNICHAR: replacement-character decoded ;
|
||||||
|
|
||||||
: finish-decoding ( buf ch state -- str )
|
: finish-decoding ( buf ch state -- str )
|
||||||
begin eq? [ decode-error ] unless drop "" like ;
|
begin eq? [ decode-error ] unless drop "" like ;
|
||||||
|
|
||||||
|
|
|
@ -14,10 +14,10 @@ SYMBOL: quad3
|
||||||
: starts-2? ( char -- ? )
|
: starts-2? ( char -- ? )
|
||||||
-6 shift BIN: 10 number= ;
|
-6 shift BIN: 10 number= ;
|
||||||
|
|
||||||
: append-nums ( bottom top -- num )
|
: append-nums ( buf bottom top state-out -- buf num state )
|
||||||
over starts-2?
|
>r over starts-2?
|
||||||
[ 6 shift swap BIN: 111111 bitand bitor ]
|
[ 6 shift swap BIN: 111111 bitand bitor r> ]
|
||||||
[ decode-error ] if ;
|
[ r> 3drop push-replacement ] if ;
|
||||||
|
|
||||||
: begin-utf8 ( buf byte -- buf ch state )
|
: begin-utf8 ( buf byte -- buf ch state )
|
||||||
{
|
{
|
||||||
|
@ -25,20 +25,20 @@ SYMBOL: quad3
|
||||||
{ [ dup -5 shift BIN: 110 number= ] [ BIN: 11111 bitand double ] }
|
{ [ dup -5 shift BIN: 110 number= ] [ BIN: 11111 bitand double ] }
|
||||||
{ [ dup -4 shift BIN: 1110 number= ] [ BIN: 1111 bitand triple ] }
|
{ [ dup -4 shift BIN: 1110 number= ] [ BIN: 1111 bitand triple ] }
|
||||||
{ [ dup -3 shift BIN: 11110 number= ] [ BIN: 111 bitand quad ] }
|
{ [ dup -3 shift BIN: 11110 number= ] [ BIN: 111 bitand quad ] }
|
||||||
{ [ t ] [ decode-error ] }
|
{ [ t ] [ drop push-replacement ] }
|
||||||
} cond ;
|
} cond ;
|
||||||
|
|
||||||
: end-multibyte ( buf byte ch -- buf ch state )
|
: end-multibyte ( buf byte ch -- buf ch state )
|
||||||
append-nums decoded ;
|
begin append-nums decoded ;
|
||||||
|
|
||||||
: (decode-utf8) ( buf byte ch state -- buf ch state )
|
: (decode-utf8) ( buf byte ch state -- buf ch state )
|
||||||
{
|
{
|
||||||
{ begin [ drop begin-utf8 ] }
|
{ begin [ drop begin-utf8 ] }
|
||||||
{ double [ end-multibyte ] }
|
{ double [ end-multibyte ] }
|
||||||
{ triple [ append-nums triple2 ] }
|
{ triple [ triple2 append-nums ] }
|
||||||
{ triple2 [ end-multibyte ] }
|
{ triple2 [ end-multibyte ] }
|
||||||
{ quad [ append-nums quad2 ] }
|
{ quad [ quad2 append-nums ] }
|
||||||
{ quad2 [ append-nums quad3 ] }
|
{ quad2 [ quad3 append-nums ] }
|
||||||
{ quad3 [ end-multibyte ] }
|
{ quad3 [ end-multibyte ] }
|
||||||
} case ;
|
} case ;
|
||||||
|
|
||||||
|
|
Loading…
Reference in New Issue