73 lines
1.9 KiB
Factor
73 lines
1.9 KiB
Factor
! Copyright (C) 2006, 2007 Daniel Ehrenberg.
|
|
! See http://factorcode.org/license.txt for BSD license.
|
|
USING: math kernel sequences sbufs vectors
|
|
namespaces io.encodings combinators ;
|
|
IN: io.utf8
|
|
|
|
SYMBOL: double
|
|
SYMBOL: triple
|
|
SYMBOL: triple2
|
|
SYMBOL: quad
|
|
SYMBOL: quad2
|
|
SYMBOL: quad3
|
|
|
|
: starts-2? ( char -- ? )
|
|
-6 shift BIN: 10 number= ;
|
|
|
|
: append-nums ( bottom top -- num )
|
|
over starts-2?
|
|
[ 6 shift swap BIN: 111111 bitand bitor ]
|
|
[ decode-error ] if ;
|
|
|
|
: begin-utf8 ( buf byte -- buf ch state )
|
|
{
|
|
{ [ dup -7 shift zero? ] [ decoded ] }
|
|
{ [ dup -5 shift BIN: 110 number= ] [ BIN: 11111 bitand double ] }
|
|
{ [ dup -4 shift BIN: 1110 number= ] [ BIN: 1111 bitand triple ] }
|
|
{ [ dup -3 shift BIN: 11110 number= ] [ BIN: 111 bitand quad ] }
|
|
{ [ t ] [ decode-error ] }
|
|
} cond ;
|
|
|
|
: end-multibyte ( buf byte ch -- buf ch state )
|
|
append-nums decoded ;
|
|
|
|
: (decode-utf8) ( buf byte ch state -- buf ch state )
|
|
{
|
|
{ begin [ drop begin-utf8 ] }
|
|
{ double [ end-multibyte ] }
|
|
{ triple [ append-nums triple2 ] }
|
|
{ triple2 [ end-multibyte ] }
|
|
{ quad [ append-nums quad2 ] }
|
|
{ quad2 [ append-nums quad3 ] }
|
|
{ quad3 [ end-multibyte ] }
|
|
} case ;
|
|
|
|
: decode-utf8 ( seq -- str )
|
|
[ -rot (decode-utf8) ] decode ;
|
|
|
|
: encoded ( char -- )
|
|
BIN: 111111 bitand BIN: 10000000 bitor , ;
|
|
|
|
: char>utf8 ( char -- )
|
|
{
|
|
{ [ dup -7 shift zero? ] [ , ] }
|
|
{ [ dup -11 shift zero? ] [
|
|
dup -6 shift BIN: 11000000 bitor ,
|
|
encoded
|
|
] }
|
|
{ [ dup -16 shift zero? ] [
|
|
dup -12 shift BIN: 11100000 bitor ,
|
|
dup -6 shift encoded
|
|
encoded
|
|
] }
|
|
{ [ t ] [
|
|
dup -18 shift BIN: 11110000 bitor ,
|
|
dup -12 shift encoded
|
|
dup -6 shift encoded
|
|
encoded
|
|
] }
|
|
} cond ;
|
|
|
|
: encode-utf8 ( str -- seq )
|
|
[ [ char>utf8 ] each ] B{ } make ;
|