73 lines
		
	
	
		
			1.9 KiB
		
	
	
	
		
			Factor
		
	
	
			
		
		
	
	
			73 lines
		
	
	
		
			1.9 KiB
		
	
	
	
		
			Factor
		
	
	
! Copyright (C) 2006, 2007 Daniel Ehrenberg.
 | 
						|
! See http://factorcode.org/license.txt for BSD license.
 | 
						|
USING: math kernel sequences sbufs vectors
 | 
						|
namespaces io.encodings combinators ;
 | 
						|
IN: io.utf8
 | 
						|
 | 
						|
SYMBOL: double
 | 
						|
SYMBOL: triple
 | 
						|
SYMBOL: triple2
 | 
						|
SYMBOL: quad
 | 
						|
SYMBOL: quad2
 | 
						|
SYMBOL: quad3
 | 
						|
 | 
						|
: starts-2? ( char -- ? )
 | 
						|
    -6 shift BIN: 10 number= ;
 | 
						|
 | 
						|
: append-nums ( bottom top -- num )
 | 
						|
    over starts-2?
 | 
						|
    [ 6 shift swap BIN: 111111 bitand bitor ]
 | 
						|
    [ decode-error ] if ;
 | 
						|
 | 
						|
: begin-utf8 ( buf byte -- buf ch state )
 | 
						|
    {
 | 
						|
        { [ dup -7 shift zero? ] [ decoded ] }
 | 
						|
        { [ dup -5 shift BIN: 110 number= ] [ BIN: 11111 bitand double ] }
 | 
						|
        { [ dup -4 shift BIN: 1110 number= ] [ BIN: 1111 bitand triple ] }
 | 
						|
        { [ dup -3 shift BIN: 11110 number= ] [ BIN: 111 bitand quad ] }
 | 
						|
        { [ t ] [ decode-error ] }
 | 
						|
    } cond ;
 | 
						|
 | 
						|
: end-multibyte ( buf byte ch -- buf ch state )
 | 
						|
    append-nums decoded ;
 | 
						|
 | 
						|
: (decode-utf8) ( buf byte ch state -- buf ch state )
 | 
						|
    {
 | 
						|
        { begin [ drop begin-utf8 ] }
 | 
						|
        { double [ end-multibyte ] }
 | 
						|
        { triple [ append-nums triple2 ] }
 | 
						|
        { triple2 [ end-multibyte ] }
 | 
						|
        { quad [ append-nums quad2 ] }
 | 
						|
        { quad2 [ append-nums quad3 ] }
 | 
						|
        { quad3 [ end-multibyte ] }
 | 
						|
    } case ;
 | 
						|
 | 
						|
: decode-utf8 ( seq -- str )
 | 
						|
    [ -rot (decode-utf8) ] decode ;
 | 
						|
 | 
						|
: encoded ( char -- )
 | 
						|
    BIN: 111111 bitand BIN: 10000000 bitor , ;
 | 
						|
 | 
						|
: char>utf8 ( char -- )
 | 
						|
    {
 | 
						|
        { [ dup -7 shift zero? ] [ , ] }
 | 
						|
        { [ dup -11 shift zero? ] [
 | 
						|
            dup -6 shift BIN: 11000000 bitor ,
 | 
						|
            encoded
 | 
						|
        ] }
 | 
						|
        { [ dup -16 shift zero? ] [
 | 
						|
            dup -12 shift BIN: 11100000 bitor ,
 | 
						|
            dup -6 shift encoded
 | 
						|
            encoded
 | 
						|
        ] }
 | 
						|
        { [ t ] [
 | 
						|
            dup -18 shift BIN: 11110000 bitor ,
 | 
						|
            dup -12 shift encoded
 | 
						|
            dup -6 shift encoded
 | 
						|
            encoded
 | 
						|
        ] }
 | 
						|
    } cond ;
 | 
						|
 | 
						|
: encode-utf8 ( str -- seq )
 | 
						|
    [ [ char>utf8 ] each ] B{ } make ;
 |