io.encodings.korean johab encodings initial
parent
e36967d3b8
commit
b02332df40
|
@ -5,7 +5,8 @@ io.encodings.korean.private io.encodings.string io.streams.string
|
|||
kernel locals multiline namespaces sequences strings tools.test ;
|
||||
IN: io.encodings.korean.tests
|
||||
|
||||
! convert cp949 <> unicode
|
||||
|
||||
! cp949 encodings
|
||||
|
||||
[ f ] [ HEX: 80 cp949>unicode ] unit-test
|
||||
[ f ] [ HEX: ff cp949>unicode ] unit-test
|
||||
|
@ -44,3 +45,25 @@ IN: io.encodings.korean.tests
|
|||
[ t ] [ phrase-cp949 3 head* cp949 decode phrase-unicode 2 head* = ] unit-test
|
||||
|
||||
[ t ] [ phrase-cp949 2 head* cp949 decode phrase-unicode 2 head* CHAR: replacement-character suffix = ] unit-test
|
||||
|
||||
|
||||
! johab encodings
|
||||
[ HEX: 20 ] [ HEX: 20 johab>unicode ] unit-test
|
||||
[ HEX: 3133 ] [ HEX: 8444 johab>unicode ] unit-test
|
||||
[ HEX: 8A5D ] [ HEX: AD4F unicode>johab ] unit-test
|
||||
|
||||
|
||||
: phrase-johab ( -- s )
|
||||
B{
|
||||
149 183 208 129 162 137 137 193 32 164 130 150 129 172 101
|
||||
183 161 33
|
||||
} ;
|
||||
|
||||
: phrase-johab>unicode ( -- s )
|
||||
phrase-johab johab decode ;
|
||||
|
||||
: phrase-unicode>johab ( -- s )
|
||||
phrase-unicode johab encode ;
|
||||
|
||||
[ t ] [ phrase-johab>unicode phrase-unicode = ] unit-test
|
||||
[ t ] [ phrase-unicode>johab phrase-johab = ] unit-test
|
|
@ -11,8 +11,15 @@ SINGLETON: cp949
|
|||
|
||||
cp949 "EUC-KR" register-encoding
|
||||
|
||||
SINGLETON: johab
|
||||
|
||||
! johab "JOHAB" register-encoding
|
||||
|
||||
|
||||
<PRIVATE
|
||||
|
||||
! cp949 encodings
|
||||
|
||||
VALUE: cp949-table
|
||||
|
||||
"vocab:io/encodings/korean/data/cp949.txt" <code-table>*
|
||||
|
@ -38,7 +45,7 @@ M:: cp949 encode-char ( char stream encoding -- )
|
|||
stream stream-write
|
||||
] if ;
|
||||
|
||||
: decode-char-step2 ( c stream -- char )
|
||||
: cp949-decode-char-step2 ( c stream -- char )
|
||||
stream-read1
|
||||
[ 2byte-array be> cp949>unicode ]
|
||||
[ drop replacement-char ] if* ;
|
||||
|
@ -47,6 +54,50 @@ M:: cp949 decode-char ( stream encoding -- char/f )
|
|||
stream stream-read1
|
||||
{
|
||||
{ [ dup not ] [ drop f ] }
|
||||
{ [ dup cp949-1st? ] [ stream decode-char-step2 ] }
|
||||
{ [ dup cp949-1st? ] [ stream cp949-decode-char-step2 ] }
|
||||
[ ]
|
||||
} cond ;
|
||||
|
||||
|
||||
|
||||
! johab encodings
|
||||
|
||||
VALUE: johab-table
|
||||
|
||||
"vocab:io/encodings/korean/data/johab.txt" <code-table>*
|
||||
to: johab-table
|
||||
|
||||
: johab>unicode ( n -- u ) johab-table n>u ;
|
||||
|
||||
: unicode>johab ( u -- n ) johab-table u>n ;
|
||||
|
||||
: johab-1st? ( n -- ? )
|
||||
[ HEX: 84 HEX: D3 between? ]
|
||||
[ HEX: D8 HEX: DE between? ]
|
||||
[ HEX: E0 HEX: F9 between? ]
|
||||
tri { } 3sequence [ t? ] any? ;
|
||||
|
||||
M:: johab encode-char ( char stream encoding -- )
|
||||
char unicode>johab byte?
|
||||
[ char 1byte-array stream stream-write ] [
|
||||
char unicode>johab
|
||||
h>b/b swap 2byte-array
|
||||
stream stream-write
|
||||
] if ;
|
||||
|
||||
: johab-decode-char-step2 ( c stream -- char )
|
||||
stream-read1
|
||||
[ 2byte-array be> johab>unicode ]
|
||||
[ drop replacement-char ] if* ;
|
||||
|
||||
M:: johab decode-char ( stream encoding -- char/f )
|
||||
stream stream-read1
|
||||
{
|
||||
{ [ dup not ] [ drop f ] }
|
||||
{ [ dup johab-1st? ] [ stream johab-decode-char-step2 ] }
|
||||
[ ]
|
||||
} cond ;
|
||||
|
||||
PRIVATE>
|
||||
|
||||
|
||||
|
|
Loading…
Reference in New Issue