io.encodings.korean johab encodings initial

db4
Yun, Jonghyouk 2009-02-28 15:10:15 +09:00
parent e36967d3b8
commit b02332df40
2 changed files with 77 additions and 3 deletions

View File

@ -5,7 +5,8 @@ io.encodings.korean.private io.encodings.string io.streams.string
kernel locals multiline namespaces sequences strings tools.test ;
IN: io.encodings.korean.tests
! convert cp949 <> unicode
! cp949 encodings
[ f ] [ HEX: 80 cp949>unicode ] unit-test
[ f ] [ HEX: ff cp949>unicode ] unit-test
@ -44,3 +45,25 @@ IN: io.encodings.korean.tests
[ t ] [ phrase-cp949 3 head* cp949 decode phrase-unicode 2 head* = ] unit-test
[ t ] [ phrase-cp949 2 head* cp949 decode phrase-unicode 2 head* CHAR: replacement-character suffix = ] unit-test
! johab encodings
[ HEX: 20 ] [ HEX: 20 johab>unicode ] unit-test
[ HEX: 3133 ] [ HEX: 8444 johab>unicode ] unit-test
[ HEX: 8A5D ] [ HEX: AD4F unicode>johab ] unit-test
: phrase-johab ( -- s )
B{
149 183 208 129 162 137 137 193 32 164 130 150 129 172 101
183 161 33
} ;
: phrase-johab>unicode ( -- s )
phrase-johab johab decode ;
: phrase-unicode>johab ( -- s )
phrase-unicode johab encode ;
[ t ] [ phrase-johab>unicode phrase-unicode = ] unit-test
[ t ] [ phrase-unicode>johab phrase-johab = ] unit-test

View File

@ -11,8 +11,15 @@ SINGLETON: cp949
cp949 "EUC-KR" register-encoding
SINGLETON: johab
! johab "JOHAB" register-encoding
<PRIVATE
! cp949 encodings
VALUE: cp949-table
"vocab:io/encodings/korean/data/cp949.txt" <code-table>*
@ -38,7 +45,7 @@ M:: cp949 encode-char ( char stream encoding -- )
stream stream-write
] if ;
: decode-char-step2 ( c stream -- char )
: cp949-decode-char-step2 ( c stream -- char )
stream-read1
[ 2byte-array be> cp949>unicode ]
[ drop replacement-char ] if* ;
@ -47,6 +54,50 @@ M:: cp949 decode-char ( stream encoding -- char/f )
stream stream-read1
{
{ [ dup not ] [ drop f ] }
{ [ dup cp949-1st? ] [ stream decode-char-step2 ] }
{ [ dup cp949-1st? ] [ stream cp949-decode-char-step2 ] }
[ ]
} cond ;
! johab encodings
VALUE: johab-table
"vocab:io/encodings/korean/data/johab.txt" <code-table>*
to: johab-table
: johab>unicode ( n -- u ) johab-table n>u ;
: unicode>johab ( u -- n ) johab-table u>n ;
: johab-1st? ( n -- ? )
[ HEX: 84 HEX: D3 between? ]
[ HEX: D8 HEX: DE between? ]
[ HEX: E0 HEX: F9 between? ]
tri { } 3sequence [ t? ] any? ;
M:: johab encode-char ( char stream encoding -- )
char unicode>johab byte?
[ char 1byte-array stream stream-write ] [
char unicode>johab
h>b/b swap 2byte-array
stream stream-write
] if ;
: johab-decode-char-step2 ( c stream -- char )
stream-read1
[ 2byte-array be> johab>unicode ]
[ drop replacement-char ] if* ;
M:: johab decode-char ( stream encoding -- char/f )
stream stream-read1
{
{ [ dup not ] [ drop f ] }
{ [ dup johab-1st? ] [ stream johab-decode-char-step2 ] }
[ ]
} cond ;
PRIVATE>