From b02332df4057df346d2e6b6fc78a46c6210d96cd Mon Sep 17 00:00:00 2001 From: "Yun, Jonghyouk" Date: Sat, 28 Feb 2009 15:10:15 +0900 Subject: [PATCH] io.encodings.korean johab encodings initial --- basis/io/encodings/korean/korean-tests.factor | 25 ++++++++- basis/io/encodings/korean/korean.factor | 55 ++++++++++++++++++- 2 files changed, 77 insertions(+), 3 deletions(-) diff --git a/basis/io/encodings/korean/korean-tests.factor b/basis/io/encodings/korean/korean-tests.factor index b39aa866d1..d0d86d5ae1 100644 --- a/basis/io/encodings/korean/korean-tests.factor +++ b/basis/io/encodings/korean/korean-tests.factor @@ -5,7 +5,8 @@ io.encodings.korean.private io.encodings.string io.streams.string kernel locals multiline namespaces sequences strings tools.test ; IN: io.encodings.korean.tests -! convert cp949 <> unicode + +! cp949 encodings [ f ] [ HEX: 80 cp949>unicode ] unit-test [ f ] [ HEX: ff cp949>unicode ] unit-test @@ -44,3 +45,25 @@ IN: io.encodings.korean.tests [ t ] [ phrase-cp949 3 head* cp949 decode phrase-unicode 2 head* = ] unit-test [ t ] [ phrase-cp949 2 head* cp949 decode phrase-unicode 2 head* CHAR: replacement-character suffix = ] unit-test + + +! johab encodings +[ HEX: 20 ] [ HEX: 20 johab>unicode ] unit-test +[ HEX: 3133 ] [ HEX: 8444 johab>unicode ] unit-test +[ HEX: 8A5D ] [ HEX: AD4F unicode>johab ] unit-test + + +: phrase-johab ( -- s ) + B{ + 149 183 208 129 162 137 137 193 32 164 130 150 129 172 101 + 183 161 33 + } ; + +: phrase-johab>unicode ( -- s ) + phrase-johab johab decode ; + +: phrase-unicode>johab ( -- s ) + phrase-unicode johab encode ; + +[ t ] [ phrase-johab>unicode phrase-unicode = ] unit-test +[ t ] [ phrase-unicode>johab phrase-johab = ] unit-test \ No newline at end of file diff --git a/basis/io/encodings/korean/korean.factor b/basis/io/encodings/korean/korean.factor index 07ceb956b3..95bc1c976b 100644 --- a/basis/io/encodings/korean/korean.factor +++ b/basis/io/encodings/korean/korean.factor @@ -11,8 +11,15 @@ SINGLETON: cp949 cp949 "EUC-KR" register-encoding +SINGLETON: johab + +! johab "JOHAB" register-encoding + + * @@ -38,7 +45,7 @@ M:: cp949 encode-char ( char stream encoding -- ) stream stream-write ] if ; -: decode-char-step2 ( c stream -- char ) +: cp949-decode-char-step2 ( c stream -- char ) stream-read1 [ 2byte-array be> cp949>unicode ] [ drop replacement-char ] if* ; @@ -47,6 +54,50 @@ M:: cp949 decode-char ( stream encoding -- char/f ) stream stream-read1 { { [ dup not ] [ drop f ] } - { [ dup cp949-1st? ] [ stream decode-char-step2 ] } + { [ dup cp949-1st? ] [ stream cp949-decode-char-step2 ] } [ ] } cond ; + + + +! johab encodings + +VALUE: johab-table + +"vocab:io/encodings/korean/data/johab.txt" * + to: johab-table + +: johab>unicode ( n -- u ) johab-table n>u ; + +: unicode>johab ( u -- n ) johab-table u>n ; + +: johab-1st? ( n -- ? ) + [ HEX: 84 HEX: D3 between? ] + [ HEX: D8 HEX: DE between? ] + [ HEX: E0 HEX: F9 between? ] + tri { } 3sequence [ t? ] any? ; + +M:: johab encode-char ( char stream encoding -- ) + char unicode>johab byte? + [ char 1byte-array stream stream-write ] [ + char unicode>johab + h>b/b swap 2byte-array + stream stream-write + ] if ; + +: johab-decode-char-step2 ( c stream -- char ) + stream-read1 + [ 2byte-array be> johab>unicode ] + [ drop replacement-char ] if* ; + +M:: johab decode-char ( stream encoding -- char/f ) + stream stream-read1 + { + { [ dup not ] [ drop f ] } + { [ dup johab-1st? ] [ stream johab-decode-char-step2 ] } + [ ] + } cond ; + +PRIVATE> + +