Cleanup of various encodings implementations

db4
Daniel Ehrenberg 2009-02-02 00:16:33 -06:00
parent 7512ec1110
commit ce83be67a4
3 changed files with 13 additions and 17 deletions

View File

@ -2,28 +2,24 @@
! See http://factorcode.org/license.txt for BSD license.
USING: sequences kernel io io.files combinators.short-circuit
math.order values assocs io.encodings io.binary fry strings
math io.encodings.utf8 arrays accessors splitting math.parser ;
math io.encodings.ascii arrays accessors splitting math.parser
biassocs ;
IN: io.encodings.japanese
! The code page used is Microsoft Code Page 932,
! which is a set of extensions to JIS X 0208:1997
VALUE: shift-jis
VALUE: windows-31j
<PRIVATE
TUPLE: jis jis>ch ch>jis ;
TUPLE: jis assoc ;
: <jis> ( assoc -- jis )
[ nip ] assoc-filter
[ H{ } assoc-like ]
[ [ swap ] H{ } assoc-map-as ] bi
jis boa ;
[ nip ] assoc-filter H{ } assoc-like
>biassoc jis boa ;
: ch>jis ( ch tuple -- jis ) ch>jis>> at [ encode-error ] unless* ;
: jis>ch ( jis tuple -- string ) jis>ch>> at replacement-char or ;
: ch>jis ( ch tuple -- jis ) assoc>> value-at [ encode-error ] unless* ;
: jis>ch ( jis tuple -- string ) assoc>> at replacement-char or ;
: process-jis ( lines -- assoc )
[ "#" split1 drop ] map harvest [
@ -32,7 +28,7 @@ TUPLE: jis jis>ch ch>jis ;
] map ;
: make-jis ( filename -- jis )
utf8 file-lines process-jis <jis> ;
ascii file-lines process-jis <jis> ;
"resource:basis/io/encodings/japanese/CP932.txt"
make-jis to: windows-31j
@ -45,7 +41,7 @@ make-jis to: shift-jis
{ [ 0 HEX: 7F between? ] [ HEX: A1 HEX: DF between? ] } 1|| ;
: write-halfword ( stream halfword -- )
h>b/b swap 2array >string swap stream-write ;
h>b/b swap B{ } 2sequence swap stream-write ;
M: jis encode-char
swapd ch>jis

View File

@ -101,9 +101,9 @@ M: utf16le encode-char ( char stream encoding -- )
! UTF-16
: bom-le B{ HEX: ff HEX: fe } ; inline
CONSTANT: bom-le B{ HEX: ff HEX: fe }
: bom-be B{ HEX: fe HEX: ff } ; inline
CONSTANT: bom-be B{ HEX: fe HEX: ff }
: bom>le/be ( bom -- le/be )
dup bom-le sequence= [ drop utf16le ] [

View File

@ -40,9 +40,9 @@ M: utf32le encode-char
! UTF-32
: bom-le B{ HEX: ff HEX: fe 0 0 } ; inline
CONSTANT: bom-le B{ HEX: ff HEX: fe 0 0 }
: bom-be B{ 0 0 HEX: fe HEX: ff } ; inline
CONSTANT: bom-be B{ 0 0 HEX: fe HEX: ff }
: bom>le/be ( bom -- le/be )
dup bom-le sequence= [ drop utf32le ] [