Cleanup of various encodings implementations

db4
Daniel Ehrenberg 2009-02-02 00:16:33 -06:00
parent 7512ec1110
commit ce83be67a4
3 changed files with 13 additions and 17 deletions

View File

@ -2,28 +2,24 @@
! See http://factorcode.org/license.txt for BSD license. ! See http://factorcode.org/license.txt for BSD license.
USING: sequences kernel io io.files combinators.short-circuit USING: sequences kernel io io.files combinators.short-circuit
math.order values assocs io.encodings io.binary fry strings math.order values assocs io.encodings io.binary fry strings
math io.encodings.utf8 arrays accessors splitting math.parser ; math io.encodings.ascii arrays accessors splitting math.parser
biassocs ;
IN: io.encodings.japanese IN: io.encodings.japanese
! The code page used is Microsoft Code Page 932,
! which is a set of extensions to JIS X 0208:1997
VALUE: shift-jis VALUE: shift-jis
VALUE: windows-31j VALUE: windows-31j
<PRIVATE <PRIVATE
TUPLE: jis jis>ch ch>jis ; TUPLE: jis assoc ;
: <jis> ( assoc -- jis ) : <jis> ( assoc -- jis )
[ nip ] assoc-filter [ nip ] assoc-filter H{ } assoc-like
[ H{ } assoc-like ] >biassoc jis boa ;
[ [ swap ] H{ } assoc-map-as ] bi
jis boa ;
: ch>jis ( ch tuple -- jis ) ch>jis>> at [ encode-error ] unless* ; : ch>jis ( ch tuple -- jis ) assoc>> value-at [ encode-error ] unless* ;
: jis>ch ( jis tuple -- string ) jis>ch>> at replacement-char or ; : jis>ch ( jis tuple -- string ) assoc>> at replacement-char or ;
: process-jis ( lines -- assoc ) : process-jis ( lines -- assoc )
[ "#" split1 drop ] map harvest [ [ "#" split1 drop ] map harvest [
@ -32,7 +28,7 @@ TUPLE: jis jis>ch ch>jis ;
] map ; ] map ;
: make-jis ( filename -- jis ) : make-jis ( filename -- jis )
utf8 file-lines process-jis <jis> ; ascii file-lines process-jis <jis> ;
"resource:basis/io/encodings/japanese/CP932.txt" "resource:basis/io/encodings/japanese/CP932.txt"
make-jis to: windows-31j make-jis to: windows-31j
@ -45,7 +41,7 @@ make-jis to: shift-jis
{ [ 0 HEX: 7F between? ] [ HEX: A1 HEX: DF between? ] } 1|| ; { [ 0 HEX: 7F between? ] [ HEX: A1 HEX: DF between? ] } 1|| ;
: write-halfword ( stream halfword -- ) : write-halfword ( stream halfword -- )
h>b/b swap 2array >string swap stream-write ; h>b/b swap B{ } 2sequence swap stream-write ;
M: jis encode-char M: jis encode-char
swapd ch>jis swapd ch>jis

View File

@ -101,9 +101,9 @@ M: utf16le encode-char ( char stream encoding -- )
! UTF-16 ! UTF-16
: bom-le B{ HEX: ff HEX: fe } ; inline CONSTANT: bom-le B{ HEX: ff HEX: fe }
: bom-be B{ HEX: fe HEX: ff } ; inline CONSTANT: bom-be B{ HEX: fe HEX: ff }
: bom>le/be ( bom -- le/be ) : bom>le/be ( bom -- le/be )
dup bom-le sequence= [ drop utf16le ] [ dup bom-le sequence= [ drop utf16le ] [

View File

@ -40,9 +40,9 @@ M: utf32le encode-char
! UTF-32 ! UTF-32
: bom-le B{ HEX: ff HEX: fe 0 0 } ; inline CONSTANT: bom-le B{ HEX: ff HEX: fe 0 0 }
: bom-be B{ 0 0 HEX: fe HEX: ff } ; inline CONSTANT: bom-be B{ 0 0 HEX: fe HEX: ff }
: bom>le/be ( bom -- le/be ) : bom>le/be ( bom -- le/be )
dup bom-le sequence= [ drop utf32le ] [ dup bom-le sequence= [ drop utf32le ] [