io.encodings.8-bit: save 37% memory space by "compressing" codetable.
Instead of a biassoc with from/to hashtables, we make a from array that is 256 length, using indexing instead of hashing since we know 8-bit characters are [0,255] and the to hashtable.windows-high-dpi
parent
c8d1b756d4
commit
1b3a07ca44
|
@ -1,15 +1,13 @@
|
|||
USING: arrays io.encodings.string io.encodings.8-bit
|
||||
io.encodings.8-bit.private strings tools.test ;
|
||||
USING: arrays io.encodings.8-bit io.encodings.string strings
|
||||
tools.test ;
|
||||
|
||||
{ B{ CHAR: f CHAR: o CHAR: o } } [ "foo" latin1 encode ] unit-test
|
||||
[ { 256 } >string latin1 encode ] must-fail
|
||||
{ B{ 255 } } [ { 255 } >string latin1 encode ] unit-test
|
||||
{ B{ CHAR: f CHAR: o CHAR: o } } [ "foo" latin2 encode ] unit-test
|
||||
[ { 256 } >string latin2 encode ] must-fail
|
||||
{ "bar" } [ "bar" latin2 decode ] unit-test
|
||||
{ { CHAR: b 233 CHAR: r } } [ B{ CHAR: b 233 CHAR: r } latin2 decode >array ] unit-test
|
||||
|
||||
{ "bar" } [ "bar" latin1 decode ] unit-test
|
||||
{ { CHAR: b 233 CHAR: r } } [ B{ CHAR: b 233 CHAR: r } latin1 decode >array ] unit-test
|
||||
{ { 0xfffd 0x20AC } } [ B{ 0x81 0x80 } windows-1252 decode >array ] unit-test
|
||||
|
||||
{ t } [ \ latin1 8-bit-encoding? ] unit-test
|
||||
{ "bar" } [ "bar" \ latin1 decode ] unit-test
|
||||
{ B{ 255 } } [ { 255 } >string windows-1254 encode ] unit-test
|
||||
|
||||
{ { 0x221a 0x00b1 0x0040 } } [ B{ 0xfb 0xf1 0x40 } cp437 decode >array ] unit-test
|
||||
|
|
|
@ -1,8 +1,8 @@
|
|||
! Copyright (C) 2008 Daniel Ehrenberg, Doug Coleman.
|
||||
! See http://factorcode.org/license.txt for BSD license.
|
||||
USING: accessors assocs biassocs classes.singleton generic io
|
||||
io.encodings io.encodings.iana kernel lexer parser sequences
|
||||
simple-flat-file words ;
|
||||
USING: accessors arrays assocs classes.singleton generic
|
||||
hashtables io io.encodings io.encodings.iana kernel lexer parser
|
||||
sequences simple-flat-file words ;
|
||||
IN: io.encodings.8-bit
|
||||
|
||||
<<
|
||||
|
@ -11,17 +11,20 @@ IN: io.encodings.8-bit
|
|||
: encoding-file ( file-name -- stream )
|
||||
"vocab:io/encodings/8-bit/" ".TXT" surround ;
|
||||
|
||||
TUPLE: 8-bit { table biassoc read-only } ;
|
||||
TUPLE: 8-bit { from array read-only } { to hashtable read-only } ;
|
||||
|
||||
: <8-bit> ( biassoc -- 8-bit )
|
||||
[ from>> 256 <iota> [ of ] with map ] [ to>> ] bi 8-bit boa ;
|
||||
|
||||
: 8-bit-encode ( char 8-bit -- byte )
|
||||
table>> value-at [ encode-error ] unless* ; inline
|
||||
to>> at [ encode-error ] unless* ; inline
|
||||
|
||||
M: 8-bit encode-char
|
||||
swap [ 8-bit-encode ] dip stream-write1 ;
|
||||
|
||||
M: 8-bit decode-char
|
||||
swap stream-read1 [
|
||||
swap table>> at [ replacement-char ] unless*
|
||||
swap from>> ?nth [ replacement-char ] unless*
|
||||
] [ drop f ] if* ;
|
||||
|
||||
: create-encoding ( name -- word )
|
||||
|
@ -30,7 +33,7 @@ M: 8-bit decode-char
|
|||
: load-encoding ( name iana-name file-name -- )
|
||||
[ create-encoding dup ]
|
||||
[ register-encoding ]
|
||||
[ encoding-file load-codetable-file 8-bit boa ] tri*
|
||||
[ encoding-file load-codetable-file <8-bit> ] tri*
|
||||
[ [ \ <encoder> create-method ] dip [ nip <encoder> ] curry define ]
|
||||
[ [ \ <decoder> create-method ] dip [ nip <decoder> ] curry define ] 2bi ;
|
||||
|
||||
|
|
Loading…
Reference in New Issue