factor/basis/io/encodings/8-bit/8-bit.factor

89 lines
2.3 KiB
Factor
Raw Normal View History

2008-03-21 12:30:13 -04:00
! Copyright (C) 2008 Daniel Ehrenberg
! See http://factorcode.org/license.txt for BSD license.
2008-03-29 00:00:20 -04:00
USING: math.parser arrays io.encodings sequences kernel assocs
hashtables io.encodings.ascii generic parser classes.tuple words
2008-12-17 19:22:48 -05:00
words.symbol io io.files splitting namespaces math
compiler.units accessors ;
2008-03-21 12:30:13 -04:00
IN: io.encodings.8-bit
<PRIVATE
: mappings {
{ "latin1" "8859-1" }
{ "latin2" "8859-2" }
{ "latin3" "8859-3" }
{ "latin4" "8859-4" }
{ "latin/cyrillic" "8859-5" }
{ "latin/arabic" "8859-6" }
{ "latin/greek" "8859-7" }
{ "latin/hebrew" "8859-8" }
{ "latin5" "8859-9" }
{ "latin6" "8859-10" }
{ "latin/thai" "8859-11" }
{ "latin7" "8859-13" }
{ "latin8" "8859-14" }
{ "latin9" "8859-15" }
{ "latin10" "8859-16" }
2008-03-21 12:30:13 -04:00
{ "koi8-r" "KOI8-R" }
2008-03-21 14:01:50 -04:00
{ "windows-1252" "CP1252" }
{ "ebcdic" "CP037" }
2008-03-21 12:30:13 -04:00
{ "mac-roman" "ROMAN" }
} ;
2008-04-06 20:03:00 -04:00
: encoding-file ( file-name -- stream )
2008-07-28 23:03:13 -04:00
"resource:basis/io/encodings/8-bit/" swap ".TXT"
2008-06-19 00:29:56 -04:00
3append ascii <file-reader> ;
2008-03-21 14:01:50 -04:00
2008-03-21 12:30:13 -04:00
: process-contents ( lines -- assoc )
2008-05-14 00:36:45 -04:00
[ "#" split1 drop ] map harvest
2008-06-19 00:29:56 -04:00
[ "\t" split 2 head [ 2 short tail hex> ] map ] map ;
2008-03-21 12:30:13 -04:00
: byte>ch ( assoc -- array )
256 replacement-char <array>
[ [ swapd set-nth ] curry assoc-each ] keep ;
: ch>byte ( assoc -- newassoc )
[ swap ] assoc-map >hashtable ;
: parse-file ( stream -- byte>ch ch>byte )
2008-04-06 20:03:00 -04:00
lines process-contents
2008-03-21 12:30:13 -04:00
[ byte>ch ] [ ch>byte ] bi ;
2008-06-19 00:29:56 -04:00
SYMBOL: 8-bit-encodings
TUPLE: 8-bit decode encode ;
2008-03-21 12:30:13 -04:00
: encode-8-bit ( char stream assoc -- )
2008-08-22 23:07:59 -04:00
swapd at*
[ swap stream-write1 ] [ nip encode-error ] if ; inline
2008-03-21 12:30:13 -04:00
2008-06-19 00:29:56 -04:00
M: 8-bit encode-char encode>> encode-8-bit ;
2008-03-21 12:30:13 -04:00
: decode-8-bit ( stream array -- char/f )
2008-08-22 23:07:59 -04:00
swap stream-read1 dup
[ swap nth [ replacement-char ] unless* ] [ 2drop f ] if ; inline
2008-03-21 12:30:13 -04:00
2008-06-19 00:29:56 -04:00
M: 8-bit decode-char decode>> decode-8-bit ;
2008-03-21 12:30:13 -04:00
2008-06-12 04:49:29 -04:00
PREDICATE: 8-bit-encoding < word
2008-06-19 00:29:56 -04:00
8-bit-encodings get-global key? ;
2008-06-12 04:49:29 -04:00
2008-06-19 00:29:56 -04:00
M: 8-bit-encoding <encoder>
8-bit-encodings get-global at <encoder> ;
2008-06-12 04:49:29 -04:00
2008-06-19 00:29:56 -04:00
M: 8-bit-encoding <decoder>
8-bit-encodings get-global at <decoder> ;
2008-06-12 04:49:29 -04:00
2008-03-21 12:30:13 -04:00
PRIVATE>
2008-03-27 00:32:41 -04:00
[
2008-06-19 00:29:56 -04:00
mappings [
[ "io.encodings.8-bit" create ]
[ encoding-file parse-file 8-bit boa ]
bi*
] assoc-map
2008-06-25 16:35:14 -04:00
[ keys [ define-symbol ] each ]
2008-06-19 00:29:56 -04:00
[ 8-bit-encodings set-global ]
bi
2008-03-27 00:32:41 -04:00
] with-compilation-unit