2008-04-30 23:06:13 -04:00
|
|
|
! Copyright (C) 2008 Daniel Ehrenberg
|
|
|
|
! See http://factorcode.org/license.txt for BSD license.
|
2008-05-06 21:59:37 -04:00
|
|
|
USING: kernel strings values io.files assocs
|
2009-03-31 09:03:27 -04:00
|
|
|
splitting sequences io namespaces sets
|
2009-05-02 14:45:38 -04:00
|
|
|
io.encodings.ascii io.encodings.utf8 io.encodings.utf16 ;
|
2008-04-30 22:04:57 -04:00
|
|
|
IN: io.encodings.iana
|
2008-04-30 20:45:12 -04:00
|
|
|
|
2008-04-30 23:06:13 -04:00
|
|
|
<PRIVATE
|
2009-02-03 18:32:05 -05:00
|
|
|
SYMBOL: n>e-table
|
|
|
|
SYMBOL: e>n-table
|
|
|
|
SYMBOL: aliases
|
2008-04-30 23:06:13 -04:00
|
|
|
PRIVATE>
|
2008-04-30 20:45:12 -04:00
|
|
|
|
2009-03-20 03:20:31 -04:00
|
|
|
: name>encoding ( name -- encoding )
|
2009-03-18 19:13:11 -04:00
|
|
|
n>e-table get-global at ;
|
2009-02-03 18:32:05 -05:00
|
|
|
|
2009-03-20 03:20:31 -04:00
|
|
|
: encoding>name ( encoding -- name )
|
2009-03-18 19:13:11 -04:00
|
|
|
e>n-table get-global at ;
|
2008-04-30 20:45:12 -04:00
|
|
|
|
2008-04-30 23:06:13 -04:00
|
|
|
<PRIVATE
|
2009-02-15 21:45:06 -05:00
|
|
|
: parse-iana ( file -- synonym-set )
|
|
|
|
utf8 file-lines { "" } split [
|
2008-04-30 22:04:57 -04:00
|
|
|
[ " " split ] map
|
|
|
|
[ first { "Name:" "Alias:" } member? ] filter
|
2010-05-17 23:20:46 -04:00
|
|
|
values { "None" } diff
|
2009-02-03 18:32:05 -05:00
|
|
|
] map harvest ;
|
|
|
|
|
2009-02-15 21:45:06 -05:00
|
|
|
: make-aliases ( file -- n>e )
|
2009-02-03 18:32:05 -05:00
|
|
|
parse-iana [ [ first ] [ ] bi ] H{ } map>assoc ;
|
2008-04-30 22:04:57 -04:00
|
|
|
|
2009-02-03 18:32:05 -05:00
|
|
|
: initial-n>e ( -- assoc )
|
2008-06-12 04:50:20 -04:00
|
|
|
H{
|
|
|
|
{ "UTF8" utf8 }
|
|
|
|
{ "utf8" utf8 }
|
|
|
|
{ "utf-8" utf8 }
|
2009-02-03 18:32:05 -05:00
|
|
|
{ "UTF-8" utf8 }
|
|
|
|
} clone ;
|
|
|
|
|
|
|
|
: initial-e>n ( -- assoc )
|
|
|
|
H{ { utf8 "UTF-8" } } clone ;
|
2008-06-12 04:50:20 -04:00
|
|
|
|
2008-04-30 23:06:13 -04:00
|
|
|
PRIVATE>
|
2008-04-30 20:45:12 -04:00
|
|
|
|
2009-02-15 20:53:21 -05:00
|
|
|
"vocab:io/encodings/iana/character-sets"
|
2009-02-15 21:45:06 -05:00
|
|
|
make-aliases aliases set-global
|
2009-02-03 18:32:05 -05:00
|
|
|
|
2009-02-10 17:16:12 -05:00
|
|
|
n>e-table [ initial-n>e ] initialize
|
|
|
|
e>n-table [ initial-e>n ] initialize
|
2009-02-03 18:32:05 -05:00
|
|
|
|
|
|
|
: register-encoding ( descriptor name -- )
|
|
|
|
[
|
|
|
|
aliases get at [
|
|
|
|
[ n>e-table get-global set-at ] with each
|
|
|
|
] [ "Bad encoding registration" throw ] if*
|
|
|
|
] [ swap e>n-table get-global set-at ] 2bi ;
|
2009-03-31 09:03:27 -04:00
|
|
|
|
|
|
|
ascii "ANSI_X3.4-1968" register-encoding
|
2009-05-02 14:45:38 -04:00
|
|
|
utf16be "UTF-16BE" register-encoding
|
|
|
|
utf16le "UTF-16LE" register-encoding
|
2009-11-12 17:38:21 -05:00
|
|
|
utf16 "UTF-16" register-encoding
|