From 594f335dfebd4cf483d12a652f666d75d9fc1a44 Mon Sep 17 00:00:00 2001
From: Daniel Ehrenberg <ehrenbed@carleton.edu>
Date: Wed, 30 Apr 2008 21:04:57 -0500
Subject: [PATCH] Adding IANA encodings table

---
 extra/io/encodings/iana/iana.factor         | 41 ++++++++++++++++++++-
 extra/unicode/syntax/backend/backend.factor |  2 +-
 2 files changed, 40 insertions(+), 3 deletions(-)

diff --git a/extra/io/encodings/iana/iana.factor b/extra/io/encodings/iana/iana.factor
index 08b40f802c..1bbb80482d 100644
--- a/extra/io/encodings/iana/iana.factor
+++ b/extra/io/encodings/iana/iana.factor
@@ -1,7 +1,27 @@
-USING: kernel strings unicode.syntax.backend ; 
+USING: kernel strings unicode.syntax.backend io.files assocs
+splitting sequences io namespaces sets
+io.encodings.ascii io.encodings.utf8 io.encodings.utf16 io.encodings.8-bit ;
+IN: io.encodings.iana
 
 VALUE: n>e-table
-VALUE: e>n-table
+
+: e>n-table H{
+    { ascii "US-ASCII" }
+    { utf8 "UTF-8" }
+    { utf16 "UTF-16" }
+    { utf16be "UTF-16BE" }
+    { utf16le "UTF-16LE" }
+    { latin1 "ISO-8859-1" }
+    { latin2 "ISO-8859-2" }
+    { latin3 "ISO-8859-3" }
+    { latin4 "ISO-8859-4" }
+    { latin/cyrillic "ISO-8859-5" }
+    { latin/arabic "ISO-8859-6" }
+    { latin/greek "ISO-8859-7" }
+    { latin/hebrew "ISO-8859-8" }
+    { latin5 "ISO-8859-9" }
+    { latin6 "ISO-8859-10" }
+} ;
 
 : name>encoding ( string -- encoding )
     n>e-table at ;
@@ -9,4 +29,21 @@ VALUE: e>n-table
 : encoding>name ( encoding -- string )
     e>n-table at ;
 
+: parse-iana ( stream -- synonym-set )
+    lines { "" } split [
+        [ " " split ] map
+        [ first { "Name:" "Alias:" } member? ] filter
+        [ second ] map { "None" } diff
+    ] map ;
 
+: make-n>e ( stream -- n>e ) ! encodings is string => symbol
+    parse-iana [ [
+        dup [
+            e>n-table value-at
+            [ swap [ set ] with each ]
+            [ drop ] if*
+        ] with each
+    ] each ] H{ } make-assoc ;
+
+"resource:extra/io/encodings/iana/character-sets"
+ascii <file-reader> make-n>e \ n>e-table set-value
diff --git a/extra/unicode/syntax/backend/backend.factor b/extra/unicode/syntax/backend/backend.factor
index d1065da5c8..5c463e8fc4 100644
--- a/extra/unicode/syntax/backend/backend.factor
+++ b/extra/unicode/syntax/backend/backend.factor
@@ -1,4 +1,4 @@
-USING: kernel parser sequences definitions ;
+USING: kernel parser sequences words ;
 IN: unicode.syntax.backend
 
 : VALUE: