From 9a7a2648fd6f3bfbd0b4f2325c9bf3e67694c71e Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Bj=C3=B6rn=20Lindqvist?= <bjourne@gmail.com>
Date: Tue, 17 Dec 2013 23:43:48 +0100
Subject: [PATCH] io.encodings.utf7: decoder implementation, doesn't support
 utf7imap4 yet

---
 basis/io/encodings/utf7/utf7-tests.factor | 13 ++++++++++
 basis/io/encodings/utf7/utf7.factor       | 29 ++++++++++++++++++++---
 2 files changed, 39 insertions(+), 3 deletions(-)

diff --git a/basis/io/encodings/utf7/utf7-tests.factor b/basis/io/encodings/utf7/utf7-tests.factor
index 81caaea4ff..aabfb4c41d 100644
--- a/basis/io/encodings/utf7/utf7-tests.factor
+++ b/basis/io/encodings/utf7/utf7-tests.factor
@@ -29,3 +29,16 @@ IN: io.encodings.utf7.tests
         "~peter/mail/日本語/台北"
     } [ utf7imap4 encode >string ] map
 ] unit-test
+
+[ t ] [
+    {
+        "~/bågø"
+        "båx"
+        "bøx"
+        "test"
+        "Skräppost"
+        "Ting & Såger"
+        "~/Følder/mailbåx & stuff + more"
+        "~peter/mail/日本語/台北"
+    } dup [ utf7 encode utf7 decode ] map =
+] unit-test
diff --git a/basis/io/encodings/utf7/utf7.factor b/basis/io/encodings/utf7/utf7.factor
index d54a40e6e2..66d47b023d 100644
--- a/basis/io/encodings/utf7/utf7.factor
+++ b/basis/io/encodings/utf7/utf7.factor
@@ -8,6 +8,8 @@ USING:
     io
     io.encodings io.encodings.string io.encodings.utf16
     kernel
+    math math.functions
+    namespaces
     sequences
     splitting
     strings ;
@@ -27,6 +29,9 @@ CONSTANT: dialect-data {
 : >raw-base64 ( byte-array -- str )
     >string utf16be encode >base64 [ CHAR: = = ] trim-tail ;
 
+: raw-base64> ( str -- str' )
+    dup length 4 / ceiling 4 * CHAR: = pad-tail base64> utf16be decode ;
+
 : (group-by-loop) ( elt key groups -- groups' )
     2dup [ nip empty? ] [ ?last ?first = not ] 2bi or [
         -rot swap 1array
@@ -42,12 +47,30 @@ CONSTANT: dialect-data {
     [ >raw-base64 -rot [ first2 replace ] [ first2 surround ] bi* ] if ;
 
 : encode-utf7-string ( str dialect -- byte-array )
-    dialect-data at first2 '[ _ _ rot first2 swap encode-chunk ]
-    [ [ printable? ] group-by ] dip map concat ;
+    [ [ printable? ] group-by ] dip
+    dialect-data at first2 '[ _ _ rot first2 swap encode-chunk ] map concat ;
 
 : stream-write-utf7 ( string stream encoding -- )
     swapd encode-utf7-string >byte-array swap stream-write ;
 
 M: utf7 encode-string stream-write-utf7 ;
-
 M: utf7imap4 encode-string stream-write-utf7 ;
+
+! UTF-7 decoding is stateful, hence this ugly workaround is needed.
+SYMBOL: decoding-buffer
+
+: emit-next-char ( buffer -- ch buffer' )
+    [
+        read1 dup CHAR: + = [
+            drop { CHAR: - } read-until drop
+            [ CHAR: + { } ] [ raw-base64> emit-next-char ] if-empty
+        ] [ { } ] if
+    ] [ unclip swap ] if-empty ;
+
+: decode-utf7 ( stream encoding -- char/f )
+    drop [
+        decoding-buffer [ [ { } ] unless* emit-next-char ] change-global
+    ] with-input-stream ;
+
+M: utf7 decode-char decode-utf7 ;
+M: utf7imap4 decode-char decode-utf7 ;