io.encodings.utf7: decoder implementation, doesn't support utf7imap4 yet

2013-12-17 23:43:48 +01:00 · 2013-12-17 23:43:48 +01:00 · 9a7a2648fd
parent 9c90bc0c67
commit 9a7a2648fd
2 changed files with 39 additions and 3 deletions
--- a/basis/io/encodings/utf7/utf7-tests.factor
+++ b/basis/io/encodings/utf7/utf7-tests.factor
@ -29,3 +29,16 @@ IN: io.encodings.utf7.tests
        "~peter/mail/日本語/台北"
    } [ utf7imap4 encode >string ] map
 ] unit-test
 [ t ] [
    {
        "~/bågø"
        "båx"
        "bøx"
        "test"
        "Skräppost"
        "Ting & Såger"
        "~/Følder/mailbåx & stuff + more"
        "~peter/mail/日本語/台北"
    } dup [ utf7 encode utf7 decode ] map =
 ] unit-test
--- a/basis/io/encodings/utf7/utf7.factor
+++ b/basis/io/encodings/utf7/utf7.factor
@ -8,6 +8,8 @@ USING:
    io
    io.encodings io.encodings.string io.encodings.utf16
    kernel
    math math.functions
    namespaces
    sequences
    splitting
    strings ;
@ -27,6 +29,9 @@ CONSTANT: dialect-data {
 : >raw-base64 ( byte-array -- str )
    >string utf16be encode >base64 [ CHAR: = = ] trim-tail ;
 : raw-base64> ( str -- str' )
    dup length 4 / ceiling 4 * CHAR: = pad-tail base64> utf16be decode ;
 : (group-by-loop) ( elt key groups -- groups' )
    2dup [ nip empty? ] [ ?last ?first = not ] 2bi or [
        -rot swap 1array
@ -42,12 +47,30 @@ CONSTANT: dialect-data {
    [ >raw-base64 -rot [ first2 replace ] [ first2 surround ] bi* ] if ;
 : encode-utf7-string ( str dialect -- byte-array )
-    dialect-data at first2 '[ _ _ rot first2 swap encode-chunk ]
+    [ [ printable? ] group-by ] dip
-    [ [ printable? ] group-by ] dip map concat ;
+    dialect-data at first2 '[ _ _ rot first2 swap encode-chunk ] map concat ;
 : stream-write-utf7 ( string stream encoding -- )
    swapd encode-utf7-string >byte-array swap stream-write ;
 M: utf7 encode-string stream-write-utf7 ;
 M: utf7imap4 encode-string stream-write-utf7 ;
 ! UTF-7 decoding is stateful, hence this ugly workaround is needed.
 SYMBOL: decoding-buffer
 : emit-next-char ( buffer -- ch buffer' )
    [
        read1 dup CHAR: + = [
            drop { CHAR: - } read-until drop
            [ CHAR: + { } ] [ raw-base64> emit-next-char ] if-empty
        ] [ { } ] if
    ] [ unclip swap ] if-empty ;
 : decode-utf7 ( stream encoding -- char/f )
    drop [
        decoding-buffer [ [ { } ] unless* emit-next-char ] change-global
    ] with-input-stream ;
 M: utf7 decode-char decode-utf7 ;
 M: utf7imap4 decode-char decode-utf7 ;