io.encodings.utf7: cleaned up vocab per review

1. Tuple is used to hold the decoding buffer instead of a global 2. Fixed problematic sequence type conversions.
2014-01-11 12:53:51 +01:00 · 2014-01-11 12:53:51 +01:00 · a47c41e45c
parent 797d73bb8c
commit a47c41e45c
3 changed files with 37 additions and 51 deletions
--- a/basis/io/encodings/utf7/utf7-docs.factor
+++ b/basis/io/encodings/utf7/utf7-docs.factor
@ -2,7 +2,7 @@ USING: help.markup help.syntax ;
 IN: io.encodings.utf7

 HELP: utf7
-{ $class-description "Encoding descriptor for UTF-7 encoding." } ;
+{ $description "Encoding descriptor for UTF-7 encoding." } ;

 HELP: utf7imap4
-{ $class-description "Encoding descriptor for the encoding UTF-7 modified for IMAP (see RFC 3501 5.1.3)." } ;
+{ $description "Encoding descriptor for the encoding UTF-7 modified for IMAP (see RFC 3501 5.1.3)." } ;
--- a/basis/io/encodings/utf7/utf7-tests.factor
+++ b/basis/io/encodings/utf7/utf7-tests.factor
@ -1,9 +1,5 @@
-USING:
-    io.encodings.string io.encodings.utf7
-    kernel
-    sequences
-    strings
-    tools.test ;
+USING: io.encodings.string io.encodings.utf7 kernel sequences strings
+tools.test ;
 IN: io.encodings.utf7.tests

 [
--- a/basis/io/encodings/utf7/utf7.factor
+++ b/basis/io/encodings/utf7/utf7.factor
@ -1,71 +1,61 @@
-USING:
-    arrays
-    ascii
-    assocs
-    base64
-    byte-arrays
-    fry
-    grouping.extras
-    io io.encodings io.encodings.string io.encodings.utf16
-    kernel
-    math math.functions
-    namespaces
-    sequences
-    splitting
-    strings ;
+USING: accessors ascii base64 fry grouping.extras io io.encodings
+io.encodings.string io.encodings.utf16 kernel math math.functions
+sequences splitting strings ;
 IN: io.encodings.utf7

-SINGLETON: utf7
-SINGLETON: utf7imap4
+TUPLE: utf7codec dialect buffer ;

-! This map encodes the difference between standard utf7 and the
-! dialect used by IMAP which wants slashes repladed with commas when
+! These words encodes the difference between standard utf7 and the
+! dialect used by IMAP which wants slashes replaced with commas when
 ! encoding and uses '&' instead of '+' as the escaping character.
-CONSTANT: dialect-data {
-    { utf7 { { "" "" } { "+" "-" } } }
-    { utf7imap4 { { "/" "," } { "&" "-" } } }
-}
+: utf7 ( -- t )
+    {
+        { { } { } }
+        { { CHAR: + } { CHAR: - } }
+    } V{ } utf7codec boa ;

-: >raw-base64 ( byte-array -- str )
+: utf7imap4 ( -- t )
+    {
+        { { CHAR: / } { CHAR: , } }
+        { { CHAR: & } { CHAR: - } }
+    } V{ } utf7codec boa ;
+
+: >raw-base64 ( bytes -- bytes' )
    >string utf16be encode >base64 [ CHAR: = = ] trim-tail ;

 : raw-base64> ( str -- str' )
    dup length 4 / ceiling 4 * CHAR: = pad-tail base64> utf16be decode ;

-: encode-chunk ( repl-pair surround-pair chunk ascii? -- byte-array )
+: encode-chunk ( repl-pair surround-pair chunk ascii? -- bytes )
    [ swap [ first ] [ concat ] bi replace nip ]
    [ >raw-base64 -rot [ first2 replace ] [ first2 surround ] bi* ] if ;

-: encode-utf7-string ( str dialect -- byte-array )
+: encode-utf7-string ( str codec -- bytes )
    [ [ printable? ] group-by ] dip
-    dialect-data at first2 '[ _ _ rot first2 swap encode-chunk ] map concat ;
+    dialect>> first2 '[ _ _ rot first2 swap encode-chunk ] map
+    B{ } concat-as ;

-: stream-write-utf7 ( string stream encoding -- )
-    swapd encode-utf7-string >byte-array swap stream-write ;
+M: utf7codec encode-string ( str stream codec -- )
+    swapd encode-utf7-string swap stream-write ;

-M: utf7 encode-string stream-write-utf7 ;
-M: utf7imap4 encode-string stream-write-utf7 ;
-
-! UTF-7 decoding is stateful, hence this ugly workaround is needed.
-SYMBOL: decoding-buffer
 DEFER: emit-char

-: decode-chunk ( dialect-info -- ch buffer )
+: decode-chunk ( dialect -- ch buffer )
    dup first2 swap [ second read-until drop ] [ first2 swap replace ] bi*
    [ second first first { } ] [ raw-base64> emit-char ] if-empty ;

-: fill-buffer ( dialect-info -- ch buffer )
+: fill-buffer ( dialect -- ch buffer )
    dup second first first read1 dup swapd = [
        drop decode-chunk
    ] [ nip { } ] if ;

-: emit-char ( dialect-info buffer -- ch buffer' )
+: emit-char ( dialect buffer -- ch buffer' )
    [ fill-buffer ] [ nip unclip swap ] if-empty ;

-: decode-utf7 ( stream encoding -- char/f )
-    dialect-data at swap [
-        decoding-buffer [ dup { } ? emit-char ] change-global
-    ] with-input-stream ;
+: replace-all! ( src dst -- )
+    [ delete-all ] keep push-all ;

-M: utf7 decode-char decode-utf7 ;
-M: utf7imap4 decode-char decode-utf7 ;
+M: utf7codec decode-char ( stream codec -- char/f )
+    swap [
+        [ dialect>> ] [ buffer>> ] bi [ emit-char ] keep replace-all!
+    ] with-input-stream ;