From 900c6708944303aa59b83f0e7e416d4c3d0a5903 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Bj=C3=B6rn=20Lindqvist?= Date: Fri, 13 Dec 2013 22:16:16 +0100 Subject: [PATCH] io.encodings.utf7: implementation of decoding for new codec utf7 --- basis/io/encodings/utf7/utf7-tests.factor | 31 ++++++++++++ basis/io/encodings/utf7/utf7.factor | 58 +++++++++++++++++++++++ 2 files changed, 89 insertions(+) create mode 100644 basis/io/encodings/utf7/utf7-tests.factor create mode 100644 basis/io/encodings/utf7/utf7.factor diff --git a/basis/io/encodings/utf7/utf7-tests.factor b/basis/io/encodings/utf7/utf7-tests.factor new file mode 100644 index 0000000000..81caaea4ff --- /dev/null +++ b/basis/io/encodings/utf7/utf7-tests.factor @@ -0,0 +1,31 @@ +USING: + io.encodings.string io.encodings.utf7 + kernel + sequences + strings + tools.test ; +IN: io.encodings.utf7.tests + +[ + { + "~/b&AOU-g&APg-" + "b&AOU-x" + "b&APg-x" + "test" + "Skr&AOQ-ppost" + "Ting &- S&AOU-ger" + "~/F&APg-lder/mailb&AOU-x &- stuff + more" + "~peter/mail/&ZeVnLIqe-/&U,BTFw-" + } +] [ + { + "~/bågø" + "båx" + "bøx" + "test" + "Skräppost" + "Ting & Såger" + "~/Følder/mailbåx & stuff + more" + "~peter/mail/日本語/台北" + } [ utf7imap4 encode >string ] map +] unit-test diff --git a/basis/io/encodings/utf7/utf7.factor b/basis/io/encodings/utf7/utf7.factor new file mode 100644 index 0000000000..15ea690c6b --- /dev/null +++ b/basis/io/encodings/utf7/utf7.factor @@ -0,0 +1,58 @@ +USING: + accessors + arrays + ascii + assocs + base64 + byte-arrays + fry + io + io.encodings io.encodings.string io.encodings.utf16 + kernel + sequences + splitting + strings ; +IN: io.encodings.utf7 + +SINGLETON: utf7 +SINGLETON: utf7imap4 + +! This map encodes the difference between standard utf7 and the +! dialect used by IMAP which wants slashes repladed with commas when +! encoding and uses '&' instead of '+' as the escaping character. +CONSTANT: dialect-data { + { utf7 { { "" "" } { "+" "-" } } } + { utf7imap4 { { "/" "," } { "&" "-" } } } +} + +: >raw-base64 ( byte-array -- str ) + >string utf16be encode >base64 [ CHAR: = = ] trim-tail ; + +: flush-buffer ( buffer repl-pair surround-pair -- result ) + rot [ 2drop "" ] [ + >raw-base64 -rot [ first2 replace ] [ first2 surround ] bi* + ] if-empty ; + +: escaped-char ( str1 begin end -- str ) + -rot dupd = [ swap append ] [ nip ] if ; + +: encode-utf7-char ( result buffer dialect-info ch -- result buffer ) + dup printable? [ + 1string -rot first2 + [ flush-buffer swapd append swap ] + [ nip first2 escaped-char append ] 2bi "" + ] [ nip suffix ] if ; + +: encode-utf7-string ( str dialect -- str' ) + { "" "" } swap dialect-data at [ + '[ [ first2 ] dip _ swap encode-utf7-char 2array ] reduce + ] [ + [ first2 ] dip first2 flush-buffer append + ] bi ; + +: stream-write-utf7 ( string stream encoding -- ) + swapd encode-utf7-string >byte-array swap stream-write ; + +M: utf7 encode-string stream-write-utf7 ; + +M: utf7imap4 encode-string stream-write-utf7 ;