diff --git a/basis/io/encodings/utf7/utf7-docs.factor b/basis/io/encodings/utf7/utf7-docs.factor index df2237050c..aff22d5b02 100644 --- a/basis/io/encodings/utf7/utf7-docs.factor +++ b/basis/io/encodings/utf7/utf7-docs.factor @@ -2,7 +2,7 @@ USING: help.markup help.syntax ; IN: io.encodings.utf7 HELP: utf7 -{ $class-description "Encoding descriptor for UTF-7 encoding." } ; +{ $description "Encoding descriptor for UTF-7 encoding." } ; HELP: utf7imap4 -{ $class-description "Encoding descriptor for the encoding UTF-7 modified for IMAP (see RFC 3501 5.1.3)." } ; +{ $description "Encoding descriptor for the encoding UTF-7 modified for IMAP (see RFC 3501 5.1.3)." } ; diff --git a/basis/io/encodings/utf7/utf7-tests.factor b/basis/io/encodings/utf7/utf7-tests.factor index 4b53d7b67e..9e6ed54282 100644 --- a/basis/io/encodings/utf7/utf7-tests.factor +++ b/basis/io/encodings/utf7/utf7-tests.factor @@ -1,9 +1,5 @@ -USING: - io.encodings.string io.encodings.utf7 - kernel - sequences - strings - tools.test ; +USING: io.encodings.string io.encodings.utf7 kernel sequences strings +tools.test ; IN: io.encodings.utf7.tests [ diff --git a/basis/io/encodings/utf7/utf7.factor b/basis/io/encodings/utf7/utf7.factor index 9fcb0d1155..9fe53878ee 100644 --- a/basis/io/encodings/utf7/utf7.factor +++ b/basis/io/encodings/utf7/utf7.factor @@ -1,71 +1,61 @@ -USING: - arrays - ascii - assocs - base64 - byte-arrays - fry - grouping.extras - io io.encodings io.encodings.string io.encodings.utf16 - kernel - math math.functions - namespaces - sequences - splitting - strings ; +USING: accessors ascii base64 fry grouping.extras io io.encodings +io.encodings.string io.encodings.utf16 kernel math math.functions +sequences splitting strings ; IN: io.encodings.utf7 -SINGLETON: utf7 -SINGLETON: utf7imap4 +TUPLE: utf7codec dialect buffer ; -! This map encodes the difference between standard utf7 and the -! dialect used by IMAP which wants slashes repladed with commas when +! These words encodes the difference between standard utf7 and the +! dialect used by IMAP which wants slashes replaced with commas when ! encoding and uses '&' instead of '+' as the escaping character. -CONSTANT: dialect-data { - { utf7 { { "" "" } { "+" "-" } } } - { utf7imap4 { { "/" "," } { "&" "-" } } } -} +: utf7 ( -- t ) + { + { { } { } } + { { CHAR: + } { CHAR: - } } + } V{ } utf7codec boa ; -: >raw-base64 ( byte-array -- str ) +: utf7imap4 ( -- t ) + { + { { CHAR: / } { CHAR: , } } + { { CHAR: & } { CHAR: - } } + } V{ } utf7codec boa ; + +: >raw-base64 ( bytes -- bytes' ) >string utf16be encode >base64 [ CHAR: = = ] trim-tail ; : raw-base64> ( str -- str' ) dup length 4 / ceiling 4 * CHAR: = pad-tail base64> utf16be decode ; -: encode-chunk ( repl-pair surround-pair chunk ascii? -- byte-array ) +: encode-chunk ( repl-pair surround-pair chunk ascii? -- bytes ) [ swap [ first ] [ concat ] bi replace nip ] [ >raw-base64 -rot [ first2 replace ] [ first2 surround ] bi* ] if ; -: encode-utf7-string ( str dialect -- byte-array ) +: encode-utf7-string ( str codec -- bytes ) [ [ printable? ] group-by ] dip - dialect-data at first2 '[ _ _ rot first2 swap encode-chunk ] map concat ; + dialect>> first2 '[ _ _ rot first2 swap encode-chunk ] map + B{ } concat-as ; -: stream-write-utf7 ( string stream encoding -- ) - swapd encode-utf7-string >byte-array swap stream-write ; +M: utf7codec encode-string ( str stream codec -- ) + swapd encode-utf7-string swap stream-write ; -M: utf7 encode-string stream-write-utf7 ; -M: utf7imap4 encode-string stream-write-utf7 ; - -! UTF-7 decoding is stateful, hence this ugly workaround is needed. -SYMBOL: decoding-buffer DEFER: emit-char -: decode-chunk ( dialect-info -- ch buffer ) +: decode-chunk ( dialect -- ch buffer ) dup first2 swap [ second read-until drop ] [ first2 swap replace ] bi* [ second first first { } ] [ raw-base64> emit-char ] if-empty ; -: fill-buffer ( dialect-info -- ch buffer ) +: fill-buffer ( dialect -- ch buffer ) dup second first first read1 dup swapd = [ drop decode-chunk ] [ nip { } ] if ; -: emit-char ( dialect-info buffer -- ch buffer' ) +: emit-char ( dialect buffer -- ch buffer' ) [ fill-buffer ] [ nip unclip swap ] if-empty ; -: decode-utf7 ( stream encoding -- char/f ) - dialect-data at swap [ - decoding-buffer [ dup { } ? emit-char ] change-global - ] with-input-stream ; +: replace-all! ( src dst -- ) + [ delete-all ] keep push-all ; -M: utf7 decode-char decode-utf7 ; -M: utf7imap4 decode-char decode-utf7 ; +M: utf7codec decode-char ( stream codec -- char/f ) + swap [ + [ dialect>> ] [ buffer>> ] bi [ emit-char ] keep replace-all! + ] with-input-stream ;