io.encodings.utf7: implementation of decoding for new codec utf7

db4
Björn Lindqvist 2013-12-13 22:16:16 +01:00 committed by John Benediktsson
parent b5eae48068
commit 900c670894
2 changed files with 89 additions and 0 deletions
basis/io/encodings/utf7

View File

@ -0,0 +1,31 @@
USING:
io.encodings.string io.encodings.utf7
kernel
sequences
strings
tools.test ;
IN: io.encodings.utf7.tests
[
{
"~/b&AOU-g&APg-"
"b&AOU-x"
"b&APg-x"
"test"
"Skr&AOQ-ppost"
"Ting &- S&AOU-ger"
"~/F&APg-lder/mailb&AOU-x &- stuff + more"
"~peter/mail/&ZeVnLIqe-/&U,BTFw-"
}
] [
{
"~/bågø"
"båx"
"bøx"
"test"
"Skräppost"
"Ting & Såger"
"~/Følder/mailbåx & stuff + more"
"~peter/mail/日本語/台北"
} [ utf7imap4 encode >string ] map
] unit-test

View File

@ -0,0 +1,58 @@
USING:
accessors
arrays
ascii
assocs
base64
byte-arrays
fry
io
io.encodings io.encodings.string io.encodings.utf16
kernel
sequences
splitting
strings ;
IN: io.encodings.utf7
SINGLETON: utf7
SINGLETON: utf7imap4
! This map encodes the difference between standard utf7 and the
! dialect used by IMAP which wants slashes repladed with commas when
! encoding and uses '&' instead of '+' as the escaping character.
CONSTANT: dialect-data {
{ utf7 { { "" "" } { "+" "-" } } }
{ utf7imap4 { { "/" "," } { "&" "-" } } }
}
: >raw-base64 ( byte-array -- str )
>string utf16be encode >base64 [ CHAR: = = ] trim-tail ;
: flush-buffer ( buffer repl-pair surround-pair -- result )
rot [ 2drop "" ] [
>raw-base64 -rot [ first2 replace ] [ first2 surround ] bi*
] if-empty ;
: escaped-char ( str1 begin end -- str )
-rot dupd = [ swap append ] [ nip ] if ;
: encode-utf7-char ( result buffer dialect-info ch -- result buffer )
dup printable? [
1string -rot first2
[ flush-buffer swapd append swap ]
[ nip first2 escaped-char append ] 2bi ""
] [ nip suffix ] if ;
: encode-utf7-string ( str dialect -- str' )
{ "" "" } swap dialect-data at [
'[ [ first2 ] dip _ swap encode-utf7-char 2array ] reduce
] [
[ first2 ] dip first2 flush-buffer append
] bi ;
: stream-write-utf7 ( string stream encoding -- )
swapd encode-utf7-string >byte-array swap stream-write ;
M: utf7 encode-string stream-write-utf7 ;
M: utf7imap4 encode-string stream-write-utf7 ;