io.encodings.utf7: working utf7 decoding with tests

db4
Björn Lindqvist 2013-12-22 16:03:34 +01:00 committed by John Benediktsson
parent 9a7a2648fd
commit c286d91289
2 changed files with 29 additions and 21 deletions

View File

@ -42,3 +42,16 @@ IN: io.encodings.utf7.tests
"~peter/mail/日本語/台北" "~peter/mail/日本語/台北"
} dup [ utf7 encode utf7 decode ] map = } dup [ utf7 encode utf7 decode ] map =
] unit-test ] unit-test
[ t ] [
{
"~/bågø"
"båx"
"bøx"
"test"
"Skräppost"
"Ting & Såger"
"~/Følder/mailbåx & stuff + more"
"~peter/mail/日本語/台北"
} dup [ utf7imap4 encode utf7imap4 decode ] map =
] unit-test

View File

@ -5,8 +5,8 @@ USING:
base64 base64
byte-arrays byte-arrays
fry fry
io grouping.extras
io.encodings io.encodings.string io.encodings.utf16 io io.encodings io.encodings.string io.encodings.utf16
kernel kernel
math math.functions math math.functions
namespaces namespaces
@ -32,16 +32,6 @@ CONSTANT: dialect-data {
: raw-base64> ( str -- str' ) : raw-base64> ( str -- str' )
dup length 4 / ceiling 4 * CHAR: = pad-tail base64> utf16be decode ; dup length 4 / ceiling 4 * CHAR: = pad-tail base64> utf16be decode ;
: (group-by-loop) ( elt key groups -- groups' )
2dup [ nip empty? ] [ ?last ?first = not ] 2bi or [
-rot swap 1array
] [
nip unclip-last rot [ first2 ] dip suffix
] if 2array suffix ;
: group-by ( seq quot: ( elt -- key ) -- groups )
'[ dup _ call( x -- y ) rot (group-by-loop) ] { } swap reduce ;
: encode-chunk ( repl-pair surround-pair chunk ascii? -- byte-array ) : encode-chunk ( repl-pair surround-pair chunk ascii? -- byte-array )
[ swap [ first ] [ concat ] bi replace nip ] [ swap [ first ] [ concat ] bi replace nip ]
[ >raw-base64 -rot [ first2 replace ] [ first2 surround ] bi* ] if ; [ >raw-base64 -rot [ first2 replace ] [ first2 surround ] bi* ] if ;
@ -58,18 +48,23 @@ M: utf7imap4 encode-string stream-write-utf7 ;
! UTF-7 decoding is stateful, hence this ugly workaround is needed. ! UTF-7 decoding is stateful, hence this ugly workaround is needed.
SYMBOL: decoding-buffer SYMBOL: decoding-buffer
DEFER: emit-char
: emit-next-char ( buffer -- ch buffer' ) : decode-chunk ( dialect-info -- ch buffer )
[ dup first2 swap [ second read-until drop ] [ first2 swap replace ] bi*
read1 dup CHAR: + = [ [ second first first { } ] [ raw-base64> emit-char ] if-empty ;
drop { CHAR: - } read-until drop
[ CHAR: + { } ] [ raw-base64> emit-next-char ] if-empty : fill-buffer ( dialect-info -- ch buffer )
] [ { } ] if dup second first first read1 dup swapd = [
] [ unclip swap ] if-empty ; drop decode-chunk
] [ nip { } ] if ;
: emit-char ( dialect-info buffer -- ch buffer' )
[ fill-buffer ] [ nip unclip swap ] if-empty ;
: decode-utf7 ( stream encoding -- char/f ) : decode-utf7 ( stream encoding -- char/f )
drop [ dialect-data at swap [
decoding-buffer [ [ { } ] unless* emit-next-char ] change-global decoding-buffer [ dup { } ? emit-char ] change-global
] with-input-stream ; ] with-input-stream ;
M: utf7 decode-char decode-utf7 ; M: utf7 decode-char decode-utf7 ;