io.encodings.utf7: working utf7 decoding with tests
parent
9a7a2648fd
commit
c286d91289
|
@ -42,3 +42,16 @@ IN: io.encodings.utf7.tests
|
||||||
"~peter/mail/日本語/台北"
|
"~peter/mail/日本語/台北"
|
||||||
} dup [ utf7 encode utf7 decode ] map =
|
} dup [ utf7 encode utf7 decode ] map =
|
||||||
] unit-test
|
] unit-test
|
||||||
|
|
||||||
|
[ t ] [
|
||||||
|
{
|
||||||
|
"~/bågø"
|
||||||
|
"båx"
|
||||||
|
"bøx"
|
||||||
|
"test"
|
||||||
|
"Skräppost"
|
||||||
|
"Ting & Såger"
|
||||||
|
"~/Følder/mailbåx & stuff + more"
|
||||||
|
"~peter/mail/日本語/台北"
|
||||||
|
} dup [ utf7imap4 encode utf7imap4 decode ] map =
|
||||||
|
] unit-test
|
||||||
|
|
|
@ -5,8 +5,8 @@ USING:
|
||||||
base64
|
base64
|
||||||
byte-arrays
|
byte-arrays
|
||||||
fry
|
fry
|
||||||
io
|
grouping.extras
|
||||||
io.encodings io.encodings.string io.encodings.utf16
|
io io.encodings io.encodings.string io.encodings.utf16
|
||||||
kernel
|
kernel
|
||||||
math math.functions
|
math math.functions
|
||||||
namespaces
|
namespaces
|
||||||
|
@ -32,16 +32,6 @@ CONSTANT: dialect-data {
|
||||||
: raw-base64> ( str -- str' )
|
: raw-base64> ( str -- str' )
|
||||||
dup length 4 / ceiling 4 * CHAR: = pad-tail base64> utf16be decode ;
|
dup length 4 / ceiling 4 * CHAR: = pad-tail base64> utf16be decode ;
|
||||||
|
|
||||||
: (group-by-loop) ( elt key groups -- groups' )
|
|
||||||
2dup [ nip empty? ] [ ?last ?first = not ] 2bi or [
|
|
||||||
-rot swap 1array
|
|
||||||
] [
|
|
||||||
nip unclip-last rot [ first2 ] dip suffix
|
|
||||||
] if 2array suffix ;
|
|
||||||
|
|
||||||
: group-by ( seq quot: ( elt -- key ) -- groups )
|
|
||||||
'[ dup _ call( x -- y ) rot (group-by-loop) ] { } swap reduce ;
|
|
||||||
|
|
||||||
: encode-chunk ( repl-pair surround-pair chunk ascii? -- byte-array )
|
: encode-chunk ( repl-pair surround-pair chunk ascii? -- byte-array )
|
||||||
[ swap [ first ] [ concat ] bi replace nip ]
|
[ swap [ first ] [ concat ] bi replace nip ]
|
||||||
[ >raw-base64 -rot [ first2 replace ] [ first2 surround ] bi* ] if ;
|
[ >raw-base64 -rot [ first2 replace ] [ first2 surround ] bi* ] if ;
|
||||||
|
@ -58,18 +48,23 @@ M: utf7imap4 encode-string stream-write-utf7 ;
|
||||||
|
|
||||||
! UTF-7 decoding is stateful, hence this ugly workaround is needed.
|
! UTF-7 decoding is stateful, hence this ugly workaround is needed.
|
||||||
SYMBOL: decoding-buffer
|
SYMBOL: decoding-buffer
|
||||||
|
DEFER: emit-char
|
||||||
|
|
||||||
: emit-next-char ( buffer -- ch buffer' )
|
: decode-chunk ( dialect-info -- ch buffer )
|
||||||
[
|
dup first2 swap [ second read-until drop ] [ first2 swap replace ] bi*
|
||||||
read1 dup CHAR: + = [
|
[ second first first { } ] [ raw-base64> emit-char ] if-empty ;
|
||||||
drop { CHAR: - } read-until drop
|
|
||||||
[ CHAR: + { } ] [ raw-base64> emit-next-char ] if-empty
|
: fill-buffer ( dialect-info -- ch buffer )
|
||||||
] [ { } ] if
|
dup second first first read1 dup swapd = [
|
||||||
] [ unclip swap ] if-empty ;
|
drop decode-chunk
|
||||||
|
] [ nip { } ] if ;
|
||||||
|
|
||||||
|
: emit-char ( dialect-info buffer -- ch buffer' )
|
||||||
|
[ fill-buffer ] [ nip unclip swap ] if-empty ;
|
||||||
|
|
||||||
: decode-utf7 ( stream encoding -- char/f )
|
: decode-utf7 ( stream encoding -- char/f )
|
||||||
drop [
|
dialect-data at swap [
|
||||||
decoding-buffer [ [ { } ] unless* emit-next-char ] change-global
|
decoding-buffer [ dup { } ? emit-char ] change-global
|
||||||
] with-input-stream ;
|
] with-input-stream ;
|
||||||
|
|
||||||
M: utf7 decode-char decode-utf7 ;
|
M: utf7 decode-char decode-utf7 ;
|
||||||
|
|
Loading…
Reference in New Issue