io.encodings.utf7: cleaned up vocab per review
1. Tuple is used to hold the decoding buffer instead of a global 2. Fixed problematic sequence type conversions.db4
parent
797d73bb8c
commit
a47c41e45c
|
@ -2,7 +2,7 @@ USING: help.markup help.syntax ;
|
||||||
IN: io.encodings.utf7
|
IN: io.encodings.utf7
|
||||||
|
|
||||||
HELP: utf7
|
HELP: utf7
|
||||||
{ $class-description "Encoding descriptor for UTF-7 encoding." } ;
|
{ $description "Encoding descriptor for UTF-7 encoding." } ;
|
||||||
|
|
||||||
HELP: utf7imap4
|
HELP: utf7imap4
|
||||||
{ $class-description "Encoding descriptor for the encoding UTF-7 modified for IMAP (see RFC 3501 5.1.3)." } ;
|
{ $description "Encoding descriptor for the encoding UTF-7 modified for IMAP (see RFC 3501 5.1.3)." } ;
|
||||||
|
|
|
@ -1,9 +1,5 @@
|
||||||
USING:
|
USING: io.encodings.string io.encodings.utf7 kernel sequences strings
|
||||||
io.encodings.string io.encodings.utf7
|
tools.test ;
|
||||||
kernel
|
|
||||||
sequences
|
|
||||||
strings
|
|
||||||
tools.test ;
|
|
||||||
IN: io.encodings.utf7.tests
|
IN: io.encodings.utf7.tests
|
||||||
|
|
||||||
[
|
[
|
||||||
|
|
|
@ -1,71 +1,61 @@
|
||||||
USING:
|
USING: accessors ascii base64 fry grouping.extras io io.encodings
|
||||||
arrays
|
io.encodings.string io.encodings.utf16 kernel math math.functions
|
||||||
ascii
|
sequences splitting strings ;
|
||||||
assocs
|
|
||||||
base64
|
|
||||||
byte-arrays
|
|
||||||
fry
|
|
||||||
grouping.extras
|
|
||||||
io io.encodings io.encodings.string io.encodings.utf16
|
|
||||||
kernel
|
|
||||||
math math.functions
|
|
||||||
namespaces
|
|
||||||
sequences
|
|
||||||
splitting
|
|
||||||
strings ;
|
|
||||||
IN: io.encodings.utf7
|
IN: io.encodings.utf7
|
||||||
|
|
||||||
SINGLETON: utf7
|
TUPLE: utf7codec dialect buffer ;
|
||||||
SINGLETON: utf7imap4
|
|
||||||
|
|
||||||
! This map encodes the difference between standard utf7 and the
|
! These words encodes the difference between standard utf7 and the
|
||||||
! dialect used by IMAP which wants slashes repladed with commas when
|
! dialect used by IMAP which wants slashes replaced with commas when
|
||||||
! encoding and uses '&' instead of '+' as the escaping character.
|
! encoding and uses '&' instead of '+' as the escaping character.
|
||||||
CONSTANT: dialect-data {
|
: utf7 ( -- t )
|
||||||
{ utf7 { { "" "" } { "+" "-" } } }
|
{
|
||||||
{ utf7imap4 { { "/" "," } { "&" "-" } } }
|
{ { } { } }
|
||||||
}
|
{ { CHAR: + } { CHAR: - } }
|
||||||
|
} V{ } utf7codec boa ;
|
||||||
|
|
||||||
: >raw-base64 ( byte-array -- str )
|
: utf7imap4 ( -- t )
|
||||||
|
{
|
||||||
|
{ { CHAR: / } { CHAR: , } }
|
||||||
|
{ { CHAR: & } { CHAR: - } }
|
||||||
|
} V{ } utf7codec boa ;
|
||||||
|
|
||||||
|
: >raw-base64 ( bytes -- bytes' )
|
||||||
>string utf16be encode >base64 [ CHAR: = = ] trim-tail ;
|
>string utf16be encode >base64 [ CHAR: = = ] trim-tail ;
|
||||||
|
|
||||||
: raw-base64> ( str -- str' )
|
: raw-base64> ( str -- str' )
|
||||||
dup length 4 / ceiling 4 * CHAR: = pad-tail base64> utf16be decode ;
|
dup length 4 / ceiling 4 * CHAR: = pad-tail base64> utf16be decode ;
|
||||||
|
|
||||||
: encode-chunk ( repl-pair surround-pair chunk ascii? -- byte-array )
|
: encode-chunk ( repl-pair surround-pair chunk ascii? -- bytes )
|
||||||
[ swap [ first ] [ concat ] bi replace nip ]
|
[ swap [ first ] [ concat ] bi replace nip ]
|
||||||
[ >raw-base64 -rot [ first2 replace ] [ first2 surround ] bi* ] if ;
|
[ >raw-base64 -rot [ first2 replace ] [ first2 surround ] bi* ] if ;
|
||||||
|
|
||||||
: encode-utf7-string ( str dialect -- byte-array )
|
: encode-utf7-string ( str codec -- bytes )
|
||||||
[ [ printable? ] group-by ] dip
|
[ [ printable? ] group-by ] dip
|
||||||
dialect-data at first2 '[ _ _ rot first2 swap encode-chunk ] map concat ;
|
dialect>> first2 '[ _ _ rot first2 swap encode-chunk ] map
|
||||||
|
B{ } concat-as ;
|
||||||
|
|
||||||
: stream-write-utf7 ( string stream encoding -- )
|
M: utf7codec encode-string ( str stream codec -- )
|
||||||
swapd encode-utf7-string >byte-array swap stream-write ;
|
swapd encode-utf7-string swap stream-write ;
|
||||||
|
|
||||||
M: utf7 encode-string stream-write-utf7 ;
|
|
||||||
M: utf7imap4 encode-string stream-write-utf7 ;
|
|
||||||
|
|
||||||
! UTF-7 decoding is stateful, hence this ugly workaround is needed.
|
|
||||||
SYMBOL: decoding-buffer
|
|
||||||
DEFER: emit-char
|
DEFER: emit-char
|
||||||
|
|
||||||
: decode-chunk ( dialect-info -- ch buffer )
|
: decode-chunk ( dialect -- ch buffer )
|
||||||
dup first2 swap [ second read-until drop ] [ first2 swap replace ] bi*
|
dup first2 swap [ second read-until drop ] [ first2 swap replace ] bi*
|
||||||
[ second first first { } ] [ raw-base64> emit-char ] if-empty ;
|
[ second first first { } ] [ raw-base64> emit-char ] if-empty ;
|
||||||
|
|
||||||
: fill-buffer ( dialect-info -- ch buffer )
|
: fill-buffer ( dialect -- ch buffer )
|
||||||
dup second first first read1 dup swapd = [
|
dup second first first read1 dup swapd = [
|
||||||
drop decode-chunk
|
drop decode-chunk
|
||||||
] [ nip { } ] if ;
|
] [ nip { } ] if ;
|
||||||
|
|
||||||
: emit-char ( dialect-info buffer -- ch buffer' )
|
: emit-char ( dialect buffer -- ch buffer' )
|
||||||
[ fill-buffer ] [ nip unclip swap ] if-empty ;
|
[ fill-buffer ] [ nip unclip swap ] if-empty ;
|
||||||
|
|
||||||
: decode-utf7 ( stream encoding -- char/f )
|
: replace-all! ( src dst -- )
|
||||||
dialect-data at swap [
|
[ delete-all ] keep push-all ;
|
||||||
decoding-buffer [ dup { } ? emit-char ] change-global
|
|
||||||
] with-input-stream ;
|
|
||||||
|
|
||||||
M: utf7 decode-char decode-utf7 ;
|
M: utf7codec decode-char ( stream codec -- char/f )
|
||||||
M: utf7imap4 decode-char decode-utf7 ;
|
swap [
|
||||||
|
[ dialect>> ] [ buffer>> ] bi [ emit-char ] keep replace-all!
|
||||||
|
] with-input-stream ;
|
||||||
|
|
Loading…
Reference in New Issue