io.encodings.utf7: cleaned up vocab per review
1. Tuple is used to hold the decoding buffer instead of a global 2. Fixed problematic sequence type conversions.db4
							parent
							
								
									797d73bb8c
								
							
						
					
					
						commit
						a47c41e45c
					
				|  | @ -2,7 +2,7 @@ USING: help.markup help.syntax ; | |||
| IN: io.encodings.utf7 | ||||
| 
 | ||||
| HELP: utf7 | ||||
| { $class-description "Encoding descriptor for UTF-7 encoding." } ; | ||||
| { $description "Encoding descriptor for UTF-7 encoding." } ; | ||||
| 
 | ||||
| HELP: utf7imap4 | ||||
| { $class-description "Encoding descriptor for the encoding UTF-7 modified for IMAP (see RFC 3501 5.1.3)." } ; | ||||
| { $description "Encoding descriptor for the encoding UTF-7 modified for IMAP (see RFC 3501 5.1.3)." } ; | ||||
|  |  | |||
|  | @ -1,9 +1,5 @@ | |||
| USING: | ||||
|     io.encodings.string io.encodings.utf7 | ||||
|     kernel | ||||
|     sequences | ||||
|     strings | ||||
|     tools.test ; | ||||
| USING: io.encodings.string io.encodings.utf7 kernel sequences strings | ||||
| tools.test ; | ||||
| IN: io.encodings.utf7.tests | ||||
| 
 | ||||
| [ | ||||
|  |  | |||
|  | @ -1,71 +1,61 @@ | |||
| USING: | ||||
|     arrays | ||||
|     ascii | ||||
|     assocs | ||||
|     base64 | ||||
|     byte-arrays | ||||
|     fry | ||||
|     grouping.extras | ||||
|     io io.encodings io.encodings.string io.encodings.utf16 | ||||
|     kernel | ||||
|     math math.functions | ||||
|     namespaces | ||||
|     sequences | ||||
|     splitting | ||||
|     strings ; | ||||
| USING: accessors ascii base64 fry grouping.extras io io.encodings | ||||
| io.encodings.string io.encodings.utf16 kernel math math.functions | ||||
| sequences splitting strings ; | ||||
| IN: io.encodings.utf7 | ||||
| 
 | ||||
| SINGLETON: utf7 | ||||
| SINGLETON: utf7imap4 | ||||
| TUPLE: utf7codec dialect buffer ; | ||||
| 
 | ||||
| ! This map encodes the difference between standard utf7 and the | ||||
| ! dialect used by IMAP which wants slashes repladed with commas when | ||||
| ! These words encodes the difference between standard utf7 and the | ||||
| ! dialect used by IMAP which wants slashes replaced with commas when | ||||
| ! encoding and uses '&' instead of '+' as the escaping character. | ||||
| CONSTANT: dialect-data { | ||||
|     { utf7 { { "" "" } { "+" "-" } } } | ||||
|     { utf7imap4 { { "/" "," } { "&" "-" } } } | ||||
| } | ||||
| : utf7 ( -- t ) | ||||
|     { | ||||
|         { { } { } } | ||||
|         { { CHAR: + } { CHAR: - } } | ||||
|     } V{ } utf7codec boa ; | ||||
| 
 | ||||
| : >raw-base64 ( byte-array -- str ) | ||||
| : utf7imap4 ( -- t ) | ||||
|     { | ||||
|         { { CHAR: / } { CHAR: , } } | ||||
|         { { CHAR: & } { CHAR: - } } | ||||
|     } V{ } utf7codec boa ; | ||||
| 
 | ||||
| : >raw-base64 ( bytes -- bytes' ) | ||||
|     >string utf16be encode >base64 [ CHAR: = = ] trim-tail ; | ||||
| 
 | ||||
| : raw-base64> ( str -- str' ) | ||||
|     dup length 4 / ceiling 4 * CHAR: = pad-tail base64> utf16be decode ; | ||||
| 
 | ||||
| : encode-chunk ( repl-pair surround-pair chunk ascii? -- byte-array ) | ||||
| : encode-chunk ( repl-pair surround-pair chunk ascii? -- bytes ) | ||||
|     [ swap [ first ] [ concat ] bi replace nip ] | ||||
|     [ >raw-base64 -rot [ first2 replace ] [ first2 surround ] bi* ] if ; | ||||
| 
 | ||||
| : encode-utf7-string ( str dialect -- byte-array ) | ||||
| : encode-utf7-string ( str codec -- bytes ) | ||||
|     [ [ printable? ] group-by ] dip | ||||
|     dialect-data at first2 '[ _ _ rot first2 swap encode-chunk ] map concat ; | ||||
|     dialect>> first2 '[ _ _ rot first2 swap encode-chunk ] map | ||||
|     B{ } concat-as ; | ||||
| 
 | ||||
| : stream-write-utf7 ( string stream encoding -- ) | ||||
|     swapd encode-utf7-string >byte-array swap stream-write ; | ||||
| M: utf7codec encode-string ( str stream codec -- ) | ||||
|     swapd encode-utf7-string swap stream-write ; | ||||
| 
 | ||||
| M: utf7 encode-string stream-write-utf7 ; | ||||
| M: utf7imap4 encode-string stream-write-utf7 ; | ||||
| 
 | ||||
| ! UTF-7 decoding is stateful, hence this ugly workaround is needed. | ||||
| SYMBOL: decoding-buffer | ||||
| DEFER: emit-char | ||||
| 
 | ||||
| : decode-chunk ( dialect-info -- ch buffer ) | ||||
| : decode-chunk ( dialect -- ch buffer ) | ||||
|     dup first2 swap [ second read-until drop ] [ first2 swap replace ] bi* | ||||
|     [ second first first { } ] [ raw-base64> emit-char ] if-empty ; | ||||
| 
 | ||||
| : fill-buffer ( dialect-info -- ch buffer ) | ||||
| : fill-buffer ( dialect -- ch buffer ) | ||||
|     dup second first first read1 dup swapd = [ | ||||
|         drop decode-chunk | ||||
|     ] [ nip { } ] if ; | ||||
| 
 | ||||
| : emit-char ( dialect-info buffer -- ch buffer' ) | ||||
| : emit-char ( dialect buffer -- ch buffer' ) | ||||
|     [ fill-buffer ] [ nip unclip swap ] if-empty ; | ||||
| 
 | ||||
| : decode-utf7 ( stream encoding -- char/f ) | ||||
|     dialect-data at swap [ | ||||
|         decoding-buffer [ dup { } ? emit-char ] change-global | ||||
|     ] with-input-stream ; | ||||
| : replace-all! ( src dst -- ) | ||||
|     [ delete-all ] keep push-all ; | ||||
| 
 | ||||
| M: utf7 decode-char decode-utf7 ; | ||||
| M: utf7imap4 decode-char decode-utf7 ; | ||||
| M: utf7codec decode-char ( stream codec -- char/f ) | ||||
|     swap [ | ||||
|         [ dialect>> ] [ buffer>> ] bi [ emit-char ] keep replace-all! | ||||
|     ] with-input-stream ; | ||||
|  |  | |||
		Loading…
	
		Reference in New Issue