| 
									
										
										
										
											2010-04-19 02:13:21 -04:00
										 |  |  | ! Copyright (C) 2008, 2010 Daniel Ehrenberg, Slava Pestov. | 
					
						
							| 
									
										
										
										
											2007-09-20 18:09:08 -04:00
										 |  |  | ! See http://factorcode.org/license.txt for BSD license. | 
					
						
							| 
									
										
										
										
											2013-11-24 16:43:00 -05:00
										 |  |  | USING: accessors byte-arrays combinators destructors io | 
					
						
							|  |  |  | io.streams.plain kernel kernel.private math namespaces sbufs | 
					
						
							|  |  |  | sequences sequences.private splitting strings strings.private ;
 | 
					
						
							| 
									
										
										
										
											2007-09-20 18:09:08 -04:00
										 |  |  | IN: io.encodings | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2008-03-05 15:51:01 -05:00
										 |  |  | ! The encoding descriptor protocol | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2011-10-13 00:08:58 -04:00
										 |  |  | GENERIC: guess-encoded-length ( string-length encoding -- byte-length )
 | 
					
						
							|  |  |  | GENERIC: guess-decoded-length ( byte-length encoding -- string-length )
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | M: object guess-decoded-length drop ; inline
 | 
					
						
							|  |  |  | M: object guess-encoded-length drop ; inline
 | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2008-03-14 04:09:51 -04:00
										 |  |  | GENERIC: decode-char ( stream encoding -- char/f )
 | 
					
						
							| 
									
										
										
										
											2008-03-05 15:51:01 -05:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2013-03-18 16:35:22 -04:00
										 |  |  | GENERIC: decode-until ( seps stream encoding -- string/f sep/f )
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | <PRIVATE
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | ! If the stop? branch is taken convert the sbuf to a string | 
					
						
							|  |  |  | ! If sep is present, returns ``string sep'' (string can be "") | 
					
						
							|  |  |  | ! If sep is f, returns ``string f'' or ``f f'' | 
					
						
							|  |  |  | : read-until-loop ( buf quot: ( -- char stop? ) -- string/f sep/f )
 | 
					
						
							|  |  |  |     dup call
 | 
					
						
							|  |  |  |     [ nip [ "" like ] dip [ f like f ] unless* ] | 
					
						
							|  |  |  |     [ pick push read-until-loop ] if ; inline recursive
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | PRIVATE>
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | : (decode-until) ( seps stream encoding -- string/f sep/f )
 | 
					
						
							|  |  |  |     [ decode-char dup ] 2curry swap [ dupd member? ] curry
 | 
					
						
							|  |  |  |     [ [ drop f t ] if ] curry compose
 | 
					
						
							|  |  |  |     [ 100 <sbuf> ] dip read-until-loop ; inline
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | M: object decode-until (decode-until) ;
 | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2013-11-24 19:08:26 -05:00
										 |  |  | CONSTANT: replacement-char 0xfffd
 | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2013-11-24 16:43:00 -05:00
										 |  |  | <PRIVATE
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | : string>byte-array-fast ( string -- byte-array )
 | 
					
						
							|  |  |  |     { string } declare ! aux>> must be f | 
					
						
							| 
									
										
										
										
											2014-03-07 14:19:45 -05:00
										 |  |  |     [ length ] keep over (byte-array) [ | 
					
						
							| 
									
										
										
										
											2013-11-24 16:43:00 -05:00
										 |  |  |         [ | 
					
						
							|  |  |  |             [ [ string-nth-fast ] 2keep drop ] | 
					
						
							|  |  |  |             [ set-nth-unsafe ] bi*
 | 
					
						
							|  |  |  |         ] 2curry each-integer
 | 
					
						
							|  |  |  |     ] keep ; inline
 | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2013-11-24 19:08:26 -05:00
										 |  |  | : byte-array>string-fast ( byte-array -- string )
 | 
					
						
							|  |  |  |     { byte-array } declare | 
					
						
							|  |  |  |     [ length ] keep over 0 <string> [ | 
					
						
							|  |  |  |         [ | 
					
						
							| 
									
										
										
										
											2013-11-24 23:04:15 -05:00
										 |  |  |             [ [ nth-unsafe ] 2keep drop ] | 
					
						
							| 
									
										
										
										
											2013-11-24 19:08:26 -05:00
										 |  |  |             [ | 
					
						
							| 
									
										
										
										
											2013-11-24 23:04:15 -05:00
										 |  |  |                 pick 127 <=
 | 
					
						
							|  |  |  |                 [ set-string-nth-fast ] | 
					
						
							|  |  |  |                 [ [ drop replacement-char ] 2dip set-string-nth-slow ] | 
					
						
							|  |  |  |                 if
 | 
					
						
							|  |  |  |             ] bi*
 | 
					
						
							| 
									
										
										
										
											2013-11-24 19:08:26 -05:00
										 |  |  |         ] 2curry each-integer
 | 
					
						
							|  |  |  |     ] keep dup reset-string-hashcode ;
 | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2013-11-24 16:43:00 -05:00
										 |  |  | PRIVATE>
 | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2008-03-14 04:09:51 -04:00
										 |  |  | GENERIC: encode-char ( char stream encoding -- )
 | 
					
						
							| 
									
										
										
										
											2008-03-05 15:51:01 -05:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2010-04-19 02:13:21 -04:00
										 |  |  | GENERIC: encode-string ( string stream encoding -- )
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | M: object encode-string [ encode-char ] 2curry each ; inline
 | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2008-03-20 21:11:45 -04:00
										 |  |  | GENERIC: <decoder> ( stream encoding -- newstream )
 | 
					
						
							| 
									
										
										
										
											2008-03-05 15:51:01 -05:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2011-10-13 01:18:06 -04:00
										 |  |  | TUPLE: decoder { stream read-only } { code read-only } { cr boolean } ;
 | 
					
						
							| 
									
										
										
										
											2011-10-18 19:24:50 -04:00
										 |  |  | INSTANCE: decoder input-stream | 
					
						
							| 
									
										
										
										
											2008-02-13 02:02:37 -05:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2008-03-20 16:00:49 -04:00
										 |  |  | ERROR: decode-error ;
 | 
					
						
							| 
									
										
										
										
											2008-02-13 18:53:10 -05:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2008-03-19 16:24:49 -04:00
										 |  |  | GENERIC: <encoder> ( stream encoding -- newstream )
 | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2011-10-13 01:18:06 -04:00
										 |  |  | TUPLE: encoder { stream read-only } { code read-only } ;
 | 
					
						
							| 
									
										
										
										
											2011-10-18 19:24:50 -04:00
										 |  |  | INSTANCE: encoder output-stream | 
					
						
							| 
									
										
										
										
											2008-03-19 16:24:49 -04:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2008-03-20 16:00:49 -04:00
										 |  |  | ERROR: encode-error ;
 | 
					
						
							| 
									
										
										
										
											2008-03-19 16:24:49 -04:00
										 |  |  | 
 | 
					
						
							|  |  |  | ! Decoding | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2011-10-13 00:08:58 -04:00
										 |  |  | M: object <decoder> f decoder boa ; inline
 | 
					
						
							| 
									
										
										
										
											2008-02-11 00:14:42 -05:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2008-06-17 06:22:33 -04:00
										 |  |  | <PRIVATE
 | 
					
						
							| 
									
										
										
										
											2008-02-13 20:53:53 -05:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2008-12-15 20:44:56 -05:00
										 |  |  | : cr+ ( stream -- ) t >>cr drop ; inline
 | 
					
						
							| 
									
										
										
										
											2008-02-16 16:35:44 -05:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2008-12-15 20:44:56 -05:00
										 |  |  | : cr- ( stream -- ) f >>cr drop ; inline
 | 
					
						
							| 
									
										
										
										
											2008-02-13 20:53:53 -05:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2008-06-17 06:22:33 -04:00
										 |  |  | : >decoder< ( decoder -- stream encoding )
 | 
					
						
							|  |  |  |     [ stream>> ] [ code>> ] bi ; inline
 | 
					
						
							| 
									
										
										
										
											2008-02-13 20:53:53 -05:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2009-03-15 18:11:18 -04:00
										 |  |  | M: decoder stream-element-type | 
					
						
							| 
									
										
										
										
											2011-10-13 01:18:06 -04:00
										 |  |  |     drop +character+ ; inline
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | : (read1) ( decoder -- ch )
 | 
					
						
							|  |  |  |     >decoder< decode-char ; inline
 | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2011-10-13 02:25:10 -04:00
										 |  |  | : fix-cr ( decoder c -- c' )
 | 
					
						
							|  |  |  |     over cr>> [ | 
					
						
							| 
									
										
										
										
											2014-02-25 13:04:08 -05:00
										 |  |  |         over cr- dup CHAR: \n eq? [ drop (read1) ] [ nip ] if
 | 
					
						
							| 
									
										
										
										
											2011-10-13 02:25:10 -04:00
										 |  |  |     ] [ nip ] if ; inline
 | 
					
						
							| 
									
										
										
										
											2011-10-13 01:18:06 -04:00
										 |  |  | 
 | 
					
						
							|  |  |  | M: decoder stream-read1 ( decoder -- ch )
 | 
					
						
							|  |  |  |     dup (read1) fix-cr ; inline
 | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2013-06-04 20:46:25 -04:00
										 |  |  | M: decoder stream-tell stream>> stream-tell ;
 | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2011-10-13 02:25:10 -04:00
										 |  |  | : (read-first) ( n buf decoder -- buf stream encoding n c )
 | 
					
						
							|  |  |  |     [ rot [ >decoder< ] dip 2over decode-char ] | 
					
						
							|  |  |  |     [ swap fix-cr ] bi ; inline
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | : (store-read) ( buf stream encoding n c i -- buf stream encoding n )
 | 
					
						
							|  |  |  |     [ rot [ set-nth-unsafe ] keep ] 2curry 3dip ; inline
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | : (finish-read) ( buf stream encoding n i -- i )
 | 
					
						
							|  |  |  |     2nip 2nip ; inline
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | : (read-next) ( stream encoding n i -- stream encoding n i c )
 | 
					
						
							|  |  |  |     [ 2dup decode-char ] 2dip rot ; inline
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | : (read-rest) ( buf stream encoding n i -- count )
 | 
					
						
							|  |  |  |     2dup = [ (finish-read) ] [ | 
					
						
							|  |  |  |         (read-next) [ | 
					
						
							|  |  |  |             swap [ (store-read) ] [ 1 + ] bi (read-rest) | 
					
						
							|  |  |  |         ] [ (finish-read) ] if*
 | 
					
						
							| 
									
										
										
										
											2011-10-13 01:18:06 -04:00
										 |  |  |     ] if ; inline recursive
 | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2011-10-14 21:08:27 -04:00
										 |  |  | M: decoder stream-read-unsafe | 
					
						
							| 
									
										
										
										
											2011-10-13 02:25:10 -04:00
										 |  |  |     pick 0 = [ 3drop 0 ] [ | 
					
						
							|  |  |  |         (read-first) [ | 
					
						
							|  |  |  |             0 (store-read) | 
					
						
							|  |  |  |             1 (read-rest) | 
					
						
							| 
									
										
										
										
											2012-09-28 12:16:08 -04:00
										 |  |  |         ] [ 4drop 0 ] if*
 | 
					
						
							| 
									
										
										
										
											2011-10-13 01:18:06 -04:00
										 |  |  |     ] if ; inline
 | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2012-08-25 20:03:51 -04:00
										 |  |  | M: decoder stream-contents* | 
					
						
							|  |  |  |     (stream-contents-by-element) ; inline
 | 
					
						
							| 
									
										
										
										
											2011-10-14 21:08:27 -04:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2008-06-17 06:22:33 -04:00
										 |  |  | : line-ends/eof ( stream str -- str ) f like swap cr- ; inline
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | : line-ends\r ( stream str -- str ) swap cr+ ; inline
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | : line-ends\n ( stream str -- str )
 | 
					
						
							| 
									
										
										
										
											2014-02-25 13:04:08 -05:00
										 |  |  |     over cr>> [ | 
					
						
							|  |  |  |         over cr- [ stream-readln ] [ nip ] if-empty
 | 
					
						
							|  |  |  |     ] [ nip ] if ; inline
 | 
					
						
							| 
									
										
										
										
											2008-06-17 06:22:33 -04:00
										 |  |  | 
 | 
					
						
							|  |  |  | : handle-readln ( stream str ch -- str )
 | 
					
						
							|  |  |  |     { | 
					
						
							|  |  |  |         { f [ line-ends/eof ] } | 
					
						
							|  |  |  |         { CHAR: \r [ line-ends\r ] } | 
					
						
							|  |  |  |         { CHAR: \n [ line-ends\n ] } | 
					
						
							|  |  |  |     } case ; inline
 | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2014-10-31 03:58:53 -04:00
										 |  |  | M: decoder stream-read-until | 
					
						
							|  |  |  |     dup cr>> [ | 
					
						
							| 
									
										
										
										
											2014-10-31 12:33:59 -04:00
										 |  |  |         dup cr- 2dup
 | 
					
						
							| 
									
										
										
										
											2014-10-31 03:58:53 -04:00
										 |  |  |         >decoder< decode-until | 
					
						
							| 
									
										
										
										
											2014-10-31 12:33:59 -04:00
										 |  |  |         over [ | 
					
						
							|  |  |  |             dup CHAR: \n = [ | 
					
						
							|  |  |  |                 2drop stream-read-until
 | 
					
						
							|  |  |  |             ] [ | 
					
						
							|  |  |  |                 [ 2drop ] 2dip
 | 
					
						
							|  |  |  |             ] if
 | 
					
						
							| 
									
										
										
										
											2014-10-31 03:58:53 -04:00
										 |  |  |         ] [ | 
					
						
							| 
									
										
										
										
											2014-10-31 12:33:59 -04:00
										 |  |  |             first-unsafe CHAR: \n = [ [ rest ] dip ] when
 | 
					
						
							|  |  |  |             [ 2drop ] 2dip
 | 
					
						
							|  |  |  |         ] if-empty
 | 
					
						
							| 
									
										
										
										
											2014-10-31 03:58:53 -04:00
										 |  |  |     ] [ | 
					
						
							|  |  |  |         >decoder< decode-until | 
					
						
							|  |  |  |     ] if ;
 | 
					
						
							| 
									
										
										
										
											2008-02-13 20:53:53 -05:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2013-03-15 13:50:40 -04:00
										 |  |  | M: decoder stream-readln | 
					
						
							| 
									
										
										
										
											2013-03-18 16:35:22 -04:00
										 |  |  |     "\r\n" over >decoder< decode-until handle-readln ;
 | 
					
						
							| 
									
										
										
										
											2008-02-16 16:35:44 -05:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2008-06-17 06:22:33 -04:00
										 |  |  | M: decoder dispose stream>> dispose ;
 | 
					
						
							| 
									
										
										
										
											2008-03-14 04:09:51 -04:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2008-02-16 16:35:44 -05:00
										 |  |  | ! Encoding | 
					
						
							| 
									
										
										
										
											2011-10-13 00:08:58 -04:00
										 |  |  | M: object <encoder> encoder boa ; inline
 | 
					
						
							| 
									
										
										
										
											2008-03-14 04:09:51 -04:00
										 |  |  | 
 | 
					
						
							|  |  |  | : >encoder< ( encoder -- stream encoding )
 | 
					
						
							| 
									
										
										
										
											2008-06-17 06:22:33 -04:00
										 |  |  |     [ stream>> ] [ code>> ] bi ; inline
 | 
					
						
							| 
									
										
										
										
											2008-02-16 16:35:44 -05:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2009-03-15 18:11:18 -04:00
										 |  |  | M: encoder stream-element-type | 
					
						
							| 
									
										
										
										
											2011-10-13 00:08:58 -04:00
										 |  |  |     drop +character+ ; inline
 | 
					
						
							| 
									
										
										
										
											2009-03-15 18:11:18 -04:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2008-03-05 15:51:01 -05:00
										 |  |  | M: encoder stream-write1 | 
					
						
							| 
									
										
										
										
											2011-10-13 00:08:58 -04:00
										 |  |  |     >encoder< encode-char ; inline
 | 
					
						
							| 
									
										
										
										
											2008-02-13 20:53:53 -05:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2008-03-05 15:51:01 -05:00
										 |  |  | M: encoder stream-write | 
					
						
							| 
									
										
										
										
											2011-10-13 00:08:58 -04:00
										 |  |  |     >encoder< encode-string ; inline
 | 
					
						
							| 
									
										
										
										
											2008-02-16 16:35:44 -05:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2011-10-13 00:08:58 -04:00
										 |  |  | M: encoder dispose stream>> dispose ; inline
 | 
					
						
							| 
									
										
										
										
											2008-02-13 20:53:53 -05:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2011-10-13 00:08:58 -04:00
										 |  |  | M: encoder stream-flush stream>> stream-flush ; inline
 | 
					
						
							| 
									
										
										
										
											2008-03-18 17:01:14 -04:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2008-03-05 15:51:01 -05:00
										 |  |  | INSTANCE: encoder plain-writer | 
					
						
							| 
									
										
										
										
											2014-02-25 13:04:08 -05:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2008-05-10 21:17:24 -04:00
										 |  |  | PRIVATE>
 | 
					
						
							| 
									
										
										
										
											2008-02-15 20:44:35 -05:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2008-06-12 04:49:29 -04:00
										 |  |  | GENERIC# re-encode 1 ( stream encoding -- newstream )
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | M: object re-encode <encoder> ;
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | M: encoder re-encode [ stream>> ] dip re-encode ;
 | 
					
						
							| 
									
										
										
										
											2008-02-15 20:44:35 -05:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2008-05-10 21:17:24 -04:00
										 |  |  | : encode-output ( encoding -- )
 | 
					
						
							|  |  |  |     output-stream [ swap re-encode ] change ;
 | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2008-06-12 04:49:29 -04:00
										 |  |  | : with-encoded-output ( encoding quot -- )
 | 
					
						
							|  |  |  |     [ [ output-stream get ] dip re-encode ] dip
 | 
					
						
							|  |  |  |     with-output-stream* ; inline
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | GENERIC# re-decode 1 ( stream encoding -- newstream )
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | M: object re-decode <decoder> ;
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | M: decoder re-decode [ stream>> ] dip re-decode ;
 | 
					
						
							| 
									
										
										
										
											2008-03-19 16:24:49 -04:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2008-05-10 21:17:24 -04:00
										 |  |  | : decode-input ( encoding -- )
 | 
					
						
							|  |  |  |     input-stream [ swap re-decode ] change ;
 | 
					
						
							| 
									
										
										
										
											2008-06-12 04:49:29 -04:00
										 |  |  | 
 | 
					
						
							|  |  |  | : with-decoded-input ( encoding quot -- )
 | 
					
						
							|  |  |  |     [ [ input-stream get ] dip re-decode ] dip
 | 
					
						
							|  |  |  |     with-input-stream* ; inline
 |