| 
									
										
										
										
											2008-04-17 16:29:04 -04:00
										 |  |  | ! Copyright (C) 2007, 2008 Phil Dawes | 
					
						
							|  |  |  | ! See http://factorcode.org/license.txt for BSD license. | 
					
						
							| 
									
										
										
										
											2009-01-30 20:23:04 -05:00
										 |  |  | USING: kernel sequences io namespaces make combinators | 
					
						
							|  |  |  | unicode.categories io.files combinators.short-circuit ;
 | 
					
						
							| 
									
										
										
										
											2008-04-17 16:29:04 -04:00
										 |  |  | IN: csv | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2008-08-05 05:36:13 -04:00
										 |  |  | SYMBOL: delimiter | 
					
						
							| 
									
										
										
										
											2008-04-17 16:29:04 -04:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2008-08-05 05:36:13 -04:00
										 |  |  | CHAR: , delimiter set-global
 | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2009-01-30 20:23:04 -05:00
										 |  |  | <PRIVATE
 | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2008-12-15 23:49:35 -05:00
										 |  |  | : delimiter> ( -- delimiter ) delimiter get ; inline
 | 
					
						
							| 
									
										
										
										
											2008-08-05 05:36:13 -04:00
										 |  |  |      | 
					
						
							|  |  |  | DEFER: quoted-field ( -- endchar )
 | 
					
						
							| 
									
										
										
										
											2008-07-25 17:02:07 -04:00
										 |  |  |      | 
					
						
							| 
									
										
										
										
											2008-04-30 07:28:39 -04:00
										 |  |  | : trim-whitespace ( str -- str )
 | 
					
						
							| 
									
										
										
										
											2009-01-30 20:23:04 -05:00
										 |  |  |     [ blank? ] trim ; inline
 | 
					
						
							| 
									
										
										
										
											2008-04-30 07:28:39 -04:00
										 |  |  | 
 | 
					
						
							|  |  |  | : skip-to-field-end ( -- endchar )
 | 
					
						
							| 
									
										
										
										
											2008-04-30 12:50:40 -04:00
										 |  |  |   "\n" delimiter> suffix read-until nip ; inline
 | 
					
						
							| 
									
										
										
										
											2008-04-30 07:28:39 -04:00
										 |  |  |    | 
					
						
							| 
									
										
										
										
											2008-04-17 16:29:04 -04:00
										 |  |  | : not-quoted-field ( -- endchar )
 | 
					
						
							| 
									
										
										
										
											2009-01-30 20:23:04 -05:00
										 |  |  |     "\"\n" delimiter> suffix read-until
 | 
					
						
							|  |  |  |     dup { | 
					
						
							|  |  |  |         { CHAR: "    [ 2drop quoted-field ] } | 
					
						
							|  |  |  |         { delimiter> [ swap trim-whitespace % ] } | 
					
						
							|  |  |  |         { CHAR: \n   [ swap trim-whitespace % ] } | 
					
						
							|  |  |  |         { f          [ swap trim-whitespace % ] } | 
					
						
							|  |  |  |     } case ;
 | 
					
						
							| 
									
										
										
										
											2008-04-17 16:29:04 -04:00
										 |  |  |    | 
					
						
							|  |  |  | : maybe-escaped-quote ( -- endchar )
 | 
					
						
							| 
									
										
										
										
											2009-01-30 20:23:04 -05:00
										 |  |  |     read1 dup { | 
					
						
							|  |  |  |         { CHAR: "    [ , quoted-field ] } | 
					
						
							|  |  |  |         { delimiter> [ ] } | 
					
						
							|  |  |  |         { CHAR: \n   [ ] } | 
					
						
							|  |  |  |         [ 2drop skip-to-field-end ] | 
					
						
							|  |  |  |     } case ;
 | 
					
						
							| 
									
										
										
										
											2008-04-17 16:29:04 -04:00
										 |  |  |    | 
					
						
							|  |  |  | : quoted-field ( -- endchar )
 | 
					
						
							| 
									
										
										
										
											2009-01-30 20:23:04 -05:00
										 |  |  |     "\"" read-until
 | 
					
						
							|  |  |  |     drop % maybe-escaped-quote ;
 | 
					
						
							| 
									
										
										
										
											2008-04-17 16:29:04 -04:00
										 |  |  | 
 | 
					
						
							|  |  |  | : field ( -- sep string )
 | 
					
						
							| 
									
										
										
										
											2009-01-30 20:23:04 -05:00
										 |  |  |     [ not-quoted-field ] "" make  ;
 | 
					
						
							| 
									
										
										
										
											2008-04-17 16:29:04 -04:00
										 |  |  | 
 | 
					
						
							|  |  |  | : (row) ( -- sep )
 | 
					
						
							| 
									
										
										
										
											2009-01-30 20:23:04 -05:00
										 |  |  |     field ,  | 
					
						
							| 
									
										
										
										
											2009-02-12 02:32:06 -05:00
										 |  |  |     dup delimiter> = [ drop (row) ] when ;
 | 
					
						
							| 
									
										
										
										
											2008-04-17 16:29:04 -04:00
										 |  |  | 
 | 
					
						
							|  |  |  | : row ( -- eof? array[string] )
 | 
					
						
							| 
									
										
										
										
											2009-01-30 20:23:04 -05:00
										 |  |  |     [ (row) ] { } make ;
 | 
					
						
							| 
									
										
										
										
											2008-04-17 16:29:04 -04:00
										 |  |  | 
 | 
					
						
							|  |  |  | : (csv) ( -- )
 | 
					
						
							| 
									
										
										
										
											2009-02-12 02:32:06 -05:00
										 |  |  |     row | 
					
						
							|  |  |  |     dup [ empty? ] all? [ drop ] [ , ] if
 | 
					
						
							|  |  |  |     [ (csv) ] when ;
 | 
					
						
							| 
									
										
										
										
											2008-04-30 12:50:40 -04:00
										 |  |  |    | 
					
						
							| 
									
										
										
										
											2009-01-30 20:23:04 -05:00
										 |  |  | PRIVATE>
 | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2008-04-17 16:29:04 -04:00
										 |  |  | : csv-row ( stream -- row )
 | 
					
						
							| 
									
										
										
										
											2009-01-30 20:23:04 -05:00
										 |  |  |     [ row nip ] with-input-stream ;
 | 
					
						
							| 
									
										
										
										
											2008-04-17 16:29:04 -04:00
										 |  |  | 
 | 
					
						
							|  |  |  | : csv ( stream -- rows )
 | 
					
						
							| 
									
										
										
										
											2009-02-12 02:32:06 -05:00
										 |  |  |     [ [ (csv) ] { } make ] with-input-stream
 | 
					
						
							|  |  |  |     dup peek { "" } = [ but-last ] when ;
 | 
					
						
							| 
									
										
										
										
											2008-04-30 12:50:40 -04:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2009-01-30 20:23:04 -05:00
										 |  |  | : file>csv ( path encoding -- csv )
 | 
					
						
							|  |  |  |     <file-reader> csv ;
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | : with-delimiter ( ch quot -- )
 | 
					
						
							|  |  |  |     [ delimiter ] dip with-variable ; inline
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | <PRIVATE
 | 
					
						
							| 
									
										
										
										
											2008-07-25 17:02:07 -04:00
										 |  |  | 
 | 
					
						
							|  |  |  | : needs-escaping? ( cell -- ? )
 | 
					
						
							| 
									
										
										
										
											2009-01-30 20:23:04 -05:00
										 |  |  |     [ { [ "\n\"" member? ] [ delimiter get = ] } 1|| ] any? ; inline
 | 
					
						
							| 
									
										
										
										
											2008-07-25 17:02:07 -04:00
										 |  |  | 
 | 
					
						
							|  |  |  | : escape-quotes ( cell -- cell' )
 | 
					
						
							| 
									
										
										
										
											2009-01-30 20:23:04 -05:00
										 |  |  |     [ | 
					
						
							|  |  |  |         [ | 
					
						
							|  |  |  |             [ , ] | 
					
						
							|  |  |  |             [ dup CHAR: " = [ , ] [ drop ] if ] bi
 | 
					
						
							|  |  |  |         ] each
 | 
					
						
							|  |  |  |     ] "" make ; inline
 | 
					
						
							| 
									
										
										
										
											2008-07-25 17:02:07 -04:00
										 |  |  | 
 | 
					
						
							|  |  |  | : enclose-in-quotes ( cell -- cell' )
 | 
					
						
							| 
									
										
										
										
											2009-01-30 20:23:04 -05:00
										 |  |  |     "\"" dup surround ; inline
 | 
					
						
							| 
									
										
										
										
											2008-07-25 17:02:07 -04:00
										 |  |  |      | 
					
						
							|  |  |  | : escape-if-required ( cell -- cell' )
 | 
					
						
							| 
									
										
										
										
											2009-01-30 20:23:04 -05:00
										 |  |  |     dup needs-escaping? | 
					
						
							|  |  |  |     [ escape-quotes enclose-in-quotes ] when ; inline
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | PRIVATE>
 | 
					
						
							| 
									
										
										
										
											2008-07-25 17:02:07 -04:00
										 |  |  |      | 
					
						
							|  |  |  | : write-row ( row -- )
 | 
					
						
							| 
									
										
										
										
											2009-01-30 20:23:04 -05:00
										 |  |  |     [ delimiter get write1 ] | 
					
						
							|  |  |  |     [ escape-if-required write ] interleave nl ; inline
 | 
					
						
							| 
									
										
										
										
											2008-07-25 17:02:07 -04:00
										 |  |  |      | 
					
						
							| 
									
										
										
										
											2008-08-30 12:46:35 -04:00
										 |  |  | : write-csv ( rows stream -- )
 | 
					
						
							| 
									
										
										
										
											2009-01-30 20:23:04 -05:00
										 |  |  |     [ [ write-row ] each ] with-output-stream ;
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | : csv>file ( rows path encoding -- ) <file-writer> write-csv ;
 |