| 
									
										
										
										
											2008-04-17 16:29:04 -04:00
										 |  |  | ! Copyright (C) 2007, 2008 Phil Dawes | 
					
						
							|  |  |  | ! See http://factorcode.org/license.txt for BSD license. | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | ! Simple CSV Parser | 
					
						
							|  |  |  | ! Phil Dawes phil@phildawes.net | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2008-09-10 23:11:40 -04:00
										 |  |  | USING: kernel sequences io namespaces make | 
					
						
							|  |  |  | combinators unicode.categories ;
 | 
					
						
							| 
									
										
										
										
											2008-04-17 16:29:04 -04:00
										 |  |  | IN: csv | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2008-08-05 05:36:13 -04:00
										 |  |  | SYMBOL: delimiter | 
					
						
							| 
									
										
										
										
											2008-04-17 16:29:04 -04:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2008-08-05 05:36:13 -04:00
										 |  |  | CHAR: , delimiter set-global
 | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2008-12-15 23:49:35 -05:00
										 |  |  | : delimiter> ( -- delimiter ) delimiter get ; inline
 | 
					
						
							| 
									
										
										
										
											2008-08-05 05:36:13 -04:00
										 |  |  |      | 
					
						
							|  |  |  | DEFER: quoted-field ( -- endchar )
 | 
					
						
							| 
									
										
										
										
											2008-07-25 17:02:07 -04:00
										 |  |  |      | 
					
						
							| 
									
										
										
										
											2008-04-30 07:28:39 -04:00
										 |  |  | ! trims whitespace from either end of string | 
					
						
							|  |  |  | : trim-whitespace ( str -- str )
 | 
					
						
							|  |  |  |   [ blank? ] trim ; inline
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | : skip-to-field-end ( -- endchar )
 | 
					
						
							| 
									
										
										
										
											2008-04-30 12:50:40 -04:00
										 |  |  |   "\n" delimiter> suffix read-until nip ; inline
 | 
					
						
							| 
									
										
										
										
											2008-04-30 07:28:39 -04:00
										 |  |  |    | 
					
						
							| 
									
										
										
										
											2008-04-17 16:29:04 -04:00
										 |  |  | : not-quoted-field ( -- endchar )
 | 
					
						
							| 
									
										
										
										
											2008-04-30 12:50:40 -04:00
										 |  |  |   "\"\n" delimiter> suffix read-until   ! " | 
					
						
							| 
									
										
										
										
											2008-04-17 16:29:04 -04:00
										 |  |  |   dup
 | 
					
						
							| 
									
										
										
										
											2008-04-30 12:50:40 -04:00
										 |  |  |   { { CHAR: "     [ drop drop quoted-field ] }  ! "  | 
					
						
							|  |  |  |     { delimiter> [ swap trim-whitespace % ] }  | 
					
						
							|  |  |  |     { CHAR: \n    [ swap trim-whitespace % ] }     | 
					
						
							|  |  |  |     { f           [ swap trim-whitespace % ] }       ! eof | 
					
						
							| 
									
										
										
										
											2008-04-17 16:29:04 -04:00
										 |  |  |   } case ;
 | 
					
						
							|  |  |  |    | 
					
						
							|  |  |  | : maybe-escaped-quote ( -- endchar )
 | 
					
						
							| 
									
										
										
										
											2008-04-30 07:28:39 -04:00
										 |  |  |   read1 dup  | 
					
						
							| 
									
										
										
										
											2008-04-30 12:50:40 -04:00
										 |  |  |   { { CHAR: "    [ , quoted-field ] }  ! " is an escaped quote | 
					
						
							|  |  |  |     { delimiter> [ ] }                 ! end of quoted field  | 
					
						
							| 
									
										
										
										
											2008-05-01 06:54:09 -04:00
										 |  |  |     { CHAR: \n   [ ] } | 
					
						
							| 
									
										
										
										
											2008-04-30 07:28:39 -04:00
										 |  |  |     [ 2drop skip-to-field-end ]       ! end of quoted field + padding | 
					
						
							| 
									
										
										
										
											2008-04-17 16:29:04 -04:00
										 |  |  |   } case ;
 | 
					
						
							|  |  |  |    | 
					
						
							|  |  |  | : quoted-field ( -- endchar )
 | 
					
						
							|  |  |  |   "\"" read-until                                 ! " | 
					
						
							|  |  |  |   drop % maybe-escaped-quote ;
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | : field ( -- sep string )
 | 
					
						
							| 
									
										
										
										
											2008-04-30 07:28:39 -04:00
										 |  |  |   [ not-quoted-field ] "" make  ; ! trim-whitespace | 
					
						
							| 
									
										
										
										
											2008-04-17 16:29:04 -04:00
										 |  |  | 
 | 
					
						
							|  |  |  | : (row) ( -- sep )
 | 
					
						
							|  |  |  |   field ,  | 
					
						
							| 
									
										
										
										
											2008-08-05 05:36:13 -04:00
										 |  |  |   dup delimiter get = [ drop (row) ] when ;
 | 
					
						
							| 
									
										
										
										
											2008-04-17 16:29:04 -04:00
										 |  |  | 
 | 
					
						
							|  |  |  | : row ( -- eof? array[string] )
 | 
					
						
							|  |  |  |   [ (row) ] { } make ;
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | : append-if-row-not-empty ( row -- )
 | 
					
						
							|  |  |  |   dup { "" } = [ drop ] [ , ] if ;
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | : (csv) ( -- )
 | 
					
						
							|  |  |  |   row append-if-row-not-empty | 
					
						
							|  |  |  |   [ (csv) ] when ;
 | 
					
						
							| 
									
										
										
										
											2008-04-30 12:50:40 -04:00
										 |  |  |    | 
					
						
							| 
									
										
										
										
											2008-04-17 16:29:04 -04:00
										 |  |  | : csv-row ( stream -- row )
 | 
					
						
							| 
									
										
										
										
											2008-05-05 03:19:25 -04:00
										 |  |  |   [ row nip ] with-input-stream ;
 | 
					
						
							| 
									
										
										
										
											2008-04-17 16:29:04 -04:00
										 |  |  | 
 | 
					
						
							|  |  |  | : csv ( stream -- rows )
 | 
					
						
							| 
									
										
										
										
											2008-05-05 03:19:25 -04:00
										 |  |  |   [ [ (csv) ] { } make ] with-input-stream ;
 | 
					
						
							| 
									
										
										
										
											2008-04-30 12:50:40 -04:00
										 |  |  | 
 | 
					
						
							|  |  |  | : with-delimiter ( char quot -- )
 | 
					
						
							|  |  |  |   delimiter swap with-variable ; inline
 | 
					
						
							| 
									
										
										
										
											2008-07-25 17:02:07 -04:00
										 |  |  | 
 | 
					
						
							|  |  |  | : needs-escaping? ( cell -- ? )
 | 
					
						
							| 
									
										
										
										
											2008-08-05 05:36:13 -04:00
										 |  |  |   [ [ "\n\"" member? ] [ delimiter get = ] bi or ] contains? ; inline ! " | 
					
						
							| 
									
										
										
										
											2008-07-25 17:02:07 -04:00
										 |  |  | 
 | 
					
						
							|  |  |  | : escape-quotes ( cell -- cell' )
 | 
					
						
							|  |  |  |   [ [ dup , CHAR: " = [ CHAR: " , ] when ] each ] "" make ; inline
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | : enclose-in-quotes ( cell -- cell' )
 | 
					
						
							|  |  |  |   CHAR: " [ prefix ] [ suffix ] bi ; inline ! " | 
					
						
							|  |  |  |      | 
					
						
							|  |  |  | : escape-if-required ( cell -- cell' )
 | 
					
						
							|  |  |  |   dup needs-escaping? [ escape-quotes enclose-in-quotes ] when ; inline
 | 
					
						
							|  |  |  |      | 
					
						
							|  |  |  | : write-row ( row -- )
 | 
					
						
							| 
									
										
										
										
											2008-08-05 05:36:13 -04:00
										 |  |  |   [ delimiter get write1 ] [ escape-if-required write ] interleave nl ; inline
 | 
					
						
							| 
									
										
										
										
											2008-07-25 17:02:07 -04:00
										 |  |  |      | 
					
						
							| 
									
										
										
										
											2008-08-30 12:46:35 -04:00
										 |  |  | : write-csv ( rows stream -- )
 | 
					
						
							| 
									
										
										
										
											2008-07-25 17:02:07 -04:00
										 |  |  |   [ [ write-row ] each ] with-output-stream ;
 |