add csv>file and file>csv words, better docs for csv, a few cleanups

2009-01-30 19:23:04 -06:00 · 2009-01-30 19:23:04 -06:00 · f1a1760e6e
parent e36376b432
commit f1a1760e6e
3 changed files with 106 additions and 53 deletions
--- a/basis/csv/csv-docs.factor
+++ b/basis/csv/csv-docs.factor
@ -1,28 +1,52 @@
-USING: help.syntax help.markup kernel prettyprint sequences ;
+USING: help.syntax help.markup kernel prettyprint sequences
 io.pathnames ;
 IN: csv
 HELP: csv
 { $values { "stream" "an input stream" }
          { "rows" "an array of arrays of fields" } } 
-{ $description "parses a csv stream into an array of row arrays"
+{ $description "Parses a csv stream into an array of row arrays." } ;
-} ;
+
 HELP: file>csv
 { $values
    { "path" pathname } { "encoding" "an encoding descriptor" }
    { "csv" "csv" }
 }
 { $description "Opens a file and parses it into a sequence of comma-separated-value fields." } ;
 HELP: csv>file
 { $values
    { "rows" "a sequence of sequences of strings" }
    { "path" pathname } { "encoding" "an encoding descriptor" }
 }
 { $description "Writes a comma-separated-value structure to a file." } ;
 HELP: csv-row
 { $values { "stream" "an input stream" }
          { "row" "an array of fields" } } 
-{ $description "parses a row from a csv stream"
+{ $description "parses a row from a csv stream" } ;
 } ;
 HELP: write-csv
-{ $values { "rows" "an sequence of sequences of strings" }
+{ $values { "rows" "a sequence of sequences of strings" }
          { "stream" "an output stream" } } 
-{ $description "writes csv to the output stream, escaping where necessary"
+{ $description "Writes a sequence of sequences of comma-separated-values to the output stream, escaping where necessary." } ;
 } ;
 HELP: with-delimiter
-{ $values { "char" "field delimiter (e.g. CHAR: \t)" }
+{ $values { "ch" "field delimiter (e.g. CHAR: \t)" }
          { "quot" "a quotation" } }
-{ $description "Sets the field delimiter for csv or csv-row words "
+{ $description "Sets the field delimiter for csv or csv-row words." } ;
 } ;
 ARTICLE: "csv" "Comma-separated-values parsing and writing"
 "The " { $vocab-link "csv" } " vocabulary can read and write CSV (comma-separated-value) files." $nl
 "Reading a csv file:"
 { $subsection file>csv }
 "Writing a csv file:"
 { $subsection csv>file }
 "Changing the delimiter from a comma:"
 { $subsection with-delimiter }
 "Reading from a stream:"
 { $subsection csv }
 "Writing to a stream:"
 { $subsection write-csv } ;
 ABOUT: "csv"
--- a/basis/csv/csv-tests.factor
+++ b/basis/csv/csv-tests.factor
@ -1,5 +1,7 @@
 USING: io.streams.string csv tools.test shuffle kernel strings
 io.pathnames io.files.unique io.encodings.utf8 io.files
 io.directories ;
 IN: csv.tests
 USING: io.streams.string csv tools.test shuffle kernel strings ;
 ! I like to name my unit tests
 : named-unit-test ( name output input -- ) 
@ -76,3 +78,15 @@ USING: io.streams.string csv tools.test shuffle kernel strings ;
 "escapes quotes commas and newlines when writing"
 [ "\"fo\"\"o1\",bar1\n\"fo\no2\",\"b,ar2\"\n" ]
 [ { { "fo\"o1" "bar1" } { "fo\no2" "b,ar2" } } <string-writer> tuck write-csv >string ] named-unit-test ! "
 [ { { "writing" "some" "csv" "tests" } } ]
 [
    "writing,some,csv,tests"
    "csv-test1-" unique-file utf8
    [ set-file-contents ] [ file>csv ] [ drop delete-file ] 2tri
 ] unit-test
 [ t ] [
    { { "writing,some,csv,tests" } } dup "csv-test2-"
    unique-file utf8 [ csv>file ] [ file>csv ] 2bi =
 ] unit-test
--- a/basis/csv/csv.factor
+++ b/basis/csv/csv.factor
@ -1,89 +1,104 @@
 ! Copyright (C) 2007, 2008 Phil Dawes
 ! See http://factorcode.org/license.txt for BSD license.
-
+USING: kernel sequences io namespaces make combinators
-! Simple CSV Parser
+unicode.categories io.files combinators.short-circuit ;
 ! Phil Dawes phil@phildawes.net
 USING: kernel sequences io namespaces make
 combinators unicode.categories ;
 IN: csv
 SYMBOL: delimiter
 CHAR: , delimiter set-global
 <PRIVATE
 : delimiter> ( -- delimiter ) delimiter get ; inline
 DEFER: quoted-field ( -- endchar )
 ! trims whitespace from either end of string
 : trim-whitespace ( str -- str )
-  [ blank? ] trim ; inline
+    [ blank? ] trim ; inline
 : skip-to-field-end ( -- endchar )
  "\n" delimiter> suffix read-until nip ; inline
 : not-quoted-field ( -- endchar )
-  "\"\n" delimiter> suffix read-until   ! "
+    "\"\n" delimiter> suffix read-until
-  dup
+    dup {
-  { { CHAR: "     [ drop drop quoted-field ] }  ! " 
+        { CHAR: "    [ 2drop quoted-field ] }
-    { delimiter> [ swap trim-whitespace % ] } 
+        { delimiter> [ swap trim-whitespace % ] }
-    { CHAR: \n    [ swap trim-whitespace % ] }    
+        { CHAR: \n   [ swap trim-whitespace % ] }
-    { f           [ swap trim-whitespace % ] }       ! eof
+        { f          [ swap trim-whitespace % ] }
-  } case ;
+    } case ;
 : maybe-escaped-quote ( -- endchar )
-  read1 dup 
+    read1 dup {
-  { { CHAR: "    [ , quoted-field ] }  ! " is an escaped quote
+        { CHAR: "    [ , quoted-field ] }
-    { delimiter> [ ] }                 ! end of quoted field 
+        { delimiter> [ ] }
-    { CHAR: \n   [ ] }
+        { CHAR: \n   [ ] }
-    [ 2drop skip-to-field-end ]       ! end of quoted field + padding
+        [ 2drop skip-to-field-end ]
-  } case ;
+    } case ;
 : quoted-field ( -- endchar )
-  "\"" read-until                                 ! "
+    "\"" read-until
-  drop % maybe-escaped-quote ;
+    drop % maybe-escaped-quote ;
 : field ( -- sep string )
-  [ not-quoted-field ] "" make  ; ! trim-whitespace
+    [ not-quoted-field ] "" make  ;
 : (row) ( -- sep )
-  field , 
+    field , 
-  dup delimiter get = [ drop (row) ] when ;
+    dup delimiter get = [ drop (row) ] when ;
 : row ( -- eof? array[string] )
-  [ (row) ] { } make ;
+    [ (row) ] { } make ;
 : append-if-row-not-empty ( row -- )
-  dup { "" } = [ drop ] [ , ] if ;
+    dup { "" } = [ drop ] [ , ] if ;
 : (csv) ( -- )
-  row append-if-row-not-empty
+    row append-if-row-not-empty
-  [ (csv) ] when ;
+    [ (csv) ] when ;
 PRIVATE>
 : csv-row ( stream -- row )
-  [ row nip ] with-input-stream ;
+    [ row nip ] with-input-stream ;
 : csv ( stream -- rows )
-  [ [ (csv) ] { } make ] with-input-stream ;
+    [ [ (csv) ] { } make ] with-input-stream ;
-: with-delimiter ( char quot -- )
+: file>csv ( path encoding -- csv )
-  delimiter swap with-variable ; inline
+    <file-reader> csv ;
 : with-delimiter ( ch quot -- )
    [ delimiter ] dip with-variable ; inline
 <PRIVATE
 : needs-escaping? ( cell -- ? )
-  [ [ "\n\"" member? ] [ delimiter get = ] bi or ] any? ; inline
+    [ { [ "\n\"" member? ] [ delimiter get = ] } 1|| ] any? ; inline
 : escape-quotes ( cell -- cell' )
-  [ [ dup , CHAR: " = [ CHAR: " , ] when ] each ] "" make ; inline
+    [
        [
            [ , ]
            [ dup CHAR: " = [ , ] [ drop ] if ] bi
        ] each
    ] "" make ; inline
 : enclose-in-quotes ( cell -- cell' )
-  CHAR: " [ prefix ] [ suffix ] bi ; inline ! "
+    "\"" dup surround ; inline
 : escape-if-required ( cell -- cell' )
-  dup needs-escaping? [ escape-quotes enclose-in-quotes ] when ; inline
+    dup needs-escaping?
    [ escape-quotes enclose-in-quotes ] when ; inline
 PRIVATE>
 : write-row ( row -- )
-  [ delimiter get write1 ] [ escape-if-required write ] interleave nl ; inline
+    [ delimiter get write1 ]
    [ escape-if-required write ] interleave nl ; inline
 : write-csv ( rows stream -- )
-  [ [ write-row ] each ] with-output-stream ;
+    [ [ write-row ] each ] with-output-stream ;
 : csv>file ( rows path encoding -- ) <file-writer> write-csv ;