From f1a1760e6ead33bef83ca823cf0f1e874e7a2aa8 Mon Sep 17 00:00:00 2001 From: Doug Coleman Date: Fri, 30 Jan 2009 19:23:04 -0600 Subject: [PATCH 1/4] add csv>file and file>csv words, better docs for csv, a few cleanups --- basis/csv/csv-docs.factor | 48 ++++++++++++++----- basis/csv/csv-tests.factor | 16 ++++++- basis/csv/csv.factor | 95 ++++++++++++++++++++++---------------- 3 files changed, 106 insertions(+), 53 deletions(-) diff --git a/basis/csv/csv-docs.factor b/basis/csv/csv-docs.factor index e4741f4810..6ae75b6b2f 100644 --- a/basis/csv/csv-docs.factor +++ b/basis/csv/csv-docs.factor @@ -1,28 +1,52 @@ -USING: help.syntax help.markup kernel prettyprint sequences ; +USING: help.syntax help.markup kernel prettyprint sequences +io.pathnames ; IN: csv HELP: csv { $values { "stream" "an input stream" } { "rows" "an array of arrays of fields" } } -{ $description "parses a csv stream into an array of row arrays" -} ; +{ $description "Parses a csv stream into an array of row arrays." } ; + +HELP: file>csv +{ $values + { "path" pathname } { "encoding" "an encoding descriptor" } + { "csv" "csv" } +} +{ $description "Opens a file and parses it into a sequence of comma-separated-value fields." } ; + +HELP: csv>file +{ $values + { "rows" "a sequence of sequences of strings" } + { "path" pathname } { "encoding" "an encoding descriptor" } +} +{ $description "Writes a comma-separated-value structure to a file." } ; HELP: csv-row { $values { "stream" "an input stream" } { "row" "an array of fields" } } -{ $description "parses a row from a csv stream" -} ; +{ $description "parses a row from a csv stream" } ; HELP: write-csv -{ $values { "rows" "an sequence of sequences of strings" } +{ $values { "rows" "a sequence of sequences of strings" } { "stream" "an output stream" } } -{ $description "writes csv to the output stream, escaping where necessary" -} ; - +{ $description "Writes a sequence of sequences of comma-separated-values to the output stream, escaping where necessary." } ; HELP: with-delimiter -{ $values { "char" "field delimiter (e.g. CHAR: \t)" } +{ $values { "ch" "field delimiter (e.g. CHAR: \t)" } { "quot" "a quotation" } } -{ $description "Sets the field delimiter for csv or csv-row words " -} ; +{ $description "Sets the field delimiter for csv or csv-row words." } ; +ARTICLE: "csv" "Comma-separated-values parsing and writing" +"The " { $vocab-link "csv" } " vocabulary can read and write CSV (comma-separated-value) files." $nl +"Reading a csv file:" +{ $subsection file>csv } +"Writing a csv file:" +{ $subsection csv>file } +"Changing the delimiter from a comma:" +{ $subsection with-delimiter } +"Reading from a stream:" +{ $subsection csv } +"Writing to a stream:" +{ $subsection write-csv } ; + +ABOUT: "csv" diff --git a/basis/csv/csv-tests.factor b/basis/csv/csv-tests.factor index 8261ae104a..4d78c2af86 100644 --- a/basis/csv/csv-tests.factor +++ b/basis/csv/csv-tests.factor @@ -1,5 +1,7 @@ +USING: io.streams.string csv tools.test shuffle kernel strings +io.pathnames io.files.unique io.encodings.utf8 io.files +io.directories ; IN: csv.tests -USING: io.streams.string csv tools.test shuffle kernel strings ; ! I like to name my unit tests : named-unit-test ( name output input -- ) @@ -76,3 +78,15 @@ USING: io.streams.string csv tools.test shuffle kernel strings ; "escapes quotes commas and newlines when writing" [ "\"fo\"\"o1\",bar1\n\"fo\no2\",\"b,ar2\"\n" ] [ { { "fo\"o1" "bar1" } { "fo\no2" "b,ar2" } } tuck write-csv >string ] named-unit-test ! " + +[ { { "writing" "some" "csv" "tests" } } ] +[ + "writing,some,csv,tests" + "csv-test1-" unique-file utf8 + [ set-file-contents ] [ file>csv ] [ drop delete-file ] 2tri +] unit-test + +[ t ] [ + { { "writing,some,csv,tests" } } dup "csv-test2-" + unique-file utf8 [ csv>file ] [ file>csv ] 2bi = +] unit-test diff --git a/basis/csv/csv.factor b/basis/csv/csv.factor index bc3c25d347..7789f015d9 100755 --- a/basis/csv/csv.factor +++ b/basis/csv/csv.factor @@ -1,89 +1,104 @@ ! Copyright (C) 2007, 2008 Phil Dawes ! See http://factorcode.org/license.txt for BSD license. - -! Simple CSV Parser -! Phil Dawes phil@phildawes.net - -USING: kernel sequences io namespaces make -combinators unicode.categories ; +USING: kernel sequences io namespaces make combinators +unicode.categories io.files combinators.short-circuit ; IN: csv SYMBOL: delimiter CHAR: , delimiter set-global + ( -- delimiter ) delimiter get ; inline DEFER: quoted-field ( -- endchar ) -! trims whitespace from either end of string : trim-whitespace ( str -- str ) - [ blank? ] trim ; inline + [ blank? ] trim ; inline : skip-to-field-end ( -- endchar ) "\n" delimiter> suffix read-until nip ; inline : not-quoted-field ( -- endchar ) - "\"\n" delimiter> suffix read-until ! " - dup - { { CHAR: " [ drop drop quoted-field ] } ! " - { delimiter> [ swap trim-whitespace % ] } - { CHAR: \n [ swap trim-whitespace % ] } - { f [ swap trim-whitespace % ] } ! eof - } case ; + "\"\n" delimiter> suffix read-until + dup { + { CHAR: " [ 2drop quoted-field ] } + { delimiter> [ swap trim-whitespace % ] } + { CHAR: \n [ swap trim-whitespace % ] } + { f [ swap trim-whitespace % ] } + } case ; : maybe-escaped-quote ( -- endchar ) - read1 dup - { { CHAR: " [ , quoted-field ] } ! " is an escaped quote - { delimiter> [ ] } ! end of quoted field - { CHAR: \n [ ] } - [ 2drop skip-to-field-end ] ! end of quoted field + padding - } case ; + read1 dup { + { CHAR: " [ , quoted-field ] } + { delimiter> [ ] } + { CHAR: \n [ ] } + [ 2drop skip-to-field-end ] + } case ; : quoted-field ( -- endchar ) - "\"" read-until ! " - drop % maybe-escaped-quote ; + "\"" read-until + drop % maybe-escaped-quote ; : field ( -- sep string ) - [ not-quoted-field ] "" make ; ! trim-whitespace + [ not-quoted-field ] "" make ; : (row) ( -- sep ) - field , - dup delimiter get = [ drop (row) ] when ; + field , + dup delimiter get = [ drop (row) ] when ; : row ( -- eof? array[string] ) - [ (row) ] { } make ; + [ (row) ] { } make ; : append-if-row-not-empty ( row -- ) - dup { "" } = [ drop ] [ , ] if ; + dup { "" } = [ drop ] [ , ] if ; : (csv) ( -- ) - row append-if-row-not-empty - [ (csv) ] when ; + row append-if-row-not-empty + [ (csv) ] when ; +PRIVATE> + : csv-row ( stream -- row ) - [ row nip ] with-input-stream ; + [ row nip ] with-input-stream ; : csv ( stream -- rows ) - [ [ (csv) ] { } make ] with-input-stream ; + [ [ (csv) ] { } make ] with-input-stream ; -: with-delimiter ( char quot -- ) - delimiter swap with-variable ; inline +: file>csv ( path encoding -- csv ) + csv ; + +: with-delimiter ( ch quot -- ) + [ delimiter ] dip with-variable ; inline + + : write-row ( row -- ) - [ delimiter get write1 ] [ escape-if-required write ] interleave nl ; inline + [ delimiter get write1 ] + [ escape-if-required write ] interleave nl ; inline : write-csv ( rows stream -- ) - [ [ write-row ] each ] with-output-stream ; + [ [ write-row ] each ] with-output-stream ; + +: csv>file ( rows path encoding -- ) write-csv ; From 9c2a476d98751840a31ad3beb855eeafcca6694b Mon Sep 17 00:00:00 2001 From: Doug Coleman Date: Fri, 30 Jan 2009 19:24:35 -0600 Subject: [PATCH 2/4] minor cleanup --- basis/csv/csv.factor | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/basis/csv/csv.factor b/basis/csv/csv.factor index 7789f015d9..152b3dcbba 100755 --- a/basis/csv/csv.factor +++ b/basis/csv/csv.factor @@ -51,12 +51,8 @@ DEFER: quoted-field ( -- endchar ) : row ( -- eof? array[string] ) [ (row) ] { } make ; -: append-if-row-not-empty ( row -- ) - dup { "" } = [ drop ] [ , ] if ; - : (csv) ( -- ) - row append-if-row-not-empty - [ (csv) ] when ; + row harvest [ , ] unless-empty [ (csv) ] when ; PRIVATE> From f8092480a6b1488c397d2c69b616a4342f487c56 Mon Sep 17 00:00:00 2001 From: Slava Pestov Date: Fri, 30 Jan 2009 19:34:31 -0600 Subject: [PATCH 3/4] Fix a couple of bugs in xmode and add a unit test --- basis/xmode/catalog/catalog.factor | 36 +++++++++++--------- basis/xmode/code2html/code2html-tests.factor | 9 ++++- basis/xmode/code2html/code2html.factor | 6 ++-- basis/xmode/loader/loader.factor | 16 ++++----- basis/xmode/loader/syntax/syntax.factor | 13 +++---- basis/xmode/marker/context/context.factor | 4 +-- basis/xmode/marker/marker.factor | 4 +-- 7 files changed, 49 insertions(+), 39 deletions(-) diff --git a/basis/xmode/catalog/catalog.factor b/basis/xmode/catalog/catalog.factor index 8a8e5fad4a..4e3af0af56 100644 --- a/basis/xmode/catalog/catalog.factor +++ b/basis/xmode/catalog/catalog.factor @@ -52,9 +52,15 @@ SYMBOL: rule-sets dup "::" split1 [ swap (load-mode) ] [ rule-sets get ] if* dup -roll at* [ nip ] [ drop no-such-rule-set ] if ; +DEFER: finalize-rule-set + : resolve-delegate ( rule -- ) - dup delegate>> dup string? - [ get-rule-set nip swap (>>delegate) ] [ 2drop ] if ; + dup delegate>> dup string? [ + get-rule-set + dup rule-set? [ "not a rule set" throw ] unless + swap rule-sets [ dup finalize-rule-set ] with-variable + >>delegate drop + ] [ 2drop ] if ; : each-rule ( rule-set quot -- ) [ rules>> values concat ] dip each ; inline @@ -74,26 +80,22 @@ SYMBOL: rule-sets : resolve-imports ( ruleset -- ) dup imports>> [ get-rule-set swap rule-sets [ - dup resolve-delegates - 2dup import-keywords - import-rules + [ nip resolve-delegates ] + [ import-keywords ] + [ import-rules ] + 2tri ] with-variable ] with each ; ERROR: mutually-recursive-rulesets ruleset ; + : finalize-rule-set ( ruleset -- ) - dup finalized?>> { - { f [ - { - [ 1 >>finalized? drop ] - [ resolve-imports ] - [ resolve-delegates ] - [ t >>finalized? drop ] - } cleave - ] } - { t [ drop ] } - { 1 [ mutually-recursive-rulesets ] } - } case ; + dup finalized?>> [ drop ] [ + t >>finalized? + [ resolve-imports ] + [ resolve-delegates ] + bi + ] if ; : finalize-mode ( rulesets -- ) rule-sets [ diff --git a/basis/xmode/code2html/code2html-tests.factor b/basis/xmode/code2html/code2html-tests.factor index cd11ba50d0..c0b8a1b560 100644 --- a/basis/xmode/code2html/code2html-tests.factor +++ b/basis/xmode/code2html/code2html-tests.factor @@ -1,7 +1,7 @@ IN: xmode.code2html.tests USING: xmode.code2html xmode.catalog tools.test multiline splitting memoize -kernel ; +kernel io.streams.string xml.writer ; [ ] [ \ (load-mode) reset-memoized ] unit-test @@ -9,4 +9,11 @@ kernel ; <"