From 5c1d60edf39c57f98305f8e296067c9fb3aa5770 Mon Sep 17 00:00:00 2001 From: Phil Dawes Date: Thu, 17 Apr 2008 21:29:04 +0100 Subject: [PATCH 1/2] Basic CSV Parser --- extra/csv/authors.txt | 1 + extra/csv/csv-docs.factor | 14 +++++++++ extra/csv/csv-tests.factor | 60 ++++++++++++++++++++++++++++++++++++++ extra/csv/csv.factor | 59 +++++++++++++++++++++++++++++++++++++ extra/csv/summary.txt | 1 + 5 files changed, 135 insertions(+) create mode 100644 extra/csv/authors.txt create mode 100644 extra/csv/csv-docs.factor create mode 100644 extra/csv/csv-tests.factor create mode 100644 extra/csv/csv.factor create mode 100644 extra/csv/summary.txt diff --git a/extra/csv/authors.txt b/extra/csv/authors.txt new file mode 100644 index 0000000000..0be42b2faa --- /dev/null +++ b/extra/csv/authors.txt @@ -0,0 +1 @@ +Phil Dawes diff --git a/extra/csv/csv-docs.factor b/extra/csv/csv-docs.factor new file mode 100644 index 0000000000..c16ed46522 --- /dev/null +++ b/extra/csv/csv-docs.factor @@ -0,0 +1,14 @@ +USING: help.syntax help.markup kernel prettyprint sequences ; +IN: csv + +HELP: csv +{ $values { "stream" "a stream" } + { "rows" "an array of arrays of fields" } } +{ $description "parses a csv stream into an array of row arrays" +} ; + +HELP: csv-row +{ $values { "stream" "a stream" } + { "row" "an array of fields" } } +{ $description "parses a row from a csv stream" +} ; diff --git a/extra/csv/csv-tests.factor b/extra/csv/csv-tests.factor new file mode 100644 index 0000000000..bf70ed3009 --- /dev/null +++ b/extra/csv/csv-tests.factor @@ -0,0 +1,60 @@ +USING: io.streams.string csv tools.test shuffle ; + +! I like to name my unit tests +: named-unit-test ( name output input -- ) + nipd unit-test ; inline + +! tests nicked from the wikipedia csv article +! http://en.wikipedia.org/wiki/Comma-separated_values + +"Fields are separated by commas" +[ { { "1997" "Ford" "E350" } } ] +[ "1997,Ford,E350" csv ] named-unit-test + +"ignores whitespace before and after elements. n.b.specifically prohibited by RFC 4180, which states, 'Spaces are considered part of a field and should not be ignored.'" +[ { { "1997" "Ford" "E350" } } ] +[ "1997, Ford , E350" csv ] named-unit-test + +"keeps spaces in quotes" +[ { { "1997" "Ford" "E350" "Super, luxurious truck" } } ] +[ "1997,Ford,E350,\"Super, luxurious truck\"" csv ] named-unit-test + +"double quotes mean escaped in quotes" +[ { { "1997" "Ford" "E350" "Super \"luxurious\" truck" } } ] +[ "1997,Ford,E350,\"Super \"\"luxurious\"\" truck\"" + csv ] named-unit-test + +"Fields with embedded line breaks must be delimited by double-quote characters." +[ { { "1997" "Ford" "E350" "Go get one now\nthey are going fast" } } ] +[ "1997,Ford,E350,\"Go get one now\nthey are going fast\"" + csv ] named-unit-test + +"Fields with leading or trailing spaces must be delimited by double-quote characters. (See comment about leading and trailing spaces above)" +[ { { "1997" "Ford" "E350" " Super luxurious truck " } } ] +[ "1997,Ford,E350,\" Super luxurious truck \"" + csv ] unit-test + +"Fields may always be delimited by double-quote characters, whether necessary or not." +[ { { "1997" "Ford" "E350" } } ] +[ "\"1997\",\"Ford\",\"E350\"" csv ] named-unit-test + +"The first record in a csv file may contain column names in each of the fields." +[ { { "Year" "Make" "Model" } + { "1997" "Ford" "E350" } + { "2000" "Mercury" "Cougar" } } ] +[ "Year,Make,Model\n1997,Ford,E350\n2000,Mercury,Cougar" + csv ] named-unit-test + + + +! !!!!!!!! other tests + +[ { { "Phil Dawes" } } ] +[ "\"Phil Dawes\"" csv ] unit-test + +[ { { "1" "2" "3" } { "4" "5" "6" } } ] +[ "1,2,3\n4,5,6\n" csv ] unit-test + +"trims leading and trailing whitespace - n.b. this isn't really conformant, but lots of csv seems to assume this" +[ { { "foo yeah" "bah" "baz" } } ] +[ " foo yeah , bah ,baz\n" csv ] named-unit-test diff --git a/extra/csv/csv.factor b/extra/csv/csv.factor new file mode 100644 index 0000000000..ff95be2ead --- /dev/null +++ b/extra/csv/csv.factor @@ -0,0 +1,59 @@ +! Copyright (C) 2007, 2008 Phil Dawes +! See http://factorcode.org/license.txt for BSD license. + +! Simple CSV Parser +! Phil Dawes phil@phildawes.net + +USING: kernel sequences io namespaces combinators ; +IN: csv + +DEFER: quoted-field + +: not-quoted-field ( -- endchar ) + ",\"\n" read-until ! " + dup + { { CHAR: " [ drop drop quoted-field ] } ! " + { CHAR: , [ swap % ] } + { CHAR: \n [ swap % ] } + { f [ swap % ] } ! eof + } case ; + +: maybe-escaped-quote ( -- endchar ) + read1 + dup + { { CHAR: " [ , quoted-field ] } ! " is an escaped quote + { CHAR: \s [ drop not-quoted-field ] } + { CHAR: \t [ drop not-quoted-field ] } + [ drop ] + } case ; + +! trims whitespace from either end of string +: trim-whitespace ( str -- str ) + [ "\s\t" member? ] trim ; inline + +: quoted-field ( -- endchar ) + "\"" read-until ! " + drop % maybe-escaped-quote ; + +: field ( -- sep string ) + [ not-quoted-field ] "" make trim-whitespace ; + +: (row) ( -- sep ) + field , + dup CHAR: , = [ drop (row) ] when ; + +: row ( -- eof? array[string] ) + [ (row) ] { } make ; + +: append-if-row-not-empty ( row -- ) + dup { "" } = [ drop ] [ , ] if ; + +: (csv) ( -- ) + row append-if-row-not-empty + [ (csv) ] when ; + +: csv-row ( stream -- row ) + [ row nip ] with-stream ; + +: csv ( stream -- rows ) + [ [ (csv) ] { } make ] with-stream ; diff --git a/extra/csv/summary.txt b/extra/csv/summary.txt new file mode 100644 index 0000000000..503cc1b38d --- /dev/null +++ b/extra/csv/summary.txt @@ -0,0 +1 @@ +CSV parser From 02d95144d9a1526883f921ff1d247cb2b9f00284 Mon Sep 17 00:00:00 2001 From: Phil Dawes Date: Thu, 24 Apr 2008 09:25:19 +0100 Subject: [PATCH 2/2] Added vocab monitors for all vocab-roots (not just files under resource-path) N.B. monitors are initialized on startup so additional vocab-roots need to be saved in image --- extra/tools/vocabs/monitor/monitor.factor | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/extra/tools/vocabs/monitor/monitor.factor b/extra/tools/vocabs/monitor/monitor.factor index ab5e8c66b7..563cd04e3e 100755 --- a/extra/tools/vocabs/monitor/monitor.factor +++ b/extra/tools/vocabs/monitor/monitor.factor @@ -2,7 +2,7 @@ ! See http://factorcode.org/license.txt for BSD license. USING: threads io.files io.monitors init kernel vocabs vocabs.loader tools.vocabs namespaces continuations -sequences splitting assocs command-line ; +sequences splitting assocs command-line concurrency.messaging io.backend sets ; IN: tools.vocabs.monitor : vocab-dir>vocab-name ( path -- vocab ) @@ -22,17 +22,20 @@ IN: tools.vocabs.monitor : path>vocab ( path -- vocab ) chop-vocab-root path>vocab-name vocab-dir>vocab-name ; -: monitor-loop ( monitor -- ) +: monitor-loop ( -- ) #! On OS X, monitors give us the full path, so we chop it #! off if its there. - dup next-change drop path>vocab changed-vocab + receive first path>vocab changed-vocab reset-cache monitor-loop ; +: add-monitor-for-path ( path -- ) + normalize-path dup exists? [ t my-mailbox (monitor) ] when drop ; + : monitor-thread ( -- ) [ [ - "" resource-path t + vocab-roots get prune [ add-monitor-for-path ] each H{ } clone changed-vocabs set-global vocabs [ changed-vocab ] each