From 5c1d60edf39c57f98305f8e296067c9fb3aa5770 Mon Sep 17 00:00:00 2001
From: Phil Dawes <phil@phildawes.net>
Date: Thu, 17 Apr 2008 21:29:04 +0100
Subject: [PATCH 1/2] Basic CSV Parser

---
 extra/csv/authors.txt      |  1 +
 extra/csv/csv-docs.factor  | 14 +++++++++
 extra/csv/csv-tests.factor | 60 ++++++++++++++++++++++++++++++++++++++
 extra/csv/csv.factor       | 59 +++++++++++++++++++++++++++++++++++++
 extra/csv/summary.txt      |  1 +
 5 files changed, 135 insertions(+)
 create mode 100644 extra/csv/authors.txt
 create mode 100644 extra/csv/csv-docs.factor
 create mode 100644 extra/csv/csv-tests.factor
 create mode 100644 extra/csv/csv.factor
 create mode 100644 extra/csv/summary.txt
diff --git a/extra/csv/authors.txt b/extra/csv/authors.txt
new file mode 100644
index 0000000000..0be42b2faa
--- /dev/null
+++ b/extra/csv/authors.txt
@@ -0,0 +1 @@
+Phil Dawes
diff --git a/extra/csv/csv-docs.factor b/extra/csv/csv-docs.factor
new file mode 100644
index 0000000000..c16ed46522
--- /dev/null
+++ b/extra/csv/csv-docs.factor
@@ -0,0 +1,14 @@
+USING: help.syntax help.markup kernel prettyprint sequences ;
+IN: csv
+
+HELP: csv
+{ $values { "stream" "a stream" }
+          { "rows" "an array of arrays of fields" } } 
+{ $description "parses a csv stream into an array of row arrays"
+} ;
+
+HELP: csv-row
+{ $values { "stream" "a stream" }
+          { "row" "an array of fields" } } 
+{ $description "parses a row from a csv stream"
+} ;
diff --git a/extra/csv/csv-tests.factor b/extra/csv/csv-tests.factor
new file mode 100644
index 0000000000..bf70ed3009
--- /dev/null
+++ b/extra/csv/csv-tests.factor
@@ -0,0 +1,60 @@
+USING: io.streams.string csv tools.test shuffle ;
+
+! I like to name my unit tests
+: named-unit-test ( name output input -- ) 
+  nipd unit-test ; inline
+
+! tests nicked from the wikipedia csv article
+! http://en.wikipedia.org/wiki/Comma-separated_values
+
+"Fields are separated by commas"
+[ { { "1997" "Ford" "E350" } } ] 
+[ "1997,Ford,E350" <string-reader> csv ] named-unit-test
+
+"ignores whitespace before and after elements. n.b.specifically prohibited by RFC 4180, which states, 'Spaces are considered part of a field and should not be ignored.'"
+[ { { "1997" "Ford" "E350" } } ]
+[ "1997,   Ford   , E350" <string-reader> csv ] named-unit-test
+
+"keeps spaces in quotes"
+[ { { "1997" "Ford" "E350" "Super, luxurious truck" } } ]
+[ "1997,Ford,E350,\"Super, luxurious truck\"" <string-reader> csv ] named-unit-test
+
+"double quotes mean escaped in quotes"
+[ { { "1997" "Ford" "E350" "Super \"luxurious\" truck" } } ]
+[ "1997,Ford,E350,\"Super \"\"luxurious\"\" truck\"" 
+  <string-reader> csv ] named-unit-test
+
+"Fields with embedded line breaks must be delimited by double-quote characters."
+[ { { "1997" "Ford" "E350" "Go get one now\nthey are going fast" } } ]
+[ "1997,Ford,E350,\"Go get one now\nthey are going fast\""
+  <string-reader> csv ] named-unit-test
+
+"Fields with leading or trailing spaces must be delimited by double-quote characters. (See comment about leading and trailing spaces above)"
+[ { { "1997" "Ford" "E350" "  Super luxurious truck    " } } ]
+[ "1997,Ford,E350,\"  Super luxurious truck    \""
+  <string-reader> csv ] unit-test
+
+"Fields may always be delimited by double-quote characters, whether necessary or not."
+[ { { "1997" "Ford" "E350" } } ]
+[ "\"1997\",\"Ford\",\"E350\"" <string-reader> csv ] named-unit-test
+
+"The first record in a csv file may contain column names in each of the fields."
+[ { { "Year" "Make" "Model" } 
+    { "1997" "Ford" "E350" }
+    { "2000" "Mercury" "Cougar" } } ]
+[ "Year,Make,Model\n1997,Ford,E350\n2000,Mercury,Cougar" 
+   <string-reader> csv ] named-unit-test
+
+
+   
+! !!!!!!!!  other tests
+   
+[ { { "Phil Dawes" } } ] 
+[ "\"Phil Dawes\"" <string-reader> csv ] unit-test
+
+[ { { "1" "2" "3" } { "4" "5" "6" } } ] 
+[ "1,2,3\n4,5,6\n" <string-reader> csv ] unit-test
+
+"trims leading and trailing whitespace - n.b. this isn't really conformant, but lots of csv seems to assume this"
+[ { { "foo yeah" "bah" "baz" } } ] 
+[ "  foo yeah  , bah ,baz\n" <string-reader> csv ] named-unit-test
diff --git a/extra/csv/csv.factor b/extra/csv/csv.factor
new file mode 100644
index 0000000000..ff95be2ead
--- /dev/null
+++ b/extra/csv/csv.factor
@@ -0,0 +1,59 @@
+! Copyright (C) 2007, 2008 Phil Dawes
+! See http://factorcode.org/license.txt for BSD license.
+
+! Simple CSV Parser
+! Phil Dawes phil@phildawes.net
+
+USING: kernel sequences io namespaces combinators ;
+IN: csv
+
+DEFER: quoted-field
+
+: not-quoted-field ( -- endchar )
+  ",\"\n" read-until   ! "
+  dup
+  { { CHAR: "   [ drop drop quoted-field ] }  ! " 
+    { CHAR: ,   [ swap % ] } 
+    { CHAR: \n  [ swap % ] }    
+    { f         [ swap % ] }       ! eof
+  } case ;
+  
+: maybe-escaped-quote ( -- endchar )
+  read1 
+  dup
+  { { CHAR: "   [ , quoted-field ] }     ! " is an escaped quote
+    { CHAR: \s  [ drop not-quoted-field ] } 
+    { CHAR: \t  [ drop not-quoted-field ] } 
+    [ drop ]
+  } case ;
+
+! trims whitespace from either end of string
+: trim-whitespace ( str -- str )
+  [ "\s\t" member? ] trim ; inline
+  
+: quoted-field ( -- endchar )
+  "\"" read-until                                 ! "
+  drop % maybe-escaped-quote ;
+
+: field ( -- sep string )
+  [ not-quoted-field ] "" make trim-whitespace ;
+
+: (row) ( -- sep )
+  field , 
+  dup CHAR: , = [ drop (row) ] when ;
+
+: row ( -- eof? array[string] )
+  [ (row) ] { } make ;
+
+: append-if-row-not-empty ( row -- )
+  dup { "" } = [ drop ] [ , ] if ;
+
+: (csv) ( -- )
+  row append-if-row-not-empty
+  [ (csv) ] when ;
+
+: csv-row ( stream -- row )
+  [ row nip ] with-stream ;
+
+: csv ( stream -- rows )
+  [ [ (csv) ] { } make ] with-stream ;
diff --git a/extra/csv/summary.txt b/extra/csv/summary.txt
new file mode 100644
index 0000000000..503cc1b38d
--- /dev/null
+++ b/extra/csv/summary.txt
@@ -0,0 +1 @@
+CSV parser

From 02d95144d9a1526883f921ff1d247cb2b9f00284 Mon Sep 17 00:00:00 2001
From: Phil Dawes <phil@phildawes.net>
Date: Thu, 24 Apr 2008 09:25:19 +0100
Subject: [PATCH 2/2] Added vocab monitors for all vocab-roots (not just files
 under resource-path)

N.B. monitors are initialized on startup so additional vocab-roots need to be saved in image
---
 extra/tools/vocabs/monitor/monitor.factor | 11 +++++++----
 1 file changed, 7 insertions(+), 4 deletions(-)

diff --git a/extra/tools/vocabs/monitor/monitor.factor b/extra/tools/vocabs/monitor/monitor.factor
index ab5e8c66b7..563cd04e3e 100755
--- a/extra/tools/vocabs/monitor/monitor.factor
+++ b/extra/tools/vocabs/monitor/monitor.factor
@@ -2,7 +2,7 @@
 ! See http://factorcode.org/license.txt for BSD license.
 USING: threads io.files io.monitors init kernel
 vocabs vocabs.loader tools.vocabs namespaces continuations
-sequences splitting assocs command-line ;
+sequences splitting assocs command-line concurrency.messaging io.backend sets ;
 IN: tools.vocabs.monitor
 
 : vocab-dir>vocab-name ( path -- vocab )
@@ -22,17 +22,20 @@ IN: tools.vocabs.monitor
 : path>vocab ( path -- vocab )
     chop-vocab-root path>vocab-name vocab-dir>vocab-name ;
 
-: monitor-loop ( monitor -- )
+: monitor-loop ( -- )
     #! On OS X, monitors give us the full path, so we chop it
     #! off if its there.
-    dup next-change drop path>vocab changed-vocab
+    receive first path>vocab changed-vocab
     reset-cache
     monitor-loop ;
 
+: add-monitor-for-path ( path -- ) 
+    normalize-path dup exists? [ t my-mailbox (monitor) ] when drop ;
+    
 : monitor-thread ( -- )
     [
         [
-            "" resource-path t <monitor>
+            vocab-roots get prune [ add-monitor-for-path ] each
             
             H{ } clone changed-vocabs set-global
             vocabs [ changed-vocab ] each