Merge commit 'phil/master'

2008-04-29 20:20:21 -05:00 · 2008-04-29 20:20:21 -05:00 · 1b7a373ea8
parent 1bc0290c7d 02d95144d9
commit 1b7a373ea8
6 changed files with 142 additions and 4 deletions
--- a/extra/csv/authors.txt
+++ b/extra/csv/authors.txt
@ -0,0 +1 @@
+Phil Dawes
--- a/extra/csv/csv-docs.factor
+++ b/extra/csv/csv-docs.factor
@ -0,0 +1,14 @@
+USING: help.syntax help.markup kernel prettyprint sequences ;
+IN: csv
+
+HELP: csv
+{ $values { "stream" "a stream" }
+          { "rows" "an array of arrays of fields" } } 
+{ $description "parses a csv stream into an array of row arrays"
+} ;
+
+HELP: csv-row
+{ $values { "stream" "a stream" }
+          { "row" "an array of fields" } } 
+{ $description "parses a row from a csv stream"
+} ;
--- a/extra/csv/csv-tests.factor
+++ b/extra/csv/csv-tests.factor
@ -0,0 +1,60 @@
+USING: io.streams.string csv tools.test shuffle ;
+
+! I like to name my unit tests
+: named-unit-test ( name output input -- ) 
+  nipd unit-test ; inline
+
+! tests nicked from the wikipedia csv article
+! http://en.wikipedia.org/wiki/Comma-separated_values
+
+"Fields are separated by commas"
+[ { { "1997" "Ford" "E350" } } ] 
+[ "1997,Ford,E350" <string-reader> csv ] named-unit-test
+
+"ignores whitespace before and after elements. n.b.specifically prohibited by RFC 4180, which states, 'Spaces are considered part of a field and should not be ignored.'"
+[ { { "1997" "Ford" "E350" } } ]
+[ "1997,   Ford   , E350" <string-reader> csv ] named-unit-test
+
+"keeps spaces in quotes"
+[ { { "1997" "Ford" "E350" "Super, luxurious truck" } } ]
+[ "1997,Ford,E350,\"Super, luxurious truck\"" <string-reader> csv ] named-unit-test
+
+"double quotes mean escaped in quotes"
+[ { { "1997" "Ford" "E350" "Super \"luxurious\" truck" } } ]
+[ "1997,Ford,E350,\"Super \"\"luxurious\"\" truck\"" 
+  <string-reader> csv ] named-unit-test
+
+"Fields with embedded line breaks must be delimited by double-quote characters."
+[ { { "1997" "Ford" "E350" "Go get one now\nthey are going fast" } } ]
+[ "1997,Ford,E350,\"Go get one now\nthey are going fast\""
+  <string-reader> csv ] named-unit-test
+
+"Fields with leading or trailing spaces must be delimited by double-quote characters. (See comment about leading and trailing spaces above)"
+[ { { "1997" "Ford" "E350" "  Super luxurious truck    " } } ]
+[ "1997,Ford,E350,\"  Super luxurious truck    \""
+  <string-reader> csv ] unit-test
+
+"Fields may always be delimited by double-quote characters, whether necessary or not."
+[ { { "1997" "Ford" "E350" } } ]
+[ "\"1997\",\"Ford\",\"E350\"" <string-reader> csv ] named-unit-test
+
+"The first record in a csv file may contain column names in each of the fields."
+[ { { "Year" "Make" "Model" } 
+    { "1997" "Ford" "E350" }
+    { "2000" "Mercury" "Cougar" } } ]
+[ "Year,Make,Model\n1997,Ford,E350\n2000,Mercury,Cougar" 
+   <string-reader> csv ] named-unit-test
+
+
+   
+! !!!!!!!!  other tests
+   
+[ { { "Phil Dawes" } } ] 
+[ "\"Phil Dawes\"" <string-reader> csv ] unit-test
+
+[ { { "1" "2" "3" } { "4" "5" "6" } } ] 
+[ "1,2,3\n4,5,6\n" <string-reader> csv ] unit-test
+
+"trims leading and trailing whitespace - n.b. this isn't really conformant, but lots of csv seems to assume this"
+[ { { "foo yeah" "bah" "baz" } } ] 
+[ "  foo yeah  , bah ,baz\n" <string-reader> csv ] named-unit-test
--- a/extra/csv/csv.factor
+++ b/extra/csv/csv.factor
@ -0,0 +1,59 @@
+! Copyright (C) 2007, 2008 Phil Dawes
+! See http://factorcode.org/license.txt for BSD license.
+
+! Simple CSV Parser
+! Phil Dawes phil@phildawes.net
+
+USING: kernel sequences io namespaces combinators ;
+IN: csv
+
+DEFER: quoted-field
+
+: not-quoted-field ( -- endchar )
+  ",\"\n" read-until   ! "
+  dup
+  { { CHAR: "   [ drop drop quoted-field ] }  ! " 
+    { CHAR: ,   [ swap % ] } 
+    { CHAR: \n  [ swap % ] }    
+    { f         [ swap % ] }       ! eof
+  } case ;
+  
+: maybe-escaped-quote ( -- endchar )
+  read1 
+  dup
+  { { CHAR: "   [ , quoted-field ] }     ! " is an escaped quote
+    { CHAR: \s  [ drop not-quoted-field ] } 
+    { CHAR: \t  [ drop not-quoted-field ] } 
+    [ drop ]
+  } case ;
+
+! trims whitespace from either end of string
+: trim-whitespace ( str -- str )
+  [ "\s\t" member? ] trim ; inline
+  
+: quoted-field ( -- endchar )
+  "\"" read-until                                 ! "
+  drop % maybe-escaped-quote ;
+
+: field ( -- sep string )
+  [ not-quoted-field ] "" make trim-whitespace ;
+
+: (row) ( -- sep )
+  field , 
+  dup CHAR: , = [ drop (row) ] when ;
+
+: row ( -- eof? array[string] )
+  [ (row) ] { } make ;
+
+: append-if-row-not-empty ( row -- )
+  dup { "" } = [ drop ] [ , ] if ;
+
+: (csv) ( -- )
+  row append-if-row-not-empty
+  [ (csv) ] when ;
+
+: csv-row ( stream -- row )
+  [ row nip ] with-stream ;
+
+: csv ( stream -- rows )
+  [ [ (csv) ] { } make ] with-stream ;
--- a/extra/csv/summary.txt
+++ b/extra/csv/summary.txt
@ -0,0 +1 @@
+CSV parser
--- a/extra/tools/vocabs/monitor/monitor.factor
+++ b/extra/tools/vocabs/monitor/monitor.factor
@ -2,7 +2,7 @@
 ! See http://factorcode.org/license.txt for BSD license.
 USING: threads io.files io.monitors init kernel
 vocabs vocabs.loader tools.vocabs namespaces continuations
-sequences splitting assocs command-line ;
+sequences splitting assocs command-line concurrency.messaging io.backend sets ;
 IN: tools.vocabs.monitor

 : vocab-dir>vocab-name ( path -- vocab )
@ -22,17 +22,20 @@ IN: tools.vocabs.monitor
 : path>vocab ( path -- vocab )
    chop-vocab-root path>vocab-name vocab-dir>vocab-name ;

-: monitor-loop ( monitor -- )
+: monitor-loop ( -- )
    #! On OS X, monitors give us the full path, so we chop it
    #! off if its there.
-    dup next-change drop path>vocab changed-vocab
+    receive first path>vocab changed-vocab
    reset-cache
    monitor-loop ;

+: add-monitor-for-path ( path -- ) 
+    normalize-path dup exists? [ t my-mailbox (monitor) ] when drop ;
+    
 : monitor-thread ( -- )
    [
        [
-            "" resource-path t <monitor>
+            vocab-roots get prune [ add-monitor-for-path ] each
            
            H{ } clone changed-vocabs set-global
            vocabs [ changed-vocab ] each