Merge commit 'phil/master'
commit
1b7a373ea8
|
@ -0,0 +1 @@
|
||||||
|
Phil Dawes
|
|
@ -0,0 +1,14 @@
|
||||||
|
USING: help.syntax help.markup kernel prettyprint sequences ;
|
||||||
|
IN: csv
|
||||||
|
|
||||||
|
HELP: csv
|
||||||
|
{ $values { "stream" "a stream" }
|
||||||
|
{ "rows" "an array of arrays of fields" } }
|
||||||
|
{ $description "parses a csv stream into an array of row arrays"
|
||||||
|
} ;
|
||||||
|
|
||||||
|
HELP: csv-row
|
||||||
|
{ $values { "stream" "a stream" }
|
||||||
|
{ "row" "an array of fields" } }
|
||||||
|
{ $description "parses a row from a csv stream"
|
||||||
|
} ;
|
|
@ -0,0 +1,60 @@
|
||||||
|
USING: io.streams.string csv tools.test shuffle ;
|
||||||
|
|
||||||
|
! I like to name my unit tests
|
||||||
|
: named-unit-test ( name output input -- )
|
||||||
|
nipd unit-test ; inline
|
||||||
|
|
||||||
|
! tests nicked from the wikipedia csv article
|
||||||
|
! http://en.wikipedia.org/wiki/Comma-separated_values
|
||||||
|
|
||||||
|
"Fields are separated by commas"
|
||||||
|
[ { { "1997" "Ford" "E350" } } ]
|
||||||
|
[ "1997,Ford,E350" <string-reader> csv ] named-unit-test
|
||||||
|
|
||||||
|
"ignores whitespace before and after elements. n.b.specifically prohibited by RFC 4180, which states, 'Spaces are considered part of a field and should not be ignored.'"
|
||||||
|
[ { { "1997" "Ford" "E350" } } ]
|
||||||
|
[ "1997, Ford , E350" <string-reader> csv ] named-unit-test
|
||||||
|
|
||||||
|
"keeps spaces in quotes"
|
||||||
|
[ { { "1997" "Ford" "E350" "Super, luxurious truck" } } ]
|
||||||
|
[ "1997,Ford,E350,\"Super, luxurious truck\"" <string-reader> csv ] named-unit-test
|
||||||
|
|
||||||
|
"double quotes mean escaped in quotes"
|
||||||
|
[ { { "1997" "Ford" "E350" "Super \"luxurious\" truck" } } ]
|
||||||
|
[ "1997,Ford,E350,\"Super \"\"luxurious\"\" truck\""
|
||||||
|
<string-reader> csv ] named-unit-test
|
||||||
|
|
||||||
|
"Fields with embedded line breaks must be delimited by double-quote characters."
|
||||||
|
[ { { "1997" "Ford" "E350" "Go get one now\nthey are going fast" } } ]
|
||||||
|
[ "1997,Ford,E350,\"Go get one now\nthey are going fast\""
|
||||||
|
<string-reader> csv ] named-unit-test
|
||||||
|
|
||||||
|
"Fields with leading or trailing spaces must be delimited by double-quote characters. (See comment about leading and trailing spaces above)"
|
||||||
|
[ { { "1997" "Ford" "E350" " Super luxurious truck " } } ]
|
||||||
|
[ "1997,Ford,E350,\" Super luxurious truck \""
|
||||||
|
<string-reader> csv ] unit-test
|
||||||
|
|
||||||
|
"Fields may always be delimited by double-quote characters, whether necessary or not."
|
||||||
|
[ { { "1997" "Ford" "E350" } } ]
|
||||||
|
[ "\"1997\",\"Ford\",\"E350\"" <string-reader> csv ] named-unit-test
|
||||||
|
|
||||||
|
"The first record in a csv file may contain column names in each of the fields."
|
||||||
|
[ { { "Year" "Make" "Model" }
|
||||||
|
{ "1997" "Ford" "E350" }
|
||||||
|
{ "2000" "Mercury" "Cougar" } } ]
|
||||||
|
[ "Year,Make,Model\n1997,Ford,E350\n2000,Mercury,Cougar"
|
||||||
|
<string-reader> csv ] named-unit-test
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
! !!!!!!!! other tests
|
||||||
|
|
||||||
|
[ { { "Phil Dawes" } } ]
|
||||||
|
[ "\"Phil Dawes\"" <string-reader> csv ] unit-test
|
||||||
|
|
||||||
|
[ { { "1" "2" "3" } { "4" "5" "6" } } ]
|
||||||
|
[ "1,2,3\n4,5,6\n" <string-reader> csv ] unit-test
|
||||||
|
|
||||||
|
"trims leading and trailing whitespace - n.b. this isn't really conformant, but lots of csv seems to assume this"
|
||||||
|
[ { { "foo yeah" "bah" "baz" } } ]
|
||||||
|
[ " foo yeah , bah ,baz\n" <string-reader> csv ] named-unit-test
|
|
@ -0,0 +1,59 @@
|
||||||
|
! Copyright (C) 2007, 2008 Phil Dawes
|
||||||
|
! See http://factorcode.org/license.txt for BSD license.
|
||||||
|
|
||||||
|
! Simple CSV Parser
|
||||||
|
! Phil Dawes phil@phildawes.net
|
||||||
|
|
||||||
|
USING: kernel sequences io namespaces combinators ;
|
||||||
|
IN: csv
|
||||||
|
|
||||||
|
DEFER: quoted-field
|
||||||
|
|
||||||
|
: not-quoted-field ( -- endchar )
|
||||||
|
",\"\n" read-until ! "
|
||||||
|
dup
|
||||||
|
{ { CHAR: " [ drop drop quoted-field ] } ! "
|
||||||
|
{ CHAR: , [ swap % ] }
|
||||||
|
{ CHAR: \n [ swap % ] }
|
||||||
|
{ f [ swap % ] } ! eof
|
||||||
|
} case ;
|
||||||
|
|
||||||
|
: maybe-escaped-quote ( -- endchar )
|
||||||
|
read1
|
||||||
|
dup
|
||||||
|
{ { CHAR: " [ , quoted-field ] } ! " is an escaped quote
|
||||||
|
{ CHAR: \s [ drop not-quoted-field ] }
|
||||||
|
{ CHAR: \t [ drop not-quoted-field ] }
|
||||||
|
[ drop ]
|
||||||
|
} case ;
|
||||||
|
|
||||||
|
! trims whitespace from either end of string
|
||||||
|
: trim-whitespace ( str -- str )
|
||||||
|
[ "\s\t" member? ] trim ; inline
|
||||||
|
|
||||||
|
: quoted-field ( -- endchar )
|
||||||
|
"\"" read-until ! "
|
||||||
|
drop % maybe-escaped-quote ;
|
||||||
|
|
||||||
|
: field ( -- sep string )
|
||||||
|
[ not-quoted-field ] "" make trim-whitespace ;
|
||||||
|
|
||||||
|
: (row) ( -- sep )
|
||||||
|
field ,
|
||||||
|
dup CHAR: , = [ drop (row) ] when ;
|
||||||
|
|
||||||
|
: row ( -- eof? array[string] )
|
||||||
|
[ (row) ] { } make ;
|
||||||
|
|
||||||
|
: append-if-row-not-empty ( row -- )
|
||||||
|
dup { "" } = [ drop ] [ , ] if ;
|
||||||
|
|
||||||
|
: (csv) ( -- )
|
||||||
|
row append-if-row-not-empty
|
||||||
|
[ (csv) ] when ;
|
||||||
|
|
||||||
|
: csv-row ( stream -- row )
|
||||||
|
[ row nip ] with-stream ;
|
||||||
|
|
||||||
|
: csv ( stream -- rows )
|
||||||
|
[ [ (csv) ] { } make ] with-stream ;
|
|
@ -0,0 +1 @@
|
||||||
|
CSV parser
|
|
@ -2,7 +2,7 @@
|
||||||
! See http://factorcode.org/license.txt for BSD license.
|
! See http://factorcode.org/license.txt for BSD license.
|
||||||
USING: threads io.files io.monitors init kernel
|
USING: threads io.files io.monitors init kernel
|
||||||
vocabs vocabs.loader tools.vocabs namespaces continuations
|
vocabs vocabs.loader tools.vocabs namespaces continuations
|
||||||
sequences splitting assocs command-line ;
|
sequences splitting assocs command-line concurrency.messaging io.backend sets ;
|
||||||
IN: tools.vocabs.monitor
|
IN: tools.vocabs.monitor
|
||||||
|
|
||||||
: vocab-dir>vocab-name ( path -- vocab )
|
: vocab-dir>vocab-name ( path -- vocab )
|
||||||
|
@ -22,17 +22,20 @@ IN: tools.vocabs.monitor
|
||||||
: path>vocab ( path -- vocab )
|
: path>vocab ( path -- vocab )
|
||||||
chop-vocab-root path>vocab-name vocab-dir>vocab-name ;
|
chop-vocab-root path>vocab-name vocab-dir>vocab-name ;
|
||||||
|
|
||||||
: monitor-loop ( monitor -- )
|
: monitor-loop ( -- )
|
||||||
#! On OS X, monitors give us the full path, so we chop it
|
#! On OS X, monitors give us the full path, so we chop it
|
||||||
#! off if its there.
|
#! off if its there.
|
||||||
dup next-change drop path>vocab changed-vocab
|
receive first path>vocab changed-vocab
|
||||||
reset-cache
|
reset-cache
|
||||||
monitor-loop ;
|
monitor-loop ;
|
||||||
|
|
||||||
|
: add-monitor-for-path ( path -- )
|
||||||
|
normalize-path dup exists? [ t my-mailbox (monitor) ] when drop ;
|
||||||
|
|
||||||
: monitor-thread ( -- )
|
: monitor-thread ( -- )
|
||||||
[
|
[
|
||||||
[
|
[
|
||||||
"" resource-path t <monitor>
|
vocab-roots get prune [ add-monitor-for-path ] each
|
||||||
|
|
||||||
H{ } clone changed-vocabs set-global
|
H{ } clone changed-vocabs set-global
|
||||||
vocabs [ changed-vocab ] each
|
vocabs [ changed-vocab ] each
|
||||||
|
|
Loading…
Reference in New Issue