Merge branch 'master' of http://phildawes.net/2008/factor
commit
72fcd2b133
|
@ -12,3 +12,10 @@ HELP: csv-row
|
|||
{ "row" "an array of fields" } }
|
||||
{ $description "parses a row from a csv stream"
|
||||
} ;
|
||||
|
||||
|
||||
HELP: with-delimiter
|
||||
{ $values { "char" "field delimiter (e.g. CHAR: \t)" }
|
||||
{ "quot" "a quotation" } }
|
||||
{ $description "Sets the field delimiter for csv or csv-row words "
|
||||
} ;
|
||||
|
|
|
@ -46,6 +46,7 @@ IN: csv.tests
|
|||
[ "Year,Make,Model\n1997,Ford,E350\n2000,Mercury,Cougar"
|
||||
<string-reader> csv ] named-unit-test
|
||||
|
||||
|
||||
|
||||
|
||||
! !!!!!!!! other tests
|
||||
|
@ -59,3 +60,8 @@ IN: csv.tests
|
|||
"trims leading and trailing whitespace - n.b. this isn't really conformant, but lots of csv seems to assume this"
|
||||
[ { { "foo yeah" "bah" "baz" } } ]
|
||||
[ " foo yeah , bah ,baz\n" <string-reader> csv ] named-unit-test
|
||||
|
||||
|
||||
"allows setting of delimiting character"
|
||||
[ { { "foo" "bah" "baz" } } ]
|
||||
[ "foo\tbah\tbaz\n" <string-reader> CHAR: \t [ csv ] with-delimiter ] named-unit-test
|
||||
|
|
|
@ -4,44 +4,46 @@
|
|||
! Simple CSV Parser
|
||||
! Phil Dawes phil@phildawes.net
|
||||
|
||||
USING: kernel sequences io namespaces combinators
|
||||
unicode.categories ;
|
||||
USING: kernel sequences io namespaces combinators unicode.categories vars ;
|
||||
IN: csv
|
||||
|
||||
DEFER: quoted-field
|
||||
|
||||
: not-quoted-field ( -- endchar )
|
||||
",\"\n" read-until ! "
|
||||
dup
|
||||
{ { CHAR: " [ drop drop quoted-field ] } ! "
|
||||
{ CHAR: , [ swap % ] }
|
||||
{ CHAR: \n [ swap % ] }
|
||||
{ f [ swap % ] } ! eof
|
||||
} case ;
|
||||
|
||||
: maybe-escaped-quote ( -- endchar )
|
||||
read1
|
||||
dup
|
||||
{ { CHAR: " [ , quoted-field ] } ! " is an escaped quote
|
||||
{ CHAR: \s [ drop not-quoted-field ] }
|
||||
{ CHAR: \t [ drop not-quoted-field ] }
|
||||
[ drop ]
|
||||
} case ;
|
||||
VAR: delimiter
|
||||
|
||||
! trims whitespace from either end of string
|
||||
: trim-whitespace ( str -- str )
|
||||
[ blank? ] trim ; inline
|
||||
|
||||
: skip-to-field-end ( -- endchar )
|
||||
"\n" delimiter> suffix read-until nip ; inline
|
||||
|
||||
: not-quoted-field ( -- endchar )
|
||||
"\"\n" delimiter> suffix read-until ! "
|
||||
dup
|
||||
{ { CHAR: " [ drop drop quoted-field ] } ! "
|
||||
{ delimiter> [ swap trim-whitespace % ] }
|
||||
{ CHAR: \n [ swap trim-whitespace % ] }
|
||||
{ f [ swap trim-whitespace % ] } ! eof
|
||||
} case ;
|
||||
|
||||
: maybe-escaped-quote ( -- endchar )
|
||||
read1 dup
|
||||
{ { CHAR: " [ , quoted-field ] } ! " is an escaped quote
|
||||
{ delimiter> [ ] } ! end of quoted field
|
||||
[ 2drop skip-to-field-end ] ! end of quoted field + padding
|
||||
} case ;
|
||||
|
||||
: quoted-field ( -- endchar )
|
||||
"\"" read-until ! "
|
||||
drop % maybe-escaped-quote ;
|
||||
|
||||
: field ( -- sep string )
|
||||
[ not-quoted-field ] "" make trim-whitespace ;
|
||||
[ not-quoted-field ] "" make ; ! trim-whitespace
|
||||
|
||||
: (row) ( -- sep )
|
||||
field ,
|
||||
dup CHAR: , = [ drop (row) ] when ;
|
||||
dup delimiter> = [ drop (row) ] when ;
|
||||
|
||||
: row ( -- eof? array[string] )
|
||||
[ (row) ] { } make ;
|
||||
|
@ -53,8 +55,16 @@ DEFER: quoted-field
|
|||
row append-if-row-not-empty
|
||||
[ (csv) ] when ;
|
||||
|
||||
: init-vars ( -- )
|
||||
delimiter> [ CHAR: , >delimiter ] unless ; inline
|
||||
|
||||
: csv-row ( stream -- row )
|
||||
init-vars
|
||||
[ row nip ] with-stream ;
|
||||
|
||||
: csv ( stream -- rows )
|
||||
init-vars
|
||||
[ [ (csv) ] { } make ] with-stream ;
|
||||
|
||||
: with-delimiter ( char quot -- )
|
||||
delimiter swap with-variable ; inline
|
||||
|
|
Loading…
Reference in New Issue