Slava Pestov 2008-04-30 14:07:16 -05:00
commit 72fcd2b133
3 changed files with 44 additions and 21 deletions

View File

@ -12,3 +12,10 @@ HELP: csv-row
{ "row" "an array of fields" } }
{ $description "parses a row from a csv stream"
} ;
HELP: with-delimiter
{ $values { "char" "field delimiter (e.g. CHAR: \t)" }
{ "quot" "a quotation" } }
{ $description "Sets the field delimiter for csv or csv-row words "
} ;

View File

@ -46,6 +46,7 @@ IN: csv.tests
[ "Year,Make,Model\n1997,Ford,E350\n2000,Mercury,Cougar"
<string-reader> csv ] named-unit-test
! !!!!!!!! other tests
@ -59,3 +60,8 @@ IN: csv.tests
"trims leading and trailing whitespace - n.b. this isn't really conformant, but lots of csv seems to assume this"
[ { { "foo yeah" "bah" "baz" } } ]
[ " foo yeah , bah ,baz\n" <string-reader> csv ] named-unit-test
"allows setting of delimiting character"
[ { { "foo" "bah" "baz" } } ]
[ "foo\tbah\tbaz\n" <string-reader> CHAR: \t [ csv ] with-delimiter ] named-unit-test

View File

@ -4,44 +4,46 @@
! Simple CSV Parser
! Phil Dawes phil@phildawes.net
USING: kernel sequences io namespaces combinators
unicode.categories ;
USING: kernel sequences io namespaces combinators unicode.categories vars ;
IN: csv
DEFER: quoted-field
: not-quoted-field ( -- endchar )
",\"\n" read-until ! "
dup
{ { CHAR: " [ drop drop quoted-field ] } ! "
{ CHAR: , [ swap % ] }
{ CHAR: \n [ swap % ] }
{ f [ swap % ] } ! eof
} case ;
: maybe-escaped-quote ( -- endchar )
read1
dup
{ { CHAR: " [ , quoted-field ] } ! " is an escaped quote
{ CHAR: \s [ drop not-quoted-field ] }
{ CHAR: \t [ drop not-quoted-field ] }
[ drop ]
} case ;
VAR: delimiter
! trims whitespace from either end of string
: trim-whitespace ( str -- str )
[ blank? ] trim ; inline
: skip-to-field-end ( -- endchar )
"\n" delimiter> suffix read-until nip ; inline
: not-quoted-field ( -- endchar )
"\"\n" delimiter> suffix read-until ! "
dup
{ { CHAR: " [ drop drop quoted-field ] } ! "
{ delimiter> [ swap trim-whitespace % ] }
{ CHAR: \n [ swap trim-whitespace % ] }
{ f [ swap trim-whitespace % ] } ! eof
} case ;
: maybe-escaped-quote ( -- endchar )
read1 dup
{ { CHAR: " [ , quoted-field ] } ! " is an escaped quote
{ delimiter> [ ] } ! end of quoted field
[ 2drop skip-to-field-end ] ! end of quoted field + padding
} case ;
: quoted-field ( -- endchar )
"\"" read-until ! "
drop % maybe-escaped-quote ;
: field ( -- sep string )
[ not-quoted-field ] "" make trim-whitespace ;
[ not-quoted-field ] "" make ; ! trim-whitespace
: (row) ( -- sep )
field ,
dup CHAR: , = [ drop (row) ] when ;
dup delimiter> = [ drop (row) ] when ;
: row ( -- eof? array[string] )
[ (row) ] { } make ;
@ -53,8 +55,16 @@ DEFER: quoted-field
row append-if-row-not-empty
[ (csv) ] when ;
: init-vars ( -- )
delimiter> [ CHAR: , >delimiter ] unless ; inline
: csv-row ( stream -- row )
init-vars
[ row nip ] with-stream ;
: csv ( stream -- rows )
init-vars
[ [ (csv) ] { } make ] with-stream ;
: with-delimiter ( char quot -- )
delimiter swap with-variable ; inline