Slava Pestov 2008-04-30 14:07:16 -05:00
commit 72fcd2b133
3 changed files with 44 additions and 21 deletions

View File

@ -12,3 +12,10 @@ HELP: csv-row
{ "row" "an array of fields" } } { "row" "an array of fields" } }
{ $description "parses a row from a csv stream" { $description "parses a row from a csv stream"
} ; } ;
HELP: with-delimiter
{ $values { "char" "field delimiter (e.g. CHAR: \t)" }
{ "quot" "a quotation" } }
{ $description "Sets the field delimiter for csv or csv-row words "
} ;

View File

@ -48,6 +48,7 @@ IN: csv.tests
! !!!!!!!! other tests ! !!!!!!!! other tests
[ { { "Phil Dawes" } } ] [ { { "Phil Dawes" } } ]
@ -59,3 +60,8 @@ IN: csv.tests
"trims leading and trailing whitespace - n.b. this isn't really conformant, but lots of csv seems to assume this" "trims leading and trailing whitespace - n.b. this isn't really conformant, but lots of csv seems to assume this"
[ { { "foo yeah" "bah" "baz" } } ] [ { { "foo yeah" "bah" "baz" } } ]
[ " foo yeah , bah ,baz\n" <string-reader> csv ] named-unit-test [ " foo yeah , bah ,baz\n" <string-reader> csv ] named-unit-test
"allows setting of delimiting character"
[ { { "foo" "bah" "baz" } } ]
[ "foo\tbah\tbaz\n" <string-reader> CHAR: \t [ csv ] with-delimiter ] named-unit-test

View File

@ -4,44 +4,46 @@
! Simple CSV Parser ! Simple CSV Parser
! Phil Dawes phil@phildawes.net ! Phil Dawes phil@phildawes.net
USING: kernel sequences io namespaces combinators USING: kernel sequences io namespaces combinators unicode.categories vars ;
unicode.categories ;
IN: csv IN: csv
DEFER: quoted-field DEFER: quoted-field
: not-quoted-field ( -- endchar ) VAR: delimiter
",\"\n" read-until ! "
dup
{ { CHAR: " [ drop drop quoted-field ] } ! "
{ CHAR: , [ swap % ] }
{ CHAR: \n [ swap % ] }
{ f [ swap % ] } ! eof
} case ;
: maybe-escaped-quote ( -- endchar )
read1
dup
{ { CHAR: " [ , quoted-field ] } ! " is an escaped quote
{ CHAR: \s [ drop not-quoted-field ] }
{ CHAR: \t [ drop not-quoted-field ] }
[ drop ]
} case ;
! trims whitespace from either end of string ! trims whitespace from either end of string
: trim-whitespace ( str -- str ) : trim-whitespace ( str -- str )
[ blank? ] trim ; inline [ blank? ] trim ; inline
: skip-to-field-end ( -- endchar )
"\n" delimiter> suffix read-until nip ; inline
: not-quoted-field ( -- endchar )
"\"\n" delimiter> suffix read-until ! "
dup
{ { CHAR: " [ drop drop quoted-field ] } ! "
{ delimiter> [ swap trim-whitespace % ] }
{ CHAR: \n [ swap trim-whitespace % ] }
{ f [ swap trim-whitespace % ] } ! eof
} case ;
: maybe-escaped-quote ( -- endchar )
read1 dup
{ { CHAR: " [ , quoted-field ] } ! " is an escaped quote
{ delimiter> [ ] } ! end of quoted field
[ 2drop skip-to-field-end ] ! end of quoted field + padding
} case ;
: quoted-field ( -- endchar ) : quoted-field ( -- endchar )
"\"" read-until ! " "\"" read-until ! "
drop % maybe-escaped-quote ; drop % maybe-escaped-quote ;
: field ( -- sep string ) : field ( -- sep string )
[ not-quoted-field ] "" make trim-whitespace ; [ not-quoted-field ] "" make ; ! trim-whitespace
: (row) ( -- sep ) : (row) ( -- sep )
field , field ,
dup CHAR: , = [ drop (row) ] when ; dup delimiter> = [ drop (row) ] when ;
: row ( -- eof? array[string] ) : row ( -- eof? array[string] )
[ (row) ] { } make ; [ (row) ] { } make ;
@ -53,8 +55,16 @@ DEFER: quoted-field
row append-if-row-not-empty row append-if-row-not-empty
[ (csv) ] when ; [ (csv) ] when ;
: init-vars ( -- )
delimiter> [ CHAR: , >delimiter ] unless ; inline
: csv-row ( stream -- row ) : csv-row ( stream -- row )
init-vars
[ row nip ] with-stream ; [ row nip ] with-stream ;
: csv ( stream -- rows ) : csv ( stream -- rows )
init-vars
[ [ (csv) ] { } make ] with-stream ; [ [ (csv) ] { } make ] with-stream ;
: with-delimiter ( char quot -- )
delimiter swap with-variable ; inline