Added with-delimiter word to csv module to handle non comma delimiters
parent
39c578ee56
commit
7140a018b1
|
@ -12,3 +12,10 @@ HELP: csv-row
|
||||||
{ "row" "an array of fields" } }
|
{ "row" "an array of fields" } }
|
||||||
{ $description "parses a row from a csv stream"
|
{ $description "parses a row from a csv stream"
|
||||||
} ;
|
} ;
|
||||||
|
|
||||||
|
|
||||||
|
HELP: with-delimiter
|
||||||
|
{ $values { "char" "field delimiter (e.g. CHAR: \t)" }
|
||||||
|
{ "quot" "a quotation" } }
|
||||||
|
{ $description "Sets the field delimiter for csv or csv-row words "
|
||||||
|
} ;
|
||||||
|
|
|
@ -46,6 +46,7 @@ IN: csv.tests
|
||||||
[ "Year,Make,Model\n1997,Ford,E350\n2000,Mercury,Cougar"
|
[ "Year,Make,Model\n1997,Ford,E350\n2000,Mercury,Cougar"
|
||||||
<string-reader> csv ] named-unit-test
|
<string-reader> csv ] named-unit-test
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
! !!!!!!!! other tests
|
! !!!!!!!! other tests
|
||||||
|
@ -59,3 +60,8 @@ IN: csv.tests
|
||||||
"trims leading and trailing whitespace - n.b. this isn't really conformant, but lots of csv seems to assume this"
|
"trims leading and trailing whitespace - n.b. this isn't really conformant, but lots of csv seems to assume this"
|
||||||
[ { { "foo yeah" "bah" "baz" } } ]
|
[ { { "foo yeah" "bah" "baz" } } ]
|
||||||
[ " foo yeah , bah ,baz\n" <string-reader> csv ] named-unit-test
|
[ " foo yeah , bah ,baz\n" <string-reader> csv ] named-unit-test
|
||||||
|
|
||||||
|
|
||||||
|
"allows setting of delimiting character"
|
||||||
|
[ { { "foo" "bah" "baz" } } ]
|
||||||
|
[ "foo\tbah\tbaz\n" <string-reader> CHAR: \t [ csv ] with-delimiter ] named-unit-test
|
||||||
|
|
|
@ -4,32 +4,33 @@
|
||||||
! Simple CSV Parser
|
! Simple CSV Parser
|
||||||
! Phil Dawes phil@phildawes.net
|
! Phil Dawes phil@phildawes.net
|
||||||
|
|
||||||
USING: kernel sequences io namespaces combinators
|
USING: kernel sequences io namespaces combinators unicode.categories vars ;
|
||||||
unicode.categories ;
|
|
||||||
IN: csv
|
IN: csv
|
||||||
|
|
||||||
DEFER: quoted-field
|
DEFER: quoted-field
|
||||||
|
|
||||||
|
VAR: delimiter
|
||||||
|
|
||||||
! trims whitespace from either end of string
|
! trims whitespace from either end of string
|
||||||
: trim-whitespace ( str -- str )
|
: trim-whitespace ( str -- str )
|
||||||
[ blank? ] trim ; inline
|
[ blank? ] trim ; inline
|
||||||
|
|
||||||
: skip-to-field-end ( -- endchar )
|
: skip-to-field-end ( -- endchar )
|
||||||
",\n" read-until nip ; inline
|
"\n" delimiter> suffix read-until nip ; inline
|
||||||
|
|
||||||
: not-quoted-field ( -- endchar )
|
: not-quoted-field ( -- endchar )
|
||||||
",\"\n" read-until ! "
|
"\"\n" delimiter> suffix read-until ! "
|
||||||
dup
|
dup
|
||||||
{ { CHAR: " [ drop drop quoted-field ] } ! "
|
{ { CHAR: " [ drop drop quoted-field ] } ! "
|
||||||
{ CHAR: , [ swap trim-whitespace % ] }
|
{ delimiter> [ swap trim-whitespace % ] }
|
||||||
{ CHAR: \n [ swap trim-whitespace % ] }
|
{ CHAR: \n [ swap trim-whitespace % ] }
|
||||||
{ f [ swap trim-whitespace % ] } ! eof
|
{ f [ swap trim-whitespace % ] } ! eof
|
||||||
} case ;
|
} case ;
|
||||||
|
|
||||||
: maybe-escaped-quote ( -- endchar )
|
: maybe-escaped-quote ( -- endchar )
|
||||||
read1 dup
|
read1 dup
|
||||||
{ { CHAR: " [ , quoted-field ] } ! " is an escaped quote
|
{ { CHAR: " [ , quoted-field ] } ! " is an escaped quote
|
||||||
{ CHAR: , [ ] } ! end of quoted field
|
{ delimiter> [ ] } ! end of quoted field
|
||||||
[ 2drop skip-to-field-end ] ! end of quoted field + padding
|
[ 2drop skip-to-field-end ] ! end of quoted field + padding
|
||||||
} case ;
|
} case ;
|
||||||
|
|
||||||
|
@ -42,7 +43,7 @@ DEFER: quoted-field
|
||||||
|
|
||||||
: (row) ( -- sep )
|
: (row) ( -- sep )
|
||||||
field ,
|
field ,
|
||||||
dup CHAR: , = [ drop (row) ] when ;
|
dup delimiter> = [ drop (row) ] when ;
|
||||||
|
|
||||||
: row ( -- eof? array[string] )
|
: row ( -- eof? array[string] )
|
||||||
[ (row) ] { } make ;
|
[ (row) ] { } make ;
|
||||||
|
@ -54,8 +55,16 @@ DEFER: quoted-field
|
||||||
row append-if-row-not-empty
|
row append-if-row-not-empty
|
||||||
[ (csv) ] when ;
|
[ (csv) ] when ;
|
||||||
|
|
||||||
|
: init-vars ( -- )
|
||||||
|
delimiter> [ CHAR: , >delimiter ] unless ; inline
|
||||||
|
|
||||||
: csv-row ( stream -- row )
|
: csv-row ( stream -- row )
|
||||||
|
init-vars
|
||||||
[ row nip ] with-stream ;
|
[ row nip ] with-stream ;
|
||||||
|
|
||||||
: csv ( stream -- rows )
|
: csv ( stream -- rows )
|
||||||
|
init-vars
|
||||||
[ [ (csv) ] { } make ] with-stream ;
|
[ [ (csv) ] { } make ] with-stream ;
|
||||||
|
|
||||||
|
: with-delimiter ( char quot -- )
|
||||||
|
delimiter swap with-variable ; inline
|
||||||
|
|
Loading…
Reference in New Issue