Merge branch 'master' of http://phildawes.net/2008/factor
commit
72fcd2b133
|
@ -12,3 +12,10 @@ HELP: csv-row
|
||||||
{ "row" "an array of fields" } }
|
{ "row" "an array of fields" } }
|
||||||
{ $description "parses a row from a csv stream"
|
{ $description "parses a row from a csv stream"
|
||||||
} ;
|
} ;
|
||||||
|
|
||||||
|
|
||||||
|
HELP: with-delimiter
|
||||||
|
{ $values { "char" "field delimiter (e.g. CHAR: \t)" }
|
||||||
|
{ "quot" "a quotation" } }
|
||||||
|
{ $description "Sets the field delimiter for csv or csv-row words "
|
||||||
|
} ;
|
||||||
|
|
|
@ -48,6 +48,7 @@ IN: csv.tests
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
! !!!!!!!! other tests
|
! !!!!!!!! other tests
|
||||||
|
|
||||||
[ { { "Phil Dawes" } } ]
|
[ { { "Phil Dawes" } } ]
|
||||||
|
@ -59,3 +60,8 @@ IN: csv.tests
|
||||||
"trims leading and trailing whitespace - n.b. this isn't really conformant, but lots of csv seems to assume this"
|
"trims leading and trailing whitespace - n.b. this isn't really conformant, but lots of csv seems to assume this"
|
||||||
[ { { "foo yeah" "bah" "baz" } } ]
|
[ { { "foo yeah" "bah" "baz" } } ]
|
||||||
[ " foo yeah , bah ,baz\n" <string-reader> csv ] named-unit-test
|
[ " foo yeah , bah ,baz\n" <string-reader> csv ] named-unit-test
|
||||||
|
|
||||||
|
|
||||||
|
"allows setting of delimiting character"
|
||||||
|
[ { { "foo" "bah" "baz" } } ]
|
||||||
|
[ "foo\tbah\tbaz\n" <string-reader> CHAR: \t [ csv ] with-delimiter ] named-unit-test
|
||||||
|
|
|
@ -4,44 +4,46 @@
|
||||||
! Simple CSV Parser
|
! Simple CSV Parser
|
||||||
! Phil Dawes phil@phildawes.net
|
! Phil Dawes phil@phildawes.net
|
||||||
|
|
||||||
USING: kernel sequences io namespaces combinators
|
USING: kernel sequences io namespaces combinators unicode.categories vars ;
|
||||||
unicode.categories ;
|
|
||||||
IN: csv
|
IN: csv
|
||||||
|
|
||||||
DEFER: quoted-field
|
DEFER: quoted-field
|
||||||
|
|
||||||
: not-quoted-field ( -- endchar )
|
VAR: delimiter
|
||||||
",\"\n" read-until ! "
|
|
||||||
dup
|
|
||||||
{ { CHAR: " [ drop drop quoted-field ] } ! "
|
|
||||||
{ CHAR: , [ swap % ] }
|
|
||||||
{ CHAR: \n [ swap % ] }
|
|
||||||
{ f [ swap % ] } ! eof
|
|
||||||
} case ;
|
|
||||||
|
|
||||||
: maybe-escaped-quote ( -- endchar )
|
|
||||||
read1
|
|
||||||
dup
|
|
||||||
{ { CHAR: " [ , quoted-field ] } ! " is an escaped quote
|
|
||||||
{ CHAR: \s [ drop not-quoted-field ] }
|
|
||||||
{ CHAR: \t [ drop not-quoted-field ] }
|
|
||||||
[ drop ]
|
|
||||||
} case ;
|
|
||||||
|
|
||||||
! trims whitespace from either end of string
|
! trims whitespace from either end of string
|
||||||
: trim-whitespace ( str -- str )
|
: trim-whitespace ( str -- str )
|
||||||
[ blank? ] trim ; inline
|
[ blank? ] trim ; inline
|
||||||
|
|
||||||
|
: skip-to-field-end ( -- endchar )
|
||||||
|
"\n" delimiter> suffix read-until nip ; inline
|
||||||
|
|
||||||
|
: not-quoted-field ( -- endchar )
|
||||||
|
"\"\n" delimiter> suffix read-until ! "
|
||||||
|
dup
|
||||||
|
{ { CHAR: " [ drop drop quoted-field ] } ! "
|
||||||
|
{ delimiter> [ swap trim-whitespace % ] }
|
||||||
|
{ CHAR: \n [ swap trim-whitespace % ] }
|
||||||
|
{ f [ swap trim-whitespace % ] } ! eof
|
||||||
|
} case ;
|
||||||
|
|
||||||
|
: maybe-escaped-quote ( -- endchar )
|
||||||
|
read1 dup
|
||||||
|
{ { CHAR: " [ , quoted-field ] } ! " is an escaped quote
|
||||||
|
{ delimiter> [ ] } ! end of quoted field
|
||||||
|
[ 2drop skip-to-field-end ] ! end of quoted field + padding
|
||||||
|
} case ;
|
||||||
|
|
||||||
: quoted-field ( -- endchar )
|
: quoted-field ( -- endchar )
|
||||||
"\"" read-until ! "
|
"\"" read-until ! "
|
||||||
drop % maybe-escaped-quote ;
|
drop % maybe-escaped-quote ;
|
||||||
|
|
||||||
: field ( -- sep string )
|
: field ( -- sep string )
|
||||||
[ not-quoted-field ] "" make trim-whitespace ;
|
[ not-quoted-field ] "" make ; ! trim-whitespace
|
||||||
|
|
||||||
: (row) ( -- sep )
|
: (row) ( -- sep )
|
||||||
field ,
|
field ,
|
||||||
dup CHAR: , = [ drop (row) ] when ;
|
dup delimiter> = [ drop (row) ] when ;
|
||||||
|
|
||||||
: row ( -- eof? array[string] )
|
: row ( -- eof? array[string] )
|
||||||
[ (row) ] { } make ;
|
[ (row) ] { } make ;
|
||||||
|
@ -53,8 +55,16 @@ DEFER: quoted-field
|
||||||
row append-if-row-not-empty
|
row append-if-row-not-empty
|
||||||
[ (csv) ] when ;
|
[ (csv) ] when ;
|
||||||
|
|
||||||
|
: init-vars ( -- )
|
||||||
|
delimiter> [ CHAR: , >delimiter ] unless ; inline
|
||||||
|
|
||||||
: csv-row ( stream -- row )
|
: csv-row ( stream -- row )
|
||||||
|
init-vars
|
||||||
[ row nip ] with-stream ;
|
[ row nip ] with-stream ;
|
||||||
|
|
||||||
: csv ( stream -- rows )
|
: csv ( stream -- rows )
|
||||||
|
init-vars
|
||||||
[ [ (csv) ] { } make ] with-stream ;
|
[ [ (csv) ] { } make ] with-stream ;
|
||||||
|
|
||||||
|
: with-delimiter ( char quot -- )
|
||||||
|
delimiter swap with-variable ; inline
|
||||||
|
|
Loading…
Reference in New Issue