csv: need to handle \r because windows lines ends with \r\n

db4
Björn Lindqvist 2014-09-25 17:06:01 +02:00 committed by Doug Coleman
parent ae81a9c426
commit c07c1ba9b1
2 changed files with 16 additions and 8 deletions

View File

@ -4,11 +4,11 @@ io.directories ;
IN: csv.tests IN: csv.tests
! I like to name my unit tests ! I like to name my unit tests
: named-unit-test ( name output input -- ) : named-unit-test ( name output input -- )
unit-test drop ; inline unit-test drop ; inline
"Fields are separated by commas" "Fields are separated by commas"
[ { { "1997" "Ford" "E350" } } ] [ { { "1997" "Ford" "E350" } } ]
[ "1997,Ford,E350" string>csv ] named-unit-test [ "1997,Ford,E350" string>csv ] named-unit-test
"ignores whitespace before and after elements. n.b.specifically prohibited by RFC 4180, which states, 'Spaces are considered part of a field and should not be ignored.'" "ignores whitespace before and after elements. n.b.specifically prohibited by RFC 4180, which states, 'Spaces are considered part of a field and should not be ignored.'"
@ -21,7 +21,7 @@ IN: csv.tests
"double quotes mean escaped in quotes" "double quotes mean escaped in quotes"
[ { { "1997" "Ford" "E350" "Super \"luxurious\" truck" } } ] [ { { "1997" "Ford" "E350" "Super \"luxurious\" truck" } } ]
[ "1997,Ford,E350,\"Super \"\"luxurious\"\" truck\"" [ "1997,Ford,E350,\"Super \"\"luxurious\"\" truck\""
string>csv ] named-unit-test string>csv ] named-unit-test
"Fields with embedded line breaks must be delimited by double-quote characters." "Fields with embedded line breaks must be delimited by double-quote characters."
@ -39,10 +39,10 @@ IN: csv.tests
[ "\"1997\",\"Ford\",\"E350\"" string>csv ] named-unit-test [ "\"1997\",\"Ford\",\"E350\"" string>csv ] named-unit-test
"The first record in a csv file may contain column names in each of the fields." "The first record in a csv file may contain column names in each of the fields."
[ { { "Year" "Make" "Model" } [ { { "Year" "Make" "Model" }
{ "1997" "Ford" "E350" } { "1997" "Ford" "E350" }
{ "2000" "Mercury" "Cougar" } } ] { "2000" "Mercury" "Cougar" } } ]
[ "Year,Make,Model\n1997,Ford,E350\n2000,Mercury,Cougar" [ "Year,Make,Model\n1997,Ford,E350\n2000,Mercury,Cougar"
string>csv ] named-unit-test string>csv ] named-unit-test
@ -102,3 +102,10 @@ IN: csv.tests
{ { { "as,d\"f" "asdf" } } } [ "\"as,\"d\"\"\"\"f,asdf" string>csv ] unit-test { { { "as,d\"f" "asdf" } } } [ "\"as,\"d\"\"\"\"f,asdf" string>csv ] unit-test
[ { } ] [ "" string>csv ] unit-test [ { } ] [ "" string>csv ] unit-test
[
{ { "Year" "Make" "Model" }
{ "1997" "Ford" "E350" }
}
]
[ "Year,Make,\"Model\"\r\n1997,Ford,E350" string>csv ] unit-test

View File

@ -12,7 +12,7 @@ CHAR: , delimiter set-global
<PRIVATE <PRIVATE
MEMO: field-delimiters ( delimiter -- field-seps quote-seps ) MEMO: field-delimiters ( delimiter -- field-seps quote-seps )
[ "\n" swap prefix ] [ "\"\n" swap prefix ] bi ; inline [ "\r\n" swap prefix ] [ "\r\"\n" swap prefix ] bi ; inline
DEFER: quoted-field, DEFER: quoted-field,
@ -21,7 +21,8 @@ DEFER: quoted-field,
[ nip ] [ [ nip ] [
{ {
{ CHAR: " [ [ CHAR: " , ] when quoted-field, ] } { CHAR: " [ [ CHAR: " , ] when quoted-field, ] }
{ CHAR: \n [ ] } ! Error: newline inside string? { CHAR: \n [ ] } ! Error: cr inside string?
{ CHAR: \r [ ] } ! Error: lf inside string?
[ [ , drop f maybe-escaped-quote ] when* ] [ [ , drop f maybe-escaped-quote ] when* ]
} case } case
] if ; inline recursive ] if ; inline recursive
@ -85,7 +86,7 @@ PRIVATE>
<PRIVATE <PRIVATE
: needs-escaping? ( cell delimiter -- ? ) : needs-escaping? ( cell delimiter -- ? )
'[ dup "\n\"" member? [ drop t ] [ _ = ] if ] any? ; inline '[ dup "\n\"\r" member? [ drop t ] [ _ = ] if ] any? ; inline
: escape-quotes ( cell stream -- ) : escape-quotes ( cell stream -- )
CHAR: " over stream-write1 swap [ CHAR: " over stream-write1 swap [