From 6af6de1aacaa7b39c95c9d192a11fe29fb64c7bc Mon Sep 17 00:00:00 2001
From: Doug Coleman <erg@jobim.local>
Date: Wed, 1 Apr 2009 15:51:39 -0500
Subject: [PATCH] make tokenize-line configurable, fix bug in
 take-quoted-string

---
 extra/html/parser/state/state-tests.factor | 10 +++++++++-
 extra/html/parser/state/state.factor       | 19 +++++++++++++++++--
 2 files changed, 26 insertions(+), 3 deletions(-)
diff --git a/extra/html/parser/state/state-tests.factor b/extra/html/parser/state/state-tests.factor
index b7a929284b..e655dbb699 100644
--- a/extra/html/parser/state/state-tests.factor
+++ b/extra/html/parser/state/state-tests.factor
@@ -53,13 +53,18 @@ IN: html.parser.state.tests
 [ "cd" ]
 [ "abcd" <state-parser> [ "ab" take-sequence drop ] [ "cd" take-sequence ] bi ] unit-test
 
-
 [ f ]
 [
     "\"abc\" asdf" <state-parser>
     [ CHAR: \ CHAR: " take-quoted-string drop ] [ "asdf" take-sequence ] bi
 ] unit-test
 
+[ "abc\\\"def" ]
+[
+    "\"abc\\\"def\" asdf" <state-parser>
+    CHAR: \ CHAR: " take-quoted-string
+] unit-test
+
 [ "asdf" ]
 [
     "\"abc\" asdf" <state-parser>
@@ -82,3 +87,6 @@ IN: html.parser.state.tests
 
 [ "c" ]
 [ "c" <state-parser> take-token ] unit-test
+
+[ { "a" "b" "c" "abcd e \\\"f g" } ]
+[ "a b c  \"abcd e \\\"f g\"" CHAR: \ CHAR: " tokenize-line ] unit-test
diff --git a/extra/html/parser/state/state.factor b/extra/html/parser/state/state.factor
index 1b83089c98..6cca9f72a9 100644
--- a/extra/html/parser/state/state.factor
+++ b/extra/html/parser/state/state.factor
@@ -1,7 +1,8 @@
 ! Copyright (C) 2005, 2009 Daniel Ehrenberg
 ! See http://factorcode.org/license.txt for BSD license.
 USING: namespaces math kernel sequences accessors fry circular
-unicode.case unicode.categories locals combinators.short-circuit ;
+unicode.case unicode.categories locals combinators.short-circuit
+make combinators ;
 
 IN: html.parser.state
 
@@ -87,7 +88,7 @@ TUPLE: state-parser sequence n ;
     state-parser advance
     [
         {
-            [ { [ previous quote-char = ] [ current quote-char = ] } 1&& ]
+            [ { [ previous escape-char = ] [ current quote-char = ] } 1&& ]
             [ current quote-char = not ]
         } 1||
     ] take-while :> string
@@ -99,3 +100,17 @@ TUPLE: state-parser sequence n ;
 
 : take-token ( state-parser -- string )
     skip-whitespace [ current { [ blank? ] [ f = ] } 1|| ] take-until ;
+
+:: (tokenize-line) ( state-parser escape-char quote-char -- )
+    state-parser skip-whitespace
+    dup current {
+        { quote-char [
+            [ escape-char quote-char take-quoted-string , ]
+            [ escape-char quote-char (tokenize-line) ] bi
+        ] }
+        { f [ drop ] }
+        [ drop [ take-token , ] [ escape-char quote-char (tokenize-line) ] bi ]
+    } case ;
+
+: tokenize-line ( line escape-char quote-char -- seq )
+    [ <state-parser> ] 2dip [ (tokenize-line) ] { } make ;