From 6af6de1aacaa7b39c95c9d192a11fe29fb64c7bc Mon Sep 17 00:00:00 2001 From: Doug Coleman Date: Wed, 1 Apr 2009 15:51:39 -0500 Subject: [PATCH] make tokenize-line configurable, fix bug in take-quoted-string --- extra/html/parser/state/state-tests.factor | 10 +++++++++- extra/html/parser/state/state.factor | 19 +++++++++++++++++-- 2 files changed, 26 insertions(+), 3 deletions(-) diff --git a/extra/html/parser/state/state-tests.factor b/extra/html/parser/state/state-tests.factor index b7a929284b..e655dbb699 100644 --- a/extra/html/parser/state/state-tests.factor +++ b/extra/html/parser/state/state-tests.factor @@ -53,13 +53,18 @@ IN: html.parser.state.tests [ "cd" ] [ "abcd" [ "ab" take-sequence drop ] [ "cd" take-sequence ] bi ] unit-test - [ f ] [ "\"abc\" asdf" [ CHAR: \ CHAR: " take-quoted-string drop ] [ "asdf" take-sequence ] bi ] unit-test +[ "abc\\\"def" ] +[ + "\"abc\\\"def\" asdf" + CHAR: \ CHAR: " take-quoted-string +] unit-test + [ "asdf" ] [ "\"abc\" asdf" @@ -82,3 +87,6 @@ IN: html.parser.state.tests [ "c" ] [ "c" take-token ] unit-test + +[ { "a" "b" "c" "abcd e \\\"f g" } ] +[ "a b c \"abcd e \\\"f g\"" CHAR: \ CHAR: " tokenize-line ] unit-test diff --git a/extra/html/parser/state/state.factor b/extra/html/parser/state/state.factor index 1b83089c98..6cca9f72a9 100644 --- a/extra/html/parser/state/state.factor +++ b/extra/html/parser/state/state.factor @@ -1,7 +1,8 @@ ! Copyright (C) 2005, 2009 Daniel Ehrenberg ! See http://factorcode.org/license.txt for BSD license. USING: namespaces math kernel sequences accessors fry circular -unicode.case unicode.categories locals combinators.short-circuit ; +unicode.case unicode.categories locals combinators.short-circuit +make combinators ; IN: html.parser.state @@ -87,7 +88,7 @@ TUPLE: state-parser sequence n ; state-parser advance [ { - [ { [ previous quote-char = ] [ current quote-char = ] } 1&& ] + [ { [ previous escape-char = ] [ current quote-char = ] } 1&& ] [ current quote-char = not ] } 1|| ] take-while :> string @@ -99,3 +100,17 @@ TUPLE: state-parser sequence n ; : take-token ( state-parser -- string ) skip-whitespace [ current { [ blank? ] [ f = ] } 1|| ] take-until ; + +:: (tokenize-line) ( state-parser escape-char quote-char -- ) + state-parser skip-whitespace + dup current { + { quote-char [ + [ escape-char quote-char take-quoted-string , ] + [ escape-char quote-char (tokenize-line) ] bi + ] } + { f [ drop ] } + [ drop [ take-token , ] [ escape-char quote-char (tokenize-line) ] bi ] + } case ; + +: tokenize-line ( line escape-char quote-char -- seq ) + [ ] 2dip [ (tokenize-line) ] { } make ;