make tokenize-line configurable, fix bug in take-quoted-string
parent
d64e07af8b
commit
6af6de1aac
|
@ -53,13 +53,18 @@ IN: html.parser.state.tests
|
||||||
[ "cd" ]
|
[ "cd" ]
|
||||||
[ "abcd" <state-parser> [ "ab" take-sequence drop ] [ "cd" take-sequence ] bi ] unit-test
|
[ "abcd" <state-parser> [ "ab" take-sequence drop ] [ "cd" take-sequence ] bi ] unit-test
|
||||||
|
|
||||||
|
|
||||||
[ f ]
|
[ f ]
|
||||||
[
|
[
|
||||||
"\"abc\" asdf" <state-parser>
|
"\"abc\" asdf" <state-parser>
|
||||||
[ CHAR: \ CHAR: " take-quoted-string drop ] [ "asdf" take-sequence ] bi
|
[ CHAR: \ CHAR: " take-quoted-string drop ] [ "asdf" take-sequence ] bi
|
||||||
] unit-test
|
] unit-test
|
||||||
|
|
||||||
|
[ "abc\\\"def" ]
|
||||||
|
[
|
||||||
|
"\"abc\\\"def\" asdf" <state-parser>
|
||||||
|
CHAR: \ CHAR: " take-quoted-string
|
||||||
|
] unit-test
|
||||||
|
|
||||||
[ "asdf" ]
|
[ "asdf" ]
|
||||||
[
|
[
|
||||||
"\"abc\" asdf" <state-parser>
|
"\"abc\" asdf" <state-parser>
|
||||||
|
@ -82,3 +87,6 @@ IN: html.parser.state.tests
|
||||||
|
|
||||||
[ "c" ]
|
[ "c" ]
|
||||||
[ "c" <state-parser> take-token ] unit-test
|
[ "c" <state-parser> take-token ] unit-test
|
||||||
|
|
||||||
|
[ { "a" "b" "c" "abcd e \\\"f g" } ]
|
||||||
|
[ "a b c \"abcd e \\\"f g\"" CHAR: \ CHAR: " tokenize-line ] unit-test
|
||||||
|
|
|
@ -1,7 +1,8 @@
|
||||||
! Copyright (C) 2005, 2009 Daniel Ehrenberg
|
! Copyright (C) 2005, 2009 Daniel Ehrenberg
|
||||||
! See http://factorcode.org/license.txt for BSD license.
|
! See http://factorcode.org/license.txt for BSD license.
|
||||||
USING: namespaces math kernel sequences accessors fry circular
|
USING: namespaces math kernel sequences accessors fry circular
|
||||||
unicode.case unicode.categories locals combinators.short-circuit ;
|
unicode.case unicode.categories locals combinators.short-circuit
|
||||||
|
make combinators ;
|
||||||
|
|
||||||
IN: html.parser.state
|
IN: html.parser.state
|
||||||
|
|
||||||
|
@ -87,7 +88,7 @@ TUPLE: state-parser sequence n ;
|
||||||
state-parser advance
|
state-parser advance
|
||||||
[
|
[
|
||||||
{
|
{
|
||||||
[ { [ previous quote-char = ] [ current quote-char = ] } 1&& ]
|
[ { [ previous escape-char = ] [ current quote-char = ] } 1&& ]
|
||||||
[ current quote-char = not ]
|
[ current quote-char = not ]
|
||||||
} 1||
|
} 1||
|
||||||
] take-while :> string
|
] take-while :> string
|
||||||
|
@ -99,3 +100,17 @@ TUPLE: state-parser sequence n ;
|
||||||
|
|
||||||
: take-token ( state-parser -- string )
|
: take-token ( state-parser -- string )
|
||||||
skip-whitespace [ current { [ blank? ] [ f = ] } 1|| ] take-until ;
|
skip-whitespace [ current { [ blank? ] [ f = ] } 1|| ] take-until ;
|
||||||
|
|
||||||
|
:: (tokenize-line) ( state-parser escape-char quote-char -- )
|
||||||
|
state-parser skip-whitespace
|
||||||
|
dup current {
|
||||||
|
{ quote-char [
|
||||||
|
[ escape-char quote-char take-quoted-string , ]
|
||||||
|
[ escape-char quote-char (tokenize-line) ] bi
|
||||||
|
] }
|
||||||
|
{ f [ drop ] }
|
||||||
|
[ drop [ take-token , ] [ escape-char quote-char (tokenize-line) ] bi ]
|
||||||
|
} case ;
|
||||||
|
|
||||||
|
: tokenize-line ( line escape-char quote-char -- seq )
|
||||||
|
[ <state-parser> ] 2dip [ (tokenize-line) ] { } make ;
|
||||||
|
|
Loading…
Reference in New Issue