From 7d489d583980c7c4b6afb4f0bd91853e896f0fd3 Mon Sep 17 00:00:00 2001 From: John Benediktsson <mrjbq7@gmail.com> Date: Thu, 6 Feb 2014 21:12:57 -0800 Subject: [PATCH] xml.tokenize: fix cdata parsing problem. --- basis/xml/tests/cdata.factor | 22 ++++++++++++++++++++++ basis/xml/tokenize/tokenize.factor | 14 +++++++++++--- 2 files changed, 33 insertions(+), 3 deletions(-) create mode 100644 basis/xml/tests/cdata.factor diff --git a/basis/xml/tests/cdata.factor b/basis/xml/tests/cdata.factor new file mode 100644 index 0000000000..ab79679602 --- /dev/null +++ b/basis/xml/tests/cdata.factor @@ -0,0 +1,22 @@ +USING: xml xml.writer tools.test ; +IN: xml.tests + +{ +"""<?xml version="1.0" encoding="UTF-8"?> +<rss version="2.0"> + <channel> + <item> + <description>Python has a n class property in [&#8230;]</description> + </item> + </channel> +</rss>""" +} [ +"""<?xml version="1.0" encoding="UTF-8"?> +<rss version="2.0"> + <channel> + <item> + <description><![CDATA[Python has a n class property in […]]]></description> + </item> + </channel> +</rss>""" string>xml xml>string +] unit-test diff --git a/basis/xml/tokenize/tokenize.factor b/basis/xml/tokenize/tokenize.factor index a85c48a022..660cbeaf36 100644 --- a/basis/xml/tokenize/tokenize.factor +++ b/basis/xml/tokenize/tokenize.factor @@ -92,10 +92,18 @@ HINTS: next* { spot } ; : string-matcher ( str -- quot: ( pos char -- pos ? ) ) dup length 1 - '[ _ next-matching dup _ > ] ; inline +:: (take-string) ( match spot -- sbuf matched? ) + 10 <sbuf> f [ + spot char>> [ + nip over push + spot next* + dup match tail? dup not + ] [ f ] if* + ] loop ; inline + : take-string ( match -- string ) - [ 0 swap string-matcher take-until nip ] keep - dupd [ length ] bi@ 1 - - head - get-char [ missing-close ] unless next ; + [ spot get (take-string) [ missing-close ] unless ] + [ dupd [ length ] bi@ - over shorten "" like ] bi ; : expect ( string -- ) dup length spot get '[ _ [ char>> ] keep next* ] "" replicate-as