From 7d489d583980c7c4b6afb4f0bd91853e896f0fd3 Mon Sep 17 00:00:00 2001
From: John Benediktsson <mrjbq7@gmail.com>
Date: Thu, 6 Feb 2014 21:12:57 -0800
Subject: [PATCH] xml.tokenize: fix cdata parsing problem.

---
 basis/xml/tests/cdata.factor       | 22 ++++++++++++++++++++++
 basis/xml/tokenize/tokenize.factor | 14 +++++++++++---
 2 files changed, 33 insertions(+), 3 deletions(-)
 create mode 100644 basis/xml/tests/cdata.factor

diff --git a/basis/xml/tests/cdata.factor b/basis/xml/tests/cdata.factor
new file mode 100644
index 0000000000..ab79679602
--- /dev/null
+++ b/basis/xml/tests/cdata.factor
@@ -0,0 +1,22 @@
+USING: xml xml.writer tools.test ;
+IN: xml.tests
+
+{
+"""<?xml version="1.0" encoding="UTF-8"?>
+<rss version="2.0">
+  <channel>
+    <item>
+      <description>Python has a n class property in [&amp;#8230;]</description>
+    </item>
+  </channel>
+</rss>"""
+} [
+"""<?xml version="1.0" encoding="UTF-8"?>
+<rss version="2.0">
+  <channel>
+    <item>
+      <description><![CDATA[Python has a n class property in [&#8230;]]]></description>
+    </item>
+  </channel>
+</rss>""" string>xml xml>string
+] unit-test
diff --git a/basis/xml/tokenize/tokenize.factor b/basis/xml/tokenize/tokenize.factor
index a85c48a022..660cbeaf36 100644
--- a/basis/xml/tokenize/tokenize.factor
+++ b/basis/xml/tokenize/tokenize.factor
@@ -92,10 +92,18 @@ HINTS: next* { spot } ;
 : string-matcher ( str -- quot: ( pos char -- pos ? ) )
     dup length 1 - '[ _ next-matching dup _ > ] ; inline
 
+:: (take-string) ( match spot -- sbuf matched? )
+    10 <sbuf> f [
+        spot char>> [
+            nip over push
+            spot next*
+            dup match tail? dup not
+        ] [ f ] if*
+    ] loop ; inline
+
 : take-string ( match -- string )
-    [ 0 swap string-matcher take-until nip ] keep
-    dupd [ length ] bi@ 1 - - head
-    get-char [ missing-close ] unless next ;
+    [ spot get (take-string) [ missing-close ] unless ]
+    [ dupd [ length ] bi@ - over shorten "" like ] bi ;
 
 : expect ( string -- )
     dup length spot get '[ _ [ char>> ] keep next* ] "" replicate-as