xml.tokenize: fix cdata parsing problem.

2014-02-06 21:12:57 -08:00 · 2014-02-06 21:12:57 -08:00 · 7d489d5839
parent 64e56f65ef
commit 7d489d5839
2 changed files with 33 additions and 3 deletions
--- a/basis/xml/tests/cdata.factor
+++ b/basis/xml/tests/cdata.factor
@ -0,0 +1,22 @@
+USING: xml xml.writer tools.test ;
+IN: xml.tests
+
+{
+"""<?xml version="1.0" encoding="UTF-8"?>
+<rss version="2.0">
+  <channel>
+    <item>
+      <description>Python has a n class property in [&amp;#8230;]</description>
+    </item>
+  </channel>
+</rss>"""
+} [
+"""<?xml version="1.0" encoding="UTF-8"?>
+<rss version="2.0">
+  <channel>
+    <item>
+      <description><![CDATA[Python has a n class property in [&#8230;]]]></description>
+    </item>
+  </channel>
+</rss>""" string>xml xml>string
+] unit-test
--- a/basis/xml/tokenize/tokenize.factor
+++ b/basis/xml/tokenize/tokenize.factor
@ -92,10 +92,18 @@ HINTS: next* { spot } ;
 : string-matcher ( str -- quot: ( pos char -- pos ? ) )
    dup length 1 - '[ _ next-matching dup _ > ] ; inline

+:: (take-string) ( match spot -- sbuf matched? )
+    10 <sbuf> f [
+        spot char>> [
+            nip over push
+            spot next*
+            dup match tail? dup not
+        ] [ f ] if*
+    ] loop ; inline
+
 : take-string ( match -- string )
-    [ 0 swap string-matcher take-until nip ] keep
-    dupd [ length ] bi@ 1 - - head
-    get-char [ missing-close ] unless next ;
+    [ spot get (take-string) [ missing-close ] unless ]
+    [ dupd [ length ] bi@ - over shorten "" like ] bi ;

 : expect ( string -- )
    dup length spot get '[ _ [ char>> ] keep next* ] "" replicate-as