wikipedia: use html-unescape.

db4
John Benediktsson 2014-04-22 17:50:11 -07:00
parent 3c640add1e
commit 478cc0bb98
1 changed files with 5 additions and 8 deletions

View File

@ -2,10 +2,10 @@
! See http://factorcode.org/license.txt for BSD license
USING: accessors ascii assocs calendar colors.constants
formatting html.parser html.parser.analyzer html.parser.printer
http.client io io.streams.string io.styles kernel make
namespaces regexp sequences splitting urls wrap.strings xml
xml.data xml.traversal ;
formatting html.entities html.parser html.parser.analyzer
html.parser.printer http.client io io.streams.string io.styles
kernel make namespaces regexp sequences splitting urls
wrap.strings xml xml.data xml.traversal ;
FROM: xml.data => tag? ;
IN: wikipedia
@ -78,8 +78,5 @@ PRIVATE>
"content" find-by-id-between
[ html-text. ] with-string-writer string-lines
[ [ blank? ] trim ] map harvest [
R/ &lt;/ "<" re-replace
R/ &gt;/ ">" re-replace
R/ &amp;/ "&" re-replace
72 wrap-string print nl
html-unescape 72 wrap-string print nl
] each ;