wikipedia: use html-unescape.

db4
John Benediktsson 2014-04-22 17:50:11 -07:00
parent 3c640add1e
commit 478cc0bb98
1 changed files with 5 additions and 8 deletions

View File

@ -2,10 +2,10 @@
! See http://factorcode.org/license.txt for BSD license ! See http://factorcode.org/license.txt for BSD license
USING: accessors ascii assocs calendar colors.constants USING: accessors ascii assocs calendar colors.constants
formatting html.parser html.parser.analyzer html.parser.printer formatting html.entities html.parser html.parser.analyzer
http.client io io.streams.string io.styles kernel make html.parser.printer http.client io io.streams.string io.styles
namespaces regexp sequences splitting urls wrap.strings xml kernel make namespaces regexp sequences splitting urls
xml.data xml.traversal ; wrap.strings xml xml.data xml.traversal ;
FROM: xml.data => tag? ; FROM: xml.data => tag? ;
IN: wikipedia IN: wikipedia
@ -78,8 +78,5 @@ PRIVATE>
"content" find-by-id-between "content" find-by-id-between
[ html-text. ] with-string-writer string-lines [ html-text. ] with-string-writer string-lines
[ [ blank? ] trim ] map harvest [ [ [ blank? ] trim ] map harvest [
R/ &lt;/ "<" re-replace html-unescape 72 wrap-string print nl
R/ &gt;/ ">" re-replace
R/ &amp;/ "&" re-replace
72 wrap-string print nl
] each ; ] each ;