wikipedia: support other languages.

db4
John Benediktsson 2013-10-12 17:11:43 -07:00
parent 04d83f9f48
commit fbc43abfdf
1 changed files with 16 additions and 8 deletions

View File

@ -3,15 +3,24 @@
USING: accessors ascii assocs calendar colors.constants USING: accessors ascii assocs calendar colors.constants
formatting html.parser html.parser.analyzer html.parser.printer formatting html.parser html.parser.analyzer html.parser.printer
http.client io io.streams.string io.styles kernel make regexp http.client io io.streams.string io.styles kernel make
sequences splitting urls wrap.strings xml xml.data namespaces regexp sequences splitting urls wrap.strings xml
xml.traversal ; xml.data xml.traversal ;
FROM: xml.data => tag? ; FROM: xml.data => tag? ;
IN: wikipedia IN: wikipedia
SYMBOL: language
"en" language set-global
: with-language ( str quot -- )
language swap with-variable ; inline
<PRIVATE <PRIVATE
: wikipedia-url ( path -- url )
language get swap "http://%s.wikipedia.org/wiki/%s" sprintf >url ;
: header. ( string -- ) : header. ( string -- )
H{ { font-size 20 } { font-style bold } } format nl ; H{ { font-size 20 } { font-style bold } } format nl ;
@ -20,7 +29,7 @@ IN: wikipedia
: link. ( tag -- ) : link. ( tag -- )
[ deep-children>string ] [ attrs>> "href" of ] bi [ deep-children>string ] [ attrs>> "href" of ] bi
"http://en.wikipedia.org" prepend >url H{ wikipedia-url H{
{ font-name "monospace" } { font-name "monospace" }
{ foreground COLOR: blue } { foreground COLOR: blue }
} [ write-object ] with-style ; } [ write-object ] with-style ;
@ -36,8 +45,7 @@ IN: wikipedia
children-tags [ item. ] each nl ; children-tags [ item. ] each nl ;
: historical-url ( timestamp -- url ) : historical-url ( timestamp -- url )
[ month-name ] [ day>> ] bi [ month-name ] [ day>> ] bi "%s_%s" sprintf wikipedia-url ;
"http://en.wikipedia.org/wiki/%s_%s" sprintf ;
: (historical-events) ( timestamp -- seq ) : (historical-events) ( timestamp -- seq )
historical-url http-get* string>xml "ul" deep-tags-named ; historical-url http-get* string>xml "ul" deep-tags-named ;
@ -66,8 +74,8 @@ PRIVATE>
(historical-events) "Deaths" header. fourth items. ; (historical-events) "Deaths" header. fourth items. ;
: article. ( name -- ) : article. ( name -- )
"http://en.wikipedia.org/wiki/%s" sprintf wikipedia-url http-get* parse-html
http-get* parse-html "content" find-by-id-between "content" find-by-id-between
[ html-text. ] with-string-writer string-lines [ html-text. ] with-string-writer string-lines
[ [ blank? ] trim ] map harvest [ [ [ blank? ] trim ] map harvest [
R/ &lt;/ "<" re-replace R/ &lt;/ "<" re-replace