2013-07-29 10:56:14 -04:00
|
|
|
! Copyright (C) 2012 John Benediktsson
|
|
|
|
! See http://factorcode.org/license.txt for BSD license
|
|
|
|
|
2017-01-03 09:37:53 -05:00
|
|
|
USING: accessors ascii assocs colors.constants formatting
|
|
|
|
html.entities html.parser html.parser.analyzer html.parser.printer
|
|
|
|
http.client io io.styles kernel namespaces sequences splitting urls
|
|
|
|
wrap.strings xml xml.data xml.traversal ;
|
2013-10-11 14:52:53 -04:00
|
|
|
FROM: xml.data => tag? ;
|
2013-07-29 10:56:14 -04:00
|
|
|
|
|
|
|
IN: wikipedia
|
|
|
|
|
2013-10-12 20:11:43 -04:00
|
|
|
SYMBOL: language
|
|
|
|
"en" language set-global
|
|
|
|
|
|
|
|
: with-language ( str quot -- )
|
|
|
|
language swap with-variable ; inline
|
|
|
|
|
2013-07-29 10:56:14 -04:00
|
|
|
<PRIVATE
|
|
|
|
|
2013-10-12 20:11:43 -04:00
|
|
|
: wikipedia-url ( path -- url )
|
|
|
|
language get swap "http://%s.wikipedia.org/wiki/%s" sprintf >url ;
|
|
|
|
|
2013-07-29 10:56:14 -04:00
|
|
|
: header. ( string -- )
|
|
|
|
H{ { font-size 20 } { font-style bold } } format nl ;
|
|
|
|
|
|
|
|
: link ( tag -- tag/f )
|
|
|
|
"a" assure-name over tag-named? [ "a" deep-tag-named ] unless ;
|
|
|
|
|
|
|
|
: link. ( tag -- )
|
|
|
|
[ deep-children>string ] [ attrs>> "href" of ] bi
|
2013-10-12 20:11:43 -04:00
|
|
|
wikipedia-url H{
|
2013-07-29 10:56:14 -04:00
|
|
|
{ font-name "monospace" }
|
|
|
|
{ foreground COLOR: blue }
|
|
|
|
} [ write-object ] with-style ;
|
|
|
|
|
|
|
|
: item. ( tag -- )
|
|
|
|
children>> [
|
|
|
|
dup tag? [
|
|
|
|
dup link [ link. drop ] [ children>string write ] if*
|
|
|
|
] [ [ write ] unless-empty ] if
|
|
|
|
] each nl ;
|
|
|
|
|
|
|
|
: items. ( seq -- )
|
|
|
|
children-tags [ item. ] each nl ;
|
|
|
|
|
|
|
|
: historical-url ( timestamp -- url )
|
2017-01-03 09:37:53 -05:00
|
|
|
"%B_%d" strftime wikipedia-url ;
|
2013-07-29 10:56:14 -04:00
|
|
|
|
|
|
|
: (historical-events) ( timestamp -- seq )
|
2016-11-22 12:18:04 -05:00
|
|
|
historical-url http-get nip string>xml "ul" deep-tags-named ;
|
2013-07-29 10:56:14 -04:00
|
|
|
|
|
|
|
: items>sequence ( tag -- seq )
|
|
|
|
children-tags [ deep-children>string ] map ;
|
|
|
|
|
|
|
|
PRIVATE>
|
|
|
|
|
|
|
|
: historical-events ( timestamp -- events )
|
|
|
|
(historical-events) second items>sequence ;
|
|
|
|
|
|
|
|
: historical-events. ( timestamp -- )
|
|
|
|
(historical-events) "Events" header. second items. ;
|
|
|
|
|
|
|
|
: historical-births ( timestamp -- births )
|
|
|
|
(historical-events) third items>sequence ;
|
|
|
|
|
|
|
|
: historical-births. ( timestamp -- )
|
|
|
|
(historical-events) "Births" header. third items. ;
|
|
|
|
|
|
|
|
: historical-deaths ( timestamp -- births )
|
|
|
|
(historical-events) fourth items>sequence ;
|
|
|
|
|
|
|
|
: historical-deaths. ( timestamp -- )
|
|
|
|
(historical-events) "Deaths" header. fourth items. ;
|
2013-10-11 14:52:53 -04:00
|
|
|
|
|
|
|
: article. ( name -- )
|
2014-03-12 22:29:35 -04:00
|
|
|
wikipedia-url http-get nip parse-html
|
2013-10-12 20:11:43 -04:00
|
|
|
"content" find-by-id-between
|
2015-04-20 12:31:40 -04:00
|
|
|
html-text string-lines
|
2013-10-11 14:52:53 -04:00
|
|
|
[ [ blank? ] trim ] map harvest [
|
2014-04-22 20:50:11 -04:00
|
|
|
html-unescape 72 wrap-string print nl
|
2013-10-11 14:52:53 -04:00
|
|
|
] each ;
|