factor/extra/rss/rss.factor

121 lines
3.5 KiB
Factor
Raw Normal View History

2007-11-21 04:19:32 -05:00
! Copyright (C) 2006 Chris Double, Daniel Ehrenberg.
2007-09-20 18:09:08 -04:00
! See http://factorcode.org/license.txt for BSD license.
2008-04-26 12:03:41 -04:00
USING: xml.utilities kernel assocs xml.generator math.order
2007-11-21 04:19:32 -05:00
strings sequences xml.data xml.writer
2007-09-20 18:09:08 -04:00
io.streams.string combinators xml xml.entities io.files io
2008-04-17 05:15:37 -04:00
http.client namespaces xml.generator hashtables
calendar.format accessors continuations ;
2008-04-26 12:03:41 -04:00
IN: rss
2007-09-20 18:09:08 -04:00
: any-tag-named ( tag names -- tag-inside )
2008-01-09 17:36:30 -05:00
f -rot [ tag-named nip dup ] with find 2drop ;
2007-09-20 18:09:08 -04:00
TUPLE: feed title link entries ;
C: <feed> feed
TUPLE: entry title link description pub-date ;
C: <entry> entry
2008-05-26 01:48:28 -04:00
: try-parsing-timestamp ( string -- timestamp )
[ rfc822>timestamp ] [ drop rfc3339>timestamp ] recover ;
: rss1.0-entry ( tag -- entry )
2008-05-26 01:48:28 -04:00
{
[ "title" tag-named children>string ]
[ "link" tag-named children>string ]
[ "description" tag-named children>string ]
[
f "date" "http://purl.org/dc/elements/1.1/" <name>
tag-named dup [ children>string try-parsing-timestamp ] when
]
} cleave <entry> ;
2007-09-20 18:09:08 -04:00
: rss1.0 ( xml -- feed )
[
"channel" tag-named
2008-05-26 01:48:28 -04:00
[ "title" tag-named children>string ]
[ "link" tag-named children>string ] bi
] [ "item" tags-named [ rss1.0-entry ] map ] bi
<feed> ;
: rss2.0-entry ( tag -- entry )
2008-05-26 01:48:28 -04:00
{
[ "title" tag-named children>string ]
[ { "link" "guid" } any-tag-named children>string ]
[ "description" tag-named children>string ]
[
{ "date" "pubDate" } any-tag-named
children>string try-parsing-timestamp
]
} cleave <entry> ;
2007-09-20 18:09:08 -04:00
: rss2.0 ( xml -- feed )
"channel" tag-named
2008-05-26 01:48:28 -04:00
[ "title" tag-named children>string ]
[ "link" tag-named children>string ]
[ "item" tags-named [ rss2.0-entry ] map ]
tri <feed> ;
: atom1.0-entry ( tag -- entry )
2008-05-26 01:48:28 -04:00
{
[ "title" tag-named children>string ]
[ "link" tag-named "href" swap at ]
[
{ "content" "summary" } any-tag-named
dup tag-children [ string? not ] contains?
[ tag-children [ write-chunk ] with-string-writer ]
[ children>string ] if
]
[
{ "published" "updated" "issued" "modified" }
any-tag-named children>string try-parsing-timestamp
]
} cleave <entry> ;
2007-09-20 18:09:08 -04:00
: atom1.0 ( xml -- feed )
2008-05-26 01:48:28 -04:00
[ "title" tag-named children>string ]
[ "link" tag-named "href" swap at ]
[ "entry" tags-named [ atom1.0-entry ] map ]
tri <feed> ;
2007-09-20 18:09:08 -04:00
2007-11-28 22:52:22 -05:00
: xml>feed ( xml -- feed )
2007-09-20 18:09:08 -04:00
dup name-tag {
{ "RDF" [ rss1.0 ] }
{ "rss" [ rss2.0 ] }
{ "feed" [ atom1.0 ] }
} case ;
2008-04-22 21:23:54 -04:00
: read-feed ( string -- feed )
[ string>xml xml>feed ] with-html-entities ;
2007-09-20 18:09:08 -04:00
2007-11-28 22:52:22 -05:00
: download-feed ( url -- feed )
2007-09-20 18:09:08 -04:00
#! Retrieve an news syndication file, return as a feed tuple.
2008-04-22 21:23:54 -04:00
http-get read-feed ;
2007-11-21 04:19:32 -05:00
! Atom generation
: simple-tag, ( content name -- )
[ , ] tag, ;
: simple-tag*, ( content name attrs -- )
[ , ] tag*, ;
2007-11-28 22:52:22 -05:00
: entry, ( entry -- )
"entry" [
dup entry-title "title" { { "type" "html" } } simple-tag*,
"link" over entry-link "href" associate contained*,
2008-04-17 05:15:37 -04:00
dup entry-pub-date timestamp>rfc3339 "published" simple-tag,
entry-description [ "content" { { "type" "html" } } simple-tag*, ] when*
] tag, ;
2007-11-21 04:19:32 -05:00
2007-11-28 22:52:22 -05:00
: feed>xml ( feed -- xml )
"feed" { { "xmlns" "http://www.w3.org/2005/Atom" } } [
dup feed-title "title" simple-tag,
"link" over feed-link "href" associate contained*,
feed-entries [ entry, ] each
] make-xml* ;
2007-11-28 22:52:22 -05:00
2007-12-04 21:31:11 -05:00
: write-feed ( feed -- )
2007-11-28 22:52:22 -05:00
feed>xml write-xml ;