factor/extra/rss/rss.factor

107 lines
3.1 KiB
Factor
Raw Normal View History

2007-11-21 04:19:32 -05:00
! Copyright (C) 2006 Chris Double, Daniel Ehrenberg.
2007-09-20 18:09:08 -04:00
! See http://factorcode.org/license.txt for BSD license.
IN: rss
USING: xml.utilities kernel assocs xml.generator
2007-11-21 04:19:32 -05:00
strings sequences xml.data xml.writer
2007-09-20 18:09:08 -04:00
io.streams.string combinators xml xml.entities io.files io
2007-11-21 04:19:32 -05:00
http.client namespaces xml.generator hashtables ;
2007-09-20 18:09:08 -04:00
: ?children>string ( tag/f -- string/f )
[ children>string ] [ f ] if* ;
: any-tag-named ( tag names -- tag-inside )
f -rot [ tag-named nip dup ] curry* find 2drop ;
2007-09-20 18:09:08 -04:00
TUPLE: feed title link entries ;
C: <feed> feed
TUPLE: entry title link description pub-date ;
C: <entry> entry
: rss1.0-entry ( tag -- entry )
[ "title" tag-named children>string ] keep
[ "link" tag-named children>string ] keep
[ "description" tag-named children>string ] keep
f "date" "http://purl.org/dc/elements/1.1/" <name>
tag-named ?children>string
<entry> ;
2007-09-20 18:09:08 -04:00
: rss1.0 ( xml -- feed )
[
"channel" tag-named
[ "title" tag-named children>string ] keep
"link" tag-named children>string
] keep
"item" tags-named [ rss1.0-entry ] map <feed> ;
: rss2.0-entry ( tag -- entry )
[ "title" tag-named children>string ] keep
[ "link" tag-named ] keep
[ "guid" tag-named dupd ? children>string ] keep
[ "description" tag-named children>string ] keep
"pubDate" tag-named children>string <entry> ;
2007-09-20 18:09:08 -04:00
: rss2.0 ( xml -- feed )
"channel" tag-named
[ "title" tag-named children>string ] keep
[ "link" tag-named children>string ] keep
"item" tags-named [ rss2.0-entry ] map <feed> ;
: atom1.0-entry ( tag -- entry )
[ "title" tag-named children>string ] keep
[ "link" tag-named "href" swap at ] keep
[
{ "content" "summary" } any-tag-named
dup tag-children [ string? not ] contains?
[ tag-children [ write-chunk ] string-out ]
[ children>string ] if
] keep
{ "published" "updated" "issued" "modified" } any-tag-named
children>string <entry> ;
2007-09-20 18:09:08 -04:00
: atom1.0 ( xml -- feed )
[ "title" tag-named children>string ] keep
[ "link" tag-named "href" swap at ] keep
"entry" tags-named [ atom1.0-entry ] map <feed> ;
2007-09-20 18:09:08 -04:00
2007-11-28 22:52:22 -05:00
: xml>feed ( xml -- feed )
2007-09-20 18:09:08 -04:00
dup name-tag {
{ "RDF" [ rss1.0 ] }
{ "rss" [ rss2.0 ] }
{ "feed" [ atom1.0 ] }
} case ;
2007-11-28 22:52:22 -05:00
: read-feed ( stream -- feed )
[ read-xml ] with-html-entities xml>feed ;
2007-09-20 18:09:08 -04:00
2007-11-28 22:52:22 -05:00
: download-feed ( url -- feed )
2007-09-20 18:09:08 -04:00
#! Retrieve an news syndication file, return as a feed tuple.
http-get-stream rot 200 = [
2007-09-20 18:09:08 -04:00
nip read-feed
] [
2drop "Error retrieving newsfeed file" throw
] if ;
2007-11-21 04:19:32 -05:00
! Atom generation
2007-11-28 22:52:22 -05:00
: entry, ( entry -- )
<< entry >> [
<< title >> [ dup entry-title , ]
<< link [ dup entry-link ] == href // >>
<< published >> [ dup entry-pub-date , ]
<< content >> [ entry-description , ]
] ;
2007-11-21 04:19:32 -05:00
2007-11-28 22:52:22 -05:00
: feed>xml ( feed -- xml )
<XML
<< feed [ "http://www.w3.org/2005/Atom" ] == xmlns >> [
<< title >> [ dup feed-title , ]
<< link [ dup feed-link ] == href // >>
feed-entries [ entry, ] each
]
XML> ;
2007-11-28 22:52:22 -05:00
2007-12-04 21:31:11 -05:00
: write-feed ( feed -- )
2007-11-28 22:52:22 -05:00
feed>xml write-xml ;