factor/basis/syndication/syndication.factor

148 lines
4.1 KiB
Factor
Raw Normal View History

2007-11-21 04:19:32 -05:00
! Copyright (C) 2006 Chris Double, Daniel Ehrenberg.
2009-03-30 21:45:55 -04:00
! Portions copyright (C) 2008, 2009 Slava Pestov.
2007-09-20 18:09:08 -04:00
! See http://factorcode.org/license.txt for BSD license.
2009-03-30 21:45:55 -04:00
USING: xml.traversal kernel assocs math.order strings sequences
xml.data xml.writer io.streams.string combinators xml
xml.entities.html io.files io http.client namespaces make
xml.syntax hashtables calendar.format accessors continuations
urls present byte-arrays ;
2008-06-05 02:56:06 -04:00
IN: syndication
2007-09-20 18:09:08 -04:00
: any-tag-named ( tag names -- tag-inside )
2009-01-23 19:20:47 -05:00
[ f ] 2dip [ tag-named nip dup ] with find 2drop ;
2008-06-05 02:12:22 -04:00
TUPLE: feed title url entries ;
2007-09-20 18:09:08 -04:00
2008-06-05 02:12:22 -04:00
: <feed> ( -- feed ) feed new ;
2007-09-20 18:09:08 -04:00
2008-06-05 02:48:31 -04:00
TUPLE: entry title url description date ;
2007-09-20 18:09:08 -04:00
2008-06-05 02:12:22 -04:00
: set-entries ( feed entries -- feed )
[ dup url>> ] dip
[ [ derive-url ] change-url ] with map
>>entries ;
: <entry> ( -- entry ) entry new ;
2007-09-20 18:09:08 -04:00
2008-05-26 01:48:28 -04:00
: try-parsing-timestamp ( string -- timestamp )
[ rfc822>timestamp ] [ drop rfc3339>timestamp ] recover ;
: rss1.0-entry ( tag -- entry )
2008-06-05 02:12:22 -04:00
entry new
swap {
[ "title" tag-named children>string >>title ]
[ "link" tag-named children>string >url >>url ]
[ "description" tag-named children>string >>description ]
2008-05-26 01:48:28 -04:00
[
f "date" "http://purl.org/dc/elements/1.1/" <name>
tag-named dup [ children>string try-parsing-timestamp ] when
2008-06-05 02:48:31 -04:00
>>date
2008-05-26 01:48:28 -04:00
]
2008-06-05 02:12:22 -04:00
} cleave ;
2007-09-20 18:09:08 -04:00
: rss1.0 ( xml -- feed )
2008-06-05 02:12:22 -04:00
feed new
swap [
2007-09-20 18:09:08 -04:00
"channel" tag-named
2008-06-05 02:12:22 -04:00
[ "title" tag-named children>string >>title ]
[ "link" tag-named children>string >url >>url ] bi
] [ "item" tags-named [ rss1.0-entry ] map set-entries ] bi ;
: rss2.0-entry ( tag -- entry )
2008-06-05 02:12:22 -04:00
entry new
swap {
[ "title" tag-named children>string >>title ]
[ { "link" "guid" } any-tag-named children>string >url >>url ]
[ { "description" "encoded" } any-tag-named children>string >>description ]
2008-05-26 01:48:28 -04:00
[
{ "date" "pubDate" } any-tag-named
2008-06-05 02:48:31 -04:00
children>string try-parsing-timestamp >>date
2008-05-26 01:48:28 -04:00
]
2008-06-05 02:12:22 -04:00
} cleave ;
2007-09-20 18:09:08 -04:00
: rss2.0 ( xml -- feed )
2008-06-05 02:12:22 -04:00
feed new
swap
2007-09-20 18:09:08 -04:00
"channel" tag-named
2008-06-05 02:12:22 -04:00
[ "title" tag-named children>string >>title ]
2008-06-05 02:48:31 -04:00
[ "link" tag-named children>string >url >>url ]
2008-06-05 02:12:22 -04:00
[ "item" tags-named [ rss2.0-entry ] map set-entries ]
tri ;
: atom-entry-link ( tag -- url/f )
"link" tags-named [ "rel" attr "alternate" = ] find nip
dup [ "href" attr >url ] when ;
: atom1.0-entry ( tag -- entry )
2008-06-05 02:12:22 -04:00
entry new
swap {
[ "title" tag-named children>string >>title ]
[ atom-entry-link >>url ]
2008-05-26 01:48:28 -04:00
[
{ "content" "summary" } any-tag-named
dup children>> [ string? not ] any?
2009-01-29 14:33:04 -05:00
[ children>> xml>string ]
2008-06-05 02:12:22 -04:00
[ children>string ] if >>description
2008-05-26 01:48:28 -04:00
]
[
{ "published" "updated" "issued" "modified" }
any-tag-named children>string try-parsing-timestamp
2008-06-05 02:48:31 -04:00
>>date
2008-05-26 01:48:28 -04:00
]
2008-06-05 02:12:22 -04:00
} cleave ;
2007-09-20 18:09:08 -04:00
: atom1.0 ( xml -- feed )
2008-06-05 02:12:22 -04:00
feed new
swap
[ "title" tag-named children>string >>title ]
[ "link" tag-named "href" attr >url >>url ]
2008-06-05 02:12:22 -04:00
[ "entry" tags-named [ atom1.0-entry ] map set-entries ]
tri ;
2007-09-20 18:09:08 -04:00
2007-11-28 22:52:22 -05:00
: xml>feed ( xml -- feed )
2008-08-28 23:28:01 -04:00
dup main>> {
2007-09-20 18:09:08 -04:00
{ "RDF" [ rss1.0 ] }
{ "rss" [ rss2.0 ] }
{ "feed" [ atom1.0 ] }
} case ;
2009-03-31 05:43:26 -04:00
GENERIC: parse-feed ( seq -- feed )
2009-03-30 21:45:55 -04:00
M: string parse-feed [ string>xml xml>feed ] with-html-entities ;
M: byte-array parse-feed [ bytes>xml xml>feed ] with-html-entities ;
2007-09-20 18:09:08 -04:00
2007-11-28 22:52:22 -05:00
: download-feed ( url -- feed )
2007-09-20 18:09:08 -04:00
#! Retrieve an news syndication file, return as a feed tuple.
2009-03-30 21:45:55 -04:00
http-get nip parse-feed ;
2007-11-21 04:19:32 -05:00
! Atom generation
: entry>xml ( entry -- xml )
{
[ title>> ]
[ url>> present ]
[ date>> timestamp>rfc3339 ]
[ description>> ]
} cleave
[XML
<entry>
<title type="html"><-></title>
<link href=<-> />
<published><-></published>
<content type="html"><-></content>
</entry>
XML] ;
2007-11-21 04:19:32 -05:00
2007-11-28 22:52:22 -05:00
: feed>xml ( feed -- xml )
2009-01-26 01:05:13 -05:00
[ title>> ]
[ url>> present ]
[ entries>> [ entry>xml ] map ] tri
<XML
<feed xmlns="http://www.w3.org/2005/Atom">
<title><-></title>
<link href=<-> />
<->
</feed>
XML> ;