Clean up RSS library

db4
Slava Pestov 2008-06-05 01:12:22 -05:00
parent 99b23348a8
commit 19044920dc
2 changed files with 62 additions and 45 deletions

View File

@ -1,5 +1,5 @@
USING: rss io kernel io.files tools.test io.encodings.utf8 USING: rss io kernel io.files tools.test io.encodings.utf8
calendar ; calendar urls ;
IN: rss.tests IN: rss.tests
: load-news-file ( filename -- feed ) : load-news-file ( filename -- feed )
@ -11,13 +11,13 @@ IN: rss.tests
feed feed
f f
"Meerkat" "Meerkat"
"http://meerkat.oreillynet.com" URL" http://meerkat.oreillynet.com"
{ {
T{ T{
entry entry
f f
"XML: A Disruptive Technology" "XML: A Disruptive Technology"
"http://c.moreover.com/click/here.pl?r123" URL" http://c.moreover.com/click/here.pl?r123"
"\n XML is placing increasingly heavy loads on the existing technical\n infrastructure of the Internet.\n " "\n XML is placing increasingly heavy loads on the existing technical\n infrastructure of the Internet.\n "
f f
} }
@ -27,13 +27,13 @@ IN: rss.tests
feed feed
f f
"dive into mark" "dive into mark"
"http://example.org/" URL" http://example.org/"
{ {
T{ T{
entry entry
f f
"Atom draft-07 snapshot" "Atom draft-07 snapshot"
"http://example.org/2005/04/02/atom" URL" http://example.org/2005/04/02/atom"
"\n <div xmlns=\"http://www.w3.org/1999/xhtml\">\n <p><i>[Update: The Atom draft is finished.]</i></p>\n </div>\n " "\n <div xmlns=\"http://www.w3.org/1999/xhtml\">\n <p><i>[Update: The Atom draft is finished.]</i></p>\n </div>\n "
T{ timestamp f 2003 12 13 8 29 29 T{ duration f 0 0 0 -4 0 0 } } T{ timestamp f 2003 12 13 8 29 29 T{ duration f 0 0 0 -4 0 0 } }

View File

@ -10,75 +10,89 @@ IN: rss
: any-tag-named ( tag names -- tag-inside ) : any-tag-named ( tag names -- tag-inside )
f -rot [ tag-named nip dup ] with find 2drop ; f -rot [ tag-named nip dup ] with find 2drop ;
TUPLE: feed title link entries ; TUPLE: feed title url entries ;
C: <feed> feed : <feed> ( -- feed ) feed new ;
TUPLE: entry title link description pub-date ; TUPLE: entry title url description pub-date ;
C: <entry> entry : set-entries ( feed entries -- feed )
[ dup url>> ] dip
[ [ derive-url ] change-url ] with map
>>entries ;
: <entry> ( -- entry ) entry new ;
: try-parsing-timestamp ( string -- timestamp ) : try-parsing-timestamp ( string -- timestamp )
[ rfc822>timestamp ] [ drop rfc3339>timestamp ] recover ; [ rfc822>timestamp ] [ drop rfc3339>timestamp ] recover ;
: rss1.0-entry ( tag -- entry ) : rss1.0-entry ( tag -- entry )
{ entry new
[ "title" tag-named children>string ] swap {
[ "link" tag-named children>string ] [ "title" tag-named children>string >>title ]
[ "description" tag-named children>string ] [ "link" tag-named children>string >url >>url ]
[ "description" tag-named children>string >>description ]
[ [
f "date" "http://purl.org/dc/elements/1.1/" <name> f "date" "http://purl.org/dc/elements/1.1/" <name>
tag-named dup [ children>string try-parsing-timestamp ] when tag-named dup [ children>string try-parsing-timestamp ] when
>>pub-date
] ]
} cleave <entry> ; } cleave ;
: rss1.0 ( xml -- feed ) : rss1.0 ( xml -- feed )
[ feed new
swap [
"channel" tag-named "channel" tag-named
[ "title" tag-named children>string ] [ "title" tag-named children>string >>title ]
[ "link" tag-named children>string ] bi [ "link" tag-named children>string >url >>url ] bi
] [ "item" tags-named [ rss1.0-entry ] map ] bi ] [ "item" tags-named [ rss1.0-entry ] map set-entries ] bi ;
<feed> ;
: rss2.0-entry ( tag -- entry ) : rss2.0-entry ( tag -- entry )
{ entry new
[ "title" tag-named children>string ] swap {
[ { "link" "guid" } any-tag-named children>string ] [ "title" tag-named children>string >>title ]
[ "description" tag-named children>string ] [ { "link" "guid" } any-tag-named children>string >url >>url ]
[ "description" tag-named children>string >>description ]
[ [
{ "date" "pubDate" } any-tag-named { "date" "pubDate" } any-tag-named
children>string try-parsing-timestamp children>string try-parsing-timestamp >>pub-date
] ]
} cleave <entry> ; } cleave ;
: rss2.0 ( xml -- feed ) : rss2.0 ( xml -- feed )
feed new
swap
"channel" tag-named "channel" tag-named
[ "title" tag-named children>string ] [ "title" tag-named children>string >>title ]
[ "link" tag-named children>string ] [ "link" tag-named children>string >>link ]
[ "item" tags-named [ rss2.0-entry ] map ] [ "item" tags-named [ rss2.0-entry ] map set-entries ]
tri <feed> ; tri ;
: atom1.0-entry ( tag -- entry ) : atom1.0-entry ( tag -- entry )
{ entry new
[ "title" tag-named children>string ] swap {
[ "link" tag-named "href" swap at ] [ "title" tag-named children>string >>title ]
[ "link" tag-named "href" swap at >url >>url ]
[ [
{ "content" "summary" } any-tag-named { "content" "summary" } any-tag-named
dup tag-children [ string? not ] contains? dup tag-children [ string? not ] contains?
[ tag-children [ write-chunk ] with-string-writer ] [ tag-children [ write-chunk ] with-string-writer ]
[ children>string ] if [ children>string ] if >>description
] ]
[ [
{ "published" "updated" "issued" "modified" } { "published" "updated" "issued" "modified" }
any-tag-named children>string try-parsing-timestamp any-tag-named children>string try-parsing-timestamp
>>pub-date
] ]
} cleave <entry> ; } cleave ;
: atom1.0 ( xml -- feed ) : atom1.0 ( xml -- feed )
[ "title" tag-named children>string ] feed new
[ "link" tag-named "href" swap at ] swap
[ "entry" tags-named [ atom1.0-entry ] map ] [ "title" tag-named children>string >>title ]
tri <feed> ; [ "link" tag-named "href" swap at >url >>url ]
[ "entry" tags-named [ atom1.0-entry ] map set-entries ]
tri ;
: xml>feed ( xml -- feed ) : xml>feed ( xml -- feed )
dup name-tag { dup name-tag {
@ -103,15 +117,18 @@ C: <entry> entry
: entry, ( entry -- ) : entry, ( entry -- )
"entry" [ "entry" [
dup title>> "title" { { "type" "html" } } simple-tag*, {
"link" over link>> dup url? [ present ] when "href" associate contained*, [ title>> "title" { { "type" "html" } } simple-tag*, ]
dup pub-date>> timestamp>rfc3339 "published" simple-tag, [ url>> present "href" associate "link" swap contained*, ]
description>> [ "content" { { "type" "html" } } simple-tag*, ] when* [ pub-date>> timestamp>rfc3339 "published" simple-tag, ]
[ description>> [ "content" { { "type" "html" } } simple-tag*, ] when* ]
} cleave
] tag, ; ] tag, ;
: feed>xml ( feed -- xml ) : feed>xml ( feed -- xml )
"feed" { { "xmlns" "http://www.w3.org/2005/Atom" } } [ "feed" { { "xmlns" "http://www.w3.org/2005/Atom" } } [
dup title>> "title" simple-tag, [ title>> "title" simple-tag, ]
"link" over link>> dup url? [ present ] when "href" associate contained*, [ url>> present "href" associate "link" swap contained*, ]
entries>> [ entry, ] each [ entries>> [ entry, ] each ]
tri
] make-xml* ; ] make-xml* ;