From 19044920dc71550cecac50c5ea01eb38c8645b95 Mon Sep 17 00:00:00 2001 From: Slava Pestov Date: Thu, 5 Jun 2008 01:12:22 -0500 Subject: [PATCH] Clean up RSS library --- extra/rss/rss-tests.factor | 10 ++-- extra/rss/rss.factor | 97 ++++++++++++++++++++++---------------- 2 files changed, 62 insertions(+), 45 deletions(-) diff --git a/extra/rss/rss-tests.factor b/extra/rss/rss-tests.factor index 0e6bb0b9c1..4ecb7fc965 100755 --- a/extra/rss/rss-tests.factor +++ b/extra/rss/rss-tests.factor @@ -1,5 +1,5 @@ USING: rss io kernel io.files tools.test io.encodings.utf8 -calendar ; +calendar urls ; IN: rss.tests : load-news-file ( filename -- feed ) @@ -11,13 +11,13 @@ IN: rss.tests feed f "Meerkat" - "http://meerkat.oreillynet.com" + URL" http://meerkat.oreillynet.com" { T{ entry f "XML: A Disruptive Technology" - "http://c.moreover.com/click/here.pl?r123" + URL" http://c.moreover.com/click/here.pl?r123" "\n XML is placing increasingly heavy loads on the existing technical\n infrastructure of the Internet.\n " f } @@ -27,13 +27,13 @@ IN: rss.tests feed f "dive into mark" - "http://example.org/" + URL" http://example.org/" { T{ entry f "Atom draft-07 snapshot" - "http://example.org/2005/04/02/atom" + URL" http://example.org/2005/04/02/atom" "\n
\n

[Update: The Atom draft is finished.]

\n
\n " T{ timestamp f 2003 12 13 8 29 29 T{ duration f 0 0 0 -4 0 0 } } diff --git a/extra/rss/rss.factor b/extra/rss/rss.factor index 1dd66ff5d4..4aa92abc67 100644 --- a/extra/rss/rss.factor +++ b/extra/rss/rss.factor @@ -10,75 +10,89 @@ IN: rss : any-tag-named ( tag names -- tag-inside ) f -rot [ tag-named nip dup ] with find 2drop ; -TUPLE: feed title link entries ; +TUPLE: feed title url entries ; -C: feed +: ( -- feed ) feed new ; -TUPLE: entry title link description pub-date ; +TUPLE: entry title url description pub-date ; -C: entry +: set-entries ( feed entries -- feed ) + [ dup url>> ] dip + [ [ derive-url ] change-url ] with map + >>entries ; + +: ( -- entry ) entry new ; : try-parsing-timestamp ( string -- timestamp ) [ rfc822>timestamp ] [ drop rfc3339>timestamp ] recover ; : rss1.0-entry ( tag -- entry ) - { - [ "title" tag-named children>string ] - [ "link" tag-named children>string ] - [ "description" tag-named children>string ] + entry new + swap { + [ "title" tag-named children>string >>title ] + [ "link" tag-named children>string >url >>url ] + [ "description" tag-named children>string >>description ] [ f "date" "http://purl.org/dc/elements/1.1/" tag-named dup [ children>string try-parsing-timestamp ] when + >>pub-date ] - } cleave ; + } cleave ; : rss1.0 ( xml -- feed ) - [ + feed new + swap [ "channel" tag-named - [ "title" tag-named children>string ] - [ "link" tag-named children>string ] bi - ] [ "item" tags-named [ rss1.0-entry ] map ] bi - ; + [ "title" tag-named children>string >>title ] + [ "link" tag-named children>string >url >>url ] bi + ] [ "item" tags-named [ rss1.0-entry ] map set-entries ] bi ; : rss2.0-entry ( tag -- entry ) - { - [ "title" tag-named children>string ] - [ { "link" "guid" } any-tag-named children>string ] - [ "description" tag-named children>string ] + entry new + swap { + [ "title" tag-named children>string >>title ] + [ { "link" "guid" } any-tag-named children>string >url >>url ] + [ "description" tag-named children>string >>description ] [ { "date" "pubDate" } any-tag-named - children>string try-parsing-timestamp + children>string try-parsing-timestamp >>pub-date ] - } cleave ; + } cleave ; : rss2.0 ( xml -- feed ) + feed new + swap "channel" tag-named - [ "title" tag-named children>string ] - [ "link" tag-named children>string ] - [ "item" tags-named [ rss2.0-entry ] map ] - tri ; + [ "title" tag-named children>string >>title ] + [ "link" tag-named children>string >>link ] + [ "item" tags-named [ rss2.0-entry ] map set-entries ] + tri ; : atom1.0-entry ( tag -- entry ) - { - [ "title" tag-named children>string ] - [ "link" tag-named "href" swap at ] + entry new + swap { + [ "title" tag-named children>string >>title ] + [ "link" tag-named "href" swap at >url >>url ] [ { "content" "summary" } any-tag-named dup tag-children [ string? not ] contains? [ tag-children [ write-chunk ] with-string-writer ] - [ children>string ] if + [ children>string ] if >>description ] [ { "published" "updated" "issued" "modified" } any-tag-named children>string try-parsing-timestamp + >>pub-date ] - } cleave ; + } cleave ; : atom1.0 ( xml -- feed ) - [ "title" tag-named children>string ] - [ "link" tag-named "href" swap at ] - [ "entry" tags-named [ atom1.0-entry ] map ] - tri ; + feed new + swap + [ "title" tag-named children>string >>title ] + [ "link" tag-named "href" swap at >url >>url ] + [ "entry" tags-named [ atom1.0-entry ] map set-entries ] + tri ; : xml>feed ( xml -- feed ) dup name-tag { @@ -103,15 +117,18 @@ C: entry : entry, ( entry -- ) "entry" [ - dup title>> "title" { { "type" "html" } } simple-tag*, - "link" over link>> dup url? [ present ] when "href" associate contained*, - dup pub-date>> timestamp>rfc3339 "published" simple-tag, - description>> [ "content" { { "type" "html" } } simple-tag*, ] when* + { + [ title>> "title" { { "type" "html" } } simple-tag*, ] + [ url>> present "href" associate "link" swap contained*, ] + [ pub-date>> timestamp>rfc3339 "published" simple-tag, ] + [ description>> [ "content" { { "type" "html" } } simple-tag*, ] when* ] + } cleave ] tag, ; : feed>xml ( feed -- xml ) "feed" { { "xmlns" "http://www.w3.org/2005/Atom" } } [ - dup title>> "title" simple-tag, - "link" over link>> dup url? [ present ] when "href" associate contained*, - entries>> [ entry, ] each + [ title>> "title" simple-tag, ] + [ url>> present "href" associate "link" swap contained*, ] + [ entries>> [ entry, ] each ] + tri ] make-xml* ;