From c2e2fb2304e7f10baaf1a3044a01c98b9ae5e161 Mon Sep 17 00:00:00 2001 From: Daniel Ehrenberg Date: Wed, 5 Dec 2007 21:00:52 -0500 Subject: [PATCH] extra/rss now works with some Atom 0.3 feeds --- extra/rss/rss.factor | 64 +++++++++++++++++++++++--------------------- 1 file changed, 34 insertions(+), 30 deletions(-) diff --git a/extra/rss/rss.factor b/extra/rss/rss.factor index da810ee377..40395e720f 100644 --- a/extra/rss/rss.factor +++ b/extra/rss/rss.factor @@ -9,6 +9,9 @@ USING: xml.utilities kernel assocs : ?children>string ( tag/f -- string/f ) [ children>string ] [ f ] if* ; +: any-tag-named ( tag names -- tag-inside ) + f -rot [ tag-named nip dup ] curry* find 2drop ; + TUPLE: feed title link entries ; C: feed @@ -17,50 +20,51 @@ TUPLE: entry title link description pub-date ; C: entry +: rss1.0-entry ( tag -- entry ) + [ "title" tag-named children>string ] keep + [ "link" tag-named children>string ] keep + [ "description" tag-named children>string ] keep + f "date" "http://purl.org/dc/elements/1.1/" + tag-named ?children>string + ; + : rss1.0 ( xml -- feed ) [ "channel" tag-named [ "title" tag-named children>string ] keep "link" tag-named children>string ] keep - "item" tags-named [ - [ "title" tag-named children>string ] keep - [ "link" tag-named children>string ] keep - [ "description" tag-named children>string ] keep - f "date" "http://purl.org/dc/elements/1.1/" - tag-named ?children>string - - ] map ; + "item" tags-named [ rss1.0-entry ] map ; + +: rss2.0-entry ( tag -- entry ) + [ "title" tag-named children>string ] keep + [ "link" tag-named ] keep + [ "guid" tag-named dupd ? children>string ] keep + [ "description" tag-named children>string ] keep + "pubDate" tag-named children>string ; : rss2.0 ( xml -- feed ) "channel" tag-named [ "title" tag-named children>string ] keep [ "link" tag-named children>string ] keep - "item" tags-named [ - [ "title" tag-named children>string ] keep - [ "link" tag-named ] keep - [ "guid" tag-named dupd ? children>string ] keep - [ "description" tag-named children>string ] keep - "pubDate" tag-named children>string - ] map ; + "item" tags-named [ rss2.0-entry ] map ; + +: atom1.0-entry ( tag -- entry ) + [ "title" tag-named children>string ] keep + [ "link" tag-named "href" swap at ] keep + [ + { "content" "summary" } any-tag-named + dup tag-children [ string? not ] contains? + [ tag-children [ write-chunk ] string-out ] + [ children>string ] if + ] keep + { "published" "updated" "issued" "modified" } any-tag-named + children>string ; : atom1.0 ( xml -- feed ) [ "title" tag-named children>string ] keep [ "link" tag-named "href" swap at ] keep - "entry" tags-named [ - [ "title" tag-named children>string ] keep - [ "link" tag-named "href" swap at ] keep - [ - dup "content" tag-named - [ nip ] [ "summary" tag-named ] if* - dup tag-children [ tag? ] contains? - [ tag-children [ write-chunk ] string-out ] - [ children>string ] if - ] keep - dup "published" tag-named - [ nip ] [ "updated" tag-named ] if* - children>string - ] map ; + "entry" tags-named [ atom1.0-entry ] map ; : xml>feed ( xml -- feed ) dup name-tag { @@ -74,7 +78,7 @@ C: entry : download-feed ( url -- feed ) #! Retrieve an news syndication file, return as a feed tuple. - http-get rot 200 = [ + http-get-stream rot 200 = [ nip read-feed ] [ 2drop "Error retrieving newsfeed file" throw