! Copyright (C) 2006 Chris Double. ! See http://factorcode.org/license.txt for BSD license. IN: rss ! USING: kernel http-client xml xml-utils xml-data errors io strings ! sequences xml-writer parser-combinators lazy-lists entities ; USING: xml.utilities kernel promises parser-combinators assocs parser-combinators.replace strings sequences xml.data xml.writer io.streams.string combinators xml xml.entities io.files io http.client ; : ?children>string ( tag/f -- string/f ) [ children>string ] [ f ] if* ; LAZY: '&' ( -- parser ) "&" token [ blank? ] satisfy &> [ "&" swap add ] <@ ; : &>& ( string -- string ) '&' replace ; TUPLE: feed title link entries ; C: feed TUPLE: entry title link description pub-date ; C: entry : rss1.0 ( xml -- feed ) [ "channel" tag-named [ "title" tag-named children>string ] keep "link" tag-named children>string ] keep "item" tags-named [ [ "title" tag-named children>string ] keep [ "link" tag-named children>string ] keep [ "description" tag-named children>string ] keep f "date" "http://purl.org/dc/elements/1.1/" tag-named ?children>string ] map ; : rss2.0 ( xml -- feed ) "channel" tag-named [ "title" tag-named children>string ] keep [ "link" tag-named children>string ] keep "item" tags-named [ [ "title" tag-named children>string ] keep [ "link" tag-named ] keep [ "guid" tag-named dupd ? children>string ] keep [ "description" tag-named children>string ] keep "pubDate" tag-named children>string ] map ; : atom1.0 ( xml -- feed ) [ "title" tag-named children>string ] keep [ "link" tag-named "href" swap at ] keep "entry" tags-named [ [ "title" tag-named children>string ] keep [ "link" tag-named "href" swap at ] keep [ dup "content" tag-named [ nip ] [ "summary" tag-named ] if* dup tag-children [ tag? ] contains? [ tag-children [ write-chunk ] string-out ] [ children>string ] if ] keep dup "published" tag-named [ nip ] [ "updated" tag-named ] if* children>string ] map ; : feed ( xml -- feed ) dup name-tag { { "RDF" [ rss1.0 ] } { "rss" [ rss2.0 ] } { "feed" [ atom1.0 ] } } case ; : read-feed ( string -- feed ) ! &>& ! this will be uncommented when parser-combinators are fixed [ string>xml ] with-html-entities feed ; : load-news-file ( filename -- feed ) #! Load an news syndication file and process it, returning #! it as an feed tuple. [ contents read-feed ] keep stream-close ; : news-get ( url -- feed ) #! Retrieve an news syndication file, return as a feed tuple. http-get rot 200 = [ nip read-feed ] [ 2drop "Error retrieving newsfeed file" throw ] if ;