Clean up RSS library
							parent
							
								
									99b23348a8
								
							
						
					
					
						commit
						19044920dc
					
				| 
						 | 
				
			
			@ -1,5 +1,5 @@
 | 
			
		|||
USING: rss io kernel io.files tools.test io.encodings.utf8
 | 
			
		||||
calendar ;
 | 
			
		||||
calendar urls ;
 | 
			
		||||
IN: rss.tests
 | 
			
		||||
 | 
			
		||||
: load-news-file ( filename -- feed )
 | 
			
		||||
| 
						 | 
				
			
			@ -11,13 +11,13 @@ IN: rss.tests
 | 
			
		|||
    feed
 | 
			
		||||
    f
 | 
			
		||||
    "Meerkat"
 | 
			
		||||
    "http://meerkat.oreillynet.com"
 | 
			
		||||
    URL" http://meerkat.oreillynet.com"
 | 
			
		||||
    {
 | 
			
		||||
        T{
 | 
			
		||||
            entry
 | 
			
		||||
            f
 | 
			
		||||
            "XML: A Disruptive Technology"
 | 
			
		||||
            "http://c.moreover.com/click/here.pl?r123"
 | 
			
		||||
            URL" http://c.moreover.com/click/here.pl?r123"
 | 
			
		||||
            "\n      XML is placing increasingly heavy loads on the existing technical\n      infrastructure of the Internet.\n    "
 | 
			
		||||
            f
 | 
			
		||||
        }
 | 
			
		||||
| 
						 | 
				
			
			@ -27,13 +27,13 @@ IN: rss.tests
 | 
			
		|||
    feed
 | 
			
		||||
    f
 | 
			
		||||
    "dive into mark"
 | 
			
		||||
    "http://example.org/"
 | 
			
		||||
    URL" http://example.org/"
 | 
			
		||||
    {
 | 
			
		||||
        T{
 | 
			
		||||
            entry
 | 
			
		||||
            f
 | 
			
		||||
            "Atom draft-07 snapshot"
 | 
			
		||||
            "http://example.org/2005/04/02/atom"
 | 
			
		||||
            URL" http://example.org/2005/04/02/atom"
 | 
			
		||||
            "\n         <div xmlns=\"http://www.w3.org/1999/xhtml\">\n           <p><i>[Update: The Atom draft is finished.]</i></p>\n         </div>\n       "
 | 
			
		||||
 | 
			
		||||
            T{ timestamp f 2003 12 13 8 29 29 T{ duration f 0 0 0 -4 0 0 } }
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
| 
						 | 
				
			
			@ -10,75 +10,89 @@ IN: rss
 | 
			
		|||
: any-tag-named ( tag names -- tag-inside )
 | 
			
		||||
    f -rot [ tag-named nip dup ] with find 2drop ;
 | 
			
		||||
 | 
			
		||||
TUPLE: feed title link entries ;
 | 
			
		||||
TUPLE: feed title url entries ;
 | 
			
		||||
 | 
			
		||||
C: <feed> feed
 | 
			
		||||
: <feed> ( -- feed ) feed new ;
 | 
			
		||||
 | 
			
		||||
TUPLE: entry title link description pub-date ;
 | 
			
		||||
TUPLE: entry title url description pub-date ;
 | 
			
		||||
 | 
			
		||||
C: <entry> entry
 | 
			
		||||
: set-entries ( feed entries -- feed )
 | 
			
		||||
    [ dup url>> ] dip
 | 
			
		||||
    [ [ derive-url ] change-url ] with map
 | 
			
		||||
    >>entries ;
 | 
			
		||||
 | 
			
		||||
: <entry> ( -- entry ) entry new ;
 | 
			
		||||
 | 
			
		||||
: try-parsing-timestamp ( string -- timestamp )
 | 
			
		||||
    [ rfc822>timestamp ] [ drop rfc3339>timestamp ] recover ;
 | 
			
		||||
 | 
			
		||||
: rss1.0-entry ( tag -- entry )
 | 
			
		||||
    {
 | 
			
		||||
        [ "title" tag-named children>string ]
 | 
			
		||||
        [ "link" tag-named children>string ]
 | 
			
		||||
        [ "description" tag-named children>string ]
 | 
			
		||||
    entry new
 | 
			
		||||
    swap {
 | 
			
		||||
        [ "title" tag-named children>string >>title ]
 | 
			
		||||
        [ "link" tag-named children>string >url >>url ]
 | 
			
		||||
        [ "description" tag-named children>string >>description ]
 | 
			
		||||
        [
 | 
			
		||||
            f "date" "http://purl.org/dc/elements/1.1/" <name>
 | 
			
		||||
            tag-named dup [ children>string try-parsing-timestamp ] when
 | 
			
		||||
            >>pub-date
 | 
			
		||||
        ]
 | 
			
		||||
    } cleave <entry> ;
 | 
			
		||||
    } cleave ;
 | 
			
		||||
 | 
			
		||||
: rss1.0 ( xml -- feed )
 | 
			
		||||
    [
 | 
			
		||||
    feed new
 | 
			
		||||
    swap [
 | 
			
		||||
        "channel" tag-named
 | 
			
		||||
        [ "title" tag-named children>string ]
 | 
			
		||||
        [ "link" tag-named children>string ] bi
 | 
			
		||||
    ] [ "item" tags-named [ rss1.0-entry ] map ] bi
 | 
			
		||||
    <feed> ;
 | 
			
		||||
        [ "title" tag-named children>string >>title ]
 | 
			
		||||
        [ "link" tag-named children>string >url >>url ] bi
 | 
			
		||||
    ] [ "item" tags-named [ rss1.0-entry ] map set-entries ] bi ;
 | 
			
		||||
 | 
			
		||||
: rss2.0-entry ( tag -- entry )
 | 
			
		||||
    {
 | 
			
		||||
        [ "title" tag-named children>string ]
 | 
			
		||||
        [ { "link" "guid" } any-tag-named children>string ]
 | 
			
		||||
        [ "description" tag-named children>string ]
 | 
			
		||||
    entry new
 | 
			
		||||
    swap {
 | 
			
		||||
        [ "title" tag-named children>string >>title ]
 | 
			
		||||
        [ { "link" "guid" } any-tag-named children>string >url >>url ]
 | 
			
		||||
        [ "description" tag-named children>string >>description ]
 | 
			
		||||
        [
 | 
			
		||||
            { "date" "pubDate" } any-tag-named
 | 
			
		||||
            children>string try-parsing-timestamp
 | 
			
		||||
            children>string try-parsing-timestamp >>pub-date
 | 
			
		||||
        ]
 | 
			
		||||
    } cleave <entry> ;
 | 
			
		||||
    } cleave ;
 | 
			
		||||
 | 
			
		||||
: rss2.0 ( xml -- feed )
 | 
			
		||||
    feed new
 | 
			
		||||
    swap
 | 
			
		||||
    "channel" tag-named 
 | 
			
		||||
    [ "title" tag-named children>string ]
 | 
			
		||||
    [ "link" tag-named children>string ]
 | 
			
		||||
    [ "item" tags-named [ rss2.0-entry ] map ]
 | 
			
		||||
    tri <feed> ;
 | 
			
		||||
    [ "title" tag-named children>string >>title ]
 | 
			
		||||
    [ "link" tag-named children>string >>link ]
 | 
			
		||||
    [ "item" tags-named [ rss2.0-entry ] map set-entries ]
 | 
			
		||||
    tri ;
 | 
			
		||||
 | 
			
		||||
: atom1.0-entry ( tag -- entry )
 | 
			
		||||
    {
 | 
			
		||||
        [ "title" tag-named children>string ]
 | 
			
		||||
        [ "link" tag-named "href" swap at ]
 | 
			
		||||
    entry new
 | 
			
		||||
    swap {
 | 
			
		||||
        [ "title" tag-named children>string >>title ]
 | 
			
		||||
        [ "link" tag-named "href" swap at >url >>url ]
 | 
			
		||||
        [
 | 
			
		||||
            { "content" "summary" } any-tag-named
 | 
			
		||||
            dup tag-children [ string? not ] contains?
 | 
			
		||||
            [ tag-children [ write-chunk ] with-string-writer ]
 | 
			
		||||
            [ children>string ] if
 | 
			
		||||
            [ children>string ] if >>description
 | 
			
		||||
        ]
 | 
			
		||||
        [
 | 
			
		||||
            { "published" "updated" "issued" "modified" } 
 | 
			
		||||
            any-tag-named children>string try-parsing-timestamp
 | 
			
		||||
            >>pub-date
 | 
			
		||||
        ]
 | 
			
		||||
    } cleave <entry> ;
 | 
			
		||||
    } cleave ;
 | 
			
		||||
 | 
			
		||||
: atom1.0 ( xml -- feed )
 | 
			
		||||
    [ "title" tag-named children>string ]
 | 
			
		||||
    [ "link" tag-named "href" swap at ]
 | 
			
		||||
    [ "entry" tags-named [ atom1.0-entry ] map ]
 | 
			
		||||
    tri <feed> ;
 | 
			
		||||
    feed new
 | 
			
		||||
    swap
 | 
			
		||||
    [ "title" tag-named children>string >>title ]
 | 
			
		||||
    [ "link" tag-named "href" swap at >url >>url ]
 | 
			
		||||
    [ "entry" tags-named [ atom1.0-entry ] map set-entries ]
 | 
			
		||||
    tri ;
 | 
			
		||||
 | 
			
		||||
: xml>feed ( xml -- feed )
 | 
			
		||||
    dup name-tag {
 | 
			
		||||
| 
						 | 
				
			
			@ -103,15 +117,18 @@ C: <entry> entry
 | 
			
		|||
 | 
			
		||||
: entry, ( entry -- )
 | 
			
		||||
    "entry" [
 | 
			
		||||
        dup title>> "title" { { "type" "html" } } simple-tag*,
 | 
			
		||||
        "link" over link>> dup url? [ present ] when "href" associate contained*,
 | 
			
		||||
        dup pub-date>> timestamp>rfc3339 "published" simple-tag,
 | 
			
		||||
        description>> [ "content" { { "type" "html" } } simple-tag*, ] when*
 | 
			
		||||
        {
 | 
			
		||||
            [ title>> "title" { { "type" "html" } } simple-tag*, ]
 | 
			
		||||
            [ url>> present "href" associate "link" swap contained*, ]
 | 
			
		||||
            [ pub-date>> timestamp>rfc3339 "published" simple-tag, ]
 | 
			
		||||
            [ description>> [ "content" { { "type" "html" } } simple-tag*, ] when* ]
 | 
			
		||||
        } cleave
 | 
			
		||||
    ] tag, ;
 | 
			
		||||
 | 
			
		||||
: feed>xml ( feed -- xml )
 | 
			
		||||
    "feed" { { "xmlns" "http://www.w3.org/2005/Atom" } } [
 | 
			
		||||
        dup title>> "title" simple-tag,
 | 
			
		||||
        "link" over link>> dup url? [ present ] when "href" associate contained*,
 | 
			
		||||
        entries>> [ entry, ] each
 | 
			
		||||
        [ title>> "title" simple-tag, ]
 | 
			
		||||
        [ url>> present "href" associate "link" swap contained*, ]
 | 
			
		||||
        [ entries>> [ entry, ] each ]
 | 
			
		||||
        tri
 | 
			
		||||
    ] make-xml* ;
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
		Loading…
	
		Reference in New Issue