140 lines
		
	
	
		
			4.2 KiB
		
	
	
	
		
			Factor
		
	
	
			
		
		
	
	
			140 lines
		
	
	
		
			4.2 KiB
		
	
	
	
		
			Factor
		
	
	
! Copyright (C) 2006 Chris Double, Daniel Ehrenberg.
 | 
						|
! Portions copyright (C) 2008 Slava Pestov.
 | 
						|
! See http://factorcode.org/license.txt for BSD license.
 | 
						|
USING: xml.utilities kernel assocs xml.generator math.order
 | 
						|
    strings sequences xml.data xml.writer
 | 
						|
    io.streams.string combinators xml xml.entities io.files io
 | 
						|
    http.client namespaces make xml.generator hashtables
 | 
						|
    calendar.format accessors continuations urls present ;
 | 
						|
IN: syndication
 | 
						|
 | 
						|
: any-tag-named ( tag names -- tag-inside )
 | 
						|
    f -rot [ tag-named nip dup ] with find 2drop ;
 | 
						|
 | 
						|
TUPLE: feed title url entries ;
 | 
						|
 | 
						|
: <feed> ( -- feed ) feed new ;
 | 
						|
 | 
						|
TUPLE: entry title url description date ;
 | 
						|
 | 
						|
: set-entries ( feed entries -- feed )
 | 
						|
    [ dup url>> ] dip
 | 
						|
    [ [ derive-url ] change-url ] with map
 | 
						|
    >>entries ;
 | 
						|
 | 
						|
: <entry> ( -- entry ) entry new ;
 | 
						|
 | 
						|
: try-parsing-timestamp ( string -- timestamp )
 | 
						|
    [ rfc822>timestamp ] [ drop rfc3339>timestamp ] recover ;
 | 
						|
 | 
						|
: rss1.0-entry ( tag -- entry )
 | 
						|
    entry new
 | 
						|
    swap {
 | 
						|
        [ "title" tag-named children>string >>title ]
 | 
						|
        [ "link" tag-named children>string >url >>url ]
 | 
						|
        [ "description" tag-named children>string >>description ]
 | 
						|
        [
 | 
						|
            f "date" "http://purl.org/dc/elements/1.1/" <name>
 | 
						|
            tag-named dup [ children>string try-parsing-timestamp ] when
 | 
						|
            >>date
 | 
						|
        ]
 | 
						|
    } cleave ;
 | 
						|
 | 
						|
: rss1.0 ( xml -- feed )
 | 
						|
    feed new
 | 
						|
    swap [
 | 
						|
        "channel" tag-named
 | 
						|
        [ "title" tag-named children>string >>title ]
 | 
						|
        [ "link" tag-named children>string >url >>url ] bi
 | 
						|
    ] [ "item" tags-named [ rss1.0-entry ] map set-entries ] bi ;
 | 
						|
 | 
						|
: rss2.0-entry ( tag -- entry )
 | 
						|
    entry new
 | 
						|
    swap {
 | 
						|
        [ "title" tag-named children>string >>title ]
 | 
						|
        [ { "link" "guid" } any-tag-named children>string >url >>url ]
 | 
						|
        [ { "description" "encoded" } any-tag-named children>string >>description ]
 | 
						|
        [
 | 
						|
            { "date" "pubDate" } any-tag-named
 | 
						|
            children>string try-parsing-timestamp >>date
 | 
						|
        ]
 | 
						|
    } cleave ;
 | 
						|
 | 
						|
: rss2.0 ( xml -- feed )
 | 
						|
    feed new
 | 
						|
    swap
 | 
						|
    "channel" tag-named 
 | 
						|
    [ "title" tag-named children>string >>title ]
 | 
						|
    [ "link" tag-named children>string >url >>url ]
 | 
						|
    [ "item" tags-named [ rss2.0-entry ] map set-entries ]
 | 
						|
    tri ;
 | 
						|
 | 
						|
: atom-entry-link ( tag -- url/f )
 | 
						|
    "link" tags-named [ "rel" swap at "alternate" = ] find nip
 | 
						|
    dup [ "href" swap at >url ] when ;
 | 
						|
 | 
						|
: atom1.0-entry ( tag -- entry )
 | 
						|
    entry new
 | 
						|
    swap {
 | 
						|
        [ "title" tag-named children>string >>title ]
 | 
						|
        [ atom-entry-link >>url ]
 | 
						|
        [
 | 
						|
            { "content" "summary" } any-tag-named
 | 
						|
            dup children>> [ string? not ] contains?
 | 
						|
            [ children>> [ write-chunk ] with-string-writer ]
 | 
						|
            [ children>string ] if >>description
 | 
						|
        ]
 | 
						|
        [
 | 
						|
            { "published" "updated" "issued" "modified" } 
 | 
						|
            any-tag-named children>string try-parsing-timestamp
 | 
						|
            >>date
 | 
						|
        ]
 | 
						|
    } cleave ;
 | 
						|
 | 
						|
: atom1.0 ( xml -- feed )
 | 
						|
    feed new
 | 
						|
    swap
 | 
						|
    [ "title" tag-named children>string >>title ]
 | 
						|
    [ "link" tag-named "href" swap at >url >>url ]
 | 
						|
    [ "entry" tags-named [ atom1.0-entry ] map set-entries ]
 | 
						|
    tri ;
 | 
						|
 | 
						|
: xml>feed ( xml -- feed )
 | 
						|
    dup main>> {
 | 
						|
        { "RDF" [ rss1.0 ] }
 | 
						|
        { "rss" [ rss2.0 ] }
 | 
						|
        { "feed" [ atom1.0 ] }
 | 
						|
    } case ;
 | 
						|
 | 
						|
: string>feed ( string -- feed )
 | 
						|
    [ string>xml xml>feed ] with-html-entities ;
 | 
						|
 | 
						|
: download-feed ( url -- feed )
 | 
						|
    #! Retrieve an news syndication file, return as a feed tuple.
 | 
						|
    http-get nip string>feed ;
 | 
						|
 | 
						|
! Atom generation
 | 
						|
: simple-tag, ( content name -- )
 | 
						|
    [ , ] tag, ;
 | 
						|
 | 
						|
: simple-tag*, ( content name attrs -- )
 | 
						|
    [ , ] tag*, ;
 | 
						|
 | 
						|
: entry, ( entry -- )
 | 
						|
    "entry" [
 | 
						|
        {
 | 
						|
            [ title>> "title" { { "type" "html" } } simple-tag*, ]
 | 
						|
            [ url>> present "href" associate "link" swap contained*, ]
 | 
						|
            [ date>> timestamp>rfc3339 "published" simple-tag, ]
 | 
						|
            [ description>> [ "content" { { "type" "html" } } simple-tag*, ] when* ]
 | 
						|
        } cleave
 | 
						|
    ] tag, ;
 | 
						|
 | 
						|
: feed>xml ( feed -- xml )
 | 
						|
    "feed" { { "xmlns" "http://www.w3.org/2005/Atom" } } [
 | 
						|
        [ title>> "title" simple-tag, ]
 | 
						|
        [ url>> present "href" associate "link" swap contained*, ]
 | 
						|
        [ entries>> [ entry, ] each ]
 | 
						|
        tri
 | 
						|
    ] make-xml* ;
 |