We can now parse reddit's RSS feed

db4
Slava Pestov 2008-05-26 00:48:28 -05:00
parent b91a314f0e
commit 9d04629d4c
3 changed files with 65 additions and 33 deletions

View File

@ -50,3 +50,15 @@ IN: calendar.format.tests
"Sun May 04 07:00:00 2008 GMT" cookie-string>timestamp "Sun May 04 07:00:00 2008 GMT" cookie-string>timestamp
timestamp>string timestamp>string
] unit-test ] unit-test
[
T{ timestamp f
2008
5
26
0
37
42.12345
T{ duration f 0 0 0 -5 0 0 }
}
] [ "2008-05-26T00:37:42.12345-05:00" rfc3339>timestamp ] unit-test

View File

@ -1,4 +1,4 @@
USING: math math.order math.parser kernel sequences io USING: math math.order math.parser math.functions kernel sequences io
accessors arrays io.streams.string splitting accessors arrays io.streams.string splitting
combinators accessors debugger combinators accessors debugger
calendar calendar.format.macros ; calendar calendar.format.macros ;
@ -151,11 +151,15 @@ M: timestamp year. ( timestamp -- )
: read-hms ( -- h m s ) : read-hms ( -- h m s )
read-00 ":" expect read-00 ":" expect read-00 ; read-00 ":" expect read-00 ":" expect read-00 ;
: read-rfc3339-seconds ( s -- s' ch )
"+-Z" read-until >r
[ string>number ] [ length 10 swap ^ ] bi / + r> ;
: (rfc3339>timestamp) ( -- timestamp ) : (rfc3339>timestamp) ( -- timestamp )
read-ymd read-ymd
"Tt" expect "Tt" expect
read-hms read-hms
read1 { { CHAR: . [ read-000 1000 / + read1 ] } [ ] } case read1 { { CHAR: . [ read-rfc3339-seconds ] } [ ] } case
read-rfc3339-gmt-offset read-rfc3339-gmt-offset
<timestamp> ; <timestamp> ;

View File

@ -18,51 +18,67 @@ TUPLE: entry title link description pub-date ;
C: <entry> entry C: <entry> entry
: try-parsing-timestamp ( string -- timestamp )
[ rfc822>timestamp ] [ drop rfc3339>timestamp ] recover ;
: rss1.0-entry ( tag -- entry ) : rss1.0-entry ( tag -- entry )
[ "title" tag-named children>string ] keep {
[ "link" tag-named children>string ] keep [ "title" tag-named children>string ]
[ "description" tag-named children>string ] keep [ "link" tag-named children>string ]
[ "description" tag-named children>string ]
[
f "date" "http://purl.org/dc/elements/1.1/" <name> f "date" "http://purl.org/dc/elements/1.1/" <name>
tag-named dup [ children>string rfc822>timestamp ] when tag-named dup [ children>string try-parsing-timestamp ] when
<entry> ; ]
} cleave <entry> ;
: rss1.0 ( xml -- feed ) : rss1.0 ( xml -- feed )
[ [
"channel" tag-named "channel" tag-named
[ "title" tag-named children>string ] keep [ "title" tag-named children>string ]
"link" tag-named children>string [ "link" tag-named children>string ] bi
] keep ] [ "item" tags-named [ rss1.0-entry ] map ] bi
"item" tags-named [ rss1.0-entry ] map <feed> ; <feed> ;
: rss2.0-entry ( tag -- entry ) : rss2.0-entry ( tag -- entry )
[ "title" tag-named children>string ] keep {
[ "link" tag-named ] keep [ "title" tag-named children>string ]
[ "guid" tag-named dupd ? children>string ] keep [ { "link" "guid" } any-tag-named children>string ]
[ "description" tag-named children>string ] keep [ "description" tag-named children>string ]
"pubDate" tag-named children>string rfc822>timestamp <entry> ; [
{ "date" "pubDate" } any-tag-named
children>string try-parsing-timestamp
]
} cleave <entry> ;
: rss2.0 ( xml -- feed ) : rss2.0 ( xml -- feed )
"channel" tag-named "channel" tag-named
[ "title" tag-named children>string ] keep [ "title" tag-named children>string ]
[ "link" tag-named children>string ] keep [ "link" tag-named children>string ]
"item" tags-named [ rss2.0-entry ] map <feed> ; [ "item" tags-named [ rss2.0-entry ] map ]
tri <feed> ;
: atom1.0-entry ( tag -- entry ) : atom1.0-entry ( tag -- entry )
[ "title" tag-named children>string ] keep {
[ "link" tag-named "href" swap at ] keep [ "title" tag-named children>string ]
[ "link" tag-named "href" swap at ]
[ [
{ "content" "summary" } any-tag-named { "content" "summary" } any-tag-named
dup tag-children [ string? not ] contains? dup tag-children [ string? not ] contains?
[ tag-children [ write-chunk ] with-string-writer ] [ tag-children [ write-chunk ] with-string-writer ]
[ children>string ] if [ children>string ] if
] keep ]
{ "published" "updated" "issued" "modified" } any-tag-named [
children>string rfc3339>timestamp <entry> ; { "published" "updated" "issued" "modified" }
any-tag-named children>string try-parsing-timestamp
]
} cleave <entry> ;
: atom1.0 ( xml -- feed ) : atom1.0 ( xml -- feed )
[ "title" tag-named children>string ] keep [ "title" tag-named children>string ]
[ "link" tag-named "href" swap at ] keep [ "link" tag-named "href" swap at ]
"entry" tags-named [ atom1.0-entry ] map <feed> ; [ "entry" tags-named [ atom1.0-entry ] map ]
tri <feed> ;
: xml>feed ( xml -- feed ) : xml>feed ( xml -- feed )
dup name-tag { dup name-tag {