We can now parse reddit's RSS feed

db4
Slava Pestov 2008-05-26 00:48:28 -05:00
parent b91a314f0e
commit 9d04629d4c
3 changed files with 65 additions and 33 deletions

View File

@ -50,3 +50,15 @@ IN: calendar.format.tests
"Sun May 04 07:00:00 2008 GMT" cookie-string>timestamp
timestamp>string
] unit-test
[
T{ timestamp f
2008
5
26
0
37
42.12345
T{ duration f 0 0 0 -5 0 0 }
}
] [ "2008-05-26T00:37:42.12345-05:00" rfc3339>timestamp ] unit-test

View File

@ -1,4 +1,4 @@
USING: math math.order math.parser kernel sequences io
USING: math math.order math.parser math.functions kernel sequences io
accessors arrays io.streams.string splitting
combinators accessors debugger
calendar calendar.format.macros ;
@ -151,11 +151,15 @@ M: timestamp year. ( timestamp -- )
: read-hms ( -- h m s )
read-00 ":" expect read-00 ":" expect read-00 ;
: read-rfc3339-seconds ( s -- s' ch )
"+-Z" read-until >r
[ string>number ] [ length 10 swap ^ ] bi / + r> ;
: (rfc3339>timestamp) ( -- timestamp )
read-ymd
"Tt" expect
read-hms
read1 { { CHAR: . [ read-000 1000 / + read1 ] } [ ] } case
read1 { { CHAR: . [ read-rfc3339-seconds ] } [ ] } case
read-rfc3339-gmt-offset
<timestamp> ;

View File

@ -18,51 +18,67 @@ TUPLE: entry title link description pub-date ;
C: <entry> entry
: try-parsing-timestamp ( string -- timestamp )
[ rfc822>timestamp ] [ drop rfc3339>timestamp ] recover ;
: rss1.0-entry ( tag -- entry )
[ "title" tag-named children>string ] keep
[ "link" tag-named children>string ] keep
[ "description" tag-named children>string ] keep
f "date" "http://purl.org/dc/elements/1.1/" <name>
tag-named dup [ children>string rfc822>timestamp ] when
<entry> ;
{
[ "title" tag-named children>string ]
[ "link" tag-named children>string ]
[ "description" tag-named children>string ]
[
f "date" "http://purl.org/dc/elements/1.1/" <name>
tag-named dup [ children>string try-parsing-timestamp ] when
]
} cleave <entry> ;
: rss1.0 ( xml -- feed )
[
"channel" tag-named
[ "title" tag-named children>string ] keep
"link" tag-named children>string
] keep
"item" tags-named [ rss1.0-entry ] map <feed> ;
[ "title" tag-named children>string ]
[ "link" tag-named children>string ] bi
] [ "item" tags-named [ rss1.0-entry ] map ] bi
<feed> ;
: rss2.0-entry ( tag -- entry )
[ "title" tag-named children>string ] keep
[ "link" tag-named ] keep
[ "guid" tag-named dupd ? children>string ] keep
[ "description" tag-named children>string ] keep
"pubDate" tag-named children>string rfc822>timestamp <entry> ;
{
[ "title" tag-named children>string ]
[ { "link" "guid" } any-tag-named children>string ]
[ "description" tag-named children>string ]
[
{ "date" "pubDate" } any-tag-named
children>string try-parsing-timestamp
]
} cleave <entry> ;
: rss2.0 ( xml -- feed )
"channel" tag-named
[ "title" tag-named children>string ] keep
[ "link" tag-named children>string ] keep
"item" tags-named [ rss2.0-entry ] map <feed> ;
[ "title" tag-named children>string ]
[ "link" tag-named children>string ]
[ "item" tags-named [ rss2.0-entry ] map ]
tri <feed> ;
: atom1.0-entry ( tag -- entry )
[ "title" tag-named children>string ] keep
[ "link" tag-named "href" swap at ] keep
[
{ "content" "summary" } any-tag-named
dup tag-children [ string? not ] contains?
[ tag-children [ write-chunk ] with-string-writer ]
[ children>string ] if
] keep
{ "published" "updated" "issued" "modified" } any-tag-named
children>string rfc3339>timestamp <entry> ;
{
[ "title" tag-named children>string ]
[ "link" tag-named "href" swap at ]
[
{ "content" "summary" } any-tag-named
dup tag-children [ string? not ] contains?
[ tag-children [ write-chunk ] with-string-writer ]
[ children>string ] if
]
[
{ "published" "updated" "issued" "modified" }
any-tag-named children>string try-parsing-timestamp
]
} cleave <entry> ;
: atom1.0 ( xml -- feed )
[ "title" tag-named children>string ] keep
[ "link" tag-named "href" swap at ] keep
"entry" tags-named [ atom1.0-entry ] map <feed> ;
[ "title" tag-named children>string ]
[ "link" tag-named "href" swap at ]
[ "entry" tags-named [ atom1.0-entry ] map ]
tri <feed> ;
: xml>feed ( xml -- feed )
dup name-tag {