We can now parse reddit's RSS feed
parent
b91a314f0e
commit
9d04629d4c
|
@ -50,3 +50,15 @@ IN: calendar.format.tests
|
||||||
"Sun May 04 07:00:00 2008 GMT" cookie-string>timestamp
|
"Sun May 04 07:00:00 2008 GMT" cookie-string>timestamp
|
||||||
timestamp>string
|
timestamp>string
|
||||||
] unit-test
|
] unit-test
|
||||||
|
|
||||||
|
[
|
||||||
|
T{ timestamp f
|
||||||
|
2008
|
||||||
|
5
|
||||||
|
26
|
||||||
|
0
|
||||||
|
37
|
||||||
|
42.12345
|
||||||
|
T{ duration f 0 0 0 -5 0 0 }
|
||||||
|
}
|
||||||
|
] [ "2008-05-26T00:37:42.12345-05:00" rfc3339>timestamp ] unit-test
|
||||||
|
|
|
@ -1,4 +1,4 @@
|
||||||
USING: math math.order math.parser kernel sequences io
|
USING: math math.order math.parser math.functions kernel sequences io
|
||||||
accessors arrays io.streams.string splitting
|
accessors arrays io.streams.string splitting
|
||||||
combinators accessors debugger
|
combinators accessors debugger
|
||||||
calendar calendar.format.macros ;
|
calendar calendar.format.macros ;
|
||||||
|
@ -151,11 +151,15 @@ M: timestamp year. ( timestamp -- )
|
||||||
: read-hms ( -- h m s )
|
: read-hms ( -- h m s )
|
||||||
read-00 ":" expect read-00 ":" expect read-00 ;
|
read-00 ":" expect read-00 ":" expect read-00 ;
|
||||||
|
|
||||||
|
: read-rfc3339-seconds ( s -- s' ch )
|
||||||
|
"+-Z" read-until >r
|
||||||
|
[ string>number ] [ length 10 swap ^ ] bi / + r> ;
|
||||||
|
|
||||||
: (rfc3339>timestamp) ( -- timestamp )
|
: (rfc3339>timestamp) ( -- timestamp )
|
||||||
read-ymd
|
read-ymd
|
||||||
"Tt" expect
|
"Tt" expect
|
||||||
read-hms
|
read-hms
|
||||||
read1 { { CHAR: . [ read-000 1000 / + read1 ] } [ ] } case
|
read1 { { CHAR: . [ read-rfc3339-seconds ] } [ ] } case
|
||||||
read-rfc3339-gmt-offset
|
read-rfc3339-gmt-offset
|
||||||
<timestamp> ;
|
<timestamp> ;
|
||||||
|
|
||||||
|
|
|
@ -18,51 +18,67 @@ TUPLE: entry title link description pub-date ;
|
||||||
|
|
||||||
C: <entry> entry
|
C: <entry> entry
|
||||||
|
|
||||||
|
: try-parsing-timestamp ( string -- timestamp )
|
||||||
|
[ rfc822>timestamp ] [ drop rfc3339>timestamp ] recover ;
|
||||||
|
|
||||||
: rss1.0-entry ( tag -- entry )
|
: rss1.0-entry ( tag -- entry )
|
||||||
[ "title" tag-named children>string ] keep
|
{
|
||||||
[ "link" tag-named children>string ] keep
|
[ "title" tag-named children>string ]
|
||||||
[ "description" tag-named children>string ] keep
|
[ "link" tag-named children>string ]
|
||||||
|
[ "description" tag-named children>string ]
|
||||||
|
[
|
||||||
f "date" "http://purl.org/dc/elements/1.1/" <name>
|
f "date" "http://purl.org/dc/elements/1.1/" <name>
|
||||||
tag-named dup [ children>string rfc822>timestamp ] when
|
tag-named dup [ children>string try-parsing-timestamp ] when
|
||||||
<entry> ;
|
]
|
||||||
|
} cleave <entry> ;
|
||||||
|
|
||||||
: rss1.0 ( xml -- feed )
|
: rss1.0 ( xml -- feed )
|
||||||
[
|
[
|
||||||
"channel" tag-named
|
"channel" tag-named
|
||||||
[ "title" tag-named children>string ] keep
|
[ "title" tag-named children>string ]
|
||||||
"link" tag-named children>string
|
[ "link" tag-named children>string ] bi
|
||||||
] keep
|
] [ "item" tags-named [ rss1.0-entry ] map ] bi
|
||||||
"item" tags-named [ rss1.0-entry ] map <feed> ;
|
<feed> ;
|
||||||
|
|
||||||
: rss2.0-entry ( tag -- entry )
|
: rss2.0-entry ( tag -- entry )
|
||||||
[ "title" tag-named children>string ] keep
|
{
|
||||||
[ "link" tag-named ] keep
|
[ "title" tag-named children>string ]
|
||||||
[ "guid" tag-named dupd ? children>string ] keep
|
[ { "link" "guid" } any-tag-named children>string ]
|
||||||
[ "description" tag-named children>string ] keep
|
[ "description" tag-named children>string ]
|
||||||
"pubDate" tag-named children>string rfc822>timestamp <entry> ;
|
[
|
||||||
|
{ "date" "pubDate" } any-tag-named
|
||||||
|
children>string try-parsing-timestamp
|
||||||
|
]
|
||||||
|
} cleave <entry> ;
|
||||||
|
|
||||||
: rss2.0 ( xml -- feed )
|
: rss2.0 ( xml -- feed )
|
||||||
"channel" tag-named
|
"channel" tag-named
|
||||||
[ "title" tag-named children>string ] keep
|
[ "title" tag-named children>string ]
|
||||||
[ "link" tag-named children>string ] keep
|
[ "link" tag-named children>string ]
|
||||||
"item" tags-named [ rss2.0-entry ] map <feed> ;
|
[ "item" tags-named [ rss2.0-entry ] map ]
|
||||||
|
tri <feed> ;
|
||||||
|
|
||||||
: atom1.0-entry ( tag -- entry )
|
: atom1.0-entry ( tag -- entry )
|
||||||
[ "title" tag-named children>string ] keep
|
{
|
||||||
[ "link" tag-named "href" swap at ] keep
|
[ "title" tag-named children>string ]
|
||||||
|
[ "link" tag-named "href" swap at ]
|
||||||
[
|
[
|
||||||
{ "content" "summary" } any-tag-named
|
{ "content" "summary" } any-tag-named
|
||||||
dup tag-children [ string? not ] contains?
|
dup tag-children [ string? not ] contains?
|
||||||
[ tag-children [ write-chunk ] with-string-writer ]
|
[ tag-children [ write-chunk ] with-string-writer ]
|
||||||
[ children>string ] if
|
[ children>string ] if
|
||||||
] keep
|
]
|
||||||
{ "published" "updated" "issued" "modified" } any-tag-named
|
[
|
||||||
children>string rfc3339>timestamp <entry> ;
|
{ "published" "updated" "issued" "modified" }
|
||||||
|
any-tag-named children>string try-parsing-timestamp
|
||||||
|
]
|
||||||
|
} cleave <entry> ;
|
||||||
|
|
||||||
: atom1.0 ( xml -- feed )
|
: atom1.0 ( xml -- feed )
|
||||||
[ "title" tag-named children>string ] keep
|
[ "title" tag-named children>string ]
|
||||||
[ "link" tag-named "href" swap at ] keep
|
[ "link" tag-named "href" swap at ]
|
||||||
"entry" tags-named [ atom1.0-entry ] map <feed> ;
|
[ "entry" tags-named [ atom1.0-entry ] map ]
|
||||||
|
tri <feed> ;
|
||||||
|
|
||||||
: xml>feed ( xml -- feed )
|
: xml>feed ( xml -- feed )
|
||||||
dup name-tag {
|
dup name-tag {
|
||||||
|
|
Loading…
Reference in New Issue