We can now parse reddit's RSS feed
parent
b91a314f0e
commit
9d04629d4c
|
@ -50,3 +50,15 @@ IN: calendar.format.tests
|
|||
"Sun May 04 07:00:00 2008 GMT" cookie-string>timestamp
|
||||
timestamp>string
|
||||
] unit-test
|
||||
|
||||
[
|
||||
T{ timestamp f
|
||||
2008
|
||||
5
|
||||
26
|
||||
0
|
||||
37
|
||||
42.12345
|
||||
T{ duration f 0 0 0 -5 0 0 }
|
||||
}
|
||||
] [ "2008-05-26T00:37:42.12345-05:00" rfc3339>timestamp ] unit-test
|
||||
|
|
|
@ -1,4 +1,4 @@
|
|||
USING: math math.order math.parser kernel sequences io
|
||||
USING: math math.order math.parser math.functions kernel sequences io
|
||||
accessors arrays io.streams.string splitting
|
||||
combinators accessors debugger
|
||||
calendar calendar.format.macros ;
|
||||
|
@ -151,11 +151,15 @@ M: timestamp year. ( timestamp -- )
|
|||
: read-hms ( -- h m s )
|
||||
read-00 ":" expect read-00 ":" expect read-00 ;
|
||||
|
||||
: read-rfc3339-seconds ( s -- s' ch )
|
||||
"+-Z" read-until >r
|
||||
[ string>number ] [ length 10 swap ^ ] bi / + r> ;
|
||||
|
||||
: (rfc3339>timestamp) ( -- timestamp )
|
||||
read-ymd
|
||||
"Tt" expect
|
||||
read-hms
|
||||
read1 { { CHAR: . [ read-000 1000 / + read1 ] } [ ] } case
|
||||
read1 { { CHAR: . [ read-rfc3339-seconds ] } [ ] } case
|
||||
read-rfc3339-gmt-offset
|
||||
<timestamp> ;
|
||||
|
||||
|
|
|
@ -18,51 +18,67 @@ TUPLE: entry title link description pub-date ;
|
|||
|
||||
C: <entry> entry
|
||||
|
||||
: try-parsing-timestamp ( string -- timestamp )
|
||||
[ rfc822>timestamp ] [ drop rfc3339>timestamp ] recover ;
|
||||
|
||||
: rss1.0-entry ( tag -- entry )
|
||||
[ "title" tag-named children>string ] keep
|
||||
[ "link" tag-named children>string ] keep
|
||||
[ "description" tag-named children>string ] keep
|
||||
f "date" "http://purl.org/dc/elements/1.1/" <name>
|
||||
tag-named dup [ children>string rfc822>timestamp ] when
|
||||
<entry> ;
|
||||
{
|
||||
[ "title" tag-named children>string ]
|
||||
[ "link" tag-named children>string ]
|
||||
[ "description" tag-named children>string ]
|
||||
[
|
||||
f "date" "http://purl.org/dc/elements/1.1/" <name>
|
||||
tag-named dup [ children>string try-parsing-timestamp ] when
|
||||
]
|
||||
} cleave <entry> ;
|
||||
|
||||
: rss1.0 ( xml -- feed )
|
||||
[
|
||||
"channel" tag-named
|
||||
[ "title" tag-named children>string ] keep
|
||||
"link" tag-named children>string
|
||||
] keep
|
||||
"item" tags-named [ rss1.0-entry ] map <feed> ;
|
||||
[ "title" tag-named children>string ]
|
||||
[ "link" tag-named children>string ] bi
|
||||
] [ "item" tags-named [ rss1.0-entry ] map ] bi
|
||||
<feed> ;
|
||||
|
||||
: rss2.0-entry ( tag -- entry )
|
||||
[ "title" tag-named children>string ] keep
|
||||
[ "link" tag-named ] keep
|
||||
[ "guid" tag-named dupd ? children>string ] keep
|
||||
[ "description" tag-named children>string ] keep
|
||||
"pubDate" tag-named children>string rfc822>timestamp <entry> ;
|
||||
{
|
||||
[ "title" tag-named children>string ]
|
||||
[ { "link" "guid" } any-tag-named children>string ]
|
||||
[ "description" tag-named children>string ]
|
||||
[
|
||||
{ "date" "pubDate" } any-tag-named
|
||||
children>string try-parsing-timestamp
|
||||
]
|
||||
} cleave <entry> ;
|
||||
|
||||
: rss2.0 ( xml -- feed )
|
||||
"channel" tag-named
|
||||
[ "title" tag-named children>string ] keep
|
||||
[ "link" tag-named children>string ] keep
|
||||
"item" tags-named [ rss2.0-entry ] map <feed> ;
|
||||
[ "title" tag-named children>string ]
|
||||
[ "link" tag-named children>string ]
|
||||
[ "item" tags-named [ rss2.0-entry ] map ]
|
||||
tri <feed> ;
|
||||
|
||||
: atom1.0-entry ( tag -- entry )
|
||||
[ "title" tag-named children>string ] keep
|
||||
[ "link" tag-named "href" swap at ] keep
|
||||
[
|
||||
{ "content" "summary" } any-tag-named
|
||||
dup tag-children [ string? not ] contains?
|
||||
[ tag-children [ write-chunk ] with-string-writer ]
|
||||
[ children>string ] if
|
||||
] keep
|
||||
{ "published" "updated" "issued" "modified" } any-tag-named
|
||||
children>string rfc3339>timestamp <entry> ;
|
||||
{
|
||||
[ "title" tag-named children>string ]
|
||||
[ "link" tag-named "href" swap at ]
|
||||
[
|
||||
{ "content" "summary" } any-tag-named
|
||||
dup tag-children [ string? not ] contains?
|
||||
[ tag-children [ write-chunk ] with-string-writer ]
|
||||
[ children>string ] if
|
||||
]
|
||||
[
|
||||
{ "published" "updated" "issued" "modified" }
|
||||
any-tag-named children>string try-parsing-timestamp
|
||||
]
|
||||
} cleave <entry> ;
|
||||
|
||||
: atom1.0 ( xml -- feed )
|
||||
[ "title" tag-named children>string ] keep
|
||||
[ "link" tag-named "href" swap at ] keep
|
||||
"entry" tags-named [ atom1.0-entry ] map <feed> ;
|
||||
[ "title" tag-named children>string ]
|
||||
[ "link" tag-named "href" swap at ]
|
||||
[ "entry" tags-named [ atom1.0-entry ] map ]
|
||||
tri <feed> ;
|
||||
|
||||
: xml>feed ( xml -- feed )
|
||||
dup name-tag {
|
||||
|
|
Loading…
Reference in New Issue