From 9d04629d4c304abb8d68803325042ec8283664a2 Mon Sep 17 00:00:00 2001 From: Slava Pestov Date: Mon, 26 May 2008 00:48:28 -0500 Subject: [PATCH] We can now parse reddit's RSS feed --- extra/calendar/format/format-tests.factor | 12 ++++ extra/calendar/format/format.factor | 8 ++- extra/rss/rss.factor | 78 ++++++++++++++--------- 3 files changed, 65 insertions(+), 33 deletions(-) diff --git a/extra/calendar/format/format-tests.factor b/extra/calendar/format/format-tests.factor index f4e1669178..3efe33e265 100755 --- a/extra/calendar/format/format-tests.factor +++ b/extra/calendar/format/format-tests.factor @@ -50,3 +50,15 @@ IN: calendar.format.tests "Sun May 04 07:00:00 2008 GMT" cookie-string>timestamp timestamp>string ] unit-test + +[ + T{ timestamp f + 2008 + 5 + 26 + 0 + 37 + 42.12345 + T{ duration f 0 0 0 -5 0 0 } + } +] [ "2008-05-26T00:37:42.12345-05:00" rfc3339>timestamp ] unit-test diff --git a/extra/calendar/format/format.factor b/extra/calendar/format/format.factor index 91a034f8bd..ff1811e9d5 100755 --- a/extra/calendar/format/format.factor +++ b/extra/calendar/format/format.factor @@ -1,4 +1,4 @@ -USING: math math.order math.parser kernel sequences io +USING: math math.order math.parser math.functions kernel sequences io accessors arrays io.streams.string splitting combinators accessors debugger calendar calendar.format.macros ; @@ -151,11 +151,15 @@ M: timestamp year. ( timestamp -- ) : read-hms ( -- h m s ) read-00 ":" expect read-00 ":" expect read-00 ; +: read-rfc3339-seconds ( s -- s' ch ) + "+-Z" read-until >r + [ string>number ] [ length 10 swap ^ ] bi / + r> ; + : (rfc3339>timestamp) ( -- timestamp ) read-ymd "Tt" expect read-hms - read1 { { CHAR: . [ read-000 1000 / + read1 ] } [ ] } case + read1 { { CHAR: . [ read-rfc3339-seconds ] } [ ] } case read-rfc3339-gmt-offset ; diff --git a/extra/rss/rss.factor b/extra/rss/rss.factor index 6e616e51a9..364c24b91f 100644 --- a/extra/rss/rss.factor +++ b/extra/rss/rss.factor @@ -18,51 +18,67 @@ TUPLE: entry title link description pub-date ; C: entry +: try-parsing-timestamp ( string -- timestamp ) + [ rfc822>timestamp ] [ drop rfc3339>timestamp ] recover ; + : rss1.0-entry ( tag -- entry ) - [ "title" tag-named children>string ] keep - [ "link" tag-named children>string ] keep - [ "description" tag-named children>string ] keep - f "date" "http://purl.org/dc/elements/1.1/" - tag-named dup [ children>string rfc822>timestamp ] when - ; + { + [ "title" tag-named children>string ] + [ "link" tag-named children>string ] + [ "description" tag-named children>string ] + [ + f "date" "http://purl.org/dc/elements/1.1/" + tag-named dup [ children>string try-parsing-timestamp ] when + ] + } cleave ; : rss1.0 ( xml -- feed ) [ "channel" tag-named - [ "title" tag-named children>string ] keep - "link" tag-named children>string - ] keep - "item" tags-named [ rss1.0-entry ] map ; + [ "title" tag-named children>string ] + [ "link" tag-named children>string ] bi + ] [ "item" tags-named [ rss1.0-entry ] map ] bi + ; : rss2.0-entry ( tag -- entry ) - [ "title" tag-named children>string ] keep - [ "link" tag-named ] keep - [ "guid" tag-named dupd ? children>string ] keep - [ "description" tag-named children>string ] keep - "pubDate" tag-named children>string rfc822>timestamp ; + { + [ "title" tag-named children>string ] + [ { "link" "guid" } any-tag-named children>string ] + [ "description" tag-named children>string ] + [ + { "date" "pubDate" } any-tag-named + children>string try-parsing-timestamp + ] + } cleave ; : rss2.0 ( xml -- feed ) "channel" tag-named - [ "title" tag-named children>string ] keep - [ "link" tag-named children>string ] keep - "item" tags-named [ rss2.0-entry ] map ; + [ "title" tag-named children>string ] + [ "link" tag-named children>string ] + [ "item" tags-named [ rss2.0-entry ] map ] + tri ; : atom1.0-entry ( tag -- entry ) - [ "title" tag-named children>string ] keep - [ "link" tag-named "href" swap at ] keep - [ - { "content" "summary" } any-tag-named - dup tag-children [ string? not ] contains? - [ tag-children [ write-chunk ] with-string-writer ] - [ children>string ] if - ] keep - { "published" "updated" "issued" "modified" } any-tag-named - children>string rfc3339>timestamp ; + { + [ "title" tag-named children>string ] + [ "link" tag-named "href" swap at ] + [ + { "content" "summary" } any-tag-named + dup tag-children [ string? not ] contains? + [ tag-children [ write-chunk ] with-string-writer ] + [ children>string ] if + ] + [ + { "published" "updated" "issued" "modified" } + any-tag-named children>string try-parsing-timestamp + ] + } cleave ; : atom1.0 ( xml -- feed ) - [ "title" tag-named children>string ] keep - [ "link" tag-named "href" swap at ] keep - "entry" tags-named [ atom1.0-entry ] map ; + [ "title" tag-named children>string ] + [ "link" tag-named "href" swap at ] + [ "entry" tags-named [ atom1.0-entry ] map ] + tri ; : xml>feed ( xml -- feed ) dup name-tag {