Fixing syndication to handle more RSS feeds seen in the wild

db4
Slava Pestov 2008-06-27 01:30:23 -05:00
parent 334d6d86c3
commit 38d5151322
3 changed files with 15 additions and 6 deletions

View File

@ -53,7 +53,7 @@ TUPLE: entry title url description date ;
swap { swap {
[ "title" tag-named children>string >>title ] [ "title" tag-named children>string >>title ]
[ { "link" "guid" } any-tag-named children>string >url >>url ] [ { "link" "guid" } any-tag-named children>string >url >>url ]
[ "description" tag-named children>string >>description ] [ { "description" "encoded" } any-tag-named children>string >>description ]
[ [
{ "date" "pubDate" } any-tag-named { "date" "pubDate" } any-tag-named
children>string try-parsing-timestamp >>date children>string try-parsing-timestamp >>date

View File

@ -0,0 +1,8 @@
IN: xml.utilities.tests
USING: xml xml.utilities tools.test ;
[ "bar" ] [ "<foo>bar</foo>" string>xml children>string ] unit-test
[ "" ] [ "<foo></foo>" string>xml children>string ] unit-test
[ "" ] [ "<foo/>" string>xml children>string ] unit-test

View File

@ -2,7 +2,7 @@
! See http://factorcode.org/license.txt for BSD license. ! See http://factorcode.org/license.txt for BSD license.
USING: kernel namespaces sequences words io assocs USING: kernel namespaces sequences words io assocs
quotations strings parser lexer arrays xml.data xml.writer debugger quotations strings parser lexer arrays xml.data xml.writer debugger
splitting vectors sequences.deep ; splitting vectors sequences.deep combinators ;
IN: xml.utilities IN: xml.utilities
! * System for words specialized on tag names ! * System for words specialized on tag names
@ -48,10 +48,11 @@ M: process-missing error.
standard-prolog { } rot { } <xml> ; standard-prolog { } rot { } <xml> ;
: children>string ( tag -- string ) : children>string ( tag -- string )
tag-children tag-children {
dup [ string? ] all? { [ dup empty? ] [ drop "" ] }
[ "XML tag unexpectedly contains non-text children" throw ] unless { [ dup [ string? not ] contains? ] [ "XML tag unexpectedly contains non-text children" throw ] }
concat ; [ concat ]
} cond ;
: children-tags ( tag -- sequence ) : children-tags ( tag -- sequence )
tag-children [ tag? ] filter ; tag-children [ tag? ] filter ;