From 805cb650bd889221009b5841d6298fed8dee49c1 Mon Sep 17 00:00:00 2001 From: Doug Coleman Date: Wed, 13 Aug 2008 23:09:43 -0500 Subject: [PATCH] add find-hrefs word --- extra/html/parser/analyzer/analyzer.factor | 6 ++++++ extra/html/parser/parser.factor | 10 ++++++---- 2 files changed, 12 insertions(+), 4 deletions(-) diff --git a/extra/html/parser/analyzer/analyzer.factor b/extra/html/parser/analyzer/analyzer.factor index dca727b9dc..f167feba06 100755 --- a/extra/html/parser/analyzer/analyzer.factor +++ b/extra/html/parser/analyzer/analyzer.factor @@ -140,6 +140,12 @@ TUPLE: link attributes clickable ; : href-contains? ( str tag -- ? ) attributes>> "href" swap at* [ subseq? ] [ 2drop f ] if ; +: find-hrefs ( vector -- vector' ) + find-links + [ [ + [ name>> "a" = ] + [ attributes>> "href" swap key? ] bi and ] filter + ] map sift [ [ attributes>> "href" swap at ] map ] map concat ; : find-forms ( vector -- vector' ) "form" over find-opening-tags-by-name diff --git a/extra/html/parser/parser.factor b/extra/html/parser/parser.factor index c8aa9aa9e6..dbf6c52a0d 100644 --- a/extra/html/parser/parser.factor +++ b/extra/html/parser/parser.factor @@ -1,4 +1,4 @@ -USING: arrays html.parser.utils hashtables io kernel +USING: accessors arrays html.parser.utils hashtables io kernel namespaces prettyprint quotations sequences splitting state-parser strings unicode.categories unicode.case ; IN: html.parser @@ -23,8 +23,10 @@ SYMBOL: tagstack ] if ; : ( name attributes closing? -- tag ) - { set-tag-name set-tag-attributes set-tag-closing? } - tag construct ; + tag new + swap >>closing? + swap >>attributes + swap >>name ; : make-tag ( str attribs -- tag ) >r [ closing-tag? ] keep "/" trim1 r> rot ; @@ -75,7 +77,7 @@ SYMBOL: tagstack read-quote ] [ read-token - ] if ; + ] if [ blank? ] trim ; : read-comment ( -- ) "-->" take-string* make-comment-tag push-tag ;