From 34c1170963820b19c728453d00c46ba870443b2a Mon Sep 17 00:00:00 2001
From: Doug Coleman <doug.coleman@gmail.com>
Date: Mon, 11 Feb 2008 16:27:40 -0600
Subject: [PATCH] add a few utility words

---
 extra/html/parser/analyzer/analyzer.factor | 35 +++++++++-------------
 1 file changed, 14 insertions(+), 21 deletions(-)

diff --git a/extra/html/parser/analyzer/analyzer.factor b/extra/html/parser/analyzer/analyzer.factor
index e4f11cd91e..dcfbd1e197 100755
--- a/extra/html/parser/analyzer/analyzer.factor
+++ b/extra/html/parser/analyzer/analyzer.factor
@@ -1,5 +1,5 @@
-USING: assocs html.parser kernel math sequences strings unicode.categories
-       unicode.case ;
+USING: assocs html.parser kernel math sequences strings ascii
+arrays shuffle unicode.case namespaces ;
 IN: html.parser.analyzer
 
 : remove-blank-text ( vector -- vector' )
@@ -65,28 +65,21 @@ IN: html.parser.analyzer
     [ tag-attributes "href" swap at ] map
     [ ] subset ;
 
+: (find-all) ( n seq quot -- )
+    2dup >r >r find* [
+        dupd 2array , 1+ r> r> (find-all)
+    ] [
+        r> r> 3drop
+    ] if* ;
 
+: find-all ( seq quot -- alist )
+    [ 0 -rot (find-all) ] { } make ;
 
-! : find-last-tag ( name vector -- index tag )
-    ! [
-        ! dup tag-matched? [ 2drop f ] [ tag-name = ] if
-    ! ] with find-last ;
+: find-opening-tags-by-name ( name seq -- seq )
+    [ [ tag-name = ] keep tag-closing? not and ] with find-all ;
 
-! : find-last-tag* ( name n vector -- tag )
-    ! 0 -rot <slice> find-last-tag ;
+: href-contains? ( str tag -- ? )
+    tag-attributes "href" swap at* [ subseq? ] [ 2drop f ] if ;
 
-! : find-matching-tag ( tag -- tag )
-    ! dup tag-closing? [
-        ! find-last-tag
-    ! ] [
-    ! ] if ;
-
-
-! clear "/Users/erg/web/fark.html" file-contents parse-html find-links [ "go.pl" swap start ] subset [ "=" split peek ] map
 ! clear "http://fark.com" http-get parse-html find-links [ "go.pl" swap start ] subset [ "=" split peek ] map
 
-! clear "/Users/erg/web/hostels.html" file-contents parse-html "Currency" "name" pick find-first-attribute-key-value
-
-! clear "/Users/erg/web/hostels.html" file-contents parse-html
-! "Currency" "name" pick find-first-attribute-key-value
-! pick find-between remove-blank-text