add a few utility words
parent
0e9ec0dd6a
commit
34c1170963
|
@ -1,5 +1,5 @@
|
||||||
USING: assocs html.parser kernel math sequences strings unicode.categories
|
USING: assocs html.parser kernel math sequences strings ascii
|
||||||
unicode.case ;
|
arrays shuffle unicode.case namespaces ;
|
||||||
IN: html.parser.analyzer
|
IN: html.parser.analyzer
|
||||||
|
|
||||||
: remove-blank-text ( vector -- vector' )
|
: remove-blank-text ( vector -- vector' )
|
||||||
|
@ -65,28 +65,21 @@ IN: html.parser.analyzer
|
||||||
[ tag-attributes "href" swap at ] map
|
[ tag-attributes "href" swap at ] map
|
||||||
[ ] subset ;
|
[ ] subset ;
|
||||||
|
|
||||||
|
: (find-all) ( n seq quot -- )
|
||||||
|
2dup >r >r find* [
|
||||||
|
dupd 2array , 1+ r> r> (find-all)
|
||||||
|
] [
|
||||||
|
r> r> 3drop
|
||||||
|
] if* ;
|
||||||
|
|
||||||
|
: find-all ( seq quot -- alist )
|
||||||
|
[ 0 -rot (find-all) ] { } make ;
|
||||||
|
|
||||||
! : find-last-tag ( name vector -- index tag )
|
: find-opening-tags-by-name ( name seq -- seq )
|
||||||
! [
|
[ [ tag-name = ] keep tag-closing? not and ] with find-all ;
|
||||||
! dup tag-matched? [ 2drop f ] [ tag-name = ] if
|
|
||||||
! ] with find-last ;
|
|
||||||
|
|
||||||
! : find-last-tag* ( name n vector -- tag )
|
: href-contains? ( str tag -- ? )
|
||||||
! 0 -rot <slice> find-last-tag ;
|
tag-attributes "href" swap at* [ subseq? ] [ 2drop f ] if ;
|
||||||
|
|
||||||
! : find-matching-tag ( tag -- tag )
|
|
||||||
! dup tag-closing? [
|
|
||||||
! find-last-tag
|
|
||||||
! ] [
|
|
||||||
! ] if ;
|
|
||||||
|
|
||||||
|
|
||||||
! clear "/Users/erg/web/fark.html" file-contents parse-html find-links [ "go.pl" swap start ] subset [ "=" split peek ] map
|
|
||||||
! clear "http://fark.com" http-get parse-html find-links [ "go.pl" swap start ] subset [ "=" split peek ] map
|
! clear "http://fark.com" http-get parse-html find-links [ "go.pl" swap start ] subset [ "=" split peek ] map
|
||||||
|
|
||||||
! clear "/Users/erg/web/hostels.html" file-contents parse-html "Currency" "name" pick find-first-attribute-key-value
|
|
||||||
|
|
||||||
! clear "/Users/erg/web/hostels.html" file-contents parse-html
|
|
||||||
! "Currency" "name" pick find-first-attribute-key-value
|
|
||||||
! pick find-between remove-blank-text
|
|
||||||
|
|
Loading…
Reference in New Issue