USING: assocs html.parser kernel math sequences strings unicode.categories unicode.case ; IN: html.parser.analyzer : remove-blank-text ( vector -- vector' ) [ dup tag-name text = [ tag-text [ blank? ] all? not ] [ drop t ] if ] subset ; : trim-text ( vector -- vector' ) [ dup tag-name text = [ [ tag-text [ blank? ] trim ] keep [ set-tag-text ] keep ] when ] map ; : find-by-id ( id vector -- vector ) [ tag-attributes "id" swap at = ] with subset ; : find-by-class ( id vector -- vector ) [ tag-attributes "class" swap at = ] with subset ; : find-by-name ( str vector -- vector ) >r >lower r> [ tag-name = ] with subset ; : find-first-name ( str vector -- i/f tag/f ) >r >lower r> [ tag-name = ] with find ; : find-matching-close ( str vector -- i/f tag/f ) >r >lower r> [ [ tag-name = ] keep tag-closing? and ] with find ; : find-by-attribute-key ( key vector -- vector ) >r >lower r> [ tag-attributes at ] with subset [ ] subset ; : find-by-attribute-key-value ( value key vector -- vector ) >r >lower r> [ tag-attributes at over = ] with subset nip [ ] subset ; : find-first-attribute-key-value ( value key vector -- i/f tag/f ) >r >lower r> [ tag-attributes at over = ] with find rot drop ; : find-between ( i/f tag/f vector -- vector ) pick integer? [ rot 1+ tail-slice >r tag-name r> [ find-matching-close drop ] keep swap head ] [ 3drop V{ } clone ] if ; : find-links ( vector -- vector ) [ tag-name "a" = ] subset [ tag-attributes "href" swap at ] map [ ] subset ; ! : find-last-tag ( name vector -- index tag ) ! [ ! dup tag-matched? [ 2drop f ] [ tag-name = ] if ! ] with find-last ; ! : find-last-tag* ( name n vector -- tag ) ! 0 -rot find-last-tag ; ! : find-matching-tag ( tag -- tag ) ! dup tag-closing? [ ! find-last-tag ! ] [ ! ] if ; ! clear "/Users/erg/web/fark.html" file-contents parse-html find-links [ "go.pl" swap start ] subset [ "=" split peek ] map ! clear "http://fark.com" http-get parse-html find-links [ "go.pl" swap start ] subset [ "=" split peek ] map ! clear "/Users/erg/web/hostels.html" file-contents parse-html "Currency" "name" pick find-first-attribute-key-value ! clear "/Users/erg/web/hostels.html" file-contents parse-html ! "Currency" "name" pick find-first-attribute-key-value ! pick find-between remove-blank-text