USING: assocs html.parser kernel math sequences strings ascii
arrays shuffle unicode.case namespaces splitting http
sequences.lib ;
IN: html.parser.analyzer
: (find-relative)
    [ >r + dup r> ?nth* [ 2drop f f ] unless ] [ 2drop f ] if ;
: find-relative ( seq quot n -- i elt )
    >r over [ find drop ] dip r> swap pick
    (find-relative) ;
: (find-all) ( n seq quot -- )
    2dup >r >r find* [
        dupd 2array , 1+ r> r> (find-all)
    ] [
        r> r> 3drop
    ] if* ;
: find-all ( seq quot -- alist )
    [ 0 -rot (find-all) ] { } make ;
: (find-nth) ( offset seq quot n count -- obj )
    >r >r [ find* ] 2keep 4 npick [
        r> r> 1+ 2dup <= [
            4drop
        ] [
            >r >r >r >r drop 1+ r> r> r> r>
            (find-nth)
        ] if
    ] [
        2drop r> r> 2drop
    ] if ;
: find-nth ( seq quot n -- i elt )
    0 -roll 0 (find-nth) ;
: find-nth-relative ( seq quot n offest -- i elt )
    >r [ find-nth ] 3keep 2drop nip r> swap pick
    (find-relative) ;
: remove-blank-text ( vector -- vector' )
    [
        dup tag-name text = [
            tag-text [ blank? ] all? not
        ] [
            drop t
        ] if
    ] subset ;
: trim-text ( vector -- vector' )
    [
        dup tag-name text = [
            [ tag-text [ blank? ] trim ] keep
            [ set-tag-text ] keep
        ] when
    ] map ;
: find-by-id ( id vector -- vector )
    [ tag-attributes "id" swap at = ] with subset ;
: find-by-class ( id vector -- vector )
    [ tag-attributes "class" swap at = ] with subset ;
: find-by-name ( str vector -- vector )
    >r >lower r>
    [ tag-name = ] with subset ;
: find-first-name ( str vector -- i/f tag/f )
    >r >lower r>
    [ tag-name = ] with find ;
: find-matching-close ( str vector -- i/f tag/f )
    >r >lower r>
    [ [ tag-name = ] keep tag-closing? and ] with find ;
: find-by-attribute-key ( key vector -- vector )
    >r >lower r>
    [ tag-attributes at ] with subset
    [ ] subset ;
: find-by-attribute-key-value ( value key vector -- vector )
    >r >lower r>
    [ tag-attributes at over = ] with subset nip
    [ ] subset ;
: find-first-attribute-key-value ( value key vector -- i/f tag/f )
    >r >lower r>
    [ tag-attributes at over = ] with find rot drop ;
: find-between* ( i/f tag/f vector -- vector )
    pick integer? [
        rot tail-slice
        >r tag-name r>
        [ find-matching-close drop 1+ ] keep swap head
    ] [
        3drop V{ } clone
    ] if ;
    
: find-between ( i/f tag/f vector -- vector )
    find-between* dup length 3 >= [
        [ 1 tail-slice 1 head-slice* ] keep like
    ] when ;
: find-between-first ( string vector -- vector' )
    [ find-first-name ] keep find-between ;
: tag-link ( tag -- link/f )
    tag-attributes [ "href" swap at ] [ f ] if* ;
: find-links ( vector -- vector )
    [ tag-name "a" = ] subset
    [ tag-link ] subset ;
: find-by-text ( seq quot -- tag )
    [ dup tag-name text = ] swap compose find drop ;
: find-opening-tags-by-name ( name seq -- seq )
    [ [ tag-name = ] keep tag-closing? not and ] with find-all ;
: href-contains? ( str tag -- ? )
    tag-attributes "href" swap at* [ subseq? ] [ 2drop f ] if ;
: query>assoc* ( str -- hash )
    "?" split1 nip query>assoc ;
! clear "http://fark.com" http-get parse-html find-links [ "go.pl" swap start ] subset [ "=" split peek ] map
! clear "http://www.sailwx.info/shiptrack/cruiseships.phtml" http-get parse-html remove-blank-text
! "a" over find-opening-tags-by-name
! [ nip "shipposition.phtml?call=GBTT" swap href-contains? ] assoc-subset
! first first 8 + over nth
! tag-attributes "href" swap at query>assoc*
! "lat" over at "lon" rot at