diff --git a/extra/html/parser/analyzer/analyzer.factor b/extra/html/parser/analyzer/analyzer.factor index fca15d9b07..511730efb4 100755 --- a/extra/html/parser/analyzer/analyzer.factor +++ b/extra/html/parser/analyzer/analyzer.factor @@ -1,8 +1,50 @@ USING: assocs html.parser kernel math sequences strings ascii arrays shuffle unicode.case namespaces splitting -http.server.responders ; +http.server.responders sequences.lib ; IN: html.parser.analyzer +: multi-find* ( n seq quots -- i elt ) + ; + +: multi-find ( seq quots -- i elt ) + 0 -rot ; + +: (find-relative) + [ >r + dup r> ?nth* [ 2drop f f ] unless ] [ 2drop f ] if ; + +: find-relative ( seq quot n -- i elt ) + >r over [ find drop ] dip r> swap pick + (find-relative) ; + +: (find-all) ( n seq quot -- ) + 2dup >r >r find* [ + dupd 2array , 1+ r> r> (find-all) + ] [ + r> r> 3drop + ] if* ; + +: find-all ( seq quot -- alist ) + [ 0 -rot (find-all) ] { } make ; + +: (find-nth) ( offset seq quot n count -- obj ) + >r >r [ find* ] 2keep 4 npick [ + r> r> 1+ 2dup <= [ + 4drop + ] [ + >r >r >r >r drop 1+ r> r> r> r> + (find-nth) + ] if + ] [ + 2drop r> r> 2drop + ] if ; + +: find-nth ( seq quot n -- i elt ) + 0 -roll 0 (find-nth) ; + +: find-nth-relative ( seq quot n offest -- i elt ) + >r [ find-nth ] 3keep 2drop nip r> swap pick + (find-relative) ; + : remove-blank-text ( vector -- vector' ) [ dup tag-name text = [ @@ -52,29 +94,33 @@ IN: html.parser.analyzer >r >lower r> [ tag-attributes at over = ] with find rot drop ; -: find-between ( i/f tag/f vector -- vector ) +: find-between* ( i/f tag/f vector -- vector ) pick integer? [ - rot 1+ tail-slice + rot tail-slice >r tag-name r> - [ find-matching-close drop ] keep swap head + [ find-matching-close drop 1+ ] keep swap head ] [ 3drop V{ } clone ] if ; + +: find-between ( i/f tag/f vector -- vector ) + find-between* dup length 3 >= [ + [ 1 tail-slice 1 head-slice* ] keep like + ] when ; + +: find-between-first ( string vector -- vector' ) + [ find-first-name ] keep find-between ; + +: tag-link ( tag -- link/f ) + tag-attributes [ "href" swap at ] [ f ] if* ; : find-links ( vector -- vector ) [ tag-name "a" = ] subset - [ tag-attributes "href" swap at ] map - [ ] subset ; + [ tag-link ] subset ; -: (find-all) ( n seq quot -- ) - 2dup >r >r find* [ - dupd 2array , 1+ r> r> (find-all) - ] [ - r> r> 3drop - ] if* ; -: find-all ( seq quot -- alist ) - [ 0 -rot (find-all) ] { } make ; +: find-by-text ( seq quot -- tag ) + [ dup tag-name text = ] swap compose find drop ; : find-opening-tags-by-name ( name seq -- seq ) [ [ tag-name = ] keep tag-closing? not and ] with find-all ;