html.parser.analyzer: when matching on classname, check that the tag has the given class

db4
Björn Lindqvist 2014-02-15 23:14:29 +01:00 committed by Doug Coleman
parent 19685f4528
commit 3c53214b43
3 changed files with 43 additions and 15 deletions

View File

@ -1,6 +1,11 @@
USING: help.syntax help.markup html.parser.analyzer sequences strings ;
USING: help.syntax help.markup html.parser html.parser.analyzer sequences
strings ;
IN: html.parser.analyzer
HELP: html-class?
{ $values { "tag" tag } { "string" "a classname" } }
{ $description "t if the tag has the given class." } ;
HELP: stack-find
{ $values { "seq" sequence } { "quot" { $quotation "( elt -- 1/0/-1 )" } } { "i/f" "an index or " { $link f } } }
{ $description "Takes a sequence and a quotation expected to return -1 if the element decrements the stack, 0 if it doesnt affect it and 1 if it increments it. Then finds the first element where the stack is empty." } ;
@ -8,4 +13,3 @@ HELP: stack-find
HELP: tag-classifier
{ $values { "string" string } { "quot" { $quotation "( elt -- 1/0/-1 )" } } }
{ $description "Builds a function that classifies tag tuples. Returns 1 if the tag is an opening tag with the given name, -1 if it is a closing tag and 0 otherwise." } ;

View File

@ -1,6 +1,6 @@
! Copyright (C) 2010 Doug Coleman.
! See http://factorcode.org/license.txt for BSD license.
USING: html.parser html.parser.analyzer math tools.test ;
USING: html.parser html.parser.analyzer kernel math sequences tools.test ;
IN: html.parser.analyzer.tests
[ 0 3 ]
@ -70,3 +70,27 @@ IN: html.parser.analyzer.tests
"<body><div class=\"foo\"><div><p>para</p></div></div></body>" parse-html
"foo" find-by-class-between
] unit-test
[ t ] [
T{ tag { name "f" } { attributes H{ { "class" "a b c" } } } }
{ "a" "b" "c" } [ html-class? ] with all?
] unit-test
[
V{
T{ tag
{ name "div" }
{ attributes H{ { "class" "foo and more" } } }
}
T{ tag { name "div" } { attributes H{ } } { closing? t } }
}
] [ "<div class=\"foo and more\"></div>" parse-html
"foo" find-by-class-between
] unit-test
[
0
T{ tag { name "div" } { attributes H{ { "class" "foo bar" } } } }
] [
"<div class=\"foo bar\"></div>" parse-html "bar" find-by-class
] unit-test

View File

@ -27,6 +27,12 @@ IN: html.parser.analyzer
: loopn ( n quot -- )
[ drop ] prepose loopn-index ; inline
: html-class? ( tag string -- ? )
swap "class" attribute [ blank? ] split-when member? ;
: html-id? ( tag string -- ? )
swap "id" attribute = ;
ERROR: undefined-find-nth m n seq quot ;
: check-trivial-find ( m n seq quot -- m n seq quot )
@ -94,9 +100,9 @@ ERROR: undefined-find-nth m n seq quot ;
: find-by-id ( vector id -- vector' elt/f )
'[ "id" attribute _ = ] find ;
: find-by-class ( vector id -- vector' elt/f )
'[ "class" attribute _ = ] find ;
'[ _ html-class? ] find ;
: find-by-name ( vector string -- vector elt/f )
>lower '[ name>> _ = ] find ;
@ -104,15 +110,15 @@ ERROR: undefined-find-nth m n seq quot ;
: find-by-id-between ( vector string -- vector' )
dupd
'[ "id" attribute _ = ] find find-between* ;
: find-by-class-between ( vector string -- vector' )
dupd
'[ "class" attribute _ = ] find find-between* ;
'[ _ html-class? ] find find-between* ;
: find-by-class-id-between ( vector class id -- vector' )
[
'[
[ "class" attribute _ = ]
[ _ html-class? ]
[ "id" attribute _ = ] bi and
] find
] [
@ -203,12 +209,6 @@ ERROR: undefined-find-nth m n seq quot ;
: query>assoc* ( str -- hash )
"?" split1 nip query>assoc ;
: html-class? ( tag string -- ? )
swap "class" attribute = ;
: html-id? ( tag string -- ? )
swap "id" attribute = ;
: opening-tag? ( tag -- ? )
closing?>> not ;