html.parser.analyzer: make find-between* work on nested tags.
parent
71c526d36a
commit
1b91f4dc23
|
@ -1,6 +1,6 @@
|
||||||
! Copyright (C) 2010 Doug Coleman.
|
! Copyright (C) 2010 Doug Coleman.
|
||||||
! See http://factorcode.org/license.txt for BSD license.
|
! See http://factorcode.org/license.txt for BSD license.
|
||||||
USING: html.parser.analyzer math tools.test ;
|
USING: html.parser html.parser.analyzer math tools.test ;
|
||||||
IN: html.parser.analyzer.tests
|
IN: html.parser.analyzer.tests
|
||||||
|
|
||||||
[ 0 3 ]
|
[ 0 3 ]
|
||||||
|
@ -27,3 +27,46 @@ IN: html.parser.analyzer.tests
|
||||||
|
|
||||||
[ 0 { 3 5 7 9 11 } [ odd? ] find-last-nth ]
|
[ 0 { 3 5 7 9 11 } [ odd? ] find-last-nth ]
|
||||||
[ undefined-find-nth? ] must-fail-with
|
[ undefined-find-nth? ] must-fail-with
|
||||||
|
|
||||||
|
[ V{
|
||||||
|
T{ tag f text f "foo" f }
|
||||||
|
}
|
||||||
|
] [
|
||||||
|
"<html><head><title>foo</title></head></html>" parse-html
|
||||||
|
"title" find-between-first
|
||||||
|
] unit-test
|
||||||
|
|
||||||
|
[ V{
|
||||||
|
T{ tag f "p" H{ } f f }
|
||||||
|
T{ tag f text f "para" f }
|
||||||
|
T{ tag f "p" H{ } f t }
|
||||||
|
}
|
||||||
|
] [
|
||||||
|
"<body><div><p>para</p></div></body>" parse-html "div" find-between-first
|
||||||
|
] unit-test
|
||||||
|
|
||||||
|
[ V{
|
||||||
|
T{ tag f "div" H{ { "class" "foo" } } f f }
|
||||||
|
T{ tag f "p" H{ } f f }
|
||||||
|
T{ tag f text f "para" f }
|
||||||
|
T{ tag f "p" H{ } f t }
|
||||||
|
T{ tag f "div" H{ } f t }
|
||||||
|
}
|
||||||
|
] [
|
||||||
|
"<body><div class=\"foo\"><p>para</p></div></body>" parse-html
|
||||||
|
"foo" find-by-class-between
|
||||||
|
] unit-test
|
||||||
|
|
||||||
|
[ V{
|
||||||
|
T{ tag f "div" H{ { "class" "foo" } } f f }
|
||||||
|
T{ tag f "div" H{ } f f }
|
||||||
|
T{ tag f "p" H{ } f f }
|
||||||
|
T{ tag f text f "para" f }
|
||||||
|
T{ tag f "p" H{ } f t }
|
||||||
|
T{ tag f "div" H{ } f t }
|
||||||
|
T{ tag f "div" H{ } f t }
|
||||||
|
}
|
||||||
|
] [
|
||||||
|
"<body><div class=\"foo\"><div><p>para</p></div></div></body>" parse-html
|
||||||
|
"foo" find-by-class-between
|
||||||
|
] unit-test
|
||||||
|
|
|
@ -1,8 +1,8 @@
|
||||||
! Copyright (C) 2008 Doug Coleman.
|
! Copyright (C) 2008 Doug Coleman.
|
||||||
! See http://factorcode.org/license.txt for BSD license.
|
! See http://factorcode.org/license.txt for BSD license.
|
||||||
USING: accessors assocs combinators combinators.short-circuit
|
USING: accessors assocs combinators combinators.short-circuit
|
||||||
fry html.parser http.client io kernel locals math sequences
|
fry html.parser http.client io kernel locals math math.statistics
|
||||||
sets splitting unicode.case unicode.categories urls
|
sequences sets splitting unicode.case unicode.categories urls
|
||||||
urls.encoding shuffle ;
|
urls.encoding shuffle ;
|
||||||
IN: html.parser.analyzer
|
IN: html.parser.analyzer
|
||||||
|
|
||||||
|
@ -51,14 +51,24 @@ ERROR: undefined-find-nth m n seq quot ;
|
||||||
: find-first-name ( vector string -- i/f tag/f )
|
: find-first-name ( vector string -- i/f tag/f )
|
||||||
>lower '[ name>> _ = ] find ; inline
|
>lower '[ name>> _ = ] find ; inline
|
||||||
|
|
||||||
: find-matching-close ( vector string -- i/f tag/f )
|
! Takes a sequence and a quotation expected to return -1 if the
|
||||||
|
! element decrements the stack, 0 if it doesnt affect it and 1 if it
|
||||||
|
! increments it. Then finds the matching element where the stack is
|
||||||
|
! empty.
|
||||||
|
: stack-find ( seq quot -- i/f )
|
||||||
|
map cum-sum [ 0 = ] find drop ; inline
|
||||||
|
|
||||||
|
! Produces a function which returns 1 if the input item is an opening
|
||||||
|
! tag element with the specified name, -1 if it is a closing tag of
|
||||||
|
! the same name and 0 otherwise.
|
||||||
|
: tag-classifier ( string -- quot )
|
||||||
>lower
|
>lower
|
||||||
'[ [ name>> _ = ] [ closing?>> ] bi and ] find ; inline
|
'[ dup name>> _ = [ closing?>> [ -1 ] [ 1 ] if ] [ drop 0 ] if ] ; inline
|
||||||
|
|
||||||
: find-between* ( vector i/f tag/f -- vector )
|
: find-between* ( vector i/f tag/f -- vector )
|
||||||
over integer? [
|
over integer? [
|
||||||
[ tail-slice ] [ name>> ] bi*
|
[ tail-slice ] [ name>> ] bi*
|
||||||
dupd find-matching-close drop [ 1 + ] [ 1 ] if*
|
dupd tag-classifier stack-find [ 1 + ] [ 1 ] if*
|
||||||
head
|
head
|
||||||
] [
|
] [
|
||||||
3drop V{ } clone
|
3drop V{ } clone
|
||||||
|
|
Loading…
Reference in New Issue