html.parser: Strip trailing slashes / in html tags. Add unit test. Fixes #1233.

db4
Doug Coleman 2014-11-30 09:33:39 -06:00
parent d0357cc349
commit 6dac68d593
2 changed files with 35 additions and 2 deletions

View File

@ -96,3 +96,34 @@ V{
] [
"<!--comment-->" parse-html
] unit-test
! Issue #1233, trailing / in tags
{
V{
T{ tag
{ name "img" }
{ attributes H{ { "src" "http://factorcode.org" } } }
}
}
}
[ "<img src=\"http://factorcode.org\">" parse-html ] unit-test
{
V{
T{ tag
{ name "img" }
{ attributes H{ { "src" "http://factorcode.org" } } }
}
}
}
[ "<img src=\"http://factorcode.org\"/>" parse-html ] unit-test
{
V{
T{ tag
{ name "img" }
{ attributes H{ { "src" "http://factorcode.org" } } }
}
}
}
[ "<img src=\"http://factorcode.org\"////////>" parse-html ] unit-test

View File

@ -75,8 +75,10 @@ SYMBOL: tagstack
[ advance advance read-comment ] [ read-dtd ] if ;
: read-tag ( sequence-parser -- string )
[ [ current "><" member? ] take-until ]
[ dup current CHAR: < = [ advance ] unless drop ] bi ;
[
[ current "><" member? ] take-until
[ CHAR: / = ] trim-tail
] [ dup current CHAR: < = [ advance ] unless drop ] bi ;
: read-until-< ( sequence-parser -- string )
[ current CHAR: < = ] take-until ;