fix comments and dtds in html parser

Doug Coleman 2009-05-20 15:50:01 -05:00
parent 81174c9d39
commit cedb0bdcb5
2 changed files with 28 additions and 3 deletions

View File

@ -73,3 +73,26 @@ V{
T{ tag f "head" H{ } f t }
}
] [ "<head<title>Spagna</title></head" parse-html ] unit-test
[
V{
T{ tag
{ name dtd }
{ text
"DOCTYPE HTML PUBLIC \"-//W3C//DTD HTML 3.2 Draft//EN\""
}
}
}
]
[
"<!DOCTYPE HTML PUBLIC \"-//W3C//DTD HTML 3.2 Draft//EN\">"
parse-html
] unit-test
[
V{
T{ tag { name comment } { text "comment" } }
}
] [
"<!--comment-->" parse-html
] unit-test

View File

@ -1,7 +1,7 @@
! Copyright (C) 2008 Doug Coleman.
! See http://factorcode.org/license.txt for BSD license.
USING: accessors arrays hashtables sequence-parser
html.parser.utils kernel namespaces sequences
html.parser.utils kernel namespaces sequences math
unicode.case unicode.categories combinators.short-circuit
quoting fry ;
IN: html.parser
@ -63,10 +63,12 @@ SYMBOL: tagstack
[ blank? ] trim ;
: read-comment ( sequence-parser -- )
"-->" take-until-sequence comment new-tag push-tag ;
[ "-->" take-until-sequence comment new-tag push-tag ]
[ '[ _ advance drop ] 3 swap times ] bi ;
: read-dtd ( sequence-parser -- )
">" take-until-sequence dtd new-tag push-tag ;
[ ">" take-until-sequence dtd new-tag push-tag ]
[ advance drop ] bi ;
: read-bang ( sequence-parser -- )
advance dup { [ current CHAR: - = ] [ peek-next CHAR: - = ] } 1&&