fix comments and dtds in html parser
parent
81174c9d39
commit
cedb0bdcb5
|
@ -73,3 +73,26 @@ V{
|
|||
T{ tag f "head" H{ } f t }
|
||||
}
|
||||
] [ "<head<title>Spagna</title></head" parse-html ] unit-test
|
||||
|
||||
[
|
||||
V{
|
||||
T{ tag
|
||||
{ name dtd }
|
||||
{ text
|
||||
"DOCTYPE HTML PUBLIC \"-//W3C//DTD HTML 3.2 Draft//EN\""
|
||||
}
|
||||
}
|
||||
}
|
||||
]
|
||||
[
|
||||
"<!DOCTYPE HTML PUBLIC \"-//W3C//DTD HTML 3.2 Draft//EN\">"
|
||||
parse-html
|
||||
] unit-test
|
||||
|
||||
[
|
||||
V{
|
||||
T{ tag { name comment } { text "comment" } }
|
||||
}
|
||||
] [
|
||||
"<!--comment-->" parse-html
|
||||
] unit-test
|
||||
|
|
|
@ -1,7 +1,7 @@
|
|||
! Copyright (C) 2008 Doug Coleman.
|
||||
! See http://factorcode.org/license.txt for BSD license.
|
||||
USING: accessors arrays hashtables sequence-parser
|
||||
html.parser.utils kernel namespaces sequences
|
||||
html.parser.utils kernel namespaces sequences math
|
||||
unicode.case unicode.categories combinators.short-circuit
|
||||
quoting fry ;
|
||||
IN: html.parser
|
||||
|
@ -63,10 +63,12 @@ SYMBOL: tagstack
|
|||
[ blank? ] trim ;
|
||||
|
||||
: read-comment ( sequence-parser -- )
|
||||
"-->" take-until-sequence comment new-tag push-tag ;
|
||||
[ "-->" take-until-sequence comment new-tag push-tag ]
|
||||
[ '[ _ advance drop ] 3 swap times ] bi ;
|
||||
|
||||
: read-dtd ( sequence-parser -- )
|
||||
">" take-until-sequence dtd new-tag push-tag ;
|
||||
[ ">" take-until-sequence dtd new-tag push-tag ]
|
||||
[ advance drop ] bi ;
|
||||
|
||||
: read-bang ( sequence-parser -- )
|
||||
advance dup { [ current CHAR: - = ] [ peek-next CHAR: - = ] } 1&&
|
||||
|
|
Loading…
Reference in New Issue