2016-11-12 10:49:09 -05:00
|
|
|
USING: accessors assocs combinators fry html.parser
|
|
|
|
html.parser.utils io io.streams.string kernel math namespaces
|
2016-11-13 15:38:09 -05:00
|
|
|
regexp sequences strings unicode ;
|
2007-12-04 15:14:33 -05:00
|
|
|
IN: html.parser.printer
|
2007-09-20 18:09:08 -04:00
|
|
|
|
2016-11-12 10:49:09 -05:00
|
|
|
SYMBOL: indentation " " indentation set-global
|
|
|
|
SYMBOL: #indentations
|
|
|
|
|
|
|
|
: indent ( -- )
|
|
|
|
#indentations get indentation get '[ _ write ] times ;
|
|
|
|
|
2008-08-17 11:38:34 -04:00
|
|
|
TUPLE: html-printer ;
|
|
|
|
TUPLE: text-printer < html-printer ;
|
|
|
|
TUPLE: src-printer < html-printer ;
|
|
|
|
TUPLE: html-prettyprinter < html-printer ;
|
2007-09-20 18:09:08 -04:00
|
|
|
|
2008-08-17 11:38:34 -04:00
|
|
|
HOOK: print-text-tag html-printer ( tag -- )
|
|
|
|
HOOK: print-comment-tag html-printer ( tag -- )
|
|
|
|
HOOK: print-dtd-tag html-printer ( tag -- )
|
|
|
|
HOOK: print-opening-tag html-printer ( tag -- )
|
|
|
|
HOOK: print-closing-tag html-printer ( tag -- )
|
2007-09-20 18:09:08 -04:00
|
|
|
|
2008-08-17 11:38:34 -04:00
|
|
|
ERROR: unknown-tag-error tag ;
|
|
|
|
|
|
|
|
: print-tag ( tag -- )
|
|
|
|
{
|
|
|
|
{ [ dup name>> text = ] [ print-text-tag ] }
|
|
|
|
{ [ dup name>> comment = ] [ print-comment-tag ] }
|
|
|
|
{ [ dup name>> dtd = ] [ print-dtd-tag ] }
|
|
|
|
{ [ dup name>> string? ]
|
2013-09-06 00:00:53 -04:00
|
|
|
[
|
|
|
|
dup closing?>>
|
|
|
|
[ print-closing-tag ] [ print-opening-tag ] if
|
|
|
|
]
|
|
|
|
}
|
2008-08-17 11:38:34 -04:00
|
|
|
[ unknown-tag-error ]
|
|
|
|
} cond ;
|
2007-09-20 18:09:08 -04:00
|
|
|
|
2016-11-12 10:49:09 -05:00
|
|
|
: print-tags ( vector -- )
|
|
|
|
0 #indentations [ [ print-tag ] each ] with-variable ;
|
2007-09-20 18:09:08 -04:00
|
|
|
|
2008-08-17 11:38:34 -04:00
|
|
|
: html-text. ( vector -- )
|
|
|
|
T{ text-printer } html-printer [ print-tags ] with-variable ;
|
|
|
|
|
2015-04-20 12:31:40 -04:00
|
|
|
: html-text ( vector -- string )
|
|
|
|
[ html-text. ] with-string-writer ;
|
|
|
|
|
2008-08-17 11:38:34 -04:00
|
|
|
: html-src. ( vector -- )
|
|
|
|
T{ src-printer } html-printer [ print-tags ] with-variable ;
|
|
|
|
|
2015-04-20 12:31:40 -04:00
|
|
|
: html-src ( vector -- string )
|
|
|
|
[ html-src. ] with-string-writer ;
|
|
|
|
|
2016-11-13 15:38:09 -05:00
|
|
|
SYMBOLS: preformatted? script? style? ;
|
|
|
|
|
2013-08-05 12:45:42 -04:00
|
|
|
M: text-printer print-opening-tag
|
2013-09-06 00:00:53 -04:00
|
|
|
name>> {
|
2016-11-12 10:49:09 -05:00
|
|
|
{ "br" [ nl indent ] }
|
2016-11-13 15:38:09 -05:00
|
|
|
{ "ol" [ nl indent ] }
|
|
|
|
{ "ul" [ nl indent ] }
|
2013-09-06 00:00:53 -04:00
|
|
|
{ "li" [ " * " write ] }
|
2016-11-12 10:49:09 -05:00
|
|
|
{ "blockquote" [ #indentations inc indent ] }
|
2016-11-13 15:38:09 -05:00
|
|
|
{ "pre" [ preformatted? on ] }
|
|
|
|
{ "script" [ script? on ] }
|
|
|
|
{ "style" [ style? on ] }
|
2013-09-06 00:00:53 -04:00
|
|
|
[ drop ]
|
|
|
|
} case ;
|
2013-08-05 12:45:42 -04:00
|
|
|
|
|
|
|
M: text-printer print-closing-tag
|
2016-11-12 10:49:09 -05:00
|
|
|
name>> {
|
|
|
|
[ "blockquote" = [ #indentations dec ] when ]
|
|
|
|
[
|
|
|
|
{ "p" "blockquote" "h1" "h2" "h3" "h4" "h5" }
|
|
|
|
member? [ nl indent nl indent ] when
|
|
|
|
]
|
|
|
|
[
|
|
|
|
{ "ul" "ol" "li" "tr" } member? [ nl indent ] when
|
|
|
|
]
|
2016-11-13 15:38:09 -05:00
|
|
|
[ { "th" "td" } member? [ bl ] when ]
|
|
|
|
[ "pre" = [ preformatted? off ] when ]
|
|
|
|
[ "script" = [ script? off ] when ]
|
|
|
|
[ "style" = [ style? off ] when ]
|
2016-11-12 10:49:09 -05:00
|
|
|
} cleave ;
|
2013-08-05 12:45:42 -04:00
|
|
|
|
2013-10-11 14:51:46 -04:00
|
|
|
M: text-printer print-comment-tag drop ;
|
|
|
|
|
2016-11-13 15:38:09 -05:00
|
|
|
M: text-printer print-dtd-tag drop ;
|
|
|
|
|
|
|
|
: collapse-spaces ( text -- text' )
|
|
|
|
preformatted? get [ R/ \s+/ " " re-replace ] unless ;
|
|
|
|
|
|
|
|
M: text-printer print-text-tag
|
|
|
|
script? get style? get or
|
|
|
|
[ drop ] [ text>> collapse-spaces write ] if ;
|
|
|
|
|
|
|
|
M: html-printer print-text-tag
|
2013-08-05 12:45:42 -04:00
|
|
|
text>> write ;
|
2007-09-20 18:09:08 -04:00
|
|
|
|
2016-11-13 15:38:09 -05:00
|
|
|
M: html-printer print-comment-tag
|
2008-08-17 11:38:34 -04:00
|
|
|
"<!--" write text>> write "-->" write ;
|
2007-09-20 18:09:08 -04:00
|
|
|
|
2016-11-13 15:38:09 -05:00
|
|
|
M: html-printer print-dtd-tag
|
2008-08-17 11:38:34 -04:00
|
|
|
"<!" write text>> write ">" write ;
|
2007-09-20 18:09:08 -04:00
|
|
|
|
|
|
|
: print-attributes ( hashtable -- )
|
2008-08-17 11:38:34 -04:00
|
|
|
[ [ bl write "=" write ] [ ?quote write ] bi* ] assoc-each ;
|
2007-09-20 18:09:08 -04:00
|
|
|
|
2016-11-13 15:38:09 -05:00
|
|
|
M: src-printer print-opening-tag
|
2007-09-20 18:09:08 -04:00
|
|
|
"<" write
|
2016-11-12 10:49:09 -05:00
|
|
|
[ name>> write ] [ attributes>> print-attributes ] bi
|
2007-09-20 18:09:08 -04:00
|
|
|
">" write ;
|
|
|
|
|
2016-11-13 15:38:09 -05:00
|
|
|
M: src-printer print-closing-tag
|
2016-11-12 10:49:09 -05:00
|
|
|
"</" write name>> write ">" write ;
|
2008-08-17 11:38:34 -04:00
|
|
|
|
|
|
|
: prettyprint-html ( vector -- )
|
2016-11-12 10:49:09 -05:00
|
|
|
T{ html-prettyprinter } html-printer [ print-tags ] with-variable ;
|
2008-06-08 17:33:07 -04:00
|
|
|
|
2016-11-13 15:38:09 -05:00
|
|
|
M: html-prettyprinter print-opening-tag
|
2013-07-22 08:05:14 -04:00
|
|
|
name>>
|
2016-11-12 10:49:09 -05:00
|
|
|
[ indent "<" write write ">\n" write ]
|
2013-07-22 08:27:10 -04:00
|
|
|
! These tags usually don't have any closing tag associated with them.
|
2013-07-22 08:05:14 -04:00
|
|
|
[ { "br" "img" } member? [ #indentations inc ] unless ] bi ;
|
2008-06-08 17:33:07 -04:00
|
|
|
|
2016-11-13 15:38:09 -05:00
|
|
|
M: html-prettyprinter print-closing-tag
|
2013-08-05 12:45:42 -04:00
|
|
|
! These tags usually don't have any closing tag associated with them.
|
2013-08-05 14:22:09 -04:00
|
|
|
[ { "br" "img" } member? [ #indentations dec ] unless ]
|
2016-11-12 10:49:09 -05:00
|
|
|
[ indent "</" write name>> write ">\n" write ] bi ;
|
2013-07-22 08:05:14 -04:00
|
|
|
|
2016-11-13 15:38:09 -05:00
|
|
|
M: html-prettyprinter print-text-tag
|
2016-11-12 10:49:09 -05:00
|
|
|
text>> [ blank? ] trim [ indent write "\n" write ] unless-empty ;
|