html.parser.printer: add helper words with string output.

db4
John Benediktsson 2015-04-20 09:31:40 -07:00
parent 94cc259ba2
commit 5cc7bfb067
3 changed files with 13 additions and 8 deletions

View File

@ -1,6 +1,6 @@
USING: accessors assocs combinators html.parser USING: accessors assocs combinators html.parser
html.parser.utils io kernel math math.order namespaces sequences html.parser.utils io io.streams.string kernel math math.order
strings unicode.categories ; namespaces sequences strings unicode.categories ;
IN: html.parser.printer IN: html.parser.printer
TUPLE: html-printer ; TUPLE: html-printer ;
@ -35,9 +35,15 @@ ERROR: unknown-tag-error tag ;
: html-text. ( vector -- ) : html-text. ( vector -- )
T{ text-printer } html-printer [ print-tags ] with-variable ; T{ text-printer } html-printer [ print-tags ] with-variable ;
: html-text ( vector -- string )
[ html-text. ] with-string-writer ;
: html-src. ( vector -- ) : html-src. ( vector -- )
T{ src-printer } html-printer [ print-tags ] with-variable ; T{ src-printer } html-printer [ print-tags ] with-variable ;
: html-src ( vector -- string )
[ html-src. ] with-string-writer ;
M: text-printer print-opening-tag M: text-printer print-opening-tag
name>> { name>> {
{ "br" [ nl ] } { "br" [ nl ] }

View File

@ -76,7 +76,7 @@ PRIVATE>
: article. ( name -- ) : article. ( name -- )
wikipedia-url http-get nip parse-html wikipedia-url http-get nip parse-html
"content" find-by-id-between "content" find-by-id-between
[ html-text. ] with-string-writer string-lines html-text string-lines
[ [ blank? ] trim ] map harvest [ [ [ blank? ] trim ] map harvest [
html-unescape 72 wrap-string print nl html-unescape 72 wrap-string print nl
] each ; ] each ;

View File

@ -3,9 +3,9 @@
USING: accessors formatting html.entities html.parser USING: accessors formatting html.entities html.parser
html.parser.analyzer html.parser.printer http.client images.http html.parser.analyzer html.parser.printer http.client images.http
images.viewer images.viewer.prettyprint io io.streams.string images.viewer images.viewer.prettyprint io kernel parser
kernel parser prettyprint.custom prettyprint.sections regexp prettyprint.custom prettyprint.sections regexp sequences strings
sequences strings ui wrap.strings ; ui wrap.strings ;
IN: xkcd IN: xkcd
@ -22,8 +22,7 @@ IN: xkcd
: comic-text ( url -- string ) : comic-text ( url -- string )
http-get nip parse-html http-get nip parse-html
"transcript" find-by-id-between "transcript" find-by-id-between
[ html-text. ] with-string-writer html-text html-unescape ;
html-unescape ;
: comic-text. ( url -- ) : comic-text. ( url -- )
comic-text 80 wrap-string print ; comic-text 80 wrap-string print ;