From b01d1f8a56ea4573bf9a4fc35e318c3fe399a1f7 Mon Sep 17 00:00:00 2001 From: Slava Pestov Date: Thu, 12 Jun 2008 03:50:20 -0500 Subject: [PATCH] Request size limit and encoding support for HTTP server --- .../bootstrap/image/download/download.factor | 2 +- extra/html/parser/analyzer/analyzer.factor | 2 +- extra/http/client/client.factor | 59 +++++++++++-------- extra/http/http-tests.factor | 30 +++++----- extra/http/http.factor | 25 +++++--- extra/http/server/responses/responses.factor | 3 +- extra/http/server/server.factor | 57 +++++++++++++----- extra/http/server/static/static.factor | 5 +- extra/io/encodings/iana/iana.factor | 9 ++- extra/io/streams/limited/limited-tests.factor | 32 ++++++++++ extra/io/streams/limited/limited.factor | 42 +++++++++++++ extra/syndication/syndication.factor | 2 +- extra/yahoo/yahoo.factor | 2 +- 13 files changed, 201 insertions(+), 69 deletions(-) create mode 100644 extra/io/streams/limited/limited-tests.factor create mode 100644 extra/io/streams/limited/limited.factor diff --git a/extra/bootstrap/image/download/download.factor b/extra/bootstrap/image/download/download.factor index c2e80fee9a..701a784ea4 100644 --- a/extra/bootstrap/image/download/download.factor +++ b/extra/bootstrap/image/download/download.factor @@ -7,7 +7,7 @@ kernel io.files bootstrap.image sequences io ; : url "http://factorcode.org/images/latest/" ; : download-checksums ( -- alist ) - url "checksums.txt" append http-get + url "checksums.txt" append http-get nip string-lines [ " " split1 ] { } map>assoc ; : need-new-image? ( image -- ? ) diff --git a/extra/html/parser/analyzer/analyzer.factor b/extra/html/parser/analyzer/analyzer.factor index 47d352b6b8..f6fccd42ec 100755 --- a/extra/html/parser/analyzer/analyzer.factor +++ b/extra/html/parser/analyzer/analyzer.factor @@ -6,7 +6,7 @@ IN: html.parser.analyzer TUPLE: link attributes clickable ; : scrape-html ( url -- vector ) - http-get parse-html ; + http-get nip parse-html ; : (find-relative) [ >r + dup r> ?nth* [ 2drop f f ] unless ] [ 2drop f ] if ; inline diff --git a/extra/http/client/client.factor b/extra/http/client/client.factor index 7b48bf93af..56957b021c 100755 --- a/extra/http/client/client.factor +++ b/extra/http/client/client.factor @@ -3,8 +3,13 @@ USING: assocs http kernel math math.parser namespaces sequences io io.sockets io.streams.string io.files io.timeouts strings splitting calendar continuations accessors vectors math.order -io.encodings.8-bit io.encodings.binary io.streams.duplex -fry debugger inspector ascii urls ; +io.encodings +io.encodings.string +io.encodings.ascii +io.encodings.8-bit +io.encodings.binary +io.streams.duplex +fry debugger inspector ascii urls present ; IN: http.client : max-redirects 10 ; @@ -15,7 +20,7 @@ M: too-many-redirects summary drop [ "Redirection limit of " % max-redirects # " exceeded" % ] "" make ; -DEFER: http-request +DEFER: (http-request) >method http-request + "GET" >>method (http-request) ] [ too-many-redirects ] if @@ -45,15 +50,21 @@ PRIVATE> : read-chunks ( -- ) read-chunk-size dup zero? - [ drop ] [ read % read-crlf "" assert= read-chunks ] if ; + [ drop ] [ read % read-crlf B{ } assert= read-chunks ] if ; : read-response-body ( response -- response data ) - dup "transfer-encoding" header "chunked" = - [ [ read-chunks ] "" make ] [ input-stream get contents ] if ; + dup "transfer-encoding" header "chunked" = [ + binary decode-input + [ read-chunks ] B{ } make + over content-charset>> decode + ] [ + dup content-charset>> decode-input + input-stream get contents + ] if ; -: http-request ( request -- response data ) +: (http-request) ( request -- response data ) dup request [ - dup url>> url-addr latin1 [ + dup url>> url-addr ascii [ 1 minutes timeouts write-request read-response @@ -62,14 +73,6 @@ PRIVATE> do-redirect ] with-variable ; -: ( url -- request ) - - "GET" >>method - swap >url ensure-port >>url ; - -: http-get* ( url -- response data ) - http-request ; - : success? ( code -- ? ) 200 = ; ERROR: download-failed response body ; @@ -84,18 +87,28 @@ M: download-failed error. ] [ body>> write ] bi ; -: check-response ( response string -- string ) - over code>> success? [ nip ] [ download-failed ] if ; +: check-response ( response data -- response data ) + over code>> success? [ download-failed ] unless ; -: http-get ( url -- string ) - http-get* check-response ; +: http-request ( request -- response data ) + (http-request) check-response ; + +: ( url -- request ) + + "GET" >>method + swap >url ensure-port >>url ; + +: http-get ( url -- response data ) + http-request ; : download-name ( url -- name ) - file-name "?" split1 drop "/" ?tail drop ; + present file-name "?" split1 drop "/" ?tail drop ; : download-to ( url file -- ) #! Downloads the contents of a URL to a file. - [ http-get ] dip latin1 [ write ] with-file-writer ; + swap http-get + [ content-charset>> ] [ '[ , write ] ] bi* + with-file-writer ; : download ( url -- ) dup download-name download-to ; diff --git a/extra/http/http-tests.factor b/extra/http/http-tests.factor index c1d5b46aa4..6f2171a956 100755 --- a/extra/http/http-tests.factor +++ b/extra/http/http-tests.factor @@ -1,5 +1,5 @@ USING: http tools.test multiline tuple-syntax -io.streams.string kernel arrays splitting sequences +io.streams.string io.encodings.utf8 kernel arrays splitting sequences assocs io.sockets db db.sqlite continuations urls hashtables ; IN: http.tests @@ -78,7 +78,7 @@ must-fail-with STRING: read-response-test-1 HTTP/1.1 404 not found -Content-Type: text/html; charset=UTF8 +Content-Type: text/html; charset=UTF-8 blah ; @@ -88,10 +88,10 @@ blah version: "1.1" code: 404 message: "not found" - header: H{ { "content-type" "text/html; charset=UTF8" } } + header: H{ { "content-type" "text/html; charset=UTF-8" } } cookies: { } content-type: "text/html" - content-charset: "UTF8" + content-charset: utf8 } ] [ read-response-test-1 lf>crlf @@ -101,7 +101,7 @@ blah STRING: read-response-test-1' HTTP/1.1 404 not found -content-type: text/html; charset=UTF8 +content-type: text/html; charset=UTF-8 ; @@ -160,14 +160,14 @@ test-db [ [ t ] [ "resource:extra/http/test/foo.html" ascii file-contents - "http://localhost:1237/nested/foo.html" http-get = + "http://localhost:1237/nested/foo.html" http-get nip = ] unit-test -[ "http://localhost:1237/redirect-loop" http-get ] +[ "http://localhost:1237/redirect-loop" http-get nip ] [ too-many-redirects? ] must-fail-with [ "Goodbye" ] [ - "http://localhost:1237/quit" http-get + "http://localhost:1237/quit" http-get nip ] unit-test ! Dispatcher bugs @@ -194,12 +194,12 @@ test-db [ : 404? [ download-failed? ] [ response>> code>> 404 = ] bi and ; ! This should give a 404 not an infinite redirect loop -[ "http://localhost:1237/d/blah" http-get ] [ 404? ] must-fail-with +[ "http://localhost:1237/d/blah" http-get nip ] [ 404? ] must-fail-with ! This should give a 404 not an infinite redirect loop -[ "http://localhost:1237/blah/" http-get ] [ 404? ] must-fail-with +[ "http://localhost:1237/blah/" http-get nip ] [ 404? ] must-fail-with -[ "Goodbye" ] [ "http://localhost:1237/quit" http-get ] unit-test +[ "Goodbye" ] [ "http://localhost:1237/quit" http-get nip ] unit-test [ ] [ [ @@ -218,9 +218,9 @@ test-db [ [ ] [ 100 sleep ] unit-test -[ "Hi" ] [ "http://localhost:1237/" http-get ] unit-test +[ "Hi" ] [ "http://localhost:1237/" http-get nip ] unit-test -[ "Goodbye" ] [ "http://localhost:1237/quit" http-get ] unit-test +[ "Goodbye" ] [ "http://localhost:1237/quit" http-get nip ] unit-test USING: html.components html.elements xml xml.utilities validators furnace furnace.flash ; @@ -253,7 +253,7 @@ SYMBOL: a : test-a string>xml "input" tag-named "value" swap at ; [ "3" ] [ - "http://localhost:1237/" http-get* + "http://localhost:1237/" http-get swap dup cookies>> "cookies" set session-id-key get-cookie value>> "session-id" set test-a ] unit-test @@ -273,4 +273,4 @@ SYMBOL: a [ 4 ] [ a get-global ] unit-test -[ "Goodbye" ] [ "http://localhost:1237/quit" http-get ] unit-test +[ "Goodbye" ] [ "http://localhost:1237/quit" http-get nip ] unit-test diff --git a/extra/http/http.factor b/extra/http/http.factor index 04bebce926..d7fc1b766e 100755 --- a/extra/http/http.factor +++ b/extra/http/http.factor @@ -7,6 +7,7 @@ strings vectors hashtables quotations arrays byte-arrays math.parser calendar calendar.format present io io.server io.sockets.secure +io.encodings.iana io.encodings.binary io.encodings.8-bit unicode.case unicode.categories qualified @@ -28,7 +29,8 @@ IN: http "header" get add-header ] [ - ": " split1 dup [ + ":" split1 dup [ + [ blank? ] left-trim swap >lower dup "last-header" set "header" get add-header ] [ @@ -36,20 +38,20 @@ IN: http ] if ] if ; -: read-lf ( -- string ) +: read-lf ( -- bytes ) "\n" read-until CHAR: \n assert= ; -: read-crlf ( -- string ) +: read-crlf ( -- bytes ) "\r" read-until [ CHAR: \r assert= read1 CHAR: \n assert= ] when* ; -: read-header-line ( -- ) +: (read-header) ( -- ) read-crlf dup - empty? [ drop ] [ header-line read-header-line ] if ; + empty? [ drop ] [ header-line (read-header) ] if ; : read-header ( -- assoc ) H{ } clone [ - "header" [ read-header-line ] with-variable + "header" [ (read-header) ] with-variable ] keep ; : header-value>string ( value -- string ) @@ -66,7 +68,8 @@ IN: http : write-header ( assoc -- ) >alist sort-keys [ - swap url-encode write ": " write + swap + check-header-string write ": " write header-value>string check-header-string write crlf ] assoc-each crlf ; @@ -299,6 +302,7 @@ body ; H{ } clone >>header "close" "connection" set-header now timestamp>http-string "date" set-header + latin1 >>content-charset V{ } clone >>cookies ; : read-response-version ( response -- response ) @@ -319,7 +323,9 @@ body ; read-header >>header dup "set-cookie" header parse-cookies >>cookies dup "content-type" header [ - parse-content-type [ >>content-type ] [ >>content-charset ] bi* + parse-content-type + [ >>content-type ] + [ name>encoding binary or >>content-charset ] bi* ] when* ; : read-response ( -- response ) @@ -341,7 +347,8 @@ body ; : unparse-content-type ( request -- content-type ) [ content-type>> "application/octet-stream" or ] - [ content-charset>> ] bi + [ content-charset>> encoding>name ] + bi [ "; charset=" swap 3append ] when* ; : write-response-header ( response -- response ) diff --git a/extra/http/server/responses/responses.factor b/extra/http/server/responses/responses.factor index 277ca392b7..4056f0c7f0 100644 --- a/extra/http/server/responses/responses.factor +++ b/extra/http/server/responses/responses.factor @@ -1,13 +1,14 @@ ! Copyright (C) 2008 Slava Pestov. ! See http://factorcode.org/license.txt for BSD license. USING: html.elements math.parser http accessors kernel -io io.streams.string ; +io io.streams.string io.encodings.utf8 ; IN: http.server.responses : ( body content-type -- response ) 200 >>code "Document follows" >>message + utf8 >>content-charset swap >>content-type swap >>body ; diff --git a/extra/http/server/server.factor b/extra/http/server/server.factor index fc50432030..792757b182 100755 --- a/extra/http/server/server.factor +++ b/extra/http/server/server.factor @@ -1,10 +1,21 @@ ! Copyright (C) 2003, 2008 Slava Pestov. ! See http://factorcode.org/license.txt for BSD license. USING: kernel accessors sequences arrays namespaces splitting -vocabs.loader http http.server.responses logging calendar -destructors html.elements html.streams io.server -io.encodings.8-bit io.timeouts io assocs debugger continuations -fry tools.vocabs math ; +vocabs.loader destructors assocs debugger continuations +tools.vocabs math +io +io.server +io.encodings +io.encodings.utf8 +io.encodings.ascii +io.encodings.binary +io.streams.limited +io.timeouts +fry logging calendar +http +http.server.responses +html.elements +html.streams ; IN: http.server SYMBOL: responder-nesting @@ -43,19 +54,29 @@ main-responder global [ <404> or ] change-at swap development-mode get [ '[ , http-error. ] >>body ] [ drop ] if ; : do-response ( response -- ) - dup write-response - request get method>> "HEAD" = [ drop ] [ - '[ , write-response-body ] - [ - development-mode get - [ http-error. ] [ drop "Response error" ] if - ] recover - ] if ; + [ write-response ] + [ + request get method>> "HEAD" = [ drop ] [ + '[ + , + [ content-charset>> encode-output ] + [ write-response-body ] + bi + ] + [ + utf8 [ + development-mode get + [ http-error. ] [ drop "Response error" throw ] if + ] with-encoded-output + ] recover + ] if + ] bi ; LOG: httpd-hit NOTICE : log-request ( request -- ) - [ method>> ] [ url>> [ host>> ] [ path>> ] bi ] bi 3array httpd-hit ; + [ method>> ] [ url>> [ host>> ] [ path>> ] bi ] bi + 3array httpd-hit ; : split-path ( string -- path ) "/" split harvest ; @@ -79,9 +100,15 @@ LOG: httpd-hit NOTICE development-mode get-global [ global [ refresh-all ] bind ] when ; +: setup-limits ( -- ) + 1 minutes timeouts + 64 1024 * limit-input ; + : handle-client ( -- ) [ - 1 minutes timeouts + setup-limits + ascii decode-input + ascii encode-output ?refresh-all read-request do-request @@ -90,7 +117,7 @@ LOG: httpd-hit NOTICE : httpd ( port -- ) dup integer? [ internet-server ] when - "http.server" latin1 [ handle-client ] with-server ; + "http.server" binary [ handle-client ] with-server ; : httpd-main ( -- ) 8888 httpd ; diff --git a/extra/http/server/static/static.factor b/extra/http/server/static/static.factor index 1d86a73cfa..9d76c82e4a 100755 --- a/extra/http/server/static/static.factor +++ b/extra/http/server/static/static.factor @@ -29,7 +29,10 @@ TUPLE: file-responder root hook special allow-listings ; H{ } clone >>special ; : (serve-static) ( path mime-type -- response ) - [ [ binary &dispose ] dip ] + [ + [ binary &dispose ] dip + binary >>content-charset + ] [ drop file-info [ size>> ] [ modified>> ] bi ] 2bi [ "content-length" set-header ] [ "last-modified" set-header ] bi* ; diff --git a/extra/io/encodings/iana/iana.factor b/extra/io/encodings/iana/iana.factor index dd429c1670..4368360a4d 100755 --- a/extra/io/encodings/iana/iana.factor +++ b/extra/io/encodings/iana/iana.factor @@ -41,6 +41,13 @@ PRIVATE> [ second ] map { "None" } diff ] map ; +: more-aliases ( -- assoc ) + H{ + { "UTF8" utf8 } + { "utf8" utf8 } + { "utf-8" utf8 } + } ; + : make-n>e ( stream -- n>e ) parse-iana [ [ dup [ @@ -48,7 +55,7 @@ PRIVATE> [ swap [ set ] with each ] [ drop ] if* ] with each - ] each ] H{ } make-assoc ; + ] each ] H{ } make-assoc more-aliases assoc-union ; PRIVATE> "resource:extra/io/encodings/iana/character-sets" diff --git a/extra/io/streams/limited/limited-tests.factor b/extra/io/streams/limited/limited-tests.factor new file mode 100644 index 0000000000..d160a3f756 --- /dev/null +++ b/extra/io/streams/limited/limited-tests.factor @@ -0,0 +1,32 @@ +IN: io.streams.limited.tests +USING: io io.streams.limited io.encodings io.encodings.string +io.encodings.ascii io.encodings.binary io.streams.byte-array +namespaces tools.test strings kernel ; + +[ ] [ + "hello world\nhow are you today\nthis is a very long line indeed" + ascii encode binary "data" set +] unit-test + +[ ] [ "data" get 24 "limited" set ] unit-test + +[ CHAR: h ] [ "limited" get stream-read1 ] unit-test + +[ ] [ "limited" get ascii "decoded" set ] unit-test + +[ "ello world" ] [ "decoded" get stream-readln ] unit-test + +[ "how " ] [ 4 "decoded" get stream-read ] unit-test + +[ "decoded" get stream-readln ] [ limit-exceeded? ] must-fail-with + +[ ] [ + "abc\ndef\nghi" + ascii encode binary "data" set +] unit-test + +[ ] [ "data" get 7 "limited" set ] unit-test + +[ "abc" CHAR: \n ] [ "\n" "limited" get stream-read-until [ >string ] dip ] unit-test + +[ "\n" "limited" get stream-read-until ] [ limit-exceeded? ] must-fail-with diff --git a/extra/io/streams/limited/limited.factor b/extra/io/streams/limited/limited.factor new file mode 100644 index 0000000000..1c6a172e97 --- /dev/null +++ b/extra/io/streams/limited/limited.factor @@ -0,0 +1,42 @@ +! Copyright (C) 2008 Slava Pestov +! See http://factorcode.org/license.txt for BSD license. +USING: kernel math io destructors accessors sequences +namespaces ; +IN: io.streams.limited + +TUPLE: limited-stream stream count limit ; + +: ( limit stream -- stream' ) + limited-stream new + swap >>stream + swap >>limit + 0 >>count ; + +: limit-input ( limit -- ) + input-stream [ ] change ; + +ERROR: limit-exceeded ; + +: check-limit ( n stream -- ) + [ + ] change-count + [ count>> ] [ limit>> ] bi >= + [ limit-exceeded ] when ; inline + +M: limited-stream stream-read1 + 1 over check-limit stream>> stream-read1 ; + +M: limited-stream stream-read + 2dup check-limit stream>> stream-read ; + +M: limited-stream stream-read-partial + 2dup check-limit stream>> stream-read-partial ; + +: (read-until) ( stream seps buf -- stream seps buf sep/f ) + 3dup [ [ stream-read1 dup ] dip memq? ] dip + swap [ drop ] [ push (read-until) ] if ; + +M: limited-stream stream-read-until + swap BV{ } clone (read-until) [ 2nip B{ } like ] dip ; + +M: limited-stream dispose + stream>> dispose ; diff --git a/extra/syndication/syndication.factor b/extra/syndication/syndication.factor index 12beaf4cd7..32b3c925f3 100644 --- a/extra/syndication/syndication.factor +++ b/extra/syndication/syndication.factor @@ -107,7 +107,7 @@ TUPLE: entry title url description date ; : download-feed ( url -- feed ) #! Retrieve an news syndication file, return as a feed tuple. - http-get read-feed ; + http-get nip read-feed ; ! Atom generation : simple-tag, ( content name -- ) diff --git a/extra/yahoo/yahoo.factor b/extra/yahoo/yahoo.factor index c47b8be15c..d163c8f1ac 100755 --- a/extra/yahoo/yahoo.factor +++ b/extra/yahoo/yahoo.factor @@ -59,4 +59,4 @@ format similar-ok language country site subscription license ; swap >>query ; : search-yahoo ( search -- seq ) - query http-get string>xml parse-yahoo ; + query http-get nip string>xml parse-yahoo ;