Request size limit and encoding support for HTTP server

db4
Slava Pestov 2008-06-12 03:50:20 -05:00
parent e405de8bba
commit b01d1f8a56
13 changed files with 201 additions and 69 deletions

View File

@ -7,7 +7,7 @@ kernel io.files bootstrap.image sequences io ;
: url "http://factorcode.org/images/latest/" ; : url "http://factorcode.org/images/latest/" ;
: download-checksums ( -- alist ) : download-checksums ( -- alist )
url "checksums.txt" append http-get url "checksums.txt" append http-get nip
string-lines [ " " split1 ] { } map>assoc ; string-lines [ " " split1 ] { } map>assoc ;
: need-new-image? ( image -- ? ) : need-new-image? ( image -- ? )

View File

@ -6,7 +6,7 @@ IN: html.parser.analyzer
TUPLE: link attributes clickable ; TUPLE: link attributes clickable ;
: scrape-html ( url -- vector ) : scrape-html ( url -- vector )
http-get parse-html ; http-get nip parse-html ;
: (find-relative) : (find-relative)
[ >r + dup r> ?nth* [ 2drop f f ] unless ] [ 2drop f ] if ; inline [ >r + dup r> ?nth* [ 2drop f f ] unless ] [ 2drop f ] if ; inline

View File

@ -3,8 +3,13 @@
USING: assocs http kernel math math.parser namespaces sequences USING: assocs http kernel math math.parser namespaces sequences
io io.sockets io.streams.string io.files io.timeouts strings io io.sockets io.streams.string io.files io.timeouts strings
splitting calendar continuations accessors vectors math.order splitting calendar continuations accessors vectors math.order
io.encodings.8-bit io.encodings.binary io.streams.duplex io.encodings
fry debugger inspector ascii urls ; io.encodings.string
io.encodings.ascii
io.encodings.8-bit
io.encodings.binary
io.streams.duplex
fry debugger inspector ascii urls present ;
IN: http.client IN: http.client
: max-redirects 10 ; : max-redirects 10 ;
@ -15,7 +20,7 @@ M: too-many-redirects summary
drop drop
[ "Redirection limit of " % max-redirects # " exceeded" % ] "" make ; [ "Redirection limit of " % max-redirects # " exceeded" % ] "" make ;
DEFER: http-request DEFER: (http-request)
<PRIVATE <PRIVATE
@ -31,7 +36,7 @@ SYMBOL: redirects
redirects get max-redirects < [ redirects get max-redirects < [
request get request get
swap "location" header redirect-url swap "location" header redirect-url
"GET" >>method http-request "GET" >>method (http-request)
] [ ] [
too-many-redirects too-many-redirects
] if ] if
@ -45,15 +50,21 @@ PRIVATE>
: read-chunks ( -- ) : read-chunks ( -- )
read-chunk-size dup zero? read-chunk-size dup zero?
[ drop ] [ read % read-crlf "" assert= read-chunks ] if ; [ drop ] [ read % read-crlf B{ } assert= read-chunks ] if ;
: read-response-body ( response -- response data ) : read-response-body ( response -- response data )
dup "transfer-encoding" header "chunked" = dup "transfer-encoding" header "chunked" = [
[ [ read-chunks ] "" make ] [ input-stream get contents ] if ; binary decode-input
[ read-chunks ] B{ } make
over content-charset>> decode
] [
dup content-charset>> decode-input
input-stream get contents
] if ;
: http-request ( request -- response data ) : (http-request) ( request -- response data )
dup request [ dup request [
dup url>> url-addr latin1 [ dup url>> url-addr ascii [
1 minutes timeouts 1 minutes timeouts
write-request write-request
read-response read-response
@ -62,14 +73,6 @@ PRIVATE>
do-redirect do-redirect
] with-variable ; ] with-variable ;
: <get-request> ( url -- request )
<request>
"GET" >>method
swap >url ensure-port >>url ;
: http-get* ( url -- response data )
<get-request> http-request ;
: success? ( code -- ? ) 200 = ; : success? ( code -- ? ) 200 = ;
ERROR: download-failed response body ; ERROR: download-failed response body ;
@ -84,18 +87,28 @@ M: download-failed error.
] ]
[ body>> write ] bi ; [ body>> write ] bi ;
: check-response ( response string -- string ) : check-response ( response data -- response data )
over code>> success? [ nip ] [ download-failed ] if ; over code>> success? [ download-failed ] unless ;
: http-get ( url -- string ) : http-request ( request -- response data )
http-get* check-response ; (http-request) check-response ;
: <get-request> ( url -- request )
<request>
"GET" >>method
swap >url ensure-port >>url ;
: http-get ( url -- response data )
<get-request> http-request ;
: download-name ( url -- name ) : download-name ( url -- name )
file-name "?" split1 drop "/" ?tail drop ; present file-name "?" split1 drop "/" ?tail drop ;
: download-to ( url file -- ) : download-to ( url file -- )
#! Downloads the contents of a URL to a file. #! Downloads the contents of a URL to a file.
[ http-get ] dip latin1 [ write ] with-file-writer ; swap http-get
[ content-charset>> ] [ '[ , write ] ] bi*
with-file-writer ;
: download ( url -- ) : download ( url -- )
dup download-name download-to ; dup download-name download-to ;

View File

@ -1,5 +1,5 @@
USING: http tools.test multiline tuple-syntax USING: http tools.test multiline tuple-syntax
io.streams.string kernel arrays splitting sequences io.streams.string io.encodings.utf8 kernel arrays splitting sequences
assocs io.sockets db db.sqlite continuations urls hashtables ; assocs io.sockets db db.sqlite continuations urls hashtables ;
IN: http.tests IN: http.tests
@ -78,7 +78,7 @@ must-fail-with
STRING: read-response-test-1 STRING: read-response-test-1
HTTP/1.1 404 not found HTTP/1.1 404 not found
Content-Type: text/html; charset=UTF8 Content-Type: text/html; charset=UTF-8
blah blah
; ;
@ -88,10 +88,10 @@ blah
version: "1.1" version: "1.1"
code: 404 code: 404
message: "not found" message: "not found"
header: H{ { "content-type" "text/html; charset=UTF8" } } header: H{ { "content-type" "text/html; charset=UTF-8" } }
cookies: { } cookies: { }
content-type: "text/html" content-type: "text/html"
content-charset: "UTF8" content-charset: utf8
} }
] [ ] [
read-response-test-1 lf>crlf read-response-test-1 lf>crlf
@ -101,7 +101,7 @@ blah
STRING: read-response-test-1' STRING: read-response-test-1'
HTTP/1.1 404 not found HTTP/1.1 404 not found
content-type: text/html; charset=UTF8 content-type: text/html; charset=UTF-8
; ;
@ -160,14 +160,14 @@ test-db [
[ t ] [ [ t ] [
"resource:extra/http/test/foo.html" ascii file-contents "resource:extra/http/test/foo.html" ascii file-contents
"http://localhost:1237/nested/foo.html" http-get = "http://localhost:1237/nested/foo.html" http-get nip =
] unit-test ] unit-test
[ "http://localhost:1237/redirect-loop" http-get ] [ "http://localhost:1237/redirect-loop" http-get nip ]
[ too-many-redirects? ] must-fail-with [ too-many-redirects? ] must-fail-with
[ "Goodbye" ] [ [ "Goodbye" ] [
"http://localhost:1237/quit" http-get "http://localhost:1237/quit" http-get nip
] unit-test ] unit-test
! Dispatcher bugs ! Dispatcher bugs
@ -194,12 +194,12 @@ test-db [
: 404? [ download-failed? ] [ response>> code>> 404 = ] bi and ; : 404? [ download-failed? ] [ response>> code>> 404 = ] bi and ;
! This should give a 404 not an infinite redirect loop ! This should give a 404 not an infinite redirect loop
[ "http://localhost:1237/d/blah" http-get ] [ 404? ] must-fail-with [ "http://localhost:1237/d/blah" http-get nip ] [ 404? ] must-fail-with
! This should give a 404 not an infinite redirect loop ! This should give a 404 not an infinite redirect loop
[ "http://localhost:1237/blah/" http-get ] [ 404? ] must-fail-with [ "http://localhost:1237/blah/" http-get nip ] [ 404? ] must-fail-with
[ "Goodbye" ] [ "http://localhost:1237/quit" http-get ] unit-test [ "Goodbye" ] [ "http://localhost:1237/quit" http-get nip ] unit-test
[ ] [ [ ] [
[ [
@ -218,9 +218,9 @@ test-db [
[ ] [ 100 sleep ] unit-test [ ] [ 100 sleep ] unit-test
[ "Hi" ] [ "http://localhost:1237/" http-get ] unit-test [ "Hi" ] [ "http://localhost:1237/" http-get nip ] unit-test
[ "Goodbye" ] [ "http://localhost:1237/quit" http-get ] unit-test [ "Goodbye" ] [ "http://localhost:1237/quit" http-get nip ] unit-test
USING: html.components html.elements xml xml.utilities validators USING: html.components html.elements xml xml.utilities validators
furnace furnace.flash ; furnace furnace.flash ;
@ -253,7 +253,7 @@ SYMBOL: a
: test-a string>xml "input" tag-named "value" swap at ; : test-a string>xml "input" tag-named "value" swap at ;
[ "3" ] [ [ "3" ] [
"http://localhost:1237/" http-get* "http://localhost:1237/" http-get
swap dup cookies>> "cookies" set session-id-key get-cookie swap dup cookies>> "cookies" set session-id-key get-cookie
value>> "session-id" set test-a value>> "session-id" set test-a
] unit-test ] unit-test
@ -273,4 +273,4 @@ SYMBOL: a
[ 4 ] [ a get-global ] unit-test [ 4 ] [ a get-global ] unit-test
[ "Goodbye" ] [ "http://localhost:1237/quit" http-get ] unit-test [ "Goodbye" ] [ "http://localhost:1237/quit" http-get nip ] unit-test

View File

@ -7,6 +7,7 @@ strings vectors hashtables quotations arrays byte-arrays
math.parser calendar calendar.format present math.parser calendar calendar.format present
io io.server io.sockets.secure io io.server io.sockets.secure
io.encodings.iana io.encodings.binary io.encodings.8-bit
unicode.case unicode.categories qualified unicode.case unicode.categories qualified
@ -28,7 +29,8 @@ IN: http
"header" get "header" get
add-header add-header
] [ ] [
": " split1 dup [ ":" split1 dup [
[ blank? ] left-trim
swap >lower dup "last-header" set swap >lower dup "last-header" set
"header" get add-header "header" get add-header
] [ ] [
@ -36,20 +38,20 @@ IN: http
] if ] if
] if ; ] if ;
: read-lf ( -- string ) : read-lf ( -- bytes )
"\n" read-until CHAR: \n assert= ; "\n" read-until CHAR: \n assert= ;
: read-crlf ( -- string ) : read-crlf ( -- bytes )
"\r" read-until "\r" read-until
[ CHAR: \r assert= read1 CHAR: \n assert= ] when* ; [ CHAR: \r assert= read1 CHAR: \n assert= ] when* ;
: read-header-line ( -- ) : (read-header) ( -- )
read-crlf dup read-crlf dup
empty? [ drop ] [ header-line read-header-line ] if ; empty? [ drop ] [ header-line (read-header) ] if ;
: read-header ( -- assoc ) : read-header ( -- assoc )
H{ } clone [ H{ } clone [
"header" [ read-header-line ] with-variable "header" [ (read-header) ] with-variable
] keep ; ] keep ;
: header-value>string ( value -- string ) : header-value>string ( value -- string )
@ -66,7 +68,8 @@ IN: http
: write-header ( assoc -- ) : write-header ( assoc -- )
>alist sort-keys [ >alist sort-keys [
swap url-encode write ": " write swap
check-header-string write ": " write
header-value>string check-header-string write crlf header-value>string check-header-string write crlf
] assoc-each crlf ; ] assoc-each crlf ;
@ -299,6 +302,7 @@ body ;
H{ } clone >>header H{ } clone >>header
"close" "connection" set-header "close" "connection" set-header
now timestamp>http-string "date" set-header now timestamp>http-string "date" set-header
latin1 >>content-charset
V{ } clone >>cookies ; V{ } clone >>cookies ;
: read-response-version ( response -- response ) : read-response-version ( response -- response )
@ -319,7 +323,9 @@ body ;
read-header >>header read-header >>header
dup "set-cookie" header parse-cookies >>cookies dup "set-cookie" header parse-cookies >>cookies
dup "content-type" header [ dup "content-type" header [
parse-content-type [ >>content-type ] [ >>content-charset ] bi* parse-content-type
[ >>content-type ]
[ name>encoding binary or >>content-charset ] bi*
] when* ; ] when* ;
: read-response ( -- response ) : read-response ( -- response )
@ -341,7 +347,8 @@ body ;
: unparse-content-type ( request -- content-type ) : unparse-content-type ( request -- content-type )
[ content-type>> "application/octet-stream" or ] [ content-type>> "application/octet-stream" or ]
[ content-charset>> ] bi [ content-charset>> encoding>name ]
bi
[ "; charset=" swap 3append ] when* ; [ "; charset=" swap 3append ] when* ;
: write-response-header ( response -- response ) : write-response-header ( response -- response )

View File

@ -1,13 +1,14 @@
! Copyright (C) 2008 Slava Pestov. ! Copyright (C) 2008 Slava Pestov.
! See http://factorcode.org/license.txt for BSD license. ! See http://factorcode.org/license.txt for BSD license.
USING: html.elements math.parser http accessors kernel USING: html.elements math.parser http accessors kernel
io io.streams.string ; io io.streams.string io.encodings.utf8 ;
IN: http.server.responses IN: http.server.responses
: <content> ( body content-type -- response ) : <content> ( body content-type -- response )
<response> <response>
200 >>code 200 >>code
"Document follows" >>message "Document follows" >>message
utf8 >>content-charset
swap >>content-type swap >>content-type
swap >>body ; swap >>body ;

View File

@ -1,10 +1,21 @@
! Copyright (C) 2003, 2008 Slava Pestov. ! Copyright (C) 2003, 2008 Slava Pestov.
! See http://factorcode.org/license.txt for BSD license. ! See http://factorcode.org/license.txt for BSD license.
USING: kernel accessors sequences arrays namespaces splitting USING: kernel accessors sequences arrays namespaces splitting
vocabs.loader http http.server.responses logging calendar vocabs.loader destructors assocs debugger continuations
destructors html.elements html.streams io.server tools.vocabs math
io.encodings.8-bit io.timeouts io assocs debugger continuations io
fry tools.vocabs math ; io.server
io.encodings
io.encodings.utf8
io.encodings.ascii
io.encodings.binary
io.streams.limited
io.timeouts
fry logging calendar
http
http.server.responses
html.elements
html.streams ;
IN: http.server IN: http.server
SYMBOL: responder-nesting SYMBOL: responder-nesting
@ -43,19 +54,29 @@ main-responder global [ <404> <trivial-responder> or ] change-at
swap development-mode get [ '[ , http-error. ] >>body ] [ drop ] if ; swap development-mode get [ '[ , http-error. ] >>body ] [ drop ] if ;
: do-response ( response -- ) : do-response ( response -- )
dup write-response [ write-response ]
request get method>> "HEAD" = [ drop ] [ [
'[ , write-response-body ] request get method>> "HEAD" = [ drop ] [
[ '[
development-mode get ,
[ http-error. ] [ drop "Response error" ] if [ content-charset>> encode-output ]
] recover [ write-response-body ]
] if ; bi
]
[
utf8 [
development-mode get
[ http-error. ] [ drop "Response error" throw ] if
] with-encoded-output
] recover
] if
] bi ;
LOG: httpd-hit NOTICE LOG: httpd-hit NOTICE
: log-request ( request -- ) : log-request ( request -- )
[ method>> ] [ url>> [ host>> ] [ path>> ] bi ] bi 3array httpd-hit ; [ method>> ] [ url>> [ host>> ] [ path>> ] bi ] bi
3array httpd-hit ;
: split-path ( string -- path ) : split-path ( string -- path )
"/" split harvest ; "/" split harvest ;
@ -79,9 +100,15 @@ LOG: httpd-hit NOTICE
development-mode get-global development-mode get-global
[ global [ refresh-all ] bind ] when ; [ global [ refresh-all ] bind ] when ;
: setup-limits ( -- )
1 minutes timeouts
64 1024 * limit-input ;
: handle-client ( -- ) : handle-client ( -- )
[ [
1 minutes timeouts setup-limits
ascii decode-input
ascii encode-output
?refresh-all ?refresh-all
read-request read-request
do-request do-request
@ -90,7 +117,7 @@ LOG: httpd-hit NOTICE
: httpd ( port -- ) : httpd ( port -- )
dup integer? [ internet-server ] when dup integer? [ internet-server ] when
"http.server" latin1 [ handle-client ] with-server ; "http.server" binary [ handle-client ] with-server ;
: httpd-main ( -- ) : httpd-main ( -- )
8888 httpd ; 8888 httpd ;

View File

@ -29,7 +29,10 @@ TUPLE: file-responder root hook special allow-listings ;
H{ } clone >>special ; H{ } clone >>special ;
: (serve-static) ( path mime-type -- response ) : (serve-static) ( path mime-type -- response )
[ [ binary <file-reader> &dispose ] dip <content> ] [
[ binary <file-reader> &dispose ] dip
<content> binary >>content-charset
]
[ drop file-info [ size>> ] [ modified>> ] bi ] 2bi [ drop file-info [ size>> ] [ modified>> ] bi ] 2bi
[ "content-length" set-header ] [ "content-length" set-header ]
[ "last-modified" set-header ] bi* ; [ "last-modified" set-header ] bi* ;

View File

@ -41,6 +41,13 @@ PRIVATE>
[ second ] map { "None" } diff [ second ] map { "None" } diff
] map ; ] map ;
: more-aliases ( -- assoc )
H{
{ "UTF8" utf8 }
{ "utf8" utf8 }
{ "utf-8" utf8 }
} ;
: make-n>e ( stream -- n>e ) : make-n>e ( stream -- n>e )
parse-iana [ [ parse-iana [ [
dup [ dup [
@ -48,7 +55,7 @@ PRIVATE>
[ swap [ set ] with each ] [ swap [ set ] with each ]
[ drop ] if* [ drop ] if*
] with each ] with each
] each ] H{ } make-assoc ; ] each ] H{ } make-assoc more-aliases assoc-union ;
PRIVATE> PRIVATE>
"resource:extra/io/encodings/iana/character-sets" "resource:extra/io/encodings/iana/character-sets"

View File

@ -0,0 +1,32 @@
IN: io.streams.limited.tests
USING: io io.streams.limited io.encodings io.encodings.string
io.encodings.ascii io.encodings.binary io.streams.byte-array
namespaces tools.test strings kernel ;
[ ] [
"hello world\nhow are you today\nthis is a very long line indeed"
ascii encode binary <byte-reader> "data" set
] unit-test
[ ] [ "data" get 24 <limited-stream> "limited" set ] unit-test
[ CHAR: h ] [ "limited" get stream-read1 ] unit-test
[ ] [ "limited" get ascii <decoder> "decoded" set ] unit-test
[ "ello world" ] [ "decoded" get stream-readln ] unit-test
[ "how " ] [ 4 "decoded" get stream-read ] unit-test
[ "decoded" get stream-readln ] [ limit-exceeded? ] must-fail-with
[ ] [
"abc\ndef\nghi"
ascii encode binary <byte-reader> "data" set
] unit-test
[ ] [ "data" get 7 <limited-stream> "limited" set ] unit-test
[ "abc" CHAR: \n ] [ "\n" "limited" get stream-read-until [ >string ] dip ] unit-test
[ "\n" "limited" get stream-read-until ] [ limit-exceeded? ] must-fail-with

View File

@ -0,0 +1,42 @@
! Copyright (C) 2008 Slava Pestov
! See http://factorcode.org/license.txt for BSD license.
USING: kernel math io destructors accessors sequences
namespaces ;
IN: io.streams.limited
TUPLE: limited-stream stream count limit ;
: <limited-stream> ( limit stream -- stream' )
limited-stream new
swap >>stream
swap >>limit
0 >>count ;
: limit-input ( limit -- )
input-stream [ <limited-stream> ] change ;
ERROR: limit-exceeded ;
: check-limit ( n stream -- )
[ + ] change-count
[ count>> ] [ limit>> ] bi >=
[ limit-exceeded ] when ; inline
M: limited-stream stream-read1
1 over check-limit stream>> stream-read1 ;
M: limited-stream stream-read
2dup check-limit stream>> stream-read ;
M: limited-stream stream-read-partial
2dup check-limit stream>> stream-read-partial ;
: (read-until) ( stream seps buf -- stream seps buf sep/f )
3dup [ [ stream-read1 dup ] dip memq? ] dip
swap [ drop ] [ push (read-until) ] if ;
M: limited-stream stream-read-until
swap BV{ } clone (read-until) [ 2nip B{ } like ] dip ;
M: limited-stream dispose
stream>> dispose ;

View File

@ -107,7 +107,7 @@ TUPLE: entry title url description date ;
: download-feed ( url -- feed ) : download-feed ( url -- feed )
#! Retrieve an news syndication file, return as a feed tuple. #! Retrieve an news syndication file, return as a feed tuple.
http-get read-feed ; http-get nip read-feed ;
! Atom generation ! Atom generation
: simple-tag, ( content name -- ) : simple-tag, ( content name -- )

View File

@ -59,4 +59,4 @@ format similar-ok language country site subscription license ;
swap >>query ; swap >>query ;
: search-yahoo ( search -- seq ) : search-yahoo ( search -- seq )
query http-get string>xml parse-yahoo ; query http-get nip string>xml parse-yahoo ;