Request size limit and encoding support for HTTP server

db4
Slava Pestov 2008-06-12 03:50:20 -05:00
parent e405de8bba
commit b01d1f8a56
13 changed files with 201 additions and 69 deletions

View File

@ -7,7 +7,7 @@ kernel io.files bootstrap.image sequences io ;
: url "http://factorcode.org/images/latest/" ;
: download-checksums ( -- alist )
url "checksums.txt" append http-get
url "checksums.txt" append http-get nip
string-lines [ " " split1 ] { } map>assoc ;
: need-new-image? ( image -- ? )

View File

@ -6,7 +6,7 @@ IN: html.parser.analyzer
TUPLE: link attributes clickable ;
: scrape-html ( url -- vector )
http-get parse-html ;
http-get nip parse-html ;
: (find-relative)
[ >r + dup r> ?nth* [ 2drop f f ] unless ] [ 2drop f ] if ; inline

View File

@ -3,8 +3,13 @@
USING: assocs http kernel math math.parser namespaces sequences
io io.sockets io.streams.string io.files io.timeouts strings
splitting calendar continuations accessors vectors math.order
io.encodings.8-bit io.encodings.binary io.streams.duplex
fry debugger inspector ascii urls ;
io.encodings
io.encodings.string
io.encodings.ascii
io.encodings.8-bit
io.encodings.binary
io.streams.duplex
fry debugger inspector ascii urls present ;
IN: http.client
: max-redirects 10 ;
@ -15,7 +20,7 @@ M: too-many-redirects summary
drop
[ "Redirection limit of " % max-redirects # " exceeded" % ] "" make ;
DEFER: http-request
DEFER: (http-request)
<PRIVATE
@ -31,7 +36,7 @@ SYMBOL: redirects
redirects get max-redirects < [
request get
swap "location" header redirect-url
"GET" >>method http-request
"GET" >>method (http-request)
] [
too-many-redirects
] if
@ -45,15 +50,21 @@ PRIVATE>
: read-chunks ( -- )
read-chunk-size dup zero?
[ drop ] [ read % read-crlf "" assert= read-chunks ] if ;
[ drop ] [ read % read-crlf B{ } assert= read-chunks ] if ;
: read-response-body ( response -- response data )
dup "transfer-encoding" header "chunked" =
[ [ read-chunks ] "" make ] [ input-stream get contents ] if ;
dup "transfer-encoding" header "chunked" = [
binary decode-input
[ read-chunks ] B{ } make
over content-charset>> decode
] [
dup content-charset>> decode-input
input-stream get contents
] if ;
: http-request ( request -- response data )
: (http-request) ( request -- response data )
dup request [
dup url>> url-addr latin1 [
dup url>> url-addr ascii [
1 minutes timeouts
write-request
read-response
@ -62,14 +73,6 @@ PRIVATE>
do-redirect
] with-variable ;
: <get-request> ( url -- request )
<request>
"GET" >>method
swap >url ensure-port >>url ;
: http-get* ( url -- response data )
<get-request> http-request ;
: success? ( code -- ? ) 200 = ;
ERROR: download-failed response body ;
@ -84,18 +87,28 @@ M: download-failed error.
]
[ body>> write ] bi ;
: check-response ( response string -- string )
over code>> success? [ nip ] [ download-failed ] if ;
: check-response ( response data -- response data )
over code>> success? [ download-failed ] unless ;
: http-get ( url -- string )
http-get* check-response ;
: http-request ( request -- response data )
(http-request) check-response ;
: <get-request> ( url -- request )
<request>
"GET" >>method
swap >url ensure-port >>url ;
: http-get ( url -- response data )
<get-request> http-request ;
: download-name ( url -- name )
file-name "?" split1 drop "/" ?tail drop ;
present file-name "?" split1 drop "/" ?tail drop ;
: download-to ( url file -- )
#! Downloads the contents of a URL to a file.
[ http-get ] dip latin1 [ write ] with-file-writer ;
swap http-get
[ content-charset>> ] [ '[ , write ] ] bi*
with-file-writer ;
: download ( url -- )
dup download-name download-to ;

View File

@ -1,5 +1,5 @@
USING: http tools.test multiline tuple-syntax
io.streams.string kernel arrays splitting sequences
io.streams.string io.encodings.utf8 kernel arrays splitting sequences
assocs io.sockets db db.sqlite continuations urls hashtables ;
IN: http.tests
@ -78,7 +78,7 @@ must-fail-with
STRING: read-response-test-1
HTTP/1.1 404 not found
Content-Type: text/html; charset=UTF8
Content-Type: text/html; charset=UTF-8
blah
;
@ -88,10 +88,10 @@ blah
version: "1.1"
code: 404
message: "not found"
header: H{ { "content-type" "text/html; charset=UTF8" } }
header: H{ { "content-type" "text/html; charset=UTF-8" } }
cookies: { }
content-type: "text/html"
content-charset: "UTF8"
content-charset: utf8
}
] [
read-response-test-1 lf>crlf
@ -101,7 +101,7 @@ blah
STRING: read-response-test-1'
HTTP/1.1 404 not found
content-type: text/html; charset=UTF8
content-type: text/html; charset=UTF-8
;
@ -160,14 +160,14 @@ test-db [
[ t ] [
"resource:extra/http/test/foo.html" ascii file-contents
"http://localhost:1237/nested/foo.html" http-get =
"http://localhost:1237/nested/foo.html" http-get nip =
] unit-test
[ "http://localhost:1237/redirect-loop" http-get ]
[ "http://localhost:1237/redirect-loop" http-get nip ]
[ too-many-redirects? ] must-fail-with
[ "Goodbye" ] [
"http://localhost:1237/quit" http-get
"http://localhost:1237/quit" http-get nip
] unit-test
! Dispatcher bugs
@ -194,12 +194,12 @@ test-db [
: 404? [ download-failed? ] [ response>> code>> 404 = ] bi and ;
! This should give a 404 not an infinite redirect loop
[ "http://localhost:1237/d/blah" http-get ] [ 404? ] must-fail-with
[ "http://localhost:1237/d/blah" http-get nip ] [ 404? ] must-fail-with
! This should give a 404 not an infinite redirect loop
[ "http://localhost:1237/blah/" http-get ] [ 404? ] must-fail-with
[ "http://localhost:1237/blah/" http-get nip ] [ 404? ] must-fail-with
[ "Goodbye" ] [ "http://localhost:1237/quit" http-get ] unit-test
[ "Goodbye" ] [ "http://localhost:1237/quit" http-get nip ] unit-test
[ ] [
[
@ -218,9 +218,9 @@ test-db [
[ ] [ 100 sleep ] unit-test
[ "Hi" ] [ "http://localhost:1237/" http-get ] unit-test
[ "Hi" ] [ "http://localhost:1237/" http-get nip ] unit-test
[ "Goodbye" ] [ "http://localhost:1237/quit" http-get ] unit-test
[ "Goodbye" ] [ "http://localhost:1237/quit" http-get nip ] unit-test
USING: html.components html.elements xml xml.utilities validators
furnace furnace.flash ;
@ -253,7 +253,7 @@ SYMBOL: a
: test-a string>xml "input" tag-named "value" swap at ;
[ "3" ] [
"http://localhost:1237/" http-get*
"http://localhost:1237/" http-get
swap dup cookies>> "cookies" set session-id-key get-cookie
value>> "session-id" set test-a
] unit-test
@ -273,4 +273,4 @@ SYMBOL: a
[ 4 ] [ a get-global ] unit-test
[ "Goodbye" ] [ "http://localhost:1237/quit" http-get ] unit-test
[ "Goodbye" ] [ "http://localhost:1237/quit" http-get nip ] unit-test

View File

@ -7,6 +7,7 @@ strings vectors hashtables quotations arrays byte-arrays
math.parser calendar calendar.format present
io io.server io.sockets.secure
io.encodings.iana io.encodings.binary io.encodings.8-bit
unicode.case unicode.categories qualified
@ -28,7 +29,8 @@ IN: http
"header" get
add-header
] [
": " split1 dup [
":" split1 dup [
[ blank? ] left-trim
swap >lower dup "last-header" set
"header" get add-header
] [
@ -36,20 +38,20 @@ IN: http
] if
] if ;
: read-lf ( -- string )
: read-lf ( -- bytes )
"\n" read-until CHAR: \n assert= ;
: read-crlf ( -- string )
: read-crlf ( -- bytes )
"\r" read-until
[ CHAR: \r assert= read1 CHAR: \n assert= ] when* ;
: read-header-line ( -- )
: (read-header) ( -- )
read-crlf dup
empty? [ drop ] [ header-line read-header-line ] if ;
empty? [ drop ] [ header-line (read-header) ] if ;
: read-header ( -- assoc )
H{ } clone [
"header" [ read-header-line ] with-variable
"header" [ (read-header) ] with-variable
] keep ;
: header-value>string ( value -- string )
@ -66,7 +68,8 @@ IN: http
: write-header ( assoc -- )
>alist sort-keys [
swap url-encode write ": " write
swap
check-header-string write ": " write
header-value>string check-header-string write crlf
] assoc-each crlf ;
@ -299,6 +302,7 @@ body ;
H{ } clone >>header
"close" "connection" set-header
now timestamp>http-string "date" set-header
latin1 >>content-charset
V{ } clone >>cookies ;
: read-response-version ( response -- response )
@ -319,7 +323,9 @@ body ;
read-header >>header
dup "set-cookie" header parse-cookies >>cookies
dup "content-type" header [
parse-content-type [ >>content-type ] [ >>content-charset ] bi*
parse-content-type
[ >>content-type ]
[ name>encoding binary or >>content-charset ] bi*
] when* ;
: read-response ( -- response )
@ -341,7 +347,8 @@ body ;
: unparse-content-type ( request -- content-type )
[ content-type>> "application/octet-stream" or ]
[ content-charset>> ] bi
[ content-charset>> encoding>name ]
bi
[ "; charset=" swap 3append ] when* ;
: write-response-header ( response -- response )

View File

@ -1,13 +1,14 @@
! Copyright (C) 2008 Slava Pestov.
! See http://factorcode.org/license.txt for BSD license.
USING: html.elements math.parser http accessors kernel
io io.streams.string ;
io io.streams.string io.encodings.utf8 ;
IN: http.server.responses
: <content> ( body content-type -- response )
<response>
200 >>code
"Document follows" >>message
utf8 >>content-charset
swap >>content-type
swap >>body ;

View File

@ -1,10 +1,21 @@
! Copyright (C) 2003, 2008 Slava Pestov.
! See http://factorcode.org/license.txt for BSD license.
USING: kernel accessors sequences arrays namespaces splitting
vocabs.loader http http.server.responses logging calendar
destructors html.elements html.streams io.server
io.encodings.8-bit io.timeouts io assocs debugger continuations
fry tools.vocabs math ;
vocabs.loader destructors assocs debugger continuations
tools.vocabs math
io
io.server
io.encodings
io.encodings.utf8
io.encodings.ascii
io.encodings.binary
io.streams.limited
io.timeouts
fry logging calendar
http
http.server.responses
html.elements
html.streams ;
IN: http.server
SYMBOL: responder-nesting
@ -43,19 +54,29 @@ main-responder global [ <404> <trivial-responder> or ] change-at
swap development-mode get [ '[ , http-error. ] >>body ] [ drop ] if ;
: do-response ( response -- )
dup write-response
request get method>> "HEAD" = [ drop ] [
'[ , write-response-body ]
[
development-mode get
[ http-error. ] [ drop "Response error" ] if
] recover
] if ;
[ write-response ]
[
request get method>> "HEAD" = [ drop ] [
'[
,
[ content-charset>> encode-output ]
[ write-response-body ]
bi
]
[
utf8 [
development-mode get
[ http-error. ] [ drop "Response error" throw ] if
] with-encoded-output
] recover
] if
] bi ;
LOG: httpd-hit NOTICE
: log-request ( request -- )
[ method>> ] [ url>> [ host>> ] [ path>> ] bi ] bi 3array httpd-hit ;
[ method>> ] [ url>> [ host>> ] [ path>> ] bi ] bi
3array httpd-hit ;
: split-path ( string -- path )
"/" split harvest ;
@ -79,9 +100,15 @@ LOG: httpd-hit NOTICE
development-mode get-global
[ global [ refresh-all ] bind ] when ;
: setup-limits ( -- )
1 minutes timeouts
64 1024 * limit-input ;
: handle-client ( -- )
[
1 minutes timeouts
setup-limits
ascii decode-input
ascii encode-output
?refresh-all
read-request
do-request
@ -90,7 +117,7 @@ LOG: httpd-hit NOTICE
: httpd ( port -- )
dup integer? [ internet-server ] when
"http.server" latin1 [ handle-client ] with-server ;
"http.server" binary [ handle-client ] with-server ;
: httpd-main ( -- )
8888 httpd ;

View File

@ -29,7 +29,10 @@ TUPLE: file-responder root hook special allow-listings ;
H{ } clone >>special ;
: (serve-static) ( path mime-type -- response )
[ [ binary <file-reader> &dispose ] dip <content> ]
[
[ binary <file-reader> &dispose ] dip
<content> binary >>content-charset
]
[ drop file-info [ size>> ] [ modified>> ] bi ] 2bi
[ "content-length" set-header ]
[ "last-modified" set-header ] bi* ;

View File

@ -41,6 +41,13 @@ PRIVATE>
[ second ] map { "None" } diff
] map ;
: more-aliases ( -- assoc )
H{
{ "UTF8" utf8 }
{ "utf8" utf8 }
{ "utf-8" utf8 }
} ;
: make-n>e ( stream -- n>e )
parse-iana [ [
dup [
@ -48,7 +55,7 @@ PRIVATE>
[ swap [ set ] with each ]
[ drop ] if*
] with each
] each ] H{ } make-assoc ;
] each ] H{ } make-assoc more-aliases assoc-union ;
PRIVATE>
"resource:extra/io/encodings/iana/character-sets"

View File

@ -0,0 +1,32 @@
IN: io.streams.limited.tests
USING: io io.streams.limited io.encodings io.encodings.string
io.encodings.ascii io.encodings.binary io.streams.byte-array
namespaces tools.test strings kernel ;
[ ] [
"hello world\nhow are you today\nthis is a very long line indeed"
ascii encode binary <byte-reader> "data" set
] unit-test
[ ] [ "data" get 24 <limited-stream> "limited" set ] unit-test
[ CHAR: h ] [ "limited" get stream-read1 ] unit-test
[ ] [ "limited" get ascii <decoder> "decoded" set ] unit-test
[ "ello world" ] [ "decoded" get stream-readln ] unit-test
[ "how " ] [ 4 "decoded" get stream-read ] unit-test
[ "decoded" get stream-readln ] [ limit-exceeded? ] must-fail-with
[ ] [
"abc\ndef\nghi"
ascii encode binary <byte-reader> "data" set
] unit-test
[ ] [ "data" get 7 <limited-stream> "limited" set ] unit-test
[ "abc" CHAR: \n ] [ "\n" "limited" get stream-read-until [ >string ] dip ] unit-test
[ "\n" "limited" get stream-read-until ] [ limit-exceeded? ] must-fail-with

View File

@ -0,0 +1,42 @@
! Copyright (C) 2008 Slava Pestov
! See http://factorcode.org/license.txt for BSD license.
USING: kernel math io destructors accessors sequences
namespaces ;
IN: io.streams.limited
TUPLE: limited-stream stream count limit ;
: <limited-stream> ( limit stream -- stream' )
limited-stream new
swap >>stream
swap >>limit
0 >>count ;
: limit-input ( limit -- )
input-stream [ <limited-stream> ] change ;
ERROR: limit-exceeded ;
: check-limit ( n stream -- )
[ + ] change-count
[ count>> ] [ limit>> ] bi >=
[ limit-exceeded ] when ; inline
M: limited-stream stream-read1
1 over check-limit stream>> stream-read1 ;
M: limited-stream stream-read
2dup check-limit stream>> stream-read ;
M: limited-stream stream-read-partial
2dup check-limit stream>> stream-read-partial ;
: (read-until) ( stream seps buf -- stream seps buf sep/f )
3dup [ [ stream-read1 dup ] dip memq? ] dip
swap [ drop ] [ push (read-until) ] if ;
M: limited-stream stream-read-until
swap BV{ } clone (read-until) [ 2nip B{ } like ] dip ;
M: limited-stream dispose
stream>> dispose ;

View File

@ -107,7 +107,7 @@ TUPLE: entry title url description date ;
: download-feed ( url -- feed )
#! Retrieve an news syndication file, return as a feed tuple.
http-get read-feed ;
http-get nip read-feed ;
! Atom generation
: simple-tag, ( content name -- )

View File

@ -59,4 +59,4 @@ format similar-ok language country site subscription license ;
swap >>query ;
: search-yahoo ( search -- seq )
query http-get string>xml parse-yahoo ;
query http-get nip string>xml parse-yahoo ;