http.server: decouple charset header from encoding used to send the data, and serve text files as UTF-8 by default

db4
Slava Pestov 2010-03-13 19:06:41 +13:00
parent f8e2ddd53f
commit 02a4082e1f
8 changed files with 49 additions and 36 deletions

View File

@ -35,7 +35,9 @@ M: object >entry
] map ; ] map ;
: <feed-content> ( body -- response ) : <feed-content> ( body -- response )
feed>xml "application/atom+xml" <content> ; feed>xml "application/atom+xml" <content>
"UTF-8" >>content-charset
utf8 >>content-encoding ;
TUPLE: feed-action < action title url entries ; TUPLE: feed-action < action title url entries ;

View File

@ -1,11 +1,11 @@
! Copyright (C) 2005, 2009 Slava Pestov. ! Copyright (C) 2005, 2010 Slava Pestov.
! See http://factorcode.org/license.txt for BSD license. ! See http://factorcode.org/license.txt for BSD license.
USING: assocs kernel math math.parser namespaces make USING: assocs kernel math math.parser namespaces make
sequences strings splitting calendar continuations accessors vectors sequences strings splitting calendar continuations accessors vectors
math.order hashtables byte-arrays destructors math.order hashtables byte-arrays destructors
io io.sockets io.streams.string io.files io.timeouts io io.sockets io.streams.string io.files io.timeouts
io.pathnames io.encodings io.encodings.string io.encodings.ascii io.pathnames io.encodings io.encodings.string io.encodings.ascii
io.encodings.utf8 io.encodings.binary io.crlf io.encodings.utf8 io.encodings.binary io.encodings.iana io.crlf
io.streams.duplex fry ascii urls urls.encoding present locals io.streams.duplex fry ascii urls urls.encoding present locals
http http.parsers http.client.post-data ; http http.parsers http.client.post-data ;
IN: http.client IN: http.client
@ -56,8 +56,8 @@ ERROR: too-many-redirects ;
dup "set-cookie" header parse-set-cookie >>cookies dup "set-cookie" header parse-set-cookie >>cookies
dup "content-type" header [ dup "content-type" header [
parse-content-type parse-content-type
[ >>content-type ] [ >>content-type ] [ >>content-charset ] bi*
[ >>content-charset ] bi* dup content-charset>> name>encoding >>content-encoding
] when* ; ] when* ;
: read-response ( -- response ) : read-response ( -- response )
@ -149,7 +149,7 @@ ERROR: download-failed response ;
: http-request ( request -- response data ) : http-request ( request -- response data )
[ [ % ] with-http-request ] B{ } make [ [ % ] with-http-request ] B{ } make
over content-charset>> decode check-response-with-body ; over content-encoding>> decode check-response-with-body ;
: <get-request> ( url -- request ) : <get-request> ( url -- request )
"GET" <client-request> ; "GET" <client-request> ;

View File

@ -6,13 +6,13 @@ continuations urls hashtables accessors namespaces xml.data
io.encodings.8-bit.latin1 ; io.encodings.8-bit.latin1 ;
IN: http.tests IN: http.tests
[ "text/plain" latin1 ] [ "text/plain" parse-content-type ] unit-test [ "text/plain" "UTF-8" ] [ "text/plain" parse-content-type ] unit-test
[ "text/html" utf8 ] [ "text/html; charset=UTF-8" parse-content-type ] unit-test [ "text/html" "ASCII" ] [ "text/html; charset=ASCII" parse-content-type ] unit-test
[ "text/html" utf8 ] [ "text/html; charset=\"utf-8\"" parse-content-type ] unit-test [ "text/html" "utf-8" ] [ "text/html; charset=\"utf-8\"" parse-content-type ] unit-test
[ "application/octet-stream" binary ] [ "application/octet-stream" parse-content-type ] unit-test [ "application/octet-stream" f ] [ "application/octet-stream" parse-content-type ] unit-test
: lf>crlf ( string -- string' ) "\n" split "\r\n" join ; : lf>crlf ( string -- string' ) "\n" split "\r\n" join ;
@ -115,7 +115,8 @@ blah
{ header H{ { "content-type" "text/html; charset=UTF-8" } } } { header H{ { "content-type" "text/html; charset=UTF-8" } } }
{ cookies { } } { cookies { } }
{ content-type "text/html" } { content-type "text/html" }
{ content-charset utf8 } { content-charset "UTF-8" }
{ content-encoding utf8 }
} }
] [ ] [
read-response-test-1 lf>crlf read-response-test-1 lf>crlf

View File

@ -1,11 +1,11 @@
! Copyright (C) 2003, 2008 Slava Pestov. ! Copyright (C) 2003, 2010 Slava Pestov.
! See http://factorcode.org/license.txt for BSD license. ! See http://factorcode.org/license.txt for BSD license.
USING: accessors kernel combinators math namespaces make assocs USING: accessors kernel combinators math namespaces make assocs
sequences splitting sorting sets strings vectors hashtables sequences splitting sorting sets strings vectors hashtables
quotations arrays byte-arrays math.parser calendar quotations arrays byte-arrays math.parser calendar
calendar.format present urls fry calendar.format present urls fry io io.encodings
io io.encodings io.encodings.iana io.encodings.binary io.encodings.iana io.encodings.binary io.encodings.utf8 io.crlf
io.crlf ascii io.encodings.8-bit.latin1 http.parsers base64 ; ascii io.encodings.8-bit.latin1 http.parsers base64 ;
IN: http IN: http
CONSTANT: max-redirects 10 CONSTANT: max-redirects 10
@ -170,6 +170,7 @@ header
cookies cookies
content-type content-type
content-charset content-charset
content-encoding
body ; body ;
: <response> ( -- response ) : <response> ( -- response )
@ -179,7 +180,7 @@ body ;
"close" "connection" set-header "close" "connection" set-header
now timestamp>http-string "date" set-header now timestamp>http-string "date" set-header
"Factor http.server" "server" set-header "Factor http.server" "server" set-header
latin1 >>content-charset utf8 >>content-encoding
V{ } clone >>cookies ; V{ } clone >>cookies ;
M: response clone M: response clone
@ -221,5 +222,5 @@ TUPLE: post-data data params content-type content-encoding ;
: parse-content-type ( content-type -- type encoding ) : parse-content-type ( content-type -- type encoding )
";" split1 ";" split1
parse-content-type-attributes "charset" swap at name>encoding parse-content-type-attributes "charset" swap at
[ dup "text/" head? latin1 binary ? ] unless* ; [ dup "text/" head? "UTF-8" and ] unless* ;

View File

@ -1,4 +1,4 @@
! Copyright (C) 2008 Slava Pestov. ! Copyright (C) 2008, 2010 Slava Pestov.
! See http://factorcode.org/license.txt for BSD license. ! See http://factorcode.org/license.txt for BSD license.
USING: math.parser http accessors kernel xml.syntax xml.writer USING: math.parser http accessors kernel xml.syntax xml.writer
io io.streams.string io.encodings.utf8 ; io io.streams.string io.encodings.utf8 ;
@ -8,7 +8,7 @@ IN: http.server.responses
<response> <response>
200 >>code 200 >>code
"Document follows" >>message "Document follows" >>message
utf8 >>content-charset utf8 >>content-encoding
swap >>content-type swap >>content-type
swap >>body ; swap >>body ;

View File

@ -4,16 +4,26 @@ IN: http.server.tests
[ t ] [ [ \ + first ] [ <500> ] recover response? ] unit-test [ t ] [ [ \ + first ] [ <500> ] recover response? ] unit-test
[ "text/plain; charset=UTF-8" ] [ [ "text/plain; charset=ASCII" ] [
<response> <response>
"text/plain" >>content-type "text/plain" >>content-type
utf8 >>content-charset "ASCII" >>content-charset
unparse-content-type unparse-content-type
] unit-test ] unit-test
[ "text/xml" ] [ [ "text/xml; charset=UTF-8" ] [
<response> <response>
"text/xml" >>content-type "text/xml" >>content-type
binary >>content-charset unparse-content-type
] unit-test
[ "image/jpeg" ] [
<response>
"image/jpeg" >>content-type
unparse-content-type
] unit-test
[ "application/octet-stream" ] [
<response>
unparse-content-type unparse-content-type
] unit-test ] unit-test

View File

@ -1,4 +1,4 @@
! Copyright (C) 2003, 2009 Slava Pestov. ! Copyright (C) 2003, 2010 Slava Pestov.
! See http://factorcode.org/license.txt for BSD license. ! See http://factorcode.org/license.txt for BSD license.
USING: kernel accessors sequences arrays namespaces splitting USING: kernel accessors sequences arrays namespaces splitting
vocabs.loader destructors assocs debugger continuations vocabs.loader destructors assocs debugger continuations
@ -101,8 +101,10 @@ GENERIC: write-full-response ( request response -- )
tri ; tri ;
: unparse-content-type ( request -- content-type ) : unparse-content-type ( request -- content-type )
[ content-type>> "application/octet-stream" or ] [ content-charset>> ] bi [ content-type>> ] [ content-charset>> ] bi
dup binary eq? [ drop ] [ encoding>name "; charset=" glue ] if ; over "text/" head? [ "UTF-8" or ] when
[ "application/octet-stream" or ] dip
[ "; charset=" glue ] when* ;
: ensure-domain ( cookie -- cookie ) : ensure-domain ( cookie -- cookie )
[ [
@ -133,7 +135,7 @@ M: response write-response ( respose -- )
M: response write-full-response ( request response -- ) M: response write-full-response ( request response -- )
dup write-response dup write-response
swap method>> "HEAD" = [ swap method>> "HEAD" = [
[ content-charset>> encode-output ] [ content-encoding>> encode-output ]
[ write-response-body ] [ write-response-body ]
bi bi
] unless drop ; ] unless drop ;

View File

@ -1,4 +1,4 @@
! Copyright (C) 2004, 2009 Slava Pestov. ! Copyright (C) 2004, 2010 Slava Pestov.
! See http://factorcode.org/license.txt for BSD license. ! See http://factorcode.org/license.txt for BSD license.
USING: calendar kernel math math.order math.parser namespaces USING: calendar kernel math math.order math.parser namespaces
parser sequences strings assocs hashtables debugger mime.types parser sequences strings assocs hashtables debugger mime.types
@ -16,11 +16,8 @@ TUPLE: file-responder root hook special allow-listings ;
dup [ rfc822>timestamp ] when ; dup [ rfc822>timestamp ] when ;
: modified-since? ( filename -- ? ) : modified-since? ( filename -- ? )
request get modified-since dup [ request get modified-since dup
[ file-info modified>> ] dip after? [ [ file-info modified>> ] dip after? ] [ 2drop t ] if ;
] [
2drop t
] if ;
: <file-responder> ( root hook -- responder ) : <file-responder> ( root hook -- responder )
file-responder new file-responder new
@ -30,8 +27,8 @@ TUPLE: file-responder root hook special allow-listings ;
: (serve-static) ( path mime-type -- response ) : (serve-static) ( path mime-type -- response )
[ [
[ binary <file-reader> &dispose ] dip [ binary <file-reader> &dispose ] dip <content>
<content> binary >>content-charset binary >>content-encoding
] ]
[ drop file-info [ size>> ] [ modified>> ] bi ] 2bi [ drop file-info [ size>> ] [ modified>> ] bi ] 2bi
[ "content-length" set-header ] [ "content-length" set-header ]