New http request/response parsers using pegs
parent
c19d83e13f
commit
9674541ceb
|
@ -1,7 +1,8 @@
|
|||
USING: http tools.test multiline tuple-syntax
|
||||
io.streams.string io.encodings.utf8 io.encodings.string
|
||||
kernel arrays splitting sequences
|
||||
assocs io.sockets db db.sqlite continuations urls hashtables ;
|
||||
assocs io.sockets db db.sqlite continuations urls hashtables
|
||||
accessors ;
|
||||
IN: http.tests
|
||||
|
||||
: lf>crlf "\n" split "\r\n" join ;
|
||||
|
@ -73,10 +74,21 @@ GET nested HTTP/1.0
|
|||
|
||||
;
|
||||
|
||||
[ read-request-test-3 [ read-request ] with-string-reader ]
|
||||
[ read-request-test-3 lf>crlf [ read-request ] with-string-reader ]
|
||||
[ "Bad request: URL" = ]
|
||||
must-fail-with
|
||||
|
||||
STRING: read-request-test-4
|
||||
GET /blah HTTP/1.0
|
||||
Host: "www.amazon.com"
|
||||
;
|
||||
|
||||
[ "www.amazon.com" ]
|
||||
[
|
||||
read-request-test-4 lf>crlf [ read-request ] with-string-reader
|
||||
"host" header
|
||||
] unit-test
|
||||
|
||||
STRING: read-response-test-1
|
||||
HTTP/1.1 404 not found
|
||||
Content-Type: text/html; charset=UTF-8
|
||||
|
@ -117,7 +129,38 @@ read-response-test-1' 1array [
|
|||
|
||||
[ t ] [
|
||||
"rmid=732423sdfs73242; path=/; domain=.example.net; expires=Fri, 31-Dec-2010 23:59:59 GMT"
|
||||
dup parse-cookies unparse-cookies =
|
||||
dup parse-set-cookie first unparse-set-cookie =
|
||||
] unit-test
|
||||
|
||||
[ t ] [
|
||||
"a="
|
||||
dup parse-set-cookie first unparse-set-cookie =
|
||||
] unit-test
|
||||
|
||||
STRING: read-response-test-2
|
||||
HTTP/1.1 200 Content follows
|
||||
Set-Cookie: oo="bar; a=b"; httponly=yes; sid=123456
|
||||
|
||||
|
||||
;
|
||||
|
||||
[ 2 ] [
|
||||
read-response-test-2 lf>crlf
|
||||
[ read-response ] with-string-reader
|
||||
cookies>> length
|
||||
] unit-test
|
||||
|
||||
STRING: read-response-test-3
|
||||
HTTP/1.1 200 Content follows
|
||||
Set-Cookie: oo="bar; a=b"; comment="your mom"; httponly=yes
|
||||
|
||||
|
||||
;
|
||||
|
||||
[ 1 ] [
|
||||
read-response-test-3 lf>crlf
|
||||
[ read-response ] with-string-reader
|
||||
cookies>> length
|
||||
] unit-test
|
||||
|
||||
! Live-fire exercise
|
||||
|
|
|
@ -1,8 +1,7 @@
|
|||
! Copyright (C) 2003, 2008 Slava Pestov.
|
||||
! See http://factorcode.org/license.txt for BSD license.
|
||||
USING: accessors kernel combinators math namespaces
|
||||
|
||||
assocs sequences splitting sorting sets debugger
|
||||
assocs assocs.lib sequences splitting sorting sets debugger
|
||||
strings vectors hashtables quotations arrays byte-arrays
|
||||
math.parser calendar calendar.format present
|
||||
|
||||
|
@ -11,7 +10,9 @@ io.encodings.8-bit
|
|||
|
||||
unicode.case unicode.categories qualified
|
||||
|
||||
urls html.templates xml xml.data xml.writer ;
|
||||
urls html.templates xml xml.data xml.writer
|
||||
|
||||
http.parsers ;
|
||||
|
||||
EXCLUDE: fry => , ;
|
||||
|
||||
|
@ -19,40 +20,20 @@ IN: http
|
|||
|
||||
: crlf ( -- ) "\r\n" write ;
|
||||
|
||||
: add-header ( value key assoc -- )
|
||||
[ at dup [ "; " rot 3append ] [ drop ] if ] 2keep set-at ;
|
||||
|
||||
: header-line ( line -- )
|
||||
dup first blank? [
|
||||
[ blank? ] left-trim
|
||||
"last-header" get
|
||||
"header" get
|
||||
add-header
|
||||
] [
|
||||
":" split1 dup [
|
||||
[ blank? ] left-trim
|
||||
swap >lower dup "last-header" set
|
||||
"header" get add-header
|
||||
] [
|
||||
2drop
|
||||
] if
|
||||
] if ;
|
||||
|
||||
: read-lf ( -- bytes )
|
||||
"\n" read-until CHAR: \n assert= ;
|
||||
|
||||
: read-crlf ( -- bytes )
|
||||
"\r" read-until
|
||||
[ CHAR: \r assert= read1 CHAR: \n assert= ] when* ;
|
||||
|
||||
: (read-header) ( -- )
|
||||
read-crlf dup
|
||||
empty? [ drop ] [ header-line (read-header) ] if ;
|
||||
: (read-header) ( -- alist )
|
||||
[ read-crlf dup f like ] [ parse-header-line ] [ drop ] unfold ;
|
||||
|
||||
: process-header ( alist -- assoc )
|
||||
f swap [ [ swap or dup ] dip swap ] assoc-map nip
|
||||
[ ?push ] histogram [ "; " join ] assoc-map
|
||||
>hashtable ;
|
||||
|
||||
: read-header ( -- assoc )
|
||||
H{ } clone [
|
||||
"header" [ (read-header) ] with-variable
|
||||
] keep ;
|
||||
(read-header) process-header ;
|
||||
|
||||
: header-value>string ( value -- string )
|
||||
{
|
||||
|
@ -63,47 +44,62 @@ IN: http
|
|||
|
||||
: check-header-string ( str -- str )
|
||||
#! http://en.wikipedia.org/wiki/HTTP_Header_Injection
|
||||
dup "\r\n" intersect empty?
|
||||
dup "\r\n\"" intersect empty?
|
||||
[ "Header injection attack" throw ] unless ;
|
||||
|
||||
: write-header ( assoc -- )
|
||||
>alist sort-keys [
|
||||
swap
|
||||
check-header-string write ": " write
|
||||
header-value>string check-header-string write crlf
|
||||
[ check-header-string write ": " write ]
|
||||
[ header-value>string check-header-string write crlf ] bi*
|
||||
] assoc-each crlf ;
|
||||
|
||||
TUPLE: cookie name value path domain expires max-age http-only ;
|
||||
TUPLE: cookie name value version comment path domain expires max-age http-only secure ;
|
||||
|
||||
: <cookie> ( value name -- cookie )
|
||||
cookie new
|
||||
swap >>name
|
||||
swap >>value ;
|
||||
|
||||
: parse-cookies ( string -- seq )
|
||||
: parse-set-cookie ( string -- seq )
|
||||
[
|
||||
f swap
|
||||
|
||||
";" split [
|
||||
[ blank? ] trim "=" split1 swap >lower {
|
||||
(parse-set-cookie)
|
||||
[
|
||||
swap {
|
||||
{ "version" [ >>version ] }
|
||||
{ "comment" [ >>comment ] }
|
||||
{ "expires" [ cookie-string>timestamp >>expires ] }
|
||||
{ "max-age" [ string>number seconds >>max-age ] }
|
||||
{ "domain" [ >>domain ] }
|
||||
{ "path" [ >>path ] }
|
||||
{ "httponly" [ drop t >>http-only ] }
|
||||
{ "" [ drop ] }
|
||||
{ "secure" [ drop t >>secure ] }
|
||||
[ <cookie> dup , nip ]
|
||||
} case
|
||||
] each
|
||||
] assoc-each
|
||||
drop
|
||||
] { } make ;
|
||||
|
||||
: parse-cookie ( string -- seq )
|
||||
[
|
||||
f swap
|
||||
(parse-cookie)
|
||||
[
|
||||
swap {
|
||||
{ "$version" [ >>version ] }
|
||||
{ "$domain" [ >>domain ] }
|
||||
{ "$path" [ >>path ] }
|
||||
[ <cookie> dup , nip ]
|
||||
} case
|
||||
] assoc-each
|
||||
drop
|
||||
] { } make ;
|
||||
|
||||
: check-cookie-string ( string -- string' )
|
||||
dup "=;'\"" intersect empty?
|
||||
dup "=;'\"\r\n" intersect empty?
|
||||
[ "Bad cookie name or value" throw ] unless ;
|
||||
|
||||
: (unparse-cookie) ( key value -- )
|
||||
: unparse-cookie-value ( key value -- )
|
||||
{
|
||||
{ f [ drop ] }
|
||||
{ t [ check-cookie-string , ] }
|
||||
|
@ -118,20 +114,30 @@ TUPLE: cookie name value path domain expires max-age http-only ;
|
|||
]
|
||||
} case ;
|
||||
|
||||
: unparse-cookie ( cookie -- strings )
|
||||
: (unparse-cookie) ( cookie -- strings )
|
||||
[
|
||||
dup name>> check-cookie-string >lower
|
||||
over value>> (unparse-cookie)
|
||||
"path" over path>> (unparse-cookie)
|
||||
"domain" over domain>> (unparse-cookie)
|
||||
"expires" over expires>> (unparse-cookie)
|
||||
"max-age" over max-age>> (unparse-cookie)
|
||||
"httponly" over http-only>> (unparse-cookie)
|
||||
over value>> unparse-cookie-value
|
||||
"$path" over path>> unparse-cookie-value
|
||||
"$domain" over domain>> unparse-cookie-value
|
||||
drop
|
||||
] { } make ;
|
||||
|
||||
: unparse-cookies ( cookies -- string )
|
||||
[ unparse-cookie ] map concat "; " join ;
|
||||
: unparse-cookie ( cookies -- string )
|
||||
[ (unparse-cookie) ] map concat "; " join ;
|
||||
|
||||
: unparse-set-cookie ( cookie -- string )
|
||||
[
|
||||
dup name>> check-cookie-string >lower
|
||||
over value>> unparse-cookie-value
|
||||
"path" over path>> unparse-cookie-value
|
||||
"domain" over domain>> unparse-cookie-value
|
||||
"expires" over expires>> unparse-cookie-value
|
||||
"max-age" over max-age>> unparse-cookie-value
|
||||
"httponly" over http-only>> unparse-cookie-value
|
||||
"secure" over secure>> unparse-cookie-value
|
||||
drop
|
||||
] { } make "; " join ;
|
||||
|
||||
TUPLE: request
|
||||
method
|
||||
|
@ -141,6 +147,13 @@ header
|
|||
post-data
|
||||
cookies ;
|
||||
|
||||
: check-url ( string -- url )
|
||||
>url dup path>> "/" head? [ "Bad request: URL" throw ] unless ; inline
|
||||
|
||||
: read-request-line ( request -- request )
|
||||
read-crlf parse-request-line first3
|
||||
[ >>method ] [ check-url >>url ] [ >>version ] tri* ;
|
||||
|
||||
: set-header ( request/response value key -- request/response )
|
||||
pick header>> set-at ;
|
||||
|
||||
|
@ -155,27 +168,9 @@ cookies ;
|
|||
"close" "connection" set-header
|
||||
"Factor http.client" "user-agent" set-header ;
|
||||
|
||||
: read-method ( request -- request )
|
||||
" " read-until [ "Bad request: method" throw ] unless
|
||||
>>method ;
|
||||
|
||||
: check-absolute ( url -- url )
|
||||
dup path>> "/" head? [ "Bad request: URL" throw ] unless ; inline
|
||||
|
||||
: read-url ( request -- request )
|
||||
" " read-until [
|
||||
dup empty? [ drop read-url ] [ >url check-absolute >>url ] if
|
||||
] [ "Bad request: URL" throw ] if ;
|
||||
|
||||
: parse-version ( string -- version )
|
||||
"HTTP/" ?head [ "Bad request: version" throw ] unless
|
||||
dup { "1.0" "1.1" } member? [ "Bad request: version" throw ] unless ;
|
||||
|
||||
: read-request-version ( request -- request )
|
||||
read-crlf [ CHAR: \s = ] left-trim
|
||||
parse-version
|
||||
>>version ;
|
||||
|
||||
: read-request-header ( request -- request )
|
||||
read-header >>header ;
|
||||
|
||||
|
@ -210,7 +205,7 @@ TUPLE: post-data raw content content-type ;
|
|||
drop ;
|
||||
|
||||
: extract-cookies ( request -- request )
|
||||
dup "cookie" header [ parse-cookies >>cookies ] when* ;
|
||||
dup "cookie" header [ parse-cookie >>cookies ] when* ;
|
||||
|
||||
: parse-content-type-attributes ( string -- attributes )
|
||||
" " split harvest [ "=" split1 [ >lower ] dip ] { } map>assoc ;
|
||||
|
@ -220,22 +215,18 @@ TUPLE: post-data raw content content-type ;
|
|||
|
||||
: read-request ( -- request )
|
||||
<request>
|
||||
read-method
|
||||
read-url
|
||||
read-request-version
|
||||
read-request-line
|
||||
read-request-header
|
||||
read-post-data
|
||||
extract-host
|
||||
extract-cookies ;
|
||||
|
||||
: write-method ( request -- request )
|
||||
dup method>> write bl ;
|
||||
|
||||
: write-request-url ( request -- request )
|
||||
dup url>> relative-url present write bl ;
|
||||
|
||||
: write-version ( request -- request )
|
||||
"HTTP/" write dup request-version write crlf ;
|
||||
: write-request-line ( request -- request )
|
||||
dup
|
||||
[ method>> write bl ]
|
||||
[ url>> relative-url present write bl ]
|
||||
[ "HTTP/" write version>> write crlf ]
|
||||
tri ;
|
||||
|
||||
: url-host ( url -- string )
|
||||
[ host>> ] [ port>> ] bi dup "http" protocol-port =
|
||||
|
@ -249,7 +240,7 @@ TUPLE: post-data raw content content-type ;
|
|||
[ content-type>> "content-type" pick set-at ]
|
||||
bi
|
||||
] when*
|
||||
over cookies>> f like [ unparse-cookies "cookie" pick set-at ] when*
|
||||
over cookies>> f like [ unparse-cookie "cookie" pick set-at ] when*
|
||||
write-header ;
|
||||
|
||||
GENERIC: >post-data ( object -- post-data )
|
||||
|
@ -274,9 +265,7 @@ M: f >post-data ;
|
|||
|
||||
: write-request ( request -- )
|
||||
unparse-post-data
|
||||
write-method
|
||||
write-request-url
|
||||
write-version
|
||||
write-request-line
|
||||
write-request-header
|
||||
write-post-data
|
||||
flush
|
||||
|
@ -311,23 +300,13 @@ M: response clone
|
|||
[ clone ] change-header
|
||||
[ clone ] change-cookies ;
|
||||
|
||||
: read-response-version ( response -- response )
|
||||
" \t" read-until
|
||||
[ "Bad response: version" throw ] unless
|
||||
parse-version
|
||||
>>version ;
|
||||
|
||||
: read-response-code ( response -- response )
|
||||
" \t" read-until [ "Bad response: code" throw ] unless
|
||||
string>number [ "Bad response: code" throw ] unless*
|
||||
>>code ;
|
||||
|
||||
: read-response-message ( response -- response )
|
||||
read-crlf >>message ;
|
||||
: read-response-line ( response -- response )
|
||||
read-crlf parse-response-line first3
|
||||
[ >>version ] [ >>code ] [ >>message ] tri* ;
|
||||
|
||||
: read-response-header ( response -- response )
|
||||
read-header >>header
|
||||
dup "set-cookie" header parse-cookies >>cookies
|
||||
dup "set-cookie" header parse-set-cookie >>cookies
|
||||
dup "content-type" header [
|
||||
parse-content-type
|
||||
[ >>content-type ]
|
||||
|
@ -336,20 +315,15 @@ M: response clone
|
|||
|
||||
: read-response ( -- response )
|
||||
<response>
|
||||
read-response-version
|
||||
read-response-code
|
||||
read-response-message
|
||||
read-response-line
|
||||
read-response-header ;
|
||||
|
||||
: write-response-version ( response -- response )
|
||||
"HTTP/" write
|
||||
dup version>> write bl ;
|
||||
|
||||
: write-response-code ( response -- response )
|
||||
dup code>> number>string write bl ;
|
||||
|
||||
: write-response-message ( response -- response )
|
||||
dup message>> write crlf ;
|
||||
: write-response-line ( response -- response )
|
||||
dup
|
||||
[ "HTTP/" write version>> write bl ]
|
||||
[ code>> present write bl ]
|
||||
[ message>> write crlf ]
|
||||
tri ;
|
||||
|
||||
: unparse-content-type ( request -- content-type )
|
||||
[ content-type>> "application/octet-stream" or ]
|
||||
|
@ -357,19 +331,29 @@ M: response clone
|
|||
bi
|
||||
[ "; charset=" swap 3append ] when* ;
|
||||
|
||||
: ensure-domain ( cookie -- cookie )
|
||||
[
|
||||
request get url>>
|
||||
host>> dup "localhost" =
|
||||
[ drop ] [ or ] if
|
||||
] change-domain ;
|
||||
|
||||
: write-response-header ( response -- response )
|
||||
dup header>> clone
|
||||
over cookies>> f like [ unparse-cookies "set-cookie" pick set-at ] when*
|
||||
#! We send one set-cookie header per cookie, because that's
|
||||
#! what Firefox expects.
|
||||
dup header>> >alist >vector
|
||||
over unparse-content-type "content-type" pick set-at
|
||||
over cookies>> [
|
||||
ensure-domain unparse-set-cookie
|
||||
"set-cookie" swap 2array over push
|
||||
] each
|
||||
write-header ;
|
||||
|
||||
: write-response-body ( response -- response )
|
||||
dup body>> call-template ;
|
||||
|
||||
M: response write-response ( respose -- )
|
||||
write-response-version
|
||||
write-response-code
|
||||
write-response-message
|
||||
write-response-line
|
||||
write-response-header
|
||||
flush
|
||||
drop ;
|
||||
|
@ -403,9 +387,7 @@ body ;
|
|||
"1.1" >>version ;
|
||||
|
||||
M: raw-response write-response ( respose -- )
|
||||
write-response-version
|
||||
write-response-code
|
||||
write-response-message
|
||||
write-response-line
|
||||
write-response-body
|
||||
drop ;
|
||||
|
||||
|
|
|
@ -0,0 +1,166 @@
|
|||
USING: math math.order math.parser kernel combinators.lib
|
||||
sequences sequences.deep peg peg.parsers assocs arrays
|
||||
hashtables strings unicode.case namespaces ascii ;
|
||||
IN: http.parsers
|
||||
|
||||
: except ( quot -- parser )
|
||||
[ not ] compose satisfy ; inline
|
||||
|
||||
: except-these ( quots -- parser )
|
||||
[ 1|| ] curry except ; inline
|
||||
|
||||
: ctl? ( ch -- ? )
|
||||
{ [ 0 31 between? ] [ 127 = ] } 1|| ;
|
||||
|
||||
: tspecial? ( ch -- ? )
|
||||
"()<>@,;:\\\"/[]?={} \t" member? ;
|
||||
|
||||
: 'token' ( -- parser )
|
||||
{ [ ctl? ] [ tspecial? ] } except-these repeat1 ;
|
||||
|
||||
: case-insensitive ( parser -- parser' )
|
||||
[ flatten >string >lower ] action ;
|
||||
|
||||
: case-sensitive ( parser -- parser' )
|
||||
[ flatten >string ] action ;
|
||||
|
||||
: 'space' ( -- parser )
|
||||
[ " \t" member? ] satisfy repeat0 hide ;
|
||||
|
||||
: one-of ( strings -- parser )
|
||||
[ token ] map choice ;
|
||||
|
||||
: 'http-method' ( -- parser )
|
||||
{ "OPTIONS" "GET" "HEAD" "POST" "PUT" "DELETE" "TRACE" "CONNECT" } one-of ;
|
||||
|
||||
: 'url' ( -- parser )
|
||||
[ " \t\r\n" member? ] except repeat1 case-sensitive ;
|
||||
|
||||
: 'http-version' ( -- parser )
|
||||
[
|
||||
"HTTP" token hide ,
|
||||
'space' ,
|
||||
"/" token hide ,
|
||||
'space' ,
|
||||
"1" token ,
|
||||
"." token ,
|
||||
{ "0" "1" } one-of ,
|
||||
] seq* [ concat >string ] action ;
|
||||
|
||||
PEG: parse-request-line ( string -- triple )
|
||||
#! Triple is { method url version }
|
||||
[
|
||||
'space' ,
|
||||
'http-method' ,
|
||||
'space' ,
|
||||
'url' ,
|
||||
'space' ,
|
||||
'http-version' ,
|
||||
'space' ,
|
||||
] seq* just ;
|
||||
|
||||
: 'text' ( -- parser )
|
||||
[ ctl? ] except ;
|
||||
|
||||
: 'response-code' ( -- parser )
|
||||
[ digit? ] satisfy 3 exactly-n [ string>number ] action ;
|
||||
|
||||
: 'response-message' ( -- parser )
|
||||
'text' repeat0 case-sensitive ;
|
||||
|
||||
PEG: parse-response-line ( string -- triple )
|
||||
#! Triple is { version code message }
|
||||
[
|
||||
'space' ,
|
||||
'http-version' ,
|
||||
'space' ,
|
||||
'response-code' ,
|
||||
'space' ,
|
||||
'response-message' ,
|
||||
] seq* just ;
|
||||
|
||||
: 'crlf' ( -- parser )
|
||||
"\r\n" token ;
|
||||
|
||||
: 'lws' ( -- parser )
|
||||
[ " \t" member? ] satisfy repeat1 ;
|
||||
|
||||
: 'qdtext' ( -- parser )
|
||||
{ [ CHAR: " = ] [ ctl? ] } except-these ;
|
||||
|
||||
: 'quoted-char' ( -- parser )
|
||||
"\\" token hide any-char 2seq ;
|
||||
|
||||
: 'quoted-string' ( -- parser )
|
||||
'quoted-char' 'qdtext' 2choice repeat0 "\"" "\"" surrounded-by ;
|
||||
|
||||
: 'ctext' ( -- parser )
|
||||
{ [ ctl? ] [ "()" member? ] } except-these ;
|
||||
|
||||
: 'comment' ( -- parser )
|
||||
'ctext' 'comment' 2choice repeat0 "(" ")" surrounded-by ;
|
||||
|
||||
: 'field-name' ( -- parser )
|
||||
'token' case-insensitive ;
|
||||
|
||||
: 'field-content' ( -- parser )
|
||||
'quoted-string' case-sensitive
|
||||
'text' repeat0 case-sensitive
|
||||
2choice ;
|
||||
|
||||
PEG: parse-header-line ( string -- pair )
|
||||
#! Pair is either { name value } or { f value }. If f, its a
|
||||
#! continuation of the previous header line.
|
||||
[
|
||||
'field-name' ,
|
||||
'space' ,
|
||||
":" token hide ,
|
||||
'space' ,
|
||||
'field-content' ,
|
||||
] seq*
|
||||
[
|
||||
'lws' [ drop f ] action ,
|
||||
'field-content' ,
|
||||
] seq*
|
||||
2choice ;
|
||||
|
||||
: 'word' ( -- parser )
|
||||
'token' 'quoted-string' 2choice ;
|
||||
|
||||
: 'value' ( -- parser )
|
||||
'quoted-string'
|
||||
[ ";" member? ] except repeat0
|
||||
2choice case-sensitive ;
|
||||
|
||||
: 'attr' ( -- parser )
|
||||
'token' case-insensitive ;
|
||||
|
||||
: 'av-pair' ( -- parser )
|
||||
[
|
||||
'space' ,
|
||||
'attr' ,
|
||||
'space' ,
|
||||
[ "=" token , 'space' , 'value' , ] seq* [ peek ] action
|
||||
epsilon [ drop f ] action
|
||||
2choice ,
|
||||
'space' ,
|
||||
] seq* ;
|
||||
|
||||
: 'av-pairs' ( -- parser )
|
||||
'av-pair' ";" token list-of optional ;
|
||||
|
||||
PEG: (parse-set-cookie) ( string -- alist ) 'av-pairs' just ;
|
||||
|
||||
: 'cookie-value' ( -- parser )
|
||||
[
|
||||
'space' ,
|
||||
'attr' ,
|
||||
'space' ,
|
||||
"=" token hide ,
|
||||
'space' ,
|
||||
'value' ,
|
||||
'space' ,
|
||||
] seq* ;
|
||||
|
||||
PEG: (parse-cookie) ( string -- alist )
|
||||
'cookie-value' [ ";," member? ] satisfy list-of optional just ;
|
Loading…
Reference in New Issue