Split off urls.encoding, fix query encoding

db4
Slava Pestov 2008-09-29 19:43:04 -05:00
parent 27c565c5ea
commit 85b6e32681
9 changed files with 187 additions and 170 deletions

View File

@ -0,0 +1 @@
Slava Pestov

View File

@ -0,0 +1,57 @@
IN: urls.encoding
USING: strings help.markup help.syntax assocs multiline ;
HELP: url-decode
{ $values { "str" string } { "decoded" string } }
{ $description "Decodes a URL-encoded string." } ;
HELP: url-encode
{ $values { "str" string } { "encoded" string } }
{ $description "URL-encodes a string." } ;
HELP: url-quotable?
{ $values { "ch" "a character" } { "?" "a boolean" } }
{ $description "Tests if a character be used without URL-encoding in a URL." } ;
HELP: assoc>query
{ $values { "assoc" assoc } { "str" string } }
{ $description "Converts an assoc of query parameters into a query string, performing URL encoding." }
{ $notes "This word is used by the implementation of " { $link "urls" } ". It is also used by the HTTP client to encode POST requests." }
{ $examples
{ $example
"USING: io urls ;"
"{ { \"from\" \"Lead\" } { \"to\" \"Gold, please\" } }"
"assoc>query print"
"from=Lead&to=Gold%2c+please"
}
} ;
HELP: query>assoc
{ $values { "query" string } { "assoc" assoc } }
{ $description "Parses a URL query string and URL-decodes each component." }
{ $notes "This word is used by the implementation of " { $link "urls" } ". It is also used by the HTTP server to parse POST requests." }
{ $examples
{ $unchecked-example
"USING: prettyprint urls ;"
"\"gender=female&agefrom=22&ageto=28&location=Omaha+NE\""
"query>assoc ."
<" H{
{ "gender" "female" }
{ "agefrom" "22" }
{ "ageto" "28" }
{ "location" "Omaha NE" }
}">
}
} ;
ARTICLE: "url-encoding" "URL encoding and decoding"
"URL encoding and decoding strings:"
{ $subsection url-encode }
{ $subsection url-decode }
{ $subsection url-quotable? }
"Encoding and decoding queries:"
{ $subsection assoc>query }
{ $subsection query>assoc }
"See " { $url "http://en.wikipedia.org/wiki/Percent-encoding" } " for a description of URL encoding." ;
ABOUT: "url-encoding"

View File

@ -0,0 +1,26 @@
IN: urls.encoding.tests
USING: urls.encoding tools.test arrays kernel assocs present accessors ;
[ "~hello world" ] [ "%7ehello world" url-decode ] unit-test
[ f ] [ "%XX%XX%XX" url-decode ] unit-test
[ f ] [ "%XX%XX%X" url-decode ] unit-test
[ "hello world" ] [ "hello%20world" url-decode ] unit-test
[ " ! " ] [ "%20%21%20" url-decode ] unit-test
[ "hello world" ] [ "hello world%" url-decode ] unit-test
[ "hello world" ] [ "hello world%x" url-decode ] unit-test
[ "hello%20world" ] [ "hello world" url-encode ] unit-test
[ "hello world" ] [ "hello+world" query-decode ] unit-test
[ "\u001234hi\u002045" ] [ "\u001234hi\u002045" url-encode url-decode ] unit-test
[ "a=b&a=c" ] [ { { "a" { "b" "c" } } } assoc>query ] unit-test
[ H{ { "a" "b" } } ] [ "a=b" query>assoc ] unit-test
[ H{ { "a" { "b" "c" } } } ] [ "a=b&a=c" query>assoc ] unit-test
[ H{ { "text" "hello world" } } ] [ "text=hello+world" query>assoc ] unit-test
[ "a=3" ] [ { { "a" 3 } } assoc>query ] unit-test

View File

@ -0,0 +1,96 @@
! Copyright (C) 2008 Slava Pestov.
! See http://factorcode.org/license.txt for BSD license.
USING: kernel ascii combinators combinators.short-circuit
sequences splitting fry namespaces make assocs arrays strings
io.sockets io.sockets.secure io.encodings.string
io.encodings.utf8 math math.parser accessors hashtables present ;
IN: urls.encoding
: url-quotable? ( ch -- ? )
{
[ letter? ]
[ LETTER? ]
[ digit? ]
[ "/_-.:" member? ]
} 1|| ; foldable
<PRIVATE
: push-utf8 ( ch -- )
1string utf8 encode
[ CHAR: % , >hex 2 CHAR: 0 pad-left % ] each ;
PRIVATE>
: url-encode ( str -- encoded )
[
[ dup url-quotable? [ , ] [ push-utf8 ] if ] each
] "" make ;
<PRIVATE
: url-decode-hex ( index str -- )
2dup length 2 - >= [
2drop
] [
[ 1+ dup 2 + ] dip subseq hex> [ , ] when*
] if ;
: url-decode-% ( index str -- index str )
2dup url-decode-hex ;
: url-decode-iter ( index str -- )
2dup length >= [
2drop
] [
2dup nth dup CHAR: % = [
drop url-decode-% [ 3 + ] dip
] [
, [ 1+ ] dip
] if url-decode-iter
] if ;
PRIVATE>
: url-decode ( str -- decoded )
[ 0 swap url-decode-iter ] "" make utf8 decode ;
: query-decode ( str -- decoded )
[ dup CHAR: + = [ drop "%20" ] [ 1string ] if ] { } map-as
concat url-decode ;
<PRIVATE
: add-query-param ( value key assoc -- )
[
at [
{
{ [ dup string? ] [ swap 2array ] }
{ [ dup array? ] [ swap suffix ] }
{ [ dup not ] [ drop ] }
} cond
] when*
] 2keep set-at ;
PRIVATE>
: query>assoc ( query -- assoc )
dup [
"&" split H{ } clone [
[
[ "=" split1 [ dup [ query-decode ] when ] bi@ swap ] dip
add-query-param
] curry each
] keep
] when ;
: assoc>query ( assoc -- str )
[
dup array? [ [ present ] map ] [ present 1array ] if
] assoc-map
[
[
[ url-encode ] dip
[ url-encode "=" swap 3append , ] with each
] assoc-each
] { } make "&" join ;

View File

@ -0,0 +1 @@
URL and form encoding/decoding

View File

@ -0,0 +1 @@
web

View File

@ -46,37 +46,6 @@ HELP: URL"
}
} ;
HELP: assoc>query
{ $values { "assoc" assoc } { "str" string } }
{ $description "Converts an assoc of query parameters into a query string, performing URL encoding." }
{ $notes "This word is used to implement the " { $link present } " method on URLs; it is also used by the HTTP client to encode POST requests." }
{ $examples
{ $example
"USING: io urls ;"
"{ { \"from\" \"Lead\" } { \"to\" \"Gold, please\" } }"
"assoc>query print"
"from=Lead&to=Gold%2c+please"
}
} ;
HELP: query>assoc
{ $values { "query" string } { "assoc" assoc } }
{ $description "Parses a URL query string and URL-decodes each component." }
{ $notes "This word is used to implement " { $link >url } ". It is also used by the HTTP server to parse POST requests." }
{ $examples
{ $unchecked-example
"USING: prettyprint urls ;"
"\"gender=female&agefrom=22&ageto=28&location=Omaha+NE\""
"query>assoc ."
<" H{
{ "gender" "female" }
{ "agefrom" "22" }
{ "ageto" "28" }
{ "location" "Omaha NE" }
}">
}
} ;
HELP: derive-url
{ $values { "base" url } { "url" url } { "url'" url } }
{ $description "Builds a URL by filling in missing components of " { $snippet "url" } " from " { $snippet "base" } "." }
@ -192,28 +161,7 @@ HELP: url-append-path
{ $values { "path1" string } { "path2" string } { "path" string } }
{ $description "Like " { $link append-path } ", but intended for use with URL paths and not filesystem paths." } ;
HELP: url-decode
{ $values { "str" string } { "decoded" string } }
{ $description "Decodes a URL-encoded string." } ;
HELP: url-encode
{ $values { "str" string } { "encoded" string } }
{ $description "URL-encodes a string." } ;
HELP: url-quotable?
{ $values { "ch" "a character" } { "?" "a boolean" } }
{ $description "Tests if a character be used without URL-encoding in a URL." } ;
ARTICLE: "url-encoding" "URL encoding and decoding"
"URL encoding and decoding strings:"
{ $subsection url-encode }
{ $subsection url-decode }
{ $subsection url-quotable? }
"The URL implemention encodes and decodes components of " { $link url } " instances automatically, but sometimes it is required for non-URL strings. See " { $url "http://en.wikipedia.org/wiki/Percent-encoding" } " for a description of URL encoding." ;
ARTICLE: "url-utilities" "URL implementation utilities"
{ $subsection assoc>query }
{ $subsection query>assoc }
{ $subsection parse-host }
{ $subsection secure-protocol? }
{ $subsection url-append-path } ;
@ -240,8 +188,9 @@ $nl
{ $subsection set-query-param }
"Creating " { $link "network-addressing" } " from URLs:"
{ $subsection url-addr }
"Additional topics:"
{ $subsection "url-utilities" }
{ $subsection "url-encoding" } ;
"The URL implemention encodes and decodes components of " { $link url } " instances automatically, but sometimes this functionality is needed for non-URL strings."
{ $subsection "url-encoding" }
"Utility words used by the URL implementation:"
{ $subsection "url-utilities" } ;
ABOUT: "urls"

View File

@ -2,30 +2,6 @@ IN: urls.tests
USING: urls urls.private tools.test
arrays kernel assocs present accessors ;
[ "hello+world" ] [ "hello world" url-encode ] unit-test
[ "hello world" ] [ "hello%20world" url-decode ] unit-test
[ "~hello world" ] [ "%7ehello+world" url-decode ] unit-test
[ f ] [ "%XX%XX%XX" url-decode ] unit-test
[ f ] [ "%XX%XX%X" url-decode ] unit-test
[ "hello world" ] [ "hello+world" url-decode ] unit-test
[ "hello world" ] [ "hello%20world" url-decode ] unit-test
[ " ! " ] [ "%20%21%20" url-decode ] unit-test
[ "hello world" ] [ "hello world%" url-decode ] unit-test
[ "hello world" ] [ "hello world%x" url-decode ] unit-test
[ "hello+world" ] [ "hello world" url-encode ] unit-test
[ "+%21+" ] [ " ! " url-encode ] unit-test
[ "\u001234hi\u002045" ] [ "\u001234hi\u002045" url-encode url-decode ] unit-test
[ "a=b&a=c" ] [ { { "a" { "b" "c" } } } assoc>query ] unit-test
[ H{ { "a" "b" } } ] [ "a=b" query>assoc ] unit-test
[ H{ { "a" { "b" "c" } } } ] [ "a=b&a=c" query>assoc ] unit-test
[ "a=3" ] [ { { "a" 3 } } assoc>query ] unit-test
: urls
{
{

View File

@ -5,99 +5,9 @@ sequences splitting fry namespaces make assocs arrays strings
io.sockets io.sockets.secure io.encodings.string
io.encodings.utf8 math math.parser accessors parser
strings.parser lexer prettyprint.backend hashtables present
peg.ebnf ;
peg.ebnf urls.encoding ;
IN: urls
: url-quotable? ( ch -- ? )
{
[ letter? ]
[ LETTER? ]
[ digit? ]
[ "/_-.:" member? ]
} 1|| ; foldable
<PRIVATE
: push-utf8 ( ch -- )
dup CHAR: \s = [ drop "+" % ] [
1string utf8 encode
[ CHAR: % , >hex 2 CHAR: 0 pad-left % ] each
] if ;
PRIVATE>
: url-encode ( str -- encoded )
[
[ dup url-quotable? [ , ] [ push-utf8 ] if ] each
] "" make ;
<PRIVATE
: url-decode-hex ( index str -- )
2dup length 2 - >= [
2drop
] [
[ 1+ dup 2 + ] dip subseq hex> [ , ] when*
] if ;
: url-decode-% ( index str -- index str )
2dup url-decode-hex [ 3 + ] dip ;
: url-decode-+-or-other ( index str ch -- index str )
dup CHAR: + = [ drop CHAR: \s ] when , [ 1+ ] dip ;
: url-decode-iter ( index str -- )
2dup length >= [
2drop
] [
2dup nth dup CHAR: % = [
drop url-decode-%
] [
url-decode-+-or-other
] if url-decode-iter
] if ;
PRIVATE>
: url-decode ( str -- decoded )
[ 0 swap url-decode-iter ] "" make utf8 decode ;
<PRIVATE
: add-query-param ( value key assoc -- )
[
at [
{
{ [ dup string? ] [ swap 2array ] }
{ [ dup array? ] [ swap suffix ] }
{ [ dup not ] [ drop ] }
} cond
] when*
] 2keep set-at ;
PRIVATE>
: query>assoc ( query -- assoc )
dup [
"&" split H{ } clone [
[
[ "=" split1 [ dup [ url-decode ] when ] bi@ swap ] dip
add-query-param
] curry each
] keep
] when ;
: assoc>query ( assoc -- str )
[
dup array? [ [ present ] map ] [ present 1array ] if
] assoc-map
[
[
[ url-encode ] dip
[ url-encode "=" swap 3append , ] with each
] assoc-each
] { } make "&" join ;
TUPLE: url protocol username password host port path query anchor ;
: <url> ( -- url ) url new ;