Split off urls.encoding, fix query encoding

db4
Slava Pestov 2008-09-29 19:43:04 -05:00
parent 27c565c5ea
commit 85b6e32681
9 changed files with 187 additions and 170 deletions

View File

@ -0,0 +1 @@
Slava Pestov

View File

@ -0,0 +1,57 @@
IN: urls.encoding
USING: strings help.markup help.syntax assocs multiline ;
HELP: url-decode
{ $values { "str" string } { "decoded" string } }
{ $description "Decodes a URL-encoded string." } ;
HELP: url-encode
{ $values { "str" string } { "encoded" string } }
{ $description "URL-encodes a string." } ;
HELP: url-quotable?
{ $values { "ch" "a character" } { "?" "a boolean" } }
{ $description "Tests if a character be used without URL-encoding in a URL." } ;
HELP: assoc>query
{ $values { "assoc" assoc } { "str" string } }
{ $description "Converts an assoc of query parameters into a query string, performing URL encoding." }
{ $notes "This word is used by the implementation of " { $link "urls" } ". It is also used by the HTTP client to encode POST requests." }
{ $examples
{ $example
"USING: io urls ;"
"{ { \"from\" \"Lead\" } { \"to\" \"Gold, please\" } }"
"assoc>query print"
"from=Lead&to=Gold%2c+please"
}
} ;
HELP: query>assoc
{ $values { "query" string } { "assoc" assoc } }
{ $description "Parses a URL query string and URL-decodes each component." }
{ $notes "This word is used by the implementation of " { $link "urls" } ". It is also used by the HTTP server to parse POST requests." }
{ $examples
{ $unchecked-example
"USING: prettyprint urls ;"
"\"gender=female&agefrom=22&ageto=28&location=Omaha+NE\""
"query>assoc ."
<" H{
{ "gender" "female" }
{ "agefrom" "22" }
{ "ageto" "28" }
{ "location" "Omaha NE" }
}">
}
} ;
ARTICLE: "url-encoding" "URL encoding and decoding"
"URL encoding and decoding strings:"
{ $subsection url-encode }
{ $subsection url-decode }
{ $subsection url-quotable? }
"Encoding and decoding queries:"
{ $subsection assoc>query }
{ $subsection query>assoc }
"See " { $url "http://en.wikipedia.org/wiki/Percent-encoding" } " for a description of URL encoding." ;
ABOUT: "url-encoding"

View File

@ -0,0 +1,26 @@
IN: urls.encoding.tests
USING: urls.encoding tools.test arrays kernel assocs present accessors ;
[ "~hello world" ] [ "%7ehello world" url-decode ] unit-test
[ f ] [ "%XX%XX%XX" url-decode ] unit-test
[ f ] [ "%XX%XX%X" url-decode ] unit-test
[ "hello world" ] [ "hello%20world" url-decode ] unit-test
[ " ! " ] [ "%20%21%20" url-decode ] unit-test
[ "hello world" ] [ "hello world%" url-decode ] unit-test
[ "hello world" ] [ "hello world%x" url-decode ] unit-test
[ "hello%20world" ] [ "hello world" url-encode ] unit-test
[ "hello world" ] [ "hello+world" query-decode ] unit-test
[ "\u001234hi\u002045" ] [ "\u001234hi\u002045" url-encode url-decode ] unit-test
[ "a=b&a=c" ] [ { { "a" { "b" "c" } } } assoc>query ] unit-test
[ H{ { "a" "b" } } ] [ "a=b" query>assoc ] unit-test
[ H{ { "a" { "b" "c" } } } ] [ "a=b&a=c" query>assoc ] unit-test
[ H{ { "text" "hello world" } } ] [ "text=hello+world" query>assoc ] unit-test
[ "a=3" ] [ { { "a" 3 } } assoc>query ] unit-test

View File

@ -0,0 +1,96 @@
! Copyright (C) 2008 Slava Pestov.
! See http://factorcode.org/license.txt for BSD license.
USING: kernel ascii combinators combinators.short-circuit
sequences splitting fry namespaces make assocs arrays strings
io.sockets io.sockets.secure io.encodings.string
io.encodings.utf8 math math.parser accessors hashtables present ;
IN: urls.encoding
: url-quotable? ( ch -- ? )
{
[ letter? ]
[ LETTER? ]
[ digit? ]
[ "/_-.:" member? ]
} 1|| ; foldable
<PRIVATE
: push-utf8 ( ch -- )
1string utf8 encode
[ CHAR: % , >hex 2 CHAR: 0 pad-left % ] each ;
PRIVATE>
: url-encode ( str -- encoded )
[
[ dup url-quotable? [ , ] [ push-utf8 ] if ] each
] "" make ;
<PRIVATE
: url-decode-hex ( index str -- )
2dup length 2 - >= [
2drop
] [
[ 1+ dup 2 + ] dip subseq hex> [ , ] when*
] if ;
: url-decode-% ( index str -- index str )
2dup url-decode-hex ;
: url-decode-iter ( index str -- )
2dup length >= [
2drop
] [
2dup nth dup CHAR: % = [
drop url-decode-% [ 3 + ] dip
] [
, [ 1+ ] dip
] if url-decode-iter
] if ;
PRIVATE>
: url-decode ( str -- decoded )
[ 0 swap url-decode-iter ] "" make utf8 decode ;
: query-decode ( str -- decoded )
[ dup CHAR: + = [ drop "%20" ] [ 1string ] if ] { } map-as
concat url-decode ;
<PRIVATE
: add-query-param ( value key assoc -- )
[
at [
{
{ [ dup string? ] [ swap 2array ] }
{ [ dup array? ] [ swap suffix ] }
{ [ dup not ] [ drop ] }
} cond
] when*
] 2keep set-at ;
PRIVATE>
: query>assoc ( query -- assoc )
dup [
"&" split H{ } clone [
[
[ "=" split1 [ dup [ query-decode ] when ] bi@ swap ] dip
add-query-param
] curry each
] keep
] when ;
: assoc>query ( assoc -- str )
[
dup array? [ [ present ] map ] [ present 1array ] if
] assoc-map
[
[
[ url-encode ] dip
[ url-encode "=" swap 3append , ] with each
] assoc-each
] { } make "&" join ;

View File

@ -0,0 +1 @@
URL and form encoding/decoding

View File

@ -0,0 +1 @@
web

View File

@ -46,37 +46,6 @@ HELP: URL"
} }
} ; } ;
HELP: assoc>query
{ $values { "assoc" assoc } { "str" string } }
{ $description "Converts an assoc of query parameters into a query string, performing URL encoding." }
{ $notes "This word is used to implement the " { $link present } " method on URLs; it is also used by the HTTP client to encode POST requests." }
{ $examples
{ $example
"USING: io urls ;"
"{ { \"from\" \"Lead\" } { \"to\" \"Gold, please\" } }"
"assoc>query print"
"from=Lead&to=Gold%2c+please"
}
} ;
HELP: query>assoc
{ $values { "query" string } { "assoc" assoc } }
{ $description "Parses a URL query string and URL-decodes each component." }
{ $notes "This word is used to implement " { $link >url } ". It is also used by the HTTP server to parse POST requests." }
{ $examples
{ $unchecked-example
"USING: prettyprint urls ;"
"\"gender=female&agefrom=22&ageto=28&location=Omaha+NE\""
"query>assoc ."
<" H{
{ "gender" "female" }
{ "agefrom" "22" }
{ "ageto" "28" }
{ "location" "Omaha NE" }
}">
}
} ;
HELP: derive-url HELP: derive-url
{ $values { "base" url } { "url" url } { "url'" url } } { $values { "base" url } { "url" url } { "url'" url } }
{ $description "Builds a URL by filling in missing components of " { $snippet "url" } " from " { $snippet "base" } "." } { $description "Builds a URL by filling in missing components of " { $snippet "url" } " from " { $snippet "base" } "." }
@ -192,28 +161,7 @@ HELP: url-append-path
{ $values { "path1" string } { "path2" string } { "path" string } } { $values { "path1" string } { "path2" string } { "path" string } }
{ $description "Like " { $link append-path } ", but intended for use with URL paths and not filesystem paths." } ; { $description "Like " { $link append-path } ", but intended for use with URL paths and not filesystem paths." } ;
HELP: url-decode
{ $values { "str" string } { "decoded" string } }
{ $description "Decodes a URL-encoded string." } ;
HELP: url-encode
{ $values { "str" string } { "encoded" string } }
{ $description "URL-encodes a string." } ;
HELP: url-quotable?
{ $values { "ch" "a character" } { "?" "a boolean" } }
{ $description "Tests if a character be used without URL-encoding in a URL." } ;
ARTICLE: "url-encoding" "URL encoding and decoding"
"URL encoding and decoding strings:"
{ $subsection url-encode }
{ $subsection url-decode }
{ $subsection url-quotable? }
"The URL implemention encodes and decodes components of " { $link url } " instances automatically, but sometimes it is required for non-URL strings. See " { $url "http://en.wikipedia.org/wiki/Percent-encoding" } " for a description of URL encoding." ;
ARTICLE: "url-utilities" "URL implementation utilities" ARTICLE: "url-utilities" "URL implementation utilities"
{ $subsection assoc>query }
{ $subsection query>assoc }
{ $subsection parse-host } { $subsection parse-host }
{ $subsection secure-protocol? } { $subsection secure-protocol? }
{ $subsection url-append-path } ; { $subsection url-append-path } ;
@ -240,8 +188,9 @@ $nl
{ $subsection set-query-param } { $subsection set-query-param }
"Creating " { $link "network-addressing" } " from URLs:" "Creating " { $link "network-addressing" } " from URLs:"
{ $subsection url-addr } { $subsection url-addr }
"Additional topics:" "The URL implemention encodes and decodes components of " { $link url } " instances automatically, but sometimes this functionality is needed for non-URL strings."
{ $subsection "url-utilities" } { $subsection "url-encoding" }
{ $subsection "url-encoding" } ; "Utility words used by the URL implementation:"
{ $subsection "url-utilities" } ;
ABOUT: "urls" ABOUT: "urls"

View File

@ -2,30 +2,6 @@ IN: urls.tests
USING: urls urls.private tools.test USING: urls urls.private tools.test
arrays kernel assocs present accessors ; arrays kernel assocs present accessors ;
[ "hello+world" ] [ "hello world" url-encode ] unit-test
[ "hello world" ] [ "hello%20world" url-decode ] unit-test
[ "~hello world" ] [ "%7ehello+world" url-decode ] unit-test
[ f ] [ "%XX%XX%XX" url-decode ] unit-test
[ f ] [ "%XX%XX%X" url-decode ] unit-test
[ "hello world" ] [ "hello+world" url-decode ] unit-test
[ "hello world" ] [ "hello%20world" url-decode ] unit-test
[ " ! " ] [ "%20%21%20" url-decode ] unit-test
[ "hello world" ] [ "hello world%" url-decode ] unit-test
[ "hello world" ] [ "hello world%x" url-decode ] unit-test
[ "hello+world" ] [ "hello world" url-encode ] unit-test
[ "+%21+" ] [ " ! " url-encode ] unit-test
[ "\u001234hi\u002045" ] [ "\u001234hi\u002045" url-encode url-decode ] unit-test
[ "a=b&a=c" ] [ { { "a" { "b" "c" } } } assoc>query ] unit-test
[ H{ { "a" "b" } } ] [ "a=b" query>assoc ] unit-test
[ H{ { "a" { "b" "c" } } } ] [ "a=b&a=c" query>assoc ] unit-test
[ "a=3" ] [ { { "a" 3 } } assoc>query ] unit-test
: urls : urls
{ {
{ {

View File

@ -5,99 +5,9 @@ sequences splitting fry namespaces make assocs arrays strings
io.sockets io.sockets.secure io.encodings.string io.sockets io.sockets.secure io.encodings.string
io.encodings.utf8 math math.parser accessors parser io.encodings.utf8 math math.parser accessors parser
strings.parser lexer prettyprint.backend hashtables present strings.parser lexer prettyprint.backend hashtables present
peg.ebnf ; peg.ebnf urls.encoding ;
IN: urls IN: urls
: url-quotable? ( ch -- ? )
{
[ letter? ]
[ LETTER? ]
[ digit? ]
[ "/_-.:" member? ]
} 1|| ; foldable
<PRIVATE
: push-utf8 ( ch -- )
dup CHAR: \s = [ drop "+" % ] [
1string utf8 encode
[ CHAR: % , >hex 2 CHAR: 0 pad-left % ] each
] if ;
PRIVATE>
: url-encode ( str -- encoded )
[
[ dup url-quotable? [ , ] [ push-utf8 ] if ] each
] "" make ;
<PRIVATE
: url-decode-hex ( index str -- )
2dup length 2 - >= [
2drop
] [
[ 1+ dup 2 + ] dip subseq hex> [ , ] when*
] if ;
: url-decode-% ( index str -- index str )
2dup url-decode-hex [ 3 + ] dip ;
: url-decode-+-or-other ( index str ch -- index str )
dup CHAR: + = [ drop CHAR: \s ] when , [ 1+ ] dip ;
: url-decode-iter ( index str -- )
2dup length >= [
2drop
] [
2dup nth dup CHAR: % = [
drop url-decode-%
] [
url-decode-+-or-other
] if url-decode-iter
] if ;
PRIVATE>
: url-decode ( str -- decoded )
[ 0 swap url-decode-iter ] "" make utf8 decode ;
<PRIVATE
: add-query-param ( value key assoc -- )
[
at [
{
{ [ dup string? ] [ swap 2array ] }
{ [ dup array? ] [ swap suffix ] }
{ [ dup not ] [ drop ] }
} cond
] when*
] 2keep set-at ;
PRIVATE>
: query>assoc ( query -- assoc )
dup [
"&" split H{ } clone [
[
[ "=" split1 [ dup [ url-decode ] when ] bi@ swap ] dip
add-query-param
] curry each
] keep
] when ;
: assoc>query ( assoc -- str )
[
dup array? [ [ present ] map ] [ present 1array ] if
] assoc-map
[
[
[ url-encode ] dip
[ url-encode "=" swap 3append , ] with each
] assoc-each
] { } make "&" join ;
TUPLE: url protocol username password host port path query anchor ; TUPLE: url protocol username password host port path query anchor ;
: <url> ( -- url ) url new ; : <url> ( -- url ) url new ;