diff --git a/basis/urls/encoding/authors.txt b/basis/urls/encoding/authors.txt new file mode 100644 index 0000000000..1901f27a24 --- /dev/null +++ b/basis/urls/encoding/authors.txt @@ -0,0 +1 @@ +Slava Pestov diff --git a/basis/urls/encoding/encoding-docs.factor b/basis/urls/encoding/encoding-docs.factor new file mode 100644 index 0000000000..5ba94ea1bc --- /dev/null +++ b/basis/urls/encoding/encoding-docs.factor @@ -0,0 +1,57 @@ +IN: urls.encoding +USING: strings help.markup help.syntax assocs multiline ; + +HELP: url-decode +{ $values { "str" string } { "decoded" string } } +{ $description "Decodes a URL-encoded string." } ; + +HELP: url-encode +{ $values { "str" string } { "encoded" string } } +{ $description "URL-encodes a string." } ; + +HELP: url-quotable? +{ $values { "ch" "a character" } { "?" "a boolean" } } +{ $description "Tests if a character be used without URL-encoding in a URL." } ; + +HELP: assoc>query +{ $values { "assoc" assoc } { "str" string } } +{ $description "Converts an assoc of query parameters into a query string, performing URL encoding." } +{ $notes "This word is used by the implementation of " { $link "urls" } ". It is also used by the HTTP client to encode POST requests." } +{ $examples + { $example + "USING: io urls ;" + "{ { \"from\" \"Lead\" } { \"to\" \"Gold, please\" } }" + "assoc>query print" + "from=Lead&to=Gold%2c+please" + } +} ; + +HELP: query>assoc +{ $values { "query" string } { "assoc" assoc } } +{ $description "Parses a URL query string and URL-decodes each component." } +{ $notes "This word is used by the implementation of " { $link "urls" } ". It is also used by the HTTP server to parse POST requests." } +{ $examples + { $unchecked-example + "USING: prettyprint urls ;" + "\"gender=female&agefrom=22&ageto=28&location=Omaha+NE\"" + "query>assoc ." + <" H{ + { "gender" "female" } + { "agefrom" "22" } + { "ageto" "28" } + { "location" "Omaha NE" } +}"> + } +} ; + +ARTICLE: "url-encoding" "URL encoding and decoding" +"URL encoding and decoding strings:" +{ $subsection url-encode } +{ $subsection url-decode } +{ $subsection url-quotable? } +"Encoding and decoding queries:" +{ $subsection assoc>query } +{ $subsection query>assoc } +"See " { $url "http://en.wikipedia.org/wiki/Percent-encoding" } " for a description of URL encoding." ; + +ABOUT: "url-encoding" diff --git a/basis/urls/encoding/encoding-tests.factor b/basis/urls/encoding/encoding-tests.factor new file mode 100644 index 0000000000..2217ec8a28 --- /dev/null +++ b/basis/urls/encoding/encoding-tests.factor @@ -0,0 +1,26 @@ +IN: urls.encoding.tests +USING: urls.encoding tools.test arrays kernel assocs present accessors ; + +[ "~hello world" ] [ "%7ehello world" url-decode ] unit-test +[ f ] [ "%XX%XX%XX" url-decode ] unit-test +[ f ] [ "%XX%XX%X" url-decode ] unit-test + +[ "hello world" ] [ "hello%20world" url-decode ] unit-test +[ " ! " ] [ "%20%21%20" url-decode ] unit-test +[ "hello world" ] [ "hello world%" url-decode ] unit-test +[ "hello world" ] [ "hello world%x" url-decode ] unit-test +[ "hello%20world" ] [ "hello world" url-encode ] unit-test + +[ "hello world" ] [ "hello+world" query-decode ] unit-test + +[ "\u001234hi\u002045" ] [ "\u001234hi\u002045" url-encode url-decode ] unit-test + +[ "a=b&a=c" ] [ { { "a" { "b" "c" } } } assoc>query ] unit-test + +[ H{ { "a" "b" } } ] [ "a=b" query>assoc ] unit-test + +[ H{ { "a" { "b" "c" } } } ] [ "a=b&a=c" query>assoc ] unit-test + +[ H{ { "text" "hello world" } } ] [ "text=hello+world" query>assoc ] unit-test + +[ "a=3" ] [ { { "a" 3 } } assoc>query ] unit-test diff --git a/basis/urls/encoding/encoding.factor b/basis/urls/encoding/encoding.factor new file mode 100644 index 0000000000..2f89084488 --- /dev/null +++ b/basis/urls/encoding/encoding.factor @@ -0,0 +1,96 @@ +! Copyright (C) 2008 Slava Pestov. +! See http://factorcode.org/license.txt for BSD license. +USING: kernel ascii combinators combinators.short-circuit +sequences splitting fry namespaces make assocs arrays strings +io.sockets io.sockets.secure io.encodings.string +io.encodings.utf8 math math.parser accessors hashtables present ; +IN: urls.encoding + +: url-quotable? ( ch -- ? ) + { + [ letter? ] + [ LETTER? ] + [ digit? ] + [ "/_-.:" member? ] + } 1|| ; foldable + +hex 2 CHAR: 0 pad-left % ] each ; + +PRIVATE> + +: url-encode ( str -- encoded ) + [ + [ dup url-quotable? [ , ] [ push-utf8 ] if ] each + ] "" make ; + += [ + 2drop + ] [ + [ 1+ dup 2 + ] dip subseq hex> [ , ] when* + ] if ; + +: url-decode-% ( index str -- index str ) + 2dup url-decode-hex ; + +: url-decode-iter ( index str -- ) + 2dup length >= [ + 2drop + ] [ + 2dup nth dup CHAR: % = [ + drop url-decode-% [ 3 + ] dip + ] [ + , [ 1+ ] dip + ] if url-decode-iter + ] if ; + +PRIVATE> + +: url-decode ( str -- decoded ) + [ 0 swap url-decode-iter ] "" make utf8 decode ; + +: query-decode ( str -- decoded ) + [ dup CHAR: + = [ drop "%20" ] [ 1string ] if ] { } map-as + concat url-decode ; + + + +: query>assoc ( query -- assoc ) + dup [ + "&" split H{ } clone [ + [ + [ "=" split1 [ dup [ query-decode ] when ] bi@ swap ] dip + add-query-param + ] curry each + ] keep + ] when ; + +: assoc>query ( assoc -- str ) + [ + dup array? [ [ present ] map ] [ present 1array ] if + ] assoc-map + [ + [ + [ url-encode ] dip + [ url-encode "=" swap 3append , ] with each + ] assoc-each + ] { } make "&" join ; diff --git a/basis/urls/encoding/summary.txt b/basis/urls/encoding/summary.txt new file mode 100644 index 0000000000..d156e44c56 --- /dev/null +++ b/basis/urls/encoding/summary.txt @@ -0,0 +1 @@ +URL and form encoding/decoding diff --git a/basis/urls/encoding/tags.txt b/basis/urls/encoding/tags.txt new file mode 100644 index 0000000000..c0772185a0 --- /dev/null +++ b/basis/urls/encoding/tags.txt @@ -0,0 +1 @@ +web diff --git a/basis/urls/urls-docs.factor b/basis/urls/urls-docs.factor index 166ad9d586..03ffaded05 100644 --- a/basis/urls/urls-docs.factor +++ b/basis/urls/urls-docs.factor @@ -46,37 +46,6 @@ HELP: URL" } } ; -HELP: assoc>query -{ $values { "assoc" assoc } { "str" string } } -{ $description "Converts an assoc of query parameters into a query string, performing URL encoding." } -{ $notes "This word is used to implement the " { $link present } " method on URLs; it is also used by the HTTP client to encode POST requests." } -{ $examples - { $example - "USING: io urls ;" - "{ { \"from\" \"Lead\" } { \"to\" \"Gold, please\" } }" - "assoc>query print" - "from=Lead&to=Gold%2c+please" - } -} ; - -HELP: query>assoc -{ $values { "query" string } { "assoc" assoc } } -{ $description "Parses a URL query string and URL-decodes each component." } -{ $notes "This word is used to implement " { $link >url } ". It is also used by the HTTP server to parse POST requests." } -{ $examples - { $unchecked-example - "USING: prettyprint urls ;" - "\"gender=female&agefrom=22&ageto=28&location=Omaha+NE\"" - "query>assoc ." - <" H{ - { "gender" "female" } - { "agefrom" "22" } - { "ageto" "28" } - { "location" "Omaha NE" } -}"> - } -} ; - HELP: derive-url { $values { "base" url } { "url" url } { "url'" url } } { $description "Builds a URL by filling in missing components of " { $snippet "url" } " from " { $snippet "base" } "." } @@ -192,28 +161,7 @@ HELP: url-append-path { $values { "path1" string } { "path2" string } { "path" string } } { $description "Like " { $link append-path } ", but intended for use with URL paths and not filesystem paths." } ; -HELP: url-decode -{ $values { "str" string } { "decoded" string } } -{ $description "Decodes a URL-encoded string." } ; - -HELP: url-encode -{ $values { "str" string } { "encoded" string } } -{ $description "URL-encodes a string." } ; - -HELP: url-quotable? -{ $values { "ch" "a character" } { "?" "a boolean" } } -{ $description "Tests if a character be used without URL-encoding in a URL." } ; - -ARTICLE: "url-encoding" "URL encoding and decoding" -"URL encoding and decoding strings:" -{ $subsection url-encode } -{ $subsection url-decode } -{ $subsection url-quotable? } -"The URL implemention encodes and decodes components of " { $link url } " instances automatically, but sometimes it is required for non-URL strings. See " { $url "http://en.wikipedia.org/wiki/Percent-encoding" } " for a description of URL encoding." ; - ARTICLE: "url-utilities" "URL implementation utilities" -{ $subsection assoc>query } -{ $subsection query>assoc } { $subsection parse-host } { $subsection secure-protocol? } { $subsection url-append-path } ; @@ -240,8 +188,9 @@ $nl { $subsection set-query-param } "Creating " { $link "network-addressing" } " from URLs:" { $subsection url-addr } -"Additional topics:" -{ $subsection "url-utilities" } -{ $subsection "url-encoding" } ; +"The URL implemention encodes and decodes components of " { $link url } " instances automatically, but sometimes this functionality is needed for non-URL strings." +{ $subsection "url-encoding" } +"Utility words used by the URL implementation:" +{ $subsection "url-utilities" } ; ABOUT: "urls" diff --git a/basis/urls/urls-tests.factor b/basis/urls/urls-tests.factor index b0bf950178..cac206bf3c 100644 --- a/basis/urls/urls-tests.factor +++ b/basis/urls/urls-tests.factor @@ -2,30 +2,6 @@ IN: urls.tests USING: urls urls.private tools.test arrays kernel assocs present accessors ; -[ "hello+world" ] [ "hello world" url-encode ] unit-test -[ "hello world" ] [ "hello%20world" url-decode ] unit-test -[ "~hello world" ] [ "%7ehello+world" url-decode ] unit-test -[ f ] [ "%XX%XX%XX" url-decode ] unit-test -[ f ] [ "%XX%XX%X" url-decode ] unit-test - -[ "hello world" ] [ "hello+world" url-decode ] unit-test -[ "hello world" ] [ "hello%20world" url-decode ] unit-test -[ " ! " ] [ "%20%21%20" url-decode ] unit-test -[ "hello world" ] [ "hello world%" url-decode ] unit-test -[ "hello world" ] [ "hello world%x" url-decode ] unit-test -[ "hello+world" ] [ "hello world" url-encode ] unit-test -[ "+%21+" ] [ " ! " url-encode ] unit-test - -[ "\u001234hi\u002045" ] [ "\u001234hi\u002045" url-encode url-decode ] unit-test - -[ "a=b&a=c" ] [ { { "a" { "b" "c" } } } assoc>query ] unit-test - -[ H{ { "a" "b" } } ] [ "a=b" query>assoc ] unit-test - -[ H{ { "a" { "b" "c" } } } ] [ "a=b&a=c" query>assoc ] unit-test - -[ "a=3" ] [ { { "a" 3 } } assoc>query ] unit-test - : urls { { diff --git a/basis/urls/urls.factor b/basis/urls/urls.factor index 5fe9bbb5a0..5ebcabede8 100644 --- a/basis/urls/urls.factor +++ b/basis/urls/urls.factor @@ -5,99 +5,9 @@ sequences splitting fry namespaces make assocs arrays strings io.sockets io.sockets.secure io.encodings.string io.encodings.utf8 math math.parser accessors parser strings.parser lexer prettyprint.backend hashtables present -peg.ebnf ; +peg.ebnf urls.encoding ; IN: urls -: url-quotable? ( ch -- ? ) - { - [ letter? ] - [ LETTER? ] - [ digit? ] - [ "/_-.:" member? ] - } 1|| ; foldable - -hex 2 CHAR: 0 pad-left % ] each - ] if ; - -PRIVATE> - -: url-encode ( str -- encoded ) - [ - [ dup url-quotable? [ , ] [ push-utf8 ] if ] each - ] "" make ; - -= [ - 2drop - ] [ - [ 1+ dup 2 + ] dip subseq hex> [ , ] when* - ] if ; - -: url-decode-% ( index str -- index str ) - 2dup url-decode-hex [ 3 + ] dip ; - -: url-decode-+-or-other ( index str ch -- index str ) - dup CHAR: + = [ drop CHAR: \s ] when , [ 1+ ] dip ; - -: url-decode-iter ( index str -- ) - 2dup length >= [ - 2drop - ] [ - 2dup nth dup CHAR: % = [ - drop url-decode-% - ] [ - url-decode-+-or-other - ] if url-decode-iter - ] if ; - -PRIVATE> - -: url-decode ( str -- decoded ) - [ 0 swap url-decode-iter ] "" make utf8 decode ; - - - -: query>assoc ( query -- assoc ) - dup [ - "&" split H{ } clone [ - [ - [ "=" split1 [ dup [ url-decode ] when ] bi@ swap ] dip - add-query-param - ] curry each - ] keep - ] when ; - -: assoc>query ( assoc -- str ) - [ - dup array? [ [ present ] map ] [ present 1array ] if - ] assoc-map - [ - [ - [ url-encode ] dip - [ url-encode "=" swap 3append , ] with each - ] assoc-each - ] { } make "&" join ; - TUPLE: url protocol username password host port path query anchor ; : ( -- url ) url new ;