diff --git a/extra/urls/authors.txt b/extra/urls/authors.txt new file mode 100644 index 0000000000..1901f27a24 --- /dev/null +++ b/extra/urls/authors.txt @@ -0,0 +1 @@ +Slava Pestov diff --git a/extra/urls/summary.txt b/extra/urls/summary.txt new file mode 100644 index 0000000000..caeda3d21a --- /dev/null +++ b/extra/urls/summary.txt @@ -0,0 +1 @@ +Tools for working with URLs (uniform resource locators) diff --git a/extra/urls/tags.txt b/extra/urls/tags.txt new file mode 100644 index 0000000000..93e65ae758 --- /dev/null +++ b/extra/urls/tags.txt @@ -0,0 +1,2 @@ +web +network diff --git a/extra/urls/urls-tests.factor b/extra/urls/urls-tests.factor new file mode 100644 index 0000000000..dd319a1e65 --- /dev/null +++ b/extra/urls/urls-tests.factor @@ -0,0 +1,162 @@ +IN: urls.tests +USING: urls tools.test tuple-syntax arrays kernel assocs ; + +[ "hello%20world" ] [ "hello world" url-encode ] unit-test +[ "hello world" ] [ "hello%20world" url-decode ] unit-test +[ "~hello world" ] [ "%7ehello+world" url-decode ] unit-test +[ f ] [ "%XX%XX%XX" url-decode ] unit-test +[ f ] [ "%XX%XX%X" url-decode ] unit-test + +[ "hello world" ] [ "hello+world" url-decode ] unit-test +[ "hello world" ] [ "hello%20world" url-decode ] unit-test +[ " ! " ] [ "%20%21%20" url-decode ] unit-test +[ "hello world" ] [ "hello world%" url-decode ] unit-test +[ "hello world" ] [ "hello world%x" url-decode ] unit-test +[ "hello%20world" ] [ "hello world" url-encode ] unit-test +[ "%20%21%20" ] [ " ! " url-encode ] unit-test + +[ "\u001234hi\u002045" ] [ "\u001234hi\u002045" url-encode url-decode ] unit-test + +[ "a=b&a=c" ] [ { { "a" { "b" "c" } } } assoc>query ] unit-test + +[ H{ { "a" "b" } } ] [ "a=b" query>assoc ] unit-test + +[ H{ { "a" { "b" "c" } } } ] [ "a=b&a=c" query>assoc ] unit-test + +[ "a=3" ] [ { { "a" 3 } } assoc>query ] unit-test + +: urls + { + { + TUPLE{ url + protocol: "http" + host: "www.apple.com" + port: 1234 + path: "/a/path" + query: H{ { "a" "b" } } + anchor: "foo" + } + "http://www.apple.com:1234/a/path?a=b#foo" + } + { + TUPLE{ url + protocol: "http" + host: "www.apple.com" + path: "/a/path" + query: H{ { "a" "b" } } + anchor: "foo" + } + "http://www.apple.com/a/path?a=b#foo" + } + { + TUPLE{ url + protocol: "http" + host: "www.apple.com" + port: 1234 + path: "/another/fine/path" + anchor: "foo" + } + "http://www.apple.com:1234/another/fine/path#foo" + } + { + TUPLE{ url + path: "/a/relative/path" + anchor: "foo" + } + "/a/relative/path#foo" + } + { + TUPLE{ url + path: "/a/relative/path" + } + "/a/relative/path" + } + { + TUPLE{ url + path: "a/relative/path" + } + "a/relative/path" + } + } ; + +urls [ + [ 1array ] [ [ string>url ] curry ] bi* unit-test +] assoc-each + +urls [ + swap [ 1array ] [ [ url>string ] curry ] bi* unit-test +] assoc-each + +[ + TUPLE{ url + protocol: "http" + host: "www.apple.com" + port: 1234 + path: "/a/path" + } +] [ + TUPLE{ url + path: "/a/path" + } + + TUPLE{ url + protocol: "http" + host: "www.apple.com" + port: 1234 + path: "/foo" + } + + derive-url +] unit-test + +[ + TUPLE{ url + protocol: "http" + host: "www.apple.com" + port: 1234 + path: "/a/path/relative/path" + query: H{ { "a" "b" } } + anchor: "foo" + } +] [ + TUPLE{ url + path: "relative/path" + query: H{ { "a" "b" } } + anchor: "foo" + } + + TUPLE{ url + protocol: "http" + host: "www.apple.com" + port: 1234 + path: "/a/path" + } + + derive-url +] unit-test + +[ + TUPLE{ url + protocol: "http" + host: "www.apple.com" + port: 1234 + path: "/a/path/relative/path" + query: H{ { "a" "b" } } + anchor: "foo" + } +] [ + TUPLE{ url + path: "relative/path" + query: H{ { "a" "b" } } + anchor: "foo" + } + + TUPLE{ url + protocol: "http" + host: "www.apple.com" + port: 1234 + path: "/a/path/" + } + + derive-url +] unit-test diff --git a/extra/urls/urls.factor b/extra/urls/urls.factor new file mode 100644 index 0000000000..86f3de651d --- /dev/null +++ b/extra/urls/urls.factor @@ -0,0 +1,143 @@ +USING: kernel unicode.categories combinators sequences splitting +fry namespaces assocs arrays strings mirrors +io.encodings.string io.encodings.utf8 +math math.parser accessors namespaces.lib ; +IN: urls + +: url-quotable? ( ch -- ? ) + #! In a URL, can this character be used without + #! URL-encoding? + { + { [ dup letter? ] [ t ] } + { [ dup LETTER? ] [ t ] } + { [ dup digit? ] [ t ] } + { [ dup "/_-.:" member? ] [ t ] } + [ f ] + } cond nip ; foldable + +: push-utf8 ( ch -- ) + 1string utf8 encode + [ CHAR: % , >hex 2 CHAR: 0 pad-left % ] each ; + +: url-encode ( str -- str ) + [ + [ dup url-quotable? [ , ] [ push-utf8 ] if ] each + ] "" make ; + +: url-decode-hex ( index str -- ) + 2dup length 2 - >= [ + 2drop + ] [ + [ 1+ dup 2 + ] dip subseq hex> [ , ] when* + ] if ; + +: url-decode-% ( index str -- index str ) + 2dup url-decode-hex [ 3 + ] dip ; + +: url-decode-+-or-other ( index str ch -- index str ) + dup CHAR: + = [ drop CHAR: \s ] when , [ 1+ ] dip ; + +: url-decode-iter ( index str -- ) + 2dup length >= [ + 2drop + ] [ + 2dup nth dup CHAR: % = [ + drop url-decode-% + ] [ + url-decode-+-or-other + ] if url-decode-iter + ] if ; + +: url-decode ( str -- str ) + [ 0 swap url-decode-iter ] "" make utf8 decode ; + +: add-query-param ( value key assoc -- ) + [ + at [ + { + { [ dup string? ] [ swap 2array ] } + { [ dup array? ] [ swap suffix ] } + { [ dup not ] [ drop ] } + } cond + ] when* + ] 2keep set-at ; + +: query>assoc ( query -- assoc ) + dup [ + "&" split H{ } clone [ + [ + [ "=" split1 [ dup [ url-decode ] when ] bi@ swap ] dip + add-query-param + ] curry each + ] keep + ] when ; + +: assoc>query ( hash -- str ) + [ + { + { [ dup number? ] [ number>string 1array ] } + { [ dup string? ] [ 1array ] } + { [ dup sequence? ] [ ] } + } cond + ] assoc-map + [ + [ + [ url-encode ] dip + [ url-encode "=" swap 3append , ] with each + ] assoc-each + ] { } make "&" join ; + +TUPLE: url protocol host port path query anchor ; + +: parse-host-part ( protocol rest -- string' ) + [ "protocol" set ] [ + "//" ?head [ "Invalid URL" throw ] unless + "/" split1 [ + ":" split1 + [ url-decode "host" set ] [ + dup [ + string>number + dup [ "Invalid port" throw ] unless + ] when "port" set + ] bi* + ] [ "/" prepend ] bi* + ] bi* ; + +: string>url ( string -- url ) + [ + ":" split1 [ parse-host-part ] when* + "#" split1 [ + "?" split1 [ query>assoc "query" set ] when* + url-decode "path" set + ] [ + url-decode "anchor" set + ] bi* + ] url make-object ; + +: unparse-host-part ( protocol -- ) + % + "://" % + "host" get url-encode % + "port" get [ ":" % # ] when* + "path" get "/" head? [ "Invalid URL" throw ] unless ; + +: url>string ( url -- string ) + [ + [ + "protocol" get [ unparse-host-part ] when* + "path" get url-encode % + "query" get [ "?" % assoc>query % ] when* + "anchor" get [ "#" % url-encode % ] when* + ] bind + ] "" make ; + +: fix-relative-path ( url base -- url base ) + over path>> '[ + "/" ?tail drop "/" , 3append + ] change-path + [ f >>path ] dip ; inline + +: derive-url ( url base -- url' ) + clone + over path>> "/" head? [ fix-relative-path ] unless + [ swap [ nip ] assoc-filter update ] keep ;