URLs library, abstracted out from http.server

db4
Slava Pestov 2008-05-31 23:20:24 -05:00
parent 6f45cf60ce
commit 3ab71b00a9
5 changed files with 309 additions and 0 deletions

1
extra/urls/authors.txt Normal file
View File

@ -0,0 +1 @@
Slava Pestov

1
extra/urls/summary.txt Normal file
View File

@ -0,0 +1 @@
Tools for working with URLs (uniform resource locators)

2
extra/urls/tags.txt Normal file
View File

@ -0,0 +1,2 @@
web
network

View File

@ -0,0 +1,162 @@
IN: urls.tests
USING: urls tools.test tuple-syntax arrays kernel assocs ;
[ "hello%20world" ] [ "hello world" url-encode ] unit-test
[ "hello world" ] [ "hello%20world" url-decode ] unit-test
[ "~hello world" ] [ "%7ehello+world" url-decode ] unit-test
[ f ] [ "%XX%XX%XX" url-decode ] unit-test
[ f ] [ "%XX%XX%X" url-decode ] unit-test
[ "hello world" ] [ "hello+world" url-decode ] unit-test
[ "hello world" ] [ "hello%20world" url-decode ] unit-test
[ " ! " ] [ "%20%21%20" url-decode ] unit-test
[ "hello world" ] [ "hello world%" url-decode ] unit-test
[ "hello world" ] [ "hello world%x" url-decode ] unit-test
[ "hello%20world" ] [ "hello world" url-encode ] unit-test
[ "%20%21%20" ] [ " ! " url-encode ] unit-test
[ "\u001234hi\u002045" ] [ "\u001234hi\u002045" url-encode url-decode ] unit-test
[ "a=b&a=c" ] [ { { "a" { "b" "c" } } } assoc>query ] unit-test
[ H{ { "a" "b" } } ] [ "a=b" query>assoc ] unit-test
[ H{ { "a" { "b" "c" } } } ] [ "a=b&a=c" query>assoc ] unit-test
[ "a=3" ] [ { { "a" 3 } } assoc>query ] unit-test
: urls
{
{
TUPLE{ url
protocol: "http"
host: "www.apple.com"
port: 1234
path: "/a/path"
query: H{ { "a" "b" } }
anchor: "foo"
}
"http://www.apple.com:1234/a/path?a=b#foo"
}
{
TUPLE{ url
protocol: "http"
host: "www.apple.com"
path: "/a/path"
query: H{ { "a" "b" } }
anchor: "foo"
}
"http://www.apple.com/a/path?a=b#foo"
}
{
TUPLE{ url
protocol: "http"
host: "www.apple.com"
port: 1234
path: "/another/fine/path"
anchor: "foo"
}
"http://www.apple.com:1234/another/fine/path#foo"
}
{
TUPLE{ url
path: "/a/relative/path"
anchor: "foo"
}
"/a/relative/path#foo"
}
{
TUPLE{ url
path: "/a/relative/path"
}
"/a/relative/path"
}
{
TUPLE{ url
path: "a/relative/path"
}
"a/relative/path"
}
} ;
urls [
[ 1array ] [ [ string>url ] curry ] bi* unit-test
] assoc-each
urls [
swap [ 1array ] [ [ url>string ] curry ] bi* unit-test
] assoc-each
[
TUPLE{ url
protocol: "http"
host: "www.apple.com"
port: 1234
path: "/a/path"
}
] [
TUPLE{ url
path: "/a/path"
}
TUPLE{ url
protocol: "http"
host: "www.apple.com"
port: 1234
path: "/foo"
}
derive-url
] unit-test
[
TUPLE{ url
protocol: "http"
host: "www.apple.com"
port: 1234
path: "/a/path/relative/path"
query: H{ { "a" "b" } }
anchor: "foo"
}
] [
TUPLE{ url
path: "relative/path"
query: H{ { "a" "b" } }
anchor: "foo"
}
TUPLE{ url
protocol: "http"
host: "www.apple.com"
port: 1234
path: "/a/path"
}
derive-url
] unit-test
[
TUPLE{ url
protocol: "http"
host: "www.apple.com"
port: 1234
path: "/a/path/relative/path"
query: H{ { "a" "b" } }
anchor: "foo"
}
] [
TUPLE{ url
path: "relative/path"
query: H{ { "a" "b" } }
anchor: "foo"
}
TUPLE{ url
protocol: "http"
host: "www.apple.com"
port: 1234
path: "/a/path/"
}
derive-url
] unit-test

143
extra/urls/urls.factor Normal file
View File

@ -0,0 +1,143 @@
USING: kernel unicode.categories combinators sequences splitting
fry namespaces assocs arrays strings mirrors
io.encodings.string io.encodings.utf8
math math.parser accessors namespaces.lib ;
IN: urls
: url-quotable? ( ch -- ? )
#! In a URL, can this character be used without
#! URL-encoding?
{
{ [ dup letter? ] [ t ] }
{ [ dup LETTER? ] [ t ] }
{ [ dup digit? ] [ t ] }
{ [ dup "/_-.:" member? ] [ t ] }
[ f ]
} cond nip ; foldable
: push-utf8 ( ch -- )
1string utf8 encode
[ CHAR: % , >hex 2 CHAR: 0 pad-left % ] each ;
: url-encode ( str -- str )
[
[ dup url-quotable? [ , ] [ push-utf8 ] if ] each
] "" make ;
: url-decode-hex ( index str -- )
2dup length 2 - >= [
2drop
] [
[ 1+ dup 2 + ] dip subseq hex> [ , ] when*
] if ;
: url-decode-% ( index str -- index str )
2dup url-decode-hex [ 3 + ] dip ;
: url-decode-+-or-other ( index str ch -- index str )
dup CHAR: + = [ drop CHAR: \s ] when , [ 1+ ] dip ;
: url-decode-iter ( index str -- )
2dup length >= [
2drop
] [
2dup nth dup CHAR: % = [
drop url-decode-%
] [
url-decode-+-or-other
] if url-decode-iter
] if ;
: url-decode ( str -- str )
[ 0 swap url-decode-iter ] "" make utf8 decode ;
: add-query-param ( value key assoc -- )
[
at [
{
{ [ dup string? ] [ swap 2array ] }
{ [ dup array? ] [ swap suffix ] }
{ [ dup not ] [ drop ] }
} cond
] when*
] 2keep set-at ;
: query>assoc ( query -- assoc )
dup [
"&" split H{ } clone [
[
[ "=" split1 [ dup [ url-decode ] when ] bi@ swap ] dip
add-query-param
] curry each
] keep
] when ;
: assoc>query ( hash -- str )
[
{
{ [ dup number? ] [ number>string 1array ] }
{ [ dup string? ] [ 1array ] }
{ [ dup sequence? ] [ ] }
} cond
] assoc-map
[
[
[ url-encode ] dip
[ url-encode "=" swap 3append , ] with each
] assoc-each
] { } make "&" join ;
TUPLE: url protocol host port path query anchor ;
: parse-host-part ( protocol rest -- string' )
[ "protocol" set ] [
"//" ?head [ "Invalid URL" throw ] unless
"/" split1 [
":" split1
[ url-decode "host" set ] [
dup [
string>number
dup [ "Invalid port" throw ] unless
] when "port" set
] bi*
] [ "/" prepend ] bi*
] bi* ;
: string>url ( string -- url )
[
":" split1 [ parse-host-part ] when*
"#" split1 [
"?" split1 [ query>assoc "query" set ] when*
url-decode "path" set
] [
url-decode "anchor" set
] bi*
] url make-object ;
: unparse-host-part ( protocol -- )
%
"://" %
"host" get url-encode %
"port" get [ ":" % # ] when*
"path" get "/" head? [ "Invalid URL" throw ] unless ;
: url>string ( url -- string )
[
<mirror> [
"protocol" get [ unparse-host-part ] when*
"path" get url-encode %
"query" get [ "?" % assoc>query % ] when*
"anchor" get [ "#" % url-encode % ] when*
] bind
] "" make ;
: fix-relative-path ( url base -- url base )
over path>> '[
"/" ?tail drop "/" , 3append
] change-path
[ f >>path ] dip ; inline
: derive-url ( url base -- url' )
clone
over path>> "/" head? [ fix-relative-path ] unless
[ <mirror> swap <mirror> [ nip ] assoc-filter update ] keep ;