urls: better parsing of the scheme component of urls

normalize to lowercase and a few more characters are allowed in the
protocol part.
char-rename
Björn Lindqvist 2016-07-31 02:46:39 +02:00
parent ab66a73744
commit 97d3c42091
2 changed files with 123 additions and 91 deletions

View File

@ -1,95 +1,121 @@
USING: accessors arrays assocs io.sockets io.sockets.secure USING: accessors arrays assocs io.sockets io.sockets.secure kernel
kernel linked-assocs present prettyprint tools.test urls ; linked-assocs present prettyprint sequences tools.test urls ;
IN: urls.tests IN: urls.tests
CONSTANT: urls CONSTANT: urls {
{ {
{ T{ url
T{ url { protocol "http" }
{ protocol "http" } { host "www.apple.com" }
{ host "www.apple.com" } { port 1234 }
{ port 1234 } { path "/a/path" }
{ path "/a/path" } { query LH{ { "a" "b" } } }
{ query LH{ { "a" "b" } } } { anchor "foo" }
{ anchor "foo" } }
} "http://www.apple.com:1234/a/path?a=b#foo"
"http://www.apple.com:1234/a/path?a=b#foo"
}
{
T{ url
{ protocol "http" }
{ host "www.apple.com" }
{ path "/a/path" }
{ query LH{ { "a" "b" } } }
{ anchor "foo" }
}
"http://www.apple.com/a/path?a=b#foo"
}
{
T{ url
{ protocol "http" }
{ host "www.apple.com" }
{ port 1234 }
{ path "/another/fine/path" }
{ anchor "foo" }
}
"http://www.apple.com:1234/another/fine/path#foo"
}
{
T{ url
{ path "/a/relative/path" }
{ anchor "foo" }
}
"/a/relative/path#foo"
}
{
T{ url
{ path "/a/relative/path" }
}
"/a/relative/path"
}
{
T{ url
{ path "a/relative/path" }
}
"a/relative/path"
}
{
T{ url
{ path "bar" }
{ query LH{ { "a" "b" } } }
}
"bar?a=b"
}
{
T{ url
{ protocol "ftp" }
{ host "ftp.kernel.org" }
{ username "slava" }
{ path "/" }
}
"ftp://slava@ftp.kernel.org/"
}
{
T{ url
{ protocol "ftp" }
{ host "ftp.kernel.org" }
{ username "slava" }
{ password "secret" }
{ path "/" }
}
"ftp://slava:secret@ftp.kernel.org/"
}
{
T{ url
{ protocol "http" }
{ host "foo.com" }
{ path "/" }
{ query LH{ { "a" f } } }
}
"http://foo.com/?a"
}
} }
{
T{ url
{ protocol "http" }
{ host "www.apple.com" }
{ path "/a/path" }
{ query LH{ { "a" "b" } } }
{ anchor "foo" }
}
"http://www.apple.com/a/path?a=b#foo"
}
{
T{ url
{ protocol "http" }
{ host "www.apple.com" }
{ port 1234 }
{ path "/another/fine/path" }
{ anchor "foo" }
}
"http://www.apple.com:1234/another/fine/path#foo"
}
{
T{ url
{ path "/a/relative/path" }
{ anchor "foo" }
}
"/a/relative/path#foo"
}
{
T{ url
{ path "/a/relative/path" }
}
"/a/relative/path"
}
{
T{ url
{ path "a/relative/path" }
}
"a/relative/path"
}
{
T{ url
{ path "bar" }
{ query LH{ { "a" "b" } } }
}
"bar?a=b"
}
{
T{ url
{ protocol "ftp" }
{ host "ftp.kernel.org" }
{ username "slava" }
{ path "/" }
}
"ftp://slava@ftp.kernel.org/"
}
{
T{ url
{ protocol "ftp" }
{ host "ftp.kernel.org" }
{ username "slava" }
{ password "secret" }
{ path "/" }
}
"ftp://slava:secret@ftp.kernel.org/"
}
{
T{ url
{ protocol "http" }
{ host "foo.com" }
{ path "/" }
{ query LH{ { "a" f } } }
}
"http://foo.com/?a"
}
! Capital letters, digits, hyphen, plus and period are allowed
! characters in the scheme
! part. https://tools.ietf.org/html/rfc1738#section-5
{
T{ url
{ protocol "foo.bar" }
{ host "www.google.com" }
{ path "/" }
}
"foo.bar://www.google.com/"
}
{
T{ url
{ protocol "foo.-bar" }
{ host "www.google.com" }
{ path "/" }
}
"foo.-bar://www.google.com/"
}
{
T{ url
{ protocol "t1000" }
{ host "www.google.com" }
{ path "/" }
}
"t1000://www.google.com/"
}
}
urls [ urls [
[ 1array ] [ [ >url ] curry ] bi* unit-test [ 1array ] [ [ >url ] curry ] bi* unit-test
@ -293,3 +319,9 @@ urls [
"baz" "baz" set-query-param "baz" "baz" set-query-param
present present
] unit-test ] unit-test
! Scheme characters are
! case-insensitive. https://tools.ietf.org/html/rfc3986#section-3.1
{ URL" http://www.google.com/" } [
URL" http://www.google.com/"
] unit-test

View File

@ -1,6 +1,6 @@
! Copyright (C) 2008, 2011 Slava Pestov. ! Copyright (C) 2008, 2011 Slava Pestov.
! See http://factorcode.org/license.txt for BSD license. ! See http://factorcode.org/license.txt for BSD license.
USING: accessors arrays assocs combinators fry io.pathnames USING: accessors arrays ascii assocs combinators fry io.pathnames
io.sockets io.sockets.secure kernel lexer linked-assocs make io.sockets io.sockets.secure kernel lexer linked-assocs make
math.parser namespaces peg.ebnf present sequences splitting math.parser namespaces peg.ebnf present sequences splitting
strings strings.parser urls.encoding vocabs.loader ; strings strings.parser urls.encoding vocabs.loader ;
@ -37,7 +37,7 @@ M: url >url ;
EBNF: parse-url EBNF: parse-url
protocol = [a-z+]+ => [[ url-decode ]] protocol = [a-zA-Z0-9.+-]+ => [[ url-decode ]]
username = [^/:@#?]+ => [[ url-decode ]] username = [^/:@#?]+ => [[ url-decode ]]
password = [^/:@#?]+ => [[ url-decode ]] password = [^/:@#?]+ => [[ url-decode ]]
pathname = [^#?]+ => [[ url-decode ]] pathname = [^#?]+ => [[ url-decode ]]
@ -66,7 +66,7 @@ M: string >url
parse-url { parse-url {
[ [
first [ first [
[ first >>protocol ] [ first >lower >>protocol ]
[ [
second second
[ first [ first2 [ >>username ] [ >>password ] bi* ] when* ] [ first [ first2 [ >>username ] [ >>password ] bi* ] when* ]