New, more correct URL parser
parent
943ac501a2
commit
35b5bd9898
|
@ -227,3 +227,27 @@ urls [
|
||||||
[ "foo#3" ] [ URL" foo" clone 3 >>anchor present ] unit-test
|
[ "foo#3" ] [ URL" foo" clone 3 >>anchor present ] unit-test
|
||||||
|
|
||||||
[ "http://www.foo.com/" ] [ "http://www.foo.com:80" >url present ] unit-test
|
[ "http://www.foo.com/" ] [ "http://www.foo.com:80" >url present ] unit-test
|
||||||
|
|
||||||
|
[ f ] [ URL" /gp/redirect.html/002-7009742-0004012?location=http://advantage.amazon.com/gp/vendor/public/join%26token%3d77E3769AB3A5B6CF611699E150DC33010761CE12" protocol>> ] unit-test
|
||||||
|
|
||||||
|
[
|
||||||
|
T{ url
|
||||||
|
{ protocol "http" }
|
||||||
|
{ host "localhost" }
|
||||||
|
{ query H{ { "foo" "bar" } } }
|
||||||
|
{ path "/" }
|
||||||
|
}
|
||||||
|
]
|
||||||
|
[ "http://localhost?foo=bar" >url ] unit-test
|
||||||
|
|
||||||
|
[
|
||||||
|
T{ url
|
||||||
|
{ protocol "http" }
|
||||||
|
{ host "localhost" }
|
||||||
|
{ query H{ { "foo" "bar" } } }
|
||||||
|
{ path "/" }
|
||||||
|
}
|
||||||
|
]
|
||||||
|
[ "http://localhost/?foo=bar" >url ] unit-test
|
||||||
|
|
||||||
|
[ "/" ] [ "http://www.jedit.org" >url path>> ] unit-test
|
||||||
|
|
|
@ -4,7 +4,8 @@ USING: kernel ascii combinators combinators.short-circuit
|
||||||
sequences splitting fry namespaces make assocs arrays strings
|
sequences splitting fry namespaces make assocs arrays strings
|
||||||
io.sockets io.sockets.secure io.encodings.string
|
io.sockets io.sockets.secure io.encodings.string
|
||||||
io.encodings.utf8 math math.parser accessors parser
|
io.encodings.utf8 math math.parser accessors parser
|
||||||
strings.parser lexer prettyprint.backend hashtables present ;
|
strings.parser lexer prettyprint.backend hashtables present
|
||||||
|
peg.ebnf ;
|
||||||
IN: urls
|
IN: urls
|
||||||
|
|
||||||
: url-quotable? ( ch -- ? )
|
: url-quotable? ( ch -- ? )
|
||||||
|
@ -122,38 +123,57 @@ TUPLE: url protocol username password host port path query anchor ;
|
||||||
] when
|
] when
|
||||||
] bi* ;
|
] bi* ;
|
||||||
|
|
||||||
<PRIVATE
|
|
||||||
|
|
||||||
: parse-host-part ( url protocol rest -- url string' )
|
|
||||||
[ >>protocol ] [
|
|
||||||
"//" ?head [ "Invalid URL" throw ] unless
|
|
||||||
"@" split1 [
|
|
||||||
[
|
|
||||||
":" split1 [ >>username ] [ >>password ] bi*
|
|
||||||
] dip
|
|
||||||
] when*
|
|
||||||
"/" split1 [
|
|
||||||
parse-host [ >>host ] [ >>port ] bi*
|
|
||||||
] [ "/" prepend ] bi*
|
|
||||||
] bi* ;
|
|
||||||
|
|
||||||
PRIVATE>
|
|
||||||
|
|
||||||
GENERIC: >url ( obj -- url )
|
GENERIC: >url ( obj -- url )
|
||||||
|
|
||||||
M: f >url drop <url> ;
|
M: f >url drop <url> ;
|
||||||
|
|
||||||
M: url >url ;
|
M: url >url ;
|
||||||
|
|
||||||
|
<PRIVATE
|
||||||
|
|
||||||
|
EBNF: parse-url
|
||||||
|
|
||||||
|
protocol = [a-z]+ => [[ url-decode ]]
|
||||||
|
username = [^/:@#?]+ => [[ url-decode ]]
|
||||||
|
password = [^/:@#?]+ => [[ url-decode ]]
|
||||||
|
pathname = [^#?]+ => [[ url-decode ]]
|
||||||
|
query = [^#]+ => [[ query>assoc ]]
|
||||||
|
anchor = .+ => [[ url-decode ]]
|
||||||
|
|
||||||
|
hostname = [^/#?]+ => [[ url-decode ]]
|
||||||
|
|
||||||
|
hostname-spec = hostname ("/"|!(.)) => [[ first ]]
|
||||||
|
|
||||||
|
auth = (username (":" password => [[ second ]])? "@"
|
||||||
|
=> [[ first2 2array ]])?
|
||||||
|
|
||||||
|
url = ((protocol "://") => [[ first ]] auth hostname)?
|
||||||
|
(pathname)?
|
||||||
|
("?" query => [[ second ]])?
|
||||||
|
("#" anchor => [[ second ]])?
|
||||||
|
|
||||||
|
;EBNF
|
||||||
|
|
||||||
|
PRIVATE>
|
||||||
|
|
||||||
M: string >url
|
M: string >url
|
||||||
<url> swap
|
parse-url {
|
||||||
":" split1 [ parse-host-part ] when*
|
[
|
||||||
"#" split1 [
|
first [
|
||||||
"?" split1
|
[ first ] ! protocol
|
||||||
[ url-decode >>path ]
|
[
|
||||||
[ [ query>assoc >>query ] when* ] bi*
|
second
|
||||||
]
|
[ first [ first2 ] [ f f ] if* ] ! username, password
|
||||||
[ url-decode >>anchor ] bi* ;
|
[ second parse-host ] ! host, port
|
||||||
|
bi
|
||||||
|
] bi
|
||||||
|
] [ f f f f f ] if*
|
||||||
|
]
|
||||||
|
[ second ] ! pathname
|
||||||
|
[ third ] ! query
|
||||||
|
[ fourth ] ! anchor
|
||||||
|
} cleave url boa
|
||||||
|
dup host>> [ [ "/" or ] change-path ] when ;
|
||||||
|
|
||||||
: protocol-port ( protocol -- port )
|
: protocol-port ( protocol -- port )
|
||||||
{
|
{
|
||||||
|
|
Loading…
Reference in New Issue