factor/basis/urls/urls.factor

203 lines
5.1 KiB
Factor
Raw Normal View History

! Copyright (C) 2008, 2011 Slava Pestov.
2008-06-01 01:59:06 -04:00
! See http://factorcode.org/license.txt for BSD license.
2012-10-22 23:16:19 -04:00
USING: accessors arrays assocs combinators fry hashtables
io.pathnames io.sockets kernel lexer make math.parser
namespaces peg.ebnf present sequences splitting strings
strings.parser urls.encoding vocabs.loader ;
IN: urls
TUPLE: url protocol username password host port path query anchor ;
: <url> ( -- url ) url new ;
2008-06-02 16:00:03 -04:00
: query-param ( url key -- value )
2008-06-01 01:59:06 -04:00
swap query>> at ;
2008-09-25 03:46:37 -04:00
: delete-query-param ( url key -- url )
over query>> delete-at ;
2008-06-02 16:00:03 -04:00
: set-query-param ( url value key -- url )
2008-09-25 03:46:37 -04:00
over [
'[ [ _ _ ] dip ?set-at ] change-query
] [
nip delete-query-param
] if ;
2008-06-01 01:59:06 -04:00
ERROR: malformed-port ;
: parse-host ( string -- host/f port/f )
[
":" split1-last [ url-decode ]
[ dup [ string>number [ malformed-port ] unless* ] when ] bi*
] [ f f ] if* ;
2008-06-01 01:59:06 -04:00
2008-09-26 19:24:58 -04:00
GENERIC: >url ( obj -- url )
M: f >url drop <url> ;
M: url >url ;
<PRIVATE
2008-09-26 19:24:58 -04:00
EBNF: parse-url
2015-08-17 23:32:28 -04:00
protocol = [a-z+]+ => [[ url-decode ]]
2008-09-26 19:24:58 -04:00
username = [^/:@#?]+ => [[ url-decode ]]
password = [^/:@#?]+ => [[ url-decode ]]
pathname = [^#?]+ => [[ url-decode ]]
2008-09-29 23:54:10 -04:00
query = [^#]+ => [[ query>assoc ]]
2008-09-26 19:24:58 -04:00
anchor = .+ => [[ url-decode ]]
2008-09-26 19:24:58 -04:00
hostname = [^/#?]+ => [[ url-decode ]]
2008-09-26 19:24:58 -04:00
hostname-spec = hostname ("/"|!(.)) => [[ first ]]
2008-09-26 19:24:58 -04:00
auth = (username (":" password => [[ second ]])? "@"
=> [[ first2 2array ]])?
url = (((protocol "://") => [[ first ]] auth hostname)
| (("//") => [[ f ]] auth hostname))?
2008-09-26 19:24:58 -04:00
(pathname)?
("?" query => [[ second ]])?
("#" anchor => [[ second ]])?
;EBNF
PRIVATE>
M: string >url
[ <url> ] dip
2008-09-26 19:24:58 -04:00
parse-url {
[
first [
[ first >>protocol ]
2008-09-26 19:24:58 -04:00
[
second
[ first [ first2 [ >>username ] [ >>password ] bi* ] when* ]
[ second parse-host [ >>host ] [ >>port ] bi* ] bi
2008-09-26 19:24:58 -04:00
] bi
] when*
2008-09-26 19:24:58 -04:00
]
[ second >>path ]
[ third >>query ]
[ fourth >>anchor ]
} cleave
dup host>> [ [ "/" or ] change-path ] when ;
2012-10-22 23:16:19 -04:00
M: pathname >url string>> >url ;
: relative-url ( url -- url' )
clone
f >>protocol
f >>host
f >>port ;
: relative-url? ( url -- ? ) protocol>> not ;
2008-09-23 15:17:02 -04:00
<PRIVATE
2008-09-23 17:11:11 -04:00
: unparse-username-password ( url -- )
dup username>> dup [
% password>> [ ":" % % ] when* "@" %
] [ 2drop ] if ;
2008-09-23 15:17:02 -04:00
: url-port ( url -- port/f )
[ port>> ] [ port>> ] [ protocol>> protocol-port ] tri =
2008-09-23 15:17:02 -04:00
[ drop f ] when ;
: unparse-host-part ( url -- )
{
[ unparse-username-password ]
[ host>> url-encode % ]
2008-09-23 15:17:02 -04:00
[ url-port [ ":" % # ] when* ]
[ path>> "/" head? [ "/" % ] unless ]
} cleave ;
! URL" //foo.com" takes on the protocol of the url it's derived from
: unparse-protocol ( url -- )
dup protocol>> [
% "://" % unparse-host-part
] [
dup host>> [
"//" % unparse-host-part
] [
drop
] if
] if* ;
M: url present
[
{
[ unparse-protocol ]
[ path>> url-encode % ]
[ query>> dup assoc-empty? [ drop ] [ "?" % assoc>query % ] if ]
2008-06-06 19:18:05 -04:00
[ anchor>> [ "#" % present url-encode % ] when* ]
} cleave
] "" make ;
PRIVATE>
2008-06-01 01:59:06 -04:00
: url-append-path ( path1 path2 -- path )
{
{ [ dup "/" head? ] [ nip ] }
{ [ dup empty? ] [ drop ] }
{ [ over "/" tail? ] [ append ] }
{ [ "/" pick start not ] [ nip ] }
2008-11-22 21:00:37 -05:00
[ [ "/" split1-last drop "/" ] dip 3append ]
2008-06-01 01:59:06 -04:00
} cond ;
<PRIVATE
: derive-port ( url base -- url' )
over relative-url? [ [ port>> ] either? ] [ drop port>> ] if ;
: derive-path ( url base -- url' )
[ path>> ] bi@ swap url-append-path ;
PRIVATE>
2008-06-01 01:59:06 -04:00
: derive-url ( base url -- url' )
2008-07-02 22:52:28 -04:00
[ clone ] dip over {
2008-09-29 23:54:10 -04:00
[ [ protocol>> ] either? >>protocol ]
[ [ username>> ] either? >>username ]
[ [ password>> ] either? >>password ]
[ [ host>> ] either? >>host ]
[ derive-port >>port ]
[ derive-path >>path ]
2008-09-29 23:54:10 -04:00
[ [ query>> ] either? >>query ]
[ [ anchor>> ] either? >>anchor ]
2008-07-02 22:52:28 -04:00
} 2cleave ;
2008-06-01 01:59:06 -04:00
! Half-baked stuff follows
: secure-protocol? ( protocol -- ? )
"https" = ;
<PRIVATE
GENERIC: >secure-addr ( addrspec -- addrspec' )
PRIVATE>
: url-addr ( url -- addr )
2008-09-24 22:19:27 -04:00
[
[ host>> ]
[ port>> ]
[ protocol>> protocol-port ]
tri or <inet>
2008-09-24 22:19:27 -04:00
] [ protocol>> ] bi
secure-protocol? [ >secure-addr ] when ;
: set-url-addr ( url addr -- url )
[ host>> >>host ] [ port>> >>port ] bi ;
: ensure-port ( url -- url' )
clone dup protocol>> '[ _ protocol-port or ] change-port ;
! Literal syntax
SYNTAX: URL" lexer get skip-blank parse-short-string >url suffix! ;
{ "urls" "prettyprint" } "urls.prettyprint" require-when
{ "urls" "io.sockets.secure" } "urls.secure" require-when