203 lines
		
	
	
		
			5.1 KiB
		
	
	
	
		
			Factor
		
	
	
			
		
		
	
	
			203 lines
		
	
	
		
			5.1 KiB
		
	
	
	
		
			Factor
		
	
	
! Copyright (C) 2008, 2011 Slava Pestov.
 | 
						|
! See http://factorcode.org/license.txt for BSD license.
 | 
						|
 | 
						|
USING: accessors arrays assocs combinators fry hashtables
 | 
						|
io.pathnames io.sockets kernel lexer make math.parser
 | 
						|
namespaces peg.ebnf present sequences splitting strings
 | 
						|
strings.parser urls.encoding vocabs vocabs.loader ;
 | 
						|
 | 
						|
IN: urls
 | 
						|
 | 
						|
TUPLE: url protocol username password host port path query anchor ;
 | 
						|
 | 
						|
: <url> ( -- url ) url new ;
 | 
						|
 | 
						|
: query-param ( url key -- value )
 | 
						|
    swap query>> at ;
 | 
						|
 | 
						|
: delete-query-param ( url key -- url )
 | 
						|
    over query>> delete-at ;
 | 
						|
 | 
						|
: set-query-param ( url value key -- url )
 | 
						|
    over [
 | 
						|
        '[ [ _ _ ] dip ?set-at ] change-query
 | 
						|
    ] [
 | 
						|
        nip delete-query-param
 | 
						|
    ] if ;
 | 
						|
 | 
						|
ERROR: malformed-port ;
 | 
						|
 | 
						|
: parse-host ( string -- host/f port/f )
 | 
						|
    [
 | 
						|
        ":" split1-last [ url-decode ]
 | 
						|
        [ dup [ string>number [ malformed-port ] unless* ] when ] bi*
 | 
						|
    ] [ f f ] if* ;
 | 
						|
 | 
						|
GENERIC: >url ( obj -- url )
 | 
						|
 | 
						|
M: f >url drop <url> ;
 | 
						|
 | 
						|
M: url >url ;
 | 
						|
 | 
						|
<PRIVATE
 | 
						|
 | 
						|
EBNF: parse-url
 | 
						|
 | 
						|
protocol = [a-z+]+                  => [[ url-decode ]]
 | 
						|
username = [^/:@#?]+                => [[ url-decode ]]
 | 
						|
password = [^/:@#?]+                => [[ url-decode ]]
 | 
						|
pathname = [^#?]+                   => [[ url-decode ]]
 | 
						|
query    = [^#]+                    => [[ query>assoc ]]
 | 
						|
anchor   = .+                       => [[ url-decode ]]
 | 
						|
 | 
						|
hostname = [^/#?]+                  => [[ url-decode ]]
 | 
						|
 | 
						|
hostname-spec = hostname ("/"|!(.)) => [[ first ]]
 | 
						|
 | 
						|
auth     = (username (":" password  => [[ second ]])? "@"
 | 
						|
                                    => [[ first2 2array ]])?
 | 
						|
 | 
						|
url      = (((protocol "://") => [[ first ]] auth hostname)
 | 
						|
                    | (("//") => [[ f ]] auth hostname))?
 | 
						|
           (pathname)?
 | 
						|
           ("?" query               => [[ second ]])?
 | 
						|
           ("#" anchor              => [[ second ]])?
 | 
						|
 | 
						|
;EBNF
 | 
						|
 | 
						|
PRIVATE>
 | 
						|
 | 
						|
M: string >url
 | 
						|
    [ <url> ] dip
 | 
						|
    parse-url {
 | 
						|
        [
 | 
						|
            first [
 | 
						|
                [ first >>protocol ]
 | 
						|
                [
 | 
						|
                    second
 | 
						|
                    [ first [ first2 [ >>username ] [ >>password ] bi* ] when* ]
 | 
						|
                    [ second parse-host [ >>host ] [ >>port ] bi* ] bi
 | 
						|
                ] bi
 | 
						|
            ] when*
 | 
						|
        ]
 | 
						|
        [ second >>path ]
 | 
						|
        [ third >>query ]
 | 
						|
        [ fourth >>anchor ]
 | 
						|
    } cleave
 | 
						|
    dup host>> [ [ "/" or ] change-path ] when ;
 | 
						|
 | 
						|
M: pathname >url string>> >url ;
 | 
						|
 | 
						|
: relative-url ( url -- url' )
 | 
						|
    clone
 | 
						|
        f >>protocol
 | 
						|
        f >>host
 | 
						|
        f >>port ;
 | 
						|
 | 
						|
: relative-url? ( url -- ? ) protocol>> not ;
 | 
						|
 | 
						|
<PRIVATE
 | 
						|
 | 
						|
: unparse-username-password ( url -- )
 | 
						|
    dup username>> dup [
 | 
						|
        % password>> [ ":" % % ] when* "@" %
 | 
						|
    ] [ 2drop ] if ;
 | 
						|
 | 
						|
: url-port ( url -- port/f )
 | 
						|
    [ port>> ] [ port>> ] [ protocol>> protocol-port ] tri =
 | 
						|
    [ drop f ] when ;
 | 
						|
 | 
						|
: unparse-host-part ( url -- )
 | 
						|
    {
 | 
						|
        [ unparse-username-password ]
 | 
						|
        [ host>> url-encode % ]
 | 
						|
        [ url-port [ ":" % # ] when* ]
 | 
						|
        [ path>> "/" head? [ "/" % ] unless ]
 | 
						|
    } cleave ;
 | 
						|
 | 
						|
! URL" //foo.com" takes on the protocol of the url it's derived from
 | 
						|
: unparse-protocol ( url -- )
 | 
						|
    dup protocol>> [
 | 
						|
        % "://" % unparse-host-part
 | 
						|
    ] [
 | 
						|
        dup host>> [
 | 
						|
            "//" % unparse-host-part
 | 
						|
        ] [
 | 
						|
            drop
 | 
						|
        ] if
 | 
						|
    ] if* ;
 | 
						|
 | 
						|
M: url present
 | 
						|
    [
 | 
						|
        {
 | 
						|
            [ unparse-protocol ]
 | 
						|
            [ path>> url-encode % ]
 | 
						|
            [ query>> dup assoc-empty? [ drop ] [ "?" % assoc>query % ] if ]
 | 
						|
            [ anchor>> [ "#" % present url-encode % ] when* ]
 | 
						|
        } cleave
 | 
						|
    ] "" make ;
 | 
						|
 | 
						|
PRIVATE>
 | 
						|
 | 
						|
: url-append-path ( path1 path2 -- path )
 | 
						|
    {
 | 
						|
        { [ dup "/" head? ] [ nip ] }
 | 
						|
        { [ dup empty? ] [ drop ] }
 | 
						|
        { [ over "/" tail? ] [ append ] }
 | 
						|
        { [ "/" pick start not ] [ nip ] }
 | 
						|
        [ [ "/" split1-last drop "/" ] dip 3append ]
 | 
						|
    } cond ;
 | 
						|
 | 
						|
<PRIVATE
 | 
						|
 | 
						|
: derive-port ( url base -- url' )
 | 
						|
    over relative-url? [ [ port>> ] either? ] [ drop port>> ] if ;
 | 
						|
 | 
						|
: derive-path ( url base -- url' )
 | 
						|
    [ path>> ] bi@ swap url-append-path ;
 | 
						|
 | 
						|
PRIVATE>
 | 
						|
 | 
						|
: derive-url ( base url -- url' )
 | 
						|
    [ clone ] dip over {
 | 
						|
        [ [ protocol>>  ] either? >>protocol ]
 | 
						|
        [ [ username>>  ] either? >>username ]
 | 
						|
        [ [ password>>  ] either? >>password ]
 | 
						|
        [ [ host>>      ] either? >>host ]
 | 
						|
        [ derive-port             >>port ]
 | 
						|
        [ derive-path             >>path ]
 | 
						|
        [ [ query>>     ] either? >>query ]
 | 
						|
        [ [ anchor>>    ] either? >>anchor ]
 | 
						|
    } 2cleave ;
 | 
						|
 | 
						|
! Half-baked stuff follows
 | 
						|
: secure-protocol? ( protocol -- ? )
 | 
						|
    "https" = ;
 | 
						|
 | 
						|
<PRIVATE
 | 
						|
 | 
						|
GENERIC: >secure-addr ( addrspec -- addrspec' )
 | 
						|
 | 
						|
PRIVATE>
 | 
						|
 | 
						|
: url-addr ( url -- addr )
 | 
						|
    [
 | 
						|
        [ host>> ]
 | 
						|
        [ port>> ]
 | 
						|
        [ protocol>> protocol-port ]
 | 
						|
        tri or <inet>
 | 
						|
    ] [ protocol>> ] bi
 | 
						|
    secure-protocol? [ "urls.secure" ensure-vocab-loaded >secure-addr ] when ;
 | 
						|
 | 
						|
: set-url-addr ( url addr -- url )
 | 
						|
    [ host>> >>host ] [ port>> >>port ] bi ;
 | 
						|
 | 
						|
: ensure-port ( url -- url' )
 | 
						|
    clone dup protocol>> '[ _ protocol-port or ] change-port ;
 | 
						|
 | 
						|
! Literal syntax
 | 
						|
SYNTAX: URL" lexer get skip-blank parse-short-string >url suffix! ;
 | 
						|
 | 
						|
{ "urls" "prettyprint" } "urls.prettyprint" require-when
 | 
						|
{ "urls" "io.sockets.secure" } "urls.secure" require-when
 |