! Copyright (C) 2008 Slava Pestov. ! See http://factorcode.org/license.txt for BSD license. USING: kernel ascii combinators combinators.short-circuit sequences splitting fry namespaces make assocs arrays strings io.sockets io.sockets.secure io.encodings.string io.encodings.utf8 math math.parser accessors parser strings.parser lexer prettyprint.backend hashtables present ; IN: urls : url-quotable? ( ch -- ? ) { [ letter? ] [ LETTER? ] [ digit? ] [ "/_-.:" member? ] } 1|| ; foldable hex 2 CHAR: 0 pad-left % ] each ] if ; PRIVATE> : url-encode ( str -- encoded ) [ [ dup url-quotable? [ , ] [ push-utf8 ] if ] each ] "" make ; = [ 2drop ] [ [ 1+ dup 2 + ] dip subseq hex> [ , ] when* ] if ; : url-decode-% ( index str -- index str ) 2dup url-decode-hex [ 3 + ] dip ; : url-decode-+-or-other ( index str ch -- index str ) dup CHAR: + = [ drop CHAR: \s ] when , [ 1+ ] dip ; : url-decode-iter ( index str -- ) 2dup length >= [ 2drop ] [ 2dup nth dup CHAR: % = [ drop url-decode-% ] [ url-decode-+-or-other ] if url-decode-iter ] if ; PRIVATE> : url-decode ( str -- decoded ) [ 0 swap url-decode-iter ] "" make utf8 decode ; : query>assoc ( query -- assoc ) dup [ "&" split H{ } clone [ [ [ "=" split1 [ dup [ url-decode ] when ] bi@ swap ] dip add-query-param ] curry each ] keep ] when ; : assoc>query ( assoc -- str ) [ dup array? [ [ present ] map ] [ present 1array ] if ] assoc-map [ [ [ url-encode ] dip [ url-encode "=" swap 3append , ] with each ] assoc-each ] { } make "&" join ; TUPLE: url protocol username password host port path query anchor ; : ( -- url ) url new ; : query-param ( url key -- value ) swap query>> at ; : set-query-param ( url value key -- url ) '[ [ _ _ ] dip ?set-at ] change-query ; : parse-host ( string -- host port ) ":" split1 [ url-decode ] [ dup [ string>number dup [ "Invalid port" throw ] unless ] when ] bi* ; >protocol ] [ "//" ?head [ "Invalid URL" throw ] unless "@" split1 [ [ ":" split1 [ >>username ] [ >>password ] bi* ] dip ] when* "/" split1 [ parse-host [ >>host ] [ >>port ] bi* ] [ "/" prepend ] bi* ] bi* ; PRIVATE> GENERIC: >url ( obj -- url ) M: f >url drop ; M: url >url ; M: string >url swap ":" split1 [ parse-host-part ] when* "#" split1 [ "?" split1 [ url-decode >>path ] [ [ query>assoc >>query ] when* ] bi* ] [ url-decode >>anchor ] bi* ; : protocol-port ( protocol -- port ) { { "http" [ 80 ] } { "https" [ 443 ] } { "ftp" [ 21 ] } [ drop f ] } case ; > dup [ % password>> [ ":" % % ] when* "@" % ] [ 2drop ] if ; : url-port ( url -- port/f ) [ port>> ] [ port>> ] [ protocol>> protocol-port ] tri = [ drop f ] when ; : unparse-host-part ( url protocol -- ) % "://" % { [ unparse-username-password ] [ host>> url-encode % ] [ url-port [ ":" % # ] when* ] [ path>> "/" head? [ "/" % ] unless ] } cleave ; PRIVATE> M: url present [ { [ dup protocol>> dup [ unparse-host-part ] [ 2drop ] if ] [ path>> url-encode % ] [ query>> dup assoc-empty? [ drop ] [ "?" % assoc>query % ] if ] [ anchor>> [ "#" % present url-encode % ] when* ] } cleave ] "" make ; : url-append-path ( path1 path2 -- path ) { { [ dup "/" head? ] [ nip ] } { [ dup empty? ] [ drop ] } { [ over "/" tail? ] [ append ] } { [ "/" pick start not ] [ nip ] } [ [ "/" last-split1 drop "/" ] dip 3append ] } cond ; PRIVATE> : derive-url ( base url -- url' ) [ clone ] dip over { [ [ protocol>> ] either? >>protocol ] [ [ username>> ] either? >>username ] [ [ password>> ] either? >>password ] [ [ host>> ] either? >>host ] [ [ port>> ] either? >>port ] [ [ path>> ] bi@ swap url-append-path >>path ] [ [ query>> ] either? >>query ] [ [ anchor>> ] either? >>anchor ] } 2cleave ; : relative-url ( url -- url' ) clone f >>protocol f >>host f >>port ; ! Half-baked stuff follows : secure-protocol? ( protocol -- ? ) "https" = ; : url-addr ( url -- addr ) [ [ host>> ] [ port>> ] [ protocol>> protocol-port ] tri or ] [ protocol>> ] bi secure-protocol? [ ] when ; : ensure-port ( url -- url ) dup protocol>> '[ _ protocol-port or ] change-port ; ! Literal syntax : URL" lexer get skip-blank parse-string >url parsed ; parsing M: url pprint* dup present "URL\" " "\"" pprint-string ;