229 lines
		
	
	
		
			5.4 KiB
		
	
	
	
		
			Factor
		
	
	
		
		
			
		
	
	
			229 lines
		
	
	
		
			5.4 KiB
		
	
	
	
		
			Factor
		
	
	
|  | ! Copyright (C) 2008 Slava Pestov. | ||
|  | ! See http://factorcode.org/license.txt for BSD license. | ||
|  | USING: kernel ascii combinators combinators.short-circuit | ||
|  | sequences splitting fry namespaces assocs arrays strings | ||
|  | io.sockets io.sockets.secure io.encodings.string | ||
|  | io.encodings.utf8 math math.parser accessors parser | ||
|  | strings.parser lexer prettyprint.backend hashtables present ;
 | ||
|  | IN: urls | ||
|  | 
 | ||
|  | : url-quotable? ( ch -- ? )
 | ||
|  |     #! In a URL, can this character be used without | ||
|  |     #! URL-encoding? | ||
|  |     { | ||
|  |         [ letter? ] | ||
|  |         [ LETTER? ] | ||
|  |         [ digit? ] | ||
|  |         [ "/_-." member? ] | ||
|  |     } 1|| ; foldable
 | ||
|  | 
 | ||
|  | <PRIVATE
 | ||
|  | 
 | ||
|  | : push-utf8 ( ch -- )
 | ||
|  |     1string utf8 encode | ||
|  |     [ CHAR: % , >hex 2 CHAR: 0 pad-left % ] each ;
 | ||
|  | 
 | ||
|  | PRIVATE>
 | ||
|  | 
 | ||
|  | : url-encode ( str -- str )
 | ||
|  |     [ | ||
|  |         [ dup url-quotable? [ , ] [ push-utf8 ] if ] each
 | ||
|  |     ] "" make ;
 | ||
|  | 
 | ||
|  | <PRIVATE
 | ||
|  | 
 | ||
|  | : url-decode-hex ( index str -- )
 | ||
|  |     2dup length 2 - >= [ | ||
|  |         2drop
 | ||
|  |     ] [ | ||
|  |         [ 1+ dup 2 + ] dip subseq  hex> [ , ] when*
 | ||
|  |     ] if ;
 | ||
|  | 
 | ||
|  | : url-decode-% ( index str -- index str )
 | ||
|  |     2dup url-decode-hex [ 3 + ] dip ;
 | ||
|  | 
 | ||
|  | : url-decode-+-or-other ( index str ch -- index str )
 | ||
|  |     dup CHAR: + = [ drop CHAR: \s ] when , [ 1+ ] dip ;
 | ||
|  | 
 | ||
|  | : url-decode-iter ( index str -- )
 | ||
|  |     2dup length >= [ | ||
|  |         2drop
 | ||
|  |     ] [ | ||
|  |         2dup nth dup CHAR: % = [ | ||
|  |             drop url-decode-% | ||
|  |         ] [ | ||
|  |             url-decode-+-or-other | ||
|  |         ] if url-decode-iter | ||
|  |     ] if ;
 | ||
|  | 
 | ||
|  | PRIVATE>
 | ||
|  | 
 | ||
|  | : url-decode ( str -- str )
 | ||
|  |     [ 0 swap url-decode-iter ] "" make utf8 decode ;
 | ||
|  | 
 | ||
|  | <PRIVATE
 | ||
|  | 
 | ||
|  | : add-query-param ( value key assoc -- )
 | ||
|  |     [ | ||
|  |         at [ | ||
|  |             { | ||
|  |                 { [ dup string? ] [ swap 2array ] } | ||
|  |                 { [ dup array? ] [ swap suffix ] } | ||
|  |                 { [ dup not ] [ drop ] } | ||
|  |             } cond
 | ||
|  |         ] when*
 | ||
|  |     ] 2keep set-at ;
 | ||
|  | 
 | ||
|  | PRIVATE>
 | ||
|  | 
 | ||
|  | : query>assoc ( query -- assoc )
 | ||
|  |     dup [ | ||
|  |         "&" split H{ } clone [ | ||
|  |             [ | ||
|  |                 [ "=" split1 [ dup [ url-decode ] when ] bi@ swap ] dip
 | ||
|  |                 add-query-param | ||
|  |             ] curry each
 | ||
|  |         ] keep
 | ||
|  |     ] when ;
 | ||
|  | 
 | ||
|  | : assoc>query ( hash -- str )
 | ||
|  |     [ | ||
|  |         dup array? [ [ present ] map ] [ present 1array ] if
 | ||
|  |     ] assoc-map
 | ||
|  |     [ | ||
|  |         [ | ||
|  |             [ url-encode ] dip
 | ||
|  |             [ url-encode "=" swap 3append , ] with each
 | ||
|  |         ] assoc-each
 | ||
|  |     ] { } make "&" join ;
 | ||
|  | 
 | ||
|  | TUPLE: url protocol username password host port path query anchor ;
 | ||
|  | 
 | ||
|  | : <url> ( -- url ) url new ;
 | ||
|  | 
 | ||
|  | : query-param ( url key -- value )
 | ||
|  |     swap query>> at ;
 | ||
|  | 
 | ||
|  | : set-query-param ( url value key -- url )
 | ||
|  |     '[ , , _ ?set-at ] change-query ;
 | ||
|  | 
 | ||
|  | : parse-host ( string -- host port )
 | ||
|  |     ":" split1 [ url-decode ] [ | ||
|  |         dup [ | ||
|  |             string>number | ||
|  |             dup [ "Invalid port" throw ] unless
 | ||
|  |         ] when
 | ||
|  |     ] bi* ;
 | ||
|  | 
 | ||
|  | <PRIVATE
 | ||
|  | 
 | ||
|  | : parse-host-part ( url protocol rest -- url string' )
 | ||
|  |     [ >>protocol ] [ | ||
|  |         "//" ?head [ "Invalid URL" throw ] unless
 | ||
|  |         "@" split1 [ | ||
|  |             [ | ||
|  |                 ":" split1 [ >>username ] [ >>password ] bi*
 | ||
|  |             ] dip
 | ||
|  |         ] when*
 | ||
|  |         "/" split1 [ | ||
|  |             parse-host [ >>host ] [ >>port ] bi*
 | ||
|  |         ] [ "/" prepend ] bi*
 | ||
|  |     ] bi* ;
 | ||
|  | 
 | ||
|  | PRIVATE>
 | ||
|  | 
 | ||
|  | GENERIC: >url ( obj -- url )
 | ||
|  | 
 | ||
|  | M: f >url drop <url> ;
 | ||
|  | 
 | ||
|  | M: url >url ;
 | ||
|  | 
 | ||
|  | M: string >url | ||
|  |     <url> swap
 | ||
|  |     ":" split1 [ parse-host-part ] when*
 | ||
|  |     "#" split1 [ | ||
|  |         "?" split1 | ||
|  |         [ url-decode >>path ] | ||
|  |         [ [ query>assoc >>query ] when* ] bi*
 | ||
|  |     ] | ||
|  |     [ url-decode >>anchor ] bi* ;
 | ||
|  | 
 | ||
|  | <PRIVATE
 | ||
|  | 
 | ||
|  | : unparse-username-password ( url -- )
 | ||
|  |     dup username>> dup [ | ||
|  |         % password>> [ ":" % % ] when* "@" % | ||
|  |     ] [ 2drop ] if ;
 | ||
|  | 
 | ||
|  | : unparse-host-part ( url protocol -- )
 | ||
|  |     % | ||
|  |     "://" % | ||
|  |     { | ||
|  |         [ unparse-username-password ] | ||
|  |         [ host>> url-encode % ] | ||
|  |         [ port>> [ ":" % # ] when* ] | ||
|  |         [ path>> "/" head? [ "/" % ] unless ] | ||
|  |     } cleave ;
 | ||
|  | 
 | ||
|  | M: url present | ||
|  |     [ | ||
|  |         { | ||
|  |             [ dup protocol>> dup [ unparse-host-part ] [ 2drop ] if ] | ||
|  |             [ path>> url-encode % ] | ||
|  |             [ query>> dup assoc-empty? [ drop ] [ "?" % assoc>query % ] if ] | ||
|  |             [ anchor>> [ "#" % present url-encode % ] when* ] | ||
|  |         } cleave
 | ||
|  |     ] "" make ;
 | ||
|  | 
 | ||
|  | : url-append-path ( path1 path2 -- path )
 | ||
|  |     { | ||
|  |         { [ dup "/" head? ] [ nip ] } | ||
|  |         { [ dup empty? ] [ drop ] } | ||
|  |         { [ over "/" tail? ] [ append ] } | ||
|  |         { [ "/" pick start not ] [ nip ] } | ||
|  |         [ [ "/" last-split1 drop "/" ] dip 3append ] | ||
|  |     } cond ;
 | ||
|  | 
 | ||
|  | PRIVATE>
 | ||
|  | 
 | ||
|  | : derive-url ( base url -- url' )
 | ||
|  |     [ clone ] dip over { | ||
|  |         [ [ protocol>> ] either? >>protocol ] | ||
|  |         [ [ username>> ] either? >>username ] | ||
|  |         [ [ password>> ] either? >>password ] | ||
|  |         [ [ host>>     ] either? >>host ] | ||
|  |         [ [ port>>     ] either? >>port ] | ||
|  |         [ [ path>>     ] bi@ swap url-append-path >>path ] | ||
|  |         [ [ query>>    ] either? >>query ] | ||
|  |         [ [ anchor>>   ] either? >>anchor ] | ||
|  |     } 2cleave ;
 | ||
|  | 
 | ||
|  | : relative-url ( url -- url' )
 | ||
|  |     clone
 | ||
|  |         f >>protocol | ||
|  |         f >>host | ||
|  |         f >>port ;
 | ||
|  | 
 | ||
|  | ! Half-baked stuff follows | ||
|  | : secure-protocol? ( protocol -- ? )
 | ||
|  |     "https" = ;
 | ||
|  | 
 | ||
|  | : url-addr ( url -- addr )
 | ||
|  |     [ [ host>> ] [ port>> ] bi <inet> ] [ protocol>> ] bi
 | ||
|  |     secure-protocol? [ <secure> ] when ;
 | ||
|  | 
 | ||
|  | : protocol-port ( protocol -- port )
 | ||
|  |     { | ||
|  |         { "http" [ 80 ] } | ||
|  |         { "https" [ 443 ] } | ||
|  |         { "ftp" [ 21 ] } | ||
|  |     } case ;
 | ||
|  | 
 | ||
|  | : ensure-port ( url -- url' )
 | ||
|  |     dup protocol>> '[ , protocol-port or ] change-port ;
 | ||
|  | 
 | ||
|  | ! Literal syntax | ||
|  | : URL" lexer get skip-blank parse-string >url parsed ; parsing | ||
|  | 
 | ||
|  | M: url pprint* dup present "URL\" " "\"" pprint-string ;
 |