| 
									
										
										
										
											2008-06-01 01:59:06 -04:00
										 |  |  | ! Copyright (C) 2008 Slava Pestov. | 
					
						
							|  |  |  | ! See http://factorcode.org/license.txt for BSD license. | 
					
						
							| 
									
										
										
										
											2008-07-02 22:52:28 -04:00
										 |  |  | USING: kernel ascii combinators combinators.short-circuit | 
					
						
							| 
									
										
										
										
											2008-09-10 23:11:40 -04:00
										 |  |  | sequences splitting fry namespaces make assocs arrays strings | 
					
						
							| 
									
										
										
										
											2008-12-08 15:58:00 -05:00
										 |  |  | io.sockets io.encodings.string io.encodings.utf8 math | 
					
						
							|  |  |  | math.parser accessors parser strings.parser lexer | 
					
						
							| 
									
										
										
										
											2008-12-08 21:04:13 -05:00
										 |  |  | hashtables present peg.ebnf urls.encoding ;
 | 
					
						
							| 
									
										
										
										
											2008-06-01 00:20:24 -04:00
										 |  |  | IN: urls | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2008-09-29 23:54:10 -04:00
										 |  |  | TUPLE: url protocol username password host port path query anchor ;
 | 
					
						
							| 
									
										
										
										
											2008-06-01 00:20:24 -04:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2008-06-01 18:22:39 -04:00
										 |  |  | : <url> ( -- url ) url new ;
 | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2008-06-02 16:00:03 -04:00
										 |  |  | : query-param ( url key -- value )
 | 
					
						
							| 
									
										
										
										
											2008-06-01 01:59:06 -04:00
										 |  |  |     swap query>> at ;
 | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2008-09-25 03:46:37 -04:00
										 |  |  | : delete-query-param ( url key -- url )
 | 
					
						
							|  |  |  |     over query>> delete-at ;
 | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2008-06-02 16:00:03 -04:00
										 |  |  | : set-query-param ( url value key -- url )
 | 
					
						
							| 
									
										
										
										
											2008-09-25 03:46:37 -04:00
										 |  |  |     over [ | 
					
						
							|  |  |  |         '[ [ _ _ ] dip ?set-at ] change-query | 
					
						
							|  |  |  |     ] [ | 
					
						
							|  |  |  |         nip delete-query-param | 
					
						
							|  |  |  |     ] if ;
 | 
					
						
							| 
									
										
										
										
											2008-06-01 01:59:06 -04:00
										 |  |  | 
 | 
					
						
							|  |  |  | : parse-host ( string -- host port )
 | 
					
						
							|  |  |  |     ":" split1 [ url-decode ] [ | 
					
						
							|  |  |  |         dup [ | 
					
						
							|  |  |  |             string>number | 
					
						
							|  |  |  |             dup [ "Invalid port" throw ] unless
 | 
					
						
							|  |  |  |         ] when
 | 
					
						
							|  |  |  |     ] bi* ;
 | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2008-09-26 19:24:58 -04:00
										 |  |  | GENERIC: >url ( obj -- url )
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | M: f >url drop <url> ;
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | M: url >url ;
 | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2008-06-05 01:18:36 -04:00
										 |  |  | <PRIVATE
 | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2008-09-26 19:24:58 -04:00
										 |  |  | EBNF: parse-url | 
					
						
							| 
									
										
										
										
											2008-06-01 00:20:24 -04:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2008-09-26 19:24:58 -04:00
										 |  |  | protocol = [a-z]+                   => [[ url-decode ]] | 
					
						
							|  |  |  | username = [^/:@#?]+                => [[ url-decode ]] | 
					
						
							|  |  |  | password = [^/:@#?]+                => [[ url-decode ]] | 
					
						
							|  |  |  | pathname = [^#?]+                   => [[ url-decode ]] | 
					
						
							| 
									
										
										
										
											2008-09-29 23:54:10 -04:00
										 |  |  | query    = [^#]+                    => [[ query>assoc ]] | 
					
						
							| 
									
										
										
										
											2008-09-26 19:24:58 -04:00
										 |  |  | anchor   = .+                       => [[ url-decode ]] | 
					
						
							| 
									
										
										
										
											2008-06-05 01:18:36 -04:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2008-09-26 19:24:58 -04:00
										 |  |  | hostname = [^/#?]+                  => [[ url-decode ]] | 
					
						
							| 
									
										
										
										
											2008-06-01 18:22:39 -04:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2008-09-26 19:24:58 -04:00
										 |  |  | hostname-spec = hostname ("/"|!(.)) => [[ first ]] | 
					
						
							| 
									
										
										
										
											2008-06-13 23:05:41 -04:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2008-09-26 19:24:58 -04:00
										 |  |  | auth     = (username (":" password  => [[ second ]])? "@" | 
					
						
							|  |  |  |                                     => [[ first2 2array ]])? | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | url      = ((protocol "://")        => [[ first ]] auth hostname)? | 
					
						
							|  |  |  |            (pathname)? | 
					
						
							|  |  |  |            ("?" query               => [[ second ]])? | 
					
						
							|  |  |  |            ("#" anchor              => [[ second ]])? | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | ;EBNF | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | PRIVATE>
 | 
					
						
							| 
									
										
										
										
											2008-06-01 00:20:24 -04:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2008-06-01 18:22:39 -04:00
										 |  |  | M: string >url | 
					
						
							| 
									
										
										
										
											2008-09-26 19:24:58 -04:00
										 |  |  |     parse-url { | 
					
						
							|  |  |  |         [ | 
					
						
							|  |  |  |             first [ | 
					
						
							|  |  |  |                 [ first ] ! protocol | 
					
						
							|  |  |  |                 [ | 
					
						
							|  |  |  |                     second
 | 
					
						
							|  |  |  |                     [ first [ first2 ] [ f f ] if* ] ! username, password | 
					
						
							|  |  |  |                     [ second parse-host ] ! host, port | 
					
						
							|  |  |  |                     bi
 | 
					
						
							|  |  |  |                 ] bi
 | 
					
						
							|  |  |  |             ] [ f f f f f ] if*
 | 
					
						
							|  |  |  |         ] | 
					
						
							|  |  |  |         [ second ] ! pathname | 
					
						
							| 
									
										
										
										
											2008-09-29 23:54:10 -04:00
										 |  |  |         [ third ] ! query | 
					
						
							| 
									
										
										
										
											2008-09-26 19:24:58 -04:00
										 |  |  |         [ fourth ] ! anchor | 
					
						
							|  |  |  |     } cleave url boa
 | 
					
						
							|  |  |  |     dup host>> [ [ "/" or ] change-path ] when ;
 | 
					
						
							| 
									
										
										
										
											2008-06-01 18:22:39 -04:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2008-09-23 15:17:02 -04:00
										 |  |  | : protocol-port ( protocol -- port )
 | 
					
						
							|  |  |  |     { | 
					
						
							|  |  |  |         { "http" [ 80 ] } | 
					
						
							|  |  |  |         { "https" [ 443 ] } | 
					
						
							|  |  |  |         { "ftp" [ 21 ] } | 
					
						
							|  |  |  |         [ drop f ] | 
					
						
							|  |  |  |     } case ;
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | <PRIVATE
 | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2008-09-23 17:11:11 -04:00
										 |  |  | : unparse-username-password ( url -- )
 | 
					
						
							|  |  |  |     dup username>> dup [ | 
					
						
							|  |  |  |         % password>> [ ":" % % ] when* "@" % | 
					
						
							|  |  |  |     ] [ 2drop ] if ;
 | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2008-09-23 15:17:02 -04:00
										 |  |  | : url-port ( url -- port/f )
 | 
					
						
							|  |  |  |     [ port>> ] [ port>> ] [ protocol>> protocol-port ] tri =
 | 
					
						
							|  |  |  |     [ drop f ] when ;
 | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2008-06-01 18:22:39 -04:00
										 |  |  | : unparse-host-part ( url protocol -- )
 | 
					
						
							| 
									
										
										
										
											2008-06-01 00:20:24 -04:00
										 |  |  |     % | 
					
						
							|  |  |  |     "://" % | 
					
						
							| 
									
										
										
										
											2008-06-02 14:27:00 -04:00
										 |  |  |     { | 
					
						
							|  |  |  |         [ unparse-username-password ] | 
					
						
							|  |  |  |         [ host>> url-encode % ] | 
					
						
							| 
									
										
										
										
											2008-09-23 15:17:02 -04:00
										 |  |  |         [ url-port [ ":" % # ] when* ] | 
					
						
							| 
									
										
										
										
											2008-06-02 14:27:00 -04:00
										 |  |  |         [ path>> "/" head? [ "/" % ] unless ] | 
					
						
							|  |  |  |     } cleave ;
 | 
					
						
							| 
									
										
										
										
											2008-06-01 00:20:24 -04:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2008-09-24 22:19:27 -04:00
										 |  |  | PRIVATE>
 | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2008-06-05 01:18:36 -04:00
										 |  |  | M: url present | 
					
						
							| 
									
										
										
										
											2008-06-01 00:20:24 -04:00
										 |  |  |     [ | 
					
						
							| 
									
										
										
										
											2008-06-01 18:22:39 -04:00
										 |  |  |         { | 
					
						
							|  |  |  |             [ dup protocol>> dup [ unparse-host-part ] [ 2drop ] if ] | 
					
						
							|  |  |  |             [ path>> url-encode % ] | 
					
						
							|  |  |  |             [ query>> dup assoc-empty? [ drop ] [ "?" % assoc>query % ] if ] | 
					
						
							| 
									
										
										
										
											2008-06-06 19:18:05 -04:00
										 |  |  |             [ anchor>> [ "#" % present url-encode % ] when* ] | 
					
						
							| 
									
										
										
										
											2008-06-01 18:22:39 -04:00
										 |  |  |         } cleave
 | 
					
						
							| 
									
										
										
										
											2008-06-01 00:20:24 -04:00
										 |  |  |     ] "" make ;
 | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2008-06-01 01:59:06 -04:00
										 |  |  | : url-append-path ( path1 path2 -- path )
 | 
					
						
							|  |  |  |     { | 
					
						
							|  |  |  |         { [ dup "/" head? ] [ nip ] } | 
					
						
							|  |  |  |         { [ dup empty? ] [ drop ] } | 
					
						
							|  |  |  |         { [ over "/" tail? ] [ append ] } | 
					
						
							|  |  |  |         { [ "/" pick start not ] [ nip ] } | 
					
						
							| 
									
										
										
										
											2008-11-22 21:00:37 -05:00
										 |  |  |         [ [ "/" split1-last drop "/" ] dip 3append ] | 
					
						
							| 
									
										
										
										
											2008-06-01 01:59:06 -04:00
										 |  |  |     } cond ;
 | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2008-06-05 01:18:36 -04:00
										 |  |  | PRIVATE>
 | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2008-06-01 01:59:06 -04:00
										 |  |  | : derive-url ( base url -- url' )
 | 
					
						
							| 
									
										
										
										
											2008-07-02 22:52:28 -04:00
										 |  |  |     [ clone ] dip over { | 
					
						
							| 
									
										
										
										
											2008-09-29 23:54:10 -04:00
										 |  |  |         [ [ protocol>>  ] either? >>protocol ] | 
					
						
							|  |  |  |         [ [ username>>  ] either? >>username ] | 
					
						
							|  |  |  |         [ [ password>>  ] either? >>password ] | 
					
						
							|  |  |  |         [ [ host>>      ] either? >>host ] | 
					
						
							|  |  |  |         [ [ port>>      ] either? >>port ] | 
					
						
							|  |  |  |         [ [ path>>      ] bi@ swap url-append-path >>path ] | 
					
						
							|  |  |  |         [ [ query>>     ] either? >>query ] | 
					
						
							|  |  |  |         [ [ anchor>>    ] either? >>anchor ] | 
					
						
							| 
									
										
										
										
											2008-07-02 22:52:28 -04:00
										 |  |  |     } 2cleave ;
 | 
					
						
							| 
									
										
										
										
											2008-06-01 01:59:06 -04:00
										 |  |  | 
 | 
					
						
							|  |  |  | : relative-url ( url -- url' )
 | 
					
						
							| 
									
										
										
										
											2008-07-02 22:52:28 -04:00
										 |  |  |     clone
 | 
					
						
							|  |  |  |         f >>protocol | 
					
						
							|  |  |  |         f >>host | 
					
						
							|  |  |  |         f >>port ;
 | 
					
						
							| 
									
										
										
										
											2008-06-01 18:22:39 -04:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2008-10-04 14:26:17 -04:00
										 |  |  | : relative-url? ( url -- ? ) protocol>> not ;
 | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2008-06-02 14:27:00 -04:00
										 |  |  | ! Half-baked stuff follows | 
					
						
							|  |  |  | : secure-protocol? ( protocol -- ? )
 | 
					
						
							|  |  |  |     "https" = ;
 | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2008-10-02 06:13:22 -04:00
										 |  |  | <PRIVATE
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | GENERIC: >secure-addr ( addrspec -- addrspec' )
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | PRIVATE>
 | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2008-06-02 14:27:00 -04:00
										 |  |  | : url-addr ( url -- addr )
 | 
					
						
							| 
									
										
										
										
											2008-09-24 22:19:27 -04:00
										 |  |  |     [ | 
					
						
							|  |  |  |         [ host>> ] | 
					
						
							|  |  |  |         [ port>> ] | 
					
						
							|  |  |  |         [ protocol>> protocol-port ] | 
					
						
							|  |  |  |         tri or <inet> | 
					
						
							|  |  |  |     ] [ protocol>> ] bi
 | 
					
						
							| 
									
										
										
										
											2008-10-02 06:13:22 -04:00
										 |  |  |     secure-protocol? [ >secure-addr ] when ;
 | 
					
						
							| 
									
										
										
										
											2008-06-02 14:27:00 -04:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2008-09-24 22:19:27 -04:00
										 |  |  | : ensure-port ( url -- url )
 | 
					
						
							| 
									
										
										
										
											2008-09-10 23:11:40 -04:00
										 |  |  |     dup protocol>> '[ _ protocol-port or ] change-port ;
 | 
					
						
							| 
									
										
										
										
											2008-06-02 14:27:00 -04:00
										 |  |  | 
 | 
					
						
							|  |  |  | ! Literal syntax | 
					
						
							| 
									
										
										
										
											2008-06-01 18:22:39 -04:00
										 |  |  | : URL" lexer get skip-blank parse-string >url parsed ; parsing | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2008-12-08 21:04:13 -05:00
										 |  |  | USING: vocabs vocabs.loader ;
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | "prettyprint" vocab [ | 
					
						
							|  |  |  |     "urls.prettyprint" require | 
					
						
							|  |  |  | ] when
 |