3 # Copyright (c) 2001, 2002, 2003, 2004, 2005, 2006, 2007 Python Software
4 # Foundation; All Rights Reserved
6 """A HTTPSConnection/Handler with additional proxy and cert validation features.
8 In particular, monkey patches in Python r74203 to provide support for CONNECT
9 proxies and adds SSL cert validation if the ssl module is present.
12 __author__ = "{frew,nick.johnson}@google.com (Fred Wulff and Nick Johnson)"
21 from urllib import splittype
22 from urllib import splituser
23 from urllib import splitpasswd
25 class InvalidCertificateException(httplib.HTTPException):
26 """Raised when a certificate is provided with an invalid hostname."""
28 def __init__(self, host, cert, reason):
32 host: The hostname the connection was made to.
33 cert: The SSL certificate (as a dictionary) the host returned.
35 httplib.HTTPException.__init__(self)
41 return ('Host %s returned an invalid certificate (%s): %s\n'
43 'http://code.google.com/appengine/kb/general.html#rpcssl' %
44 (self.host, self.reason, self.cert))
46 def can_validate_certs():
47 """Return True if we have the SSL package and can validate certificates."""
54 def _create_fancy_connection(tunnel_host=None, key_file=None,
55 cert_file=None, ca_certs=None):
56 # This abomination brought to you by the fact that
57 # the HTTPHandler creates the connection instance in the middle
58 # of do_open so we need to add the tunnel host to the class.
60 class PresetProxyHTTPSConnection(httplib.HTTPSConnection):
61 """An HTTPS connection that uses a proxy defined by the enclosing scope."""
63 def __init__(self, *args, **kwargs):
64 httplib.HTTPSConnection.__init__(self, *args, **kwargs)
66 self._tunnel_host = tunnel_host
68 logging.debug("Creating preset proxy https conn: %s", tunnel_host)
70 self.key_file = key_file
71 self.cert_file = cert_file
72 self.ca_certs = ca_certs
76 self.cert_reqs = ssl.CERT_REQUIRED
78 self.cert_reqs = ssl.CERT_NONE
83 self._set_hostport(self._tunnel_host, None)
84 logging.info("Connecting through tunnel to: %s:%d",
86 self.send("CONNECT %s:%d HTTP/1.0\r\n\r\n" % (self.host, self.port))
87 response = self.response_class(self.sock, strict=self.strict,
89 (_, code, message) = response._read_status()
93 raise socket.error, "Tunnel connection failed: %d %s" % (
94 code, message.strip())
97 line = response.fp.readline()
101 def _get_valid_hosts_for_cert(self, cert):
102 """Returns a list of valid host globs for an SSL certificate.
105 cert: A dictionary representing an SSL certificate.
107 list: A list of valid host globs.
109 if 'subjectAltName' in cert:
110 return [x[1] for x in cert['subjectAltName'] if x[0].lower() == 'dns']
112 # Return a list of commonName fields
113 return [x[0][1] for x in cert['subject']
114 if x[0][0].lower() == 'commonname']
116 def _validate_certificate_hostname(self, cert, hostname):
117 """Validates that a given hostname is valid for an SSL certificate.
120 cert: A dictionary representing an SSL certificate.
121 hostname: The hostname to test.
123 bool: Whether or not the hostname is valid for this certificate.
125 hosts = self._get_valid_hosts_for_cert(cert)
127 # Convert the glob-style hostname expression (eg, '*.google.com') into a
128 # valid regular expression.
129 host_re = host.replace('.', '\.').replace('*', '[^.]*')
130 if re.search('^%s$' % (host_re,), hostname, re.I):
136 # TODO(frew): When we drop support for <2.6 (in the far distant future),
137 # change this to socket.create_connection.
138 self.sock = _create_connection((self.host, self.port))
140 if self._tunnel_host:
143 # ssl and FakeSocket got deprecated. Try for the new hotness of wrap_ssl,
147 self.sock = ssl.wrap_socket(self.sock,
148 keyfile=self.key_file,
149 certfile=self.cert_file,
150 ca_certs=self.ca_certs,
151 cert_reqs=self.cert_reqs)
153 if self.cert_reqs & ssl.CERT_REQUIRED:
154 cert = self.sock.getpeercert()
155 hostname = self.host.split(':', 0)[0]
156 if not self._validate_certificate_hostname(cert, hostname):
157 raise InvalidCertificateException(hostname, cert,
160 ssl = socket.ssl(self.sock,
161 keyfile=self.key_file,
162 certfile=self.cert_file)
163 self.sock = httplib.FakeSocket(self.sock, ssl)
165 return PresetProxyHTTPSConnection
168 # Here to end of _create_connection copied wholesale from Python 2.6"s socket.py
169 _GLOBAL_DEFAULT_TIMEOUT = object()
172 def _create_connection(address, timeout=_GLOBAL_DEFAULT_TIMEOUT):
173 """Connect to *address* and return the socket object.
175 Convenience function. Connect to *address* (a 2-tuple ``(host,
176 port)``) and return the socket object. Passing the optional
177 *timeout* parameter will set the timeout on the socket instance
178 before attempting to connect. If no *timeout* is supplied, the
179 global default timeout setting returned by :func:`getdefaulttimeout`
183 msg = "getaddrinfo returns an empty list"
185 for res in socket.getaddrinfo(host, port, 0, socket.SOCK_STREAM):
186 af, socktype, proto, canonname, sa = res
189 sock = socket.socket(af, socktype, proto)
190 if timeout is not _GLOBAL_DEFAULT_TIMEOUT:
191 sock.settimeout(timeout)
195 except socket.error, msg:
199 raise socket.error, msg
202 class FancyRequest(urllib2.Request):
203 """A request that allows the use of a CONNECT proxy."""
205 def __init__(self, *args, **kwargs):
206 urllib2.Request.__init__(self, *args, **kwargs)
207 self._tunnel_host = None
208 self._key_file = None
209 self._cert_file = None
210 self._ca_certs = None
212 def set_proxy(self, host, type):
215 if self.get_type() == "https" and not self._tunnel_host:
216 self._tunnel_host = self.get_host()
217 saved_type = self.get_type()
218 urllib2.Request.set_proxy(self, host, type)
221 # Don't set self.type, we want to preserve the
222 # type for tunneling.
223 self.type = saved_type
225 def set_ssl_info(self, key_file=None, cert_file=None, ca_certs=None):
226 self._key_file = key_file
227 self._cert_file = cert_file
228 self._ca_certs = ca_certs
231 class FancyProxyHandler(urllib2.ProxyHandler):
232 """A ProxyHandler that works with CONNECT-enabled proxies."""
234 # Taken verbatim from /usr/lib/python2.5/urllib2.py
235 def _parse_proxy(self, proxy):
236 """Return (scheme, user, password, host/port) given a URL or an authority.
238 If a URL is supplied, it must have an authority (host:port) component.
239 According to RFC 3986, having an authority component means the URL must
240 have two slashes after the scheme:
242 >>> _parse_proxy('file:/ftp.example.com/')
243 Traceback (most recent call last):
244 ValueError: proxy URL with no authority: 'file:/ftp.example.com/'
246 The first three items of the returned tuple may be None.
248 Examples of authority parsing:
250 >>> _parse_proxy('proxy.example.com')
251 (None, None, None, 'proxy.example.com')
252 >>> _parse_proxy('proxy.example.com:3128')
253 (None, None, None, 'proxy.example.com:3128')
255 The authority component may optionally include userinfo (assumed to be
258 >>> _parse_proxy('joe:password@proxy.example.com')
259 (None, 'joe', 'password', 'proxy.example.com')
260 >>> _parse_proxy('joe:password@proxy.example.com:3128')
261 (None, 'joe', 'password', 'proxy.example.com:3128')
263 Same examples, but with URLs instead:
265 >>> _parse_proxy('http://proxy.example.com/')
266 ('http', None, None, 'proxy.example.com')
267 >>> _parse_proxy('http://proxy.example.com:3128/')
268 ('http', None, None, 'proxy.example.com:3128')
269 >>> _parse_proxy('http://joe:password@proxy.example.com/')
270 ('http', 'joe', 'password', 'proxy.example.com')
271 >>> _parse_proxy('http://joe:password@proxy.example.com:3128')
272 ('http', 'joe', 'password', 'proxy.example.com:3128')
274 Everything after the authority is ignored:
276 >>> _parse_proxy('ftp://joe:password@proxy.example.com/rubbish:3128')
277 ('ftp', 'joe', 'password', 'proxy.example.com')
279 Test for no trailing '/' case:
281 >>> _parse_proxy('http://joe:password@proxy.example.com')
282 ('http', 'joe', 'password', 'proxy.example.com')
285 scheme, r_scheme = splittype(proxy)
286 if not r_scheme.startswith("/"):
292 if not r_scheme.startswith("//"):
293 raise ValueError("proxy URL with no authority: %r" % proxy)
294 # We have an authority, so for RFC 3986-compliant URLs (by ss 3.
295 # and 3.3.), path is empty or starts with '/'
296 end = r_scheme.find("/", 2)
299 authority = r_scheme[2:end]
300 userinfo, hostport = splituser(authority)
301 if userinfo is not None:
302 user, password = splitpasswd(userinfo)
304 user = password = None
305 return scheme, user, password, hostport
307 def proxy_open(self, req, proxy, type):
308 # This block is copied wholesale from Python2.6 urllib2.
309 # It is idempotent, so the superclass method call executes as normal
311 orig_type = req.get_type()
312 proxy_type, user, password, hostport = self._parse_proxy(proxy)
313 if proxy_type is None:
314 proxy_type = orig_type
315 if user and password:
316 user_pass = "%s:%s" % (urllib2.unquote(user), urllib2.unquote(password))
317 creds = base64.b64encode(user_pass).strip()
318 # Later calls overwrite earlier calls for the same header
319 req.add_header("Proxy-authorization", "Basic " + creds)
320 hostport = urllib2.unquote(hostport)
321 req.set_proxy(hostport, proxy_type)
322 # This condition is the change
323 if orig_type == "https":
326 return urllib2.ProxyHandler.proxy_open(self, req, proxy, type)
329 class FancyHTTPSHandler(urllib2.HTTPSHandler):
330 """An HTTPSHandler that works with CONNECT-enabled proxies."""
332 def do_open(self, http_class, req):
333 # Intentionally very specific so as to opt for false negatives
334 # rather than false positives.
336 return urllib2.HTTPSHandler.do_open(
338 _create_fancy_connection(req._tunnel_host,
343 except urllib2.URLError, url_error:
346 if (type(url_error.reason) == ssl.SSLError and
347 url_error.reason.args[0] == 1):
348 # Display the reason to the user. Need to use args for python2.5
350 raise InvalidCertificateException(req.host, '',
351 url_error.reason.args[1])
358 # We have to implement this so that we persist the tunneling behavior
360 class FancyRedirectHandler(urllib2.HTTPRedirectHandler):
361 """A redirect handler that persists CONNECT-enabled proxy information."""
363 def redirect_request(self, req, *args, **kwargs):
364 new_req = urllib2.HTTPRedirectHandler.redirect_request(
365 self, req, *args, **kwargs)
366 # Same thing as in our set_proxy implementation, but in this case
367 # we"ve only got a Request to work with, so it was this or copy
368 # everything over piecemeal.
370 # Note that we do not persist tunneling behavior from an http request
371 # to an https request, because an http request does not set _tunnel_host.
373 # Also note that in Python < 2.6, you will get an error in
374 # FancyHTTPSHandler.do_open() on an https urllib2.Request that uses an http
375 # proxy, since the proxy type will be set to http instead of https.
376 # (FancyRequest, and urllib2.Request in Python >= 2.6 set the proxy type to
377 # https.) Such an urllib2.Request could result from this redirect
378 # if you are redirecting from an http request (since an an http request
379 # does not have _tunnel_host set, and thus you will not set the proxy
380 # in the code below), and if you have defined a proxy for https in, say,
381 # FancyProxyHandler, and that proxy has type http.
382 if hasattr(req, "_tunnel_host") and isinstance(new_req, urllib2.Request):
383 if new_req.get_type() == "https":
385 # req is proxied, so copy the proxy info.
386 new_req._tunnel_host = new_req.get_host()
387 new_req.set_proxy(req.host, "https")
389 # req is not proxied, so just make sure _tunnel_host is defined.
390 new_req._tunnel_host = None
391 new_req.type = "https"
392 if hasattr(req, "_key_file") and isinstance(new_req, urllib2.Request):
393 # Copy the auxiliary data in case this or any further redirect is https
394 new_req._key_file = req._key_file
395 new_req._cert_file = req._cert_file
396 new_req._ca_certs = req._ca_certs