1 # Copyright (c) 2014 The Chromium Authors. All rights reserved.
2 # Use of this source code is governed by a BSD-style license that can be
3 # found in the LICENSE file.
6 A http client with support for https connections with certificate verification.
8 The verification is based on http://tools.ietf.org/html/rfc6125#section-6.4.3
9 and the code is from Lib/ssl.py in python3:
10 http://hg.python.org/cpython/file/4dac45f88d45/Lib/ssl.py
12 One use case is to download Chromium DEPS file in a secure way:
13 https://src.chromium.org/chrome/trunk/src/DEPS
15 Notice: python 2.7 or newer is required.
31 _SCRIPT_DIR = os.path.dirname(__file__)
32 _TRUSTED_ROOT_CERTS = os.path.join(_SCRIPT_DIR, 'cacert.pem')
35 class CertificateError(ValueError):
39 def _DNSNameMatch(dn, hostname, max_wildcards=1):
40 """Matching according to RFC 6125, section 6.4.3
42 http://tools.ietf.org/html/rfc6125#section-6.4.3
48 parts = dn.split(r'.')
52 wildcards = leftmost.count('*')
53 if wildcards > max_wildcards:
54 # Issue #17980: avoid denials of service by refusing more
55 # than one wildcard per fragment. A survery of established
56 # policy among SSL implementations showed it to be a
58 raise CertificateError(
59 'too many wildcards in certificate DNS name: ' + repr(dn))
61 # speed up common case w/o wildcards
63 return dn.lower() == hostname.lower()
65 # RFC 6125, section 6.4.3, subitem 1.
66 # The client SHOULD NOT attempt to match a presented identifier in which
67 # the wildcard character comprises a label other than the left-most label.
69 # When '*' is a fragment by itself, it matches a non-empty dotless
72 elif leftmost.startswith('xn--') or hostname.startswith('xn--'):
73 # RFC 6125, section 6.4.3, subitem 3.
74 # The client SHOULD NOT attempt to match a presented identifier
75 # where the wildcard character is embedded within an A-label or
76 # U-label of an internationalized domain name.
77 pats.append(re.escape(leftmost))
79 # Otherwise, '*' matches any dotless string, e.g. www*
80 pats.append(re.escape(leftmost).replace(r'\*', '[^.]*'))
82 # add the remaining fragments, ignore any wildcards
83 for frag in remainder:
84 pats.append(re.escape(frag))
86 pat = re.compile(r'\A' + r'\.'.join(pats) + r'\Z', re.IGNORECASE)
87 return pat.match(hostname)
90 def _MatchHostname(cert, hostname):
91 """Verify that *cert* (in decoded format as returned by
92 SSLSocket.getpeercert()) matches the *hostname*. RFC 2818 and RFC 6125
93 rules are followed, but IP addresses are not accepted for *hostname*.
95 CertificateError is raised on failure. On success, the function
99 raise ValueError('empty or no certificate, match_hostname needs a '
100 'SSL socket or SSL context with either '
101 'CERT_OPTIONAL or CERT_REQUIRED')
103 san = cert.get('subjectAltName', ())
104 for key, value in san:
106 if _DNSNameMatch(value, hostname):
108 dnsnames.append(value)
110 # The subject is only checked when there is no dNSName entry
112 for sub in cert.get('subject', ()):
113 for key, value in sub:
114 # XXX according to RFC 2818, the most specific Common Name
116 if key == 'commonName':
117 if _DNSNameMatch(value, hostname):
119 dnsnames.append(value)
120 if len(dnsnames) > 1:
121 raise CertificateError('hostname %r doesn\'t match either of %s'
122 % (hostname, ', '.join(map(repr, dnsnames))))
123 elif len(dnsnames) == 1:
124 raise CertificateError('hostname %r doesn\'t match %r'
125 % (hostname, dnsnames[0]))
127 raise CertificateError('no appropriate commonName or '
128 'subjectAltName fields were found')
131 class HTTPSConnection(httplib.HTTPSConnection):
133 def __init__(self, host, root_certs=_TRUSTED_ROOT_CERTS, **kwargs):
134 self.root_certs = root_certs
135 httplib.HTTPSConnection.__init__(self, host, **kwargs)
138 # Overrides for certificate verification.
139 args = [(self.host, self.port), self.timeout,]
140 if self.source_address:
141 args.append(self.source_address)
142 sock = socket.create_connection(*args)
144 if self._tunnel_host:
148 # Wrap the socket for verification with the root certs.
150 if self.root_certs is not None:
151 kwargs.update(cert_reqs=ssl.CERT_REQUIRED, ca_certs=self.root_certs)
152 self.sock = ssl.wrap_socket(sock, **kwargs)
156 _MatchHostname(self.sock.getpeercert(), self.host)
157 except CertificateError:
158 self.sock.shutdown(socket.SHUT_RDWR)
163 class HTTPSHandler(urllib2.HTTPSHandler):
165 def __init__(self, root_certs=_TRUSTED_ROOT_CERTS):
166 urllib2.HTTPSHandler.__init__(self)
167 self.root_certs = root_certs
169 def https_open(self, req):
170 # Pass a reference to the function below so that verification against
171 # trusted root certs could be injected.
172 return self.do_open(self.GetConnection, req)
174 def GetConnection(self, host, **kwargs):
175 params = dict(root_certs=self.root_certs)
176 params.update(kwargs)
177 return HTTPSConnection(host, **params)
180 def _SendRequest(url, timeout=None):
181 """Send request to the given https url, and return the server response.
184 url: The https url to send request to.
187 An integer: http code of the response.
188 A string: content of the response.
191 CertificateError: Certificate verification fails.
197 if url.startswith('https://'):
198 # HTTPSHandler has to go first, because we don't want to send secure cookies
199 # to a man in the middle.
200 handlers.append(HTTPSHandler())
203 cookie_file = os.environ.get('COOKIE_FILE')
204 if cookie_file and os.path.exists(cookie_file):
206 urllib2.HTTPCookieProcessor(cookielib.MozillaCookieJar(cookie_file)))
208 url_opener = urllib2.build_opener(*handlers)
214 response = url_opener.open(url, timeout=timeout)
216 status_code = response.code
217 content = response.read()
218 except urllib2.HTTPError as e:
221 except (ssl.SSLError, httplib.BadStatusLine, IOError):
225 return status_code, content
228 class HttpClientLocal(http_client.HttpClient):
229 """This http client is used locally in a workstation, GCE VMs, etc."""
232 def Get(url, params={}, timeout=120, retries=5, retry_interval=0.5,
235 url = '%s?%s' % (url, urllib.urlencode(params))
241 status_code, content = _SendRequest(url, timeout=timeout)
242 if status_code == 200:
243 return status_code, content
244 if retry_if_not and status_code == retry_if_not:
245 return status_code, content
248 time.sleep(retry_interval)
250 return status_code, content
252 # Should never be reached.
253 return status_code, content