__author__ = 'Andrey Petrov (andrey.petrov@shazow.net)'
__license__ = 'MIT'
-__version__ = 'dev'
+__version__ = '1.8'
from .connectionpool import (
# This module is part of urllib3 and is released under
# the MIT License: http://www.opensource.org/licenses/mit-license.php
-from collections import MutableMapping
+from collections import Mapping, MutableMapping
try:
from threading import RLock
except ImportError: # Platform-specific: No threads available
from collections import OrderedDict
except ImportError:
from .packages.ordered_dict import OrderedDict
+from .packages.six import itervalues
-__all__ = ['RecentlyUsedContainer']
+__all__ = ['RecentlyUsedContainer', 'HTTPHeaderDict']
_Null = object()
def keys(self):
with self.lock:
return self._container.keys()
+
+
+class HTTPHeaderDict(MutableMapping):
+ """
+ :param headers:
+ An iterable of field-value pairs. Must not contain multiple field names
+ when compared case-insensitively.
+
+ :param kwargs:
+ Additional field-value pairs to pass in to ``dict.update``.
+
+ A ``dict`` like container for storing HTTP Headers.
+
+ Field names are stored and compared case-insensitively in compliance with
+ RFC 2616. Iteration provides the first case-sensitive key seen for each
+ case-insensitive pair.
+
+ Using ``__setitem__`` syntax overwrites fields that compare equal
+ case-insensitively in order to maintain ``dict``'s api. For fields that
+ compare equal, instead create a new ``HTTPHeaderDict`` and use ``.add``
+ in a loop.
+
+ If multiple fields that are equal case-insensitively are passed to the
+ constructor or ``.update``, the behavior is undefined and some will be
+ lost.
+
+ >>> headers = HTTPHeaderDict()
+ >>> headers.add('Set-Cookie', 'foo=bar')
+ >>> headers.add('set-cookie', 'baz=quxx')
+ >>> headers['content-length'] = '7'
+ >>> headers['SET-cookie']
+ 'foo=bar, baz=quxx'
+ >>> headers['Content-Length']
+ '7'
+
+ If you want to access the raw headers with their original casing
+ for debugging purposes you can access the private ``._data`` attribute
+ which is a normal python ``dict`` that maps the case-insensitive key to a
+ list of tuples stored as (case-sensitive-original-name, value). Using the
+ structure from above as our example:
+
+ >>> headers._data
+ {'set-cookie': [('Set-Cookie', 'foo=bar'), ('set-cookie', 'baz=quxx')],
+ 'content-length': [('content-length', '7')]}
+ """
+
+ def __init__(self, headers=None, **kwargs):
+ self._data = {}
+ if headers is None:
+ headers = {}
+ self.update(headers, **kwargs)
+
+ def add(self, key, value):
+ """Adds a (name, value) pair, doesn't overwrite the value if it already
+ exists.
+
+ >>> headers = HTTPHeaderDict(foo='bar')
+ >>> headers.add('Foo', 'baz')
+ >>> headers['foo']
+ 'bar, baz'
+ """
+ self._data.setdefault(key.lower(), []).append((key, value))
+
+ def getlist(self, key):
+ """Returns a list of all the values for the named field. Returns an
+ empty list if the key doesn't exist."""
+ return self[key].split(', ') if key in self else []
+
+ def copy(self):
+ h = HTTPHeaderDict()
+ for key in self._data:
+ for rawkey, value in self._data[key]:
+ h.add(rawkey, value)
+ return h
+
+ def __eq__(self, other):
+ if not isinstance(other, Mapping):
+ return False
+ other = HTTPHeaderDict(other)
+ return dict((k1, self[k1]) for k1 in self._data) == \
+ dict((k2, other[k2]) for k2 in other._data)
+
+ def __getitem__(self, key):
+ values = self._data[key.lower()]
+ return ', '.join(value[1] for value in values)
+
+ def __setitem__(self, key, value):
+ self._data[key.lower()] = [(key, value)]
+
+ def __delitem__(self, key):
+ del self._data[key.lower()]
+
+ def __len__(self):
+ return len(self._data)
+
+ def __iter__(self):
+ for headers in itervalues(self._data):
+ yield headers[0][0]
+
+ def __repr__(self):
+ return '%s(%r)' % (self.__class__.__name__, dict(self.items()))
# This module is part of urllib3 and is released under
# the MIT License: http://www.opensource.org/licenses/mit-license.php
+import sys
import socket
from socket import timeout as SocketTimeout
ConnectTimeoutError,
)
from .packages.ssl_match_hostname import match_hostname
+from .packages import six
from .util import (
assert_fingerprint,
resolve_cert_reqs,
class HTTPConnection(_HTTPConnection, object):
+ """
+ Based on httplib.HTTPConnection but provides an extra constructor
+ backwards-compatibility layer between older and newer Pythons.
+ """
+
default_port = port_by_scheme['http']
# By default, disable Nagle's Algorithm.
tcp_nodelay = 1
+ def __init__(self, *args, **kw):
+ if six.PY3: # Python 3
+ kw.pop('strict', None)
+
+ if sys.version_info < (2, 7): # Python 2.6 and earlier
+ kw.pop('source_address', None)
+ self.source_address = None
+
+ _HTTPConnection.__init__(self, *args, **kw)
+
def _new_conn(self):
""" Establish a socket connection and set nodelay settings on it
:return: a new socket connection
"""
- try:
- conn = socket.create_connection(
- (self.host, self.port),
- self.timeout,
- self.source_address,
- )
- except AttributeError: # Python 2.6
- conn = socket.create_connection(
- (self.host, self.port),
- self.timeout,
- )
+ extra_args = []
+ if self.source_address: # Python 2.7+
+ extra_args.append(self.source_address)
+
+ conn = socket.create_connection(
+ (self.host, self.port),
+ self.timeout,
+ *extra_args
+ )
conn.setsockopt(socket.IPPROTO_TCP, socket.TCP_NODELAY,
self.tcp_nodelay)
return conn
def __init__(self, host, port=None, key_file=None, cert_file=None,
strict=None, timeout=socket._GLOBAL_DEFAULT_TIMEOUT,
source_address=None):
- try:
- HTTPConnection.__init__(self, host, port, strict, timeout, source_address)
- except TypeError: # Python 2.6
- HTTPConnection.__init__(self, host, port, strict, timeout)
+
+ HTTPConnection.__init__(self, host, port,
+ strict=strict,
+ timeout=timeout,
+ source_address=source_address)
+
self.key_file = key_file
self.cert_file = cert_file
from .exceptions import (
ClosedPoolError,
+ ConnectionError,
ConnectTimeoutError,
EmptyPoolError,
HostChangedError,
log.info("Starting new HTTP connection (%d): %s" %
(self.num_connections, self.host))
- extra_params = {}
- if not six.PY3: # Python 2
- extra_params['strict'] = self.strict
-
conn = self.ConnectionCls(host=self.host, port=self.port,
timeout=self.timeout.connect_timeout,
- **extra_params)
+ strict=self.strict)
if self.proxy is not None:
# Enable Nagle's algorithm for proxies, to avoid packet
# fragmentation.
pass
except Full:
# This should never happen if self.block == True
- log.warning("HttpConnectionPool is full, discarding connection: %s"
- % self.host)
+ log.warning(
+ "Connection pool is full, discarding connection: %s" %
+ self.host)
# Connection never got put back into the pool, close it.
if conn:
:param retries:
Number of retries to allow before raising a MaxRetryError exception.
+ If `False`, then retries are disabled and any exception is raised
+ immediately.
:param redirect:
If True, automatically handle redirects (status codes 301, 302,
- 303, 307, 308). Each redirect counts as a retry.
+ 303, 307, 308). Each redirect counts as a retry. Disabling retries
+ will disable redirect, too.
:param assert_same_host:
If ``True``, will make sure that the host of the pool requests is
if headers is None:
headers = self.headers
- if retries < 0:
+ if retries < 0 and retries is not False:
raise MaxRetryError(self, url)
if release_conn is None:
headers = headers.copy()
headers.update(self.proxy_headers)
+ # Must keep the exception bound to a separate variable or else Python 3
+ # complains about UnboundLocalError.
+ err = None
+
try:
# Request a connection from the queue
conn = self._get_conn(timeout=pool_timeout)
# ``response.read()``)
except Empty:
- # Timed out by queue
+ # Timed out by queue.
raise EmptyPoolError(self, "No pool connections are available.")
- except BaseSSLError as e:
+ except (BaseSSLError, CertificateError) as e:
+ # Release connection unconditionally because there is no way to
+ # close it externally in case of exception.
+ release_conn = True
raise SSLError(e)
- except CertificateError as e:
- # Name mismatch
- raise SSLError(e)
+ except (TimeoutError, HTTPException, SocketError) as e:
+ if conn:
+ # Discard the connection for these exceptions. It will be
+ # be replaced during the next _get_conn() call.
+ conn.close()
+ conn = None
- except TimeoutError as e:
- # Connection broken, discard.
- conn = None
- # Save the error off for retry logic.
- err = e
+ if not retries:
+ if isinstance(e, TimeoutError):
+ # TimeoutError is exempt from MaxRetryError-wrapping.
+ # FIXME: ... Not sure why. Add a reason here.
+ raise
- if retries == 0:
- raise
+ # Wrap unexpected exceptions with the most appropriate
+ # module-level exception and re-raise.
+ if isinstance(e, SocketError) and self.proxy:
+ raise ProxyError('Cannot connect to proxy.', e)
- except (HTTPException, SocketError) as e:
- # Connection broken, discard. It will be replaced next _get_conn().
- conn = None
- # This is necessary so we can access e below
- err = e
+ if retries is False:
+ raise ConnectionError('Connection failed.', e)
+
+ raise MaxRetryError(self, url, e)
- if retries == 0:
- if isinstance(e, SocketError) and self.proxy is not None:
- raise ProxyError('Cannot connect to proxy. '
- 'Socket error: %s.' % e)
- else:
- raise MaxRetryError(self, url, e)
+ # Keep track of the error for the retry warning.
+ err = e
finally:
if release_conn:
if not conn:
# Try again
- log.warn("Retrying (%d attempts remain) after connection "
- "broken by '%r': %s" % (retries, err, url))
+ log.warning("Retrying (%d attempts remain) after connection "
+ "broken by '%r': %s" % (retries, err, url))
return self.urlopen(method, url, body, headers, retries - 1,
redirect, assert_same_host,
timeout=timeout, pool_timeout=pool_timeout,
# Handle redirect?
redirect_location = redirect and response.get_redirect_location()
- if redirect_location:
+ if redirect_location and retries is not False:
if response.status == 303:
method = 'GET'
log.info("Redirecting %s -> %s" % (url, redirect_location))
----------------
:var DEFAULT_SSL_CIPHER_LIST: The list of supported SSL/TLS cipher suites.
- Default: ``EECDH+ECDSA+AESGCM EECDH+aRSA+AESGCM EECDH+ECDSA+SHA256
- EECDH+aRSA+SHA256 EECDH+aRSA+RC4 EDH+aRSA EECDH RC4 !aNULL !eNULL !LOW !3DES
- !MD5 !EXP !PSK !SRP !DSS'``
+ Default: ``ECDH+AESGCM:DH+AESGCM:ECDH+AES256:DH+AES256:ECDH+AES128:DH+AES:
+ ECDH+3DES:DH+3DES:RSA+AESGCM:RSA+AES:RSA+3DES:!aNULL:!MD5:!DSS``
.. _sni: https://en.wikipedia.org/wiki/Server_Name_Indication
.. _crime attack: https://en.wikipedia.org/wiki/CRIME_(security_exploit)
import OpenSSL.SSL
from pyasn1.codec.der import decoder as der_decoder
from pyasn1.type import univ, constraint
-from socket import _fileobject
+from socket import _fileobject, timeout
import ssl
import select
from cStringIO import StringIO
+ OpenSSL.SSL.VERIFY_FAIL_IF_NO_PEER_CERT,
}
-# Default SSL/TLS cipher list.
-# Recommendation by https://community.qualys.com/blogs/securitylabs/2013/08/05/
-# configuring-apache-nginx-and-openssl-for-forward-secrecy
-DEFAULT_SSL_CIPHER_LIST = 'EECDH+ECDSA+AESGCM EECDH+aRSA+AESGCM ' + \
- 'EECDH+ECDSA+SHA256 EECDH+aRSA+SHA256 EECDH+aRSA+RC4 EDH+aRSA ' + \
- 'EECDH RC4 !aNULL !eNULL !LOW !3DES !MD5 !EXP !PSK !SRP !DSS'
+# A secure default.
+# Sources for more information on TLS ciphers:
+#
+# - https://wiki.mozilla.org/Security/Server_Side_TLS
+# - https://www.ssllabs.com/projects/best-practices/index.html
+# - https://hynek.me/articles/hardening-your-web-servers-ssl-ciphers/
+#
+# The general intent is:
+# - Prefer cipher suites that offer perfect forward secrecy (DHE/ECDHE),
+# - prefer ECDHE over DHE for better performance,
+# - prefer any AES-GCM over any AES-CBC for better performance and security,
+# - use 3DES as fallback which is secure but slow,
+# - disable NULL authentication, MD5 MACs and DSS for security reasons.
+DEFAULT_SSL_CIPHER_LIST = "ECDH+AESGCM:DH+AESGCM:ECDH+AES256:DH+AES256:" + \
+ "ECDH+AES128:DH+AES:ECDH+3DES:DH+3DES:RSA+AESGCM:RSA+AES:RSA+3DES:" + \
+ "!aNULL:!MD5:!DSS"
orig_util_HAS_SNI = util.HAS_SNI
class fileobject(_fileobject):
+ def _wait_for_sock(self):
+ rd, wd, ed = select.select([self._sock], [], [],
+ self._sock.gettimeout())
+ if not rd:
+ raise timeout()
+
+
def read(self, size=-1):
# Use max, disallow tiny reads in a loop as they are very inefficient.
# We never leave read() with any leftover data from a new recv() call
try:
data = self._sock.recv(rbufsize)
except OpenSSL.SSL.WantReadError:
+ self._wait_for_sock()
continue
if not data:
break
try:
data = self._sock.recv(left)
except OpenSSL.SSL.WantReadError:
+ self._wait_for_sock()
continue
if not data:
break
break
buffers.append(data)
except OpenSSL.SSL.WantReadError:
+ self._wait_for_sock()
continue
break
return "".join(buffers)
try:
data = self._sock.recv(self._rbufsize)
except OpenSSL.SSL.WantReadError:
+ self._wait_for_sock()
continue
if not data:
break
try:
data = self._sock.recv(self._rbufsize)
except OpenSSL.SSL.WantReadError:
- continue
+ self._wait_for_sock()
+ continue
if not data:
break
left = size - buf_len
ctx.load_verify_locations(ca_certs, None)
except OpenSSL.SSL.Error as e:
raise ssl.SSLError('bad ca_certs: %r' % ca_certs, e)
+ else:
+ ctx.set_default_verify_paths()
# Disable TLS compression to migitate CRIME attack (issue #309)
OP_NO_COMPRESSION = 0x20000
pass
+class ConnectionError(HTTPError):
+ "Raised when a normal connection fails."
+ pass
+
+
class DecodeError(HTTPError):
"Raised when automatic decoding based on Content-Type fails."
pass
from backports.ssl_match_hostname import CertificateError, match_hostname
except ImportError:
# Our vendored copy
- from _implementation import CertificateError, match_hostname
+ from ._implementation import CertificateError, match_hostname
# Not needed, but documenting what we provide.
__all__ = ('CertificateError', 'match_hostname')
import zlib
import io
+from ._collections import HTTPHeaderDict
from .exceptions import DecodeError
from .packages.six import string_types as basestring, binary_type
from .util import is_fp_closed
def __init__(self, body='', headers=None, status=0, version=0, reason=None,
strict=0, preload_content=True, decode_content=True,
original_response=None, pool=None, connection=None):
- self.headers = headers or {}
+
+ self.headers = HTTPHeaderDict()
+ if headers:
+ self.headers.update(headers)
self.status = status
self.version = version
self.reason = reason
with ``original_response=r``.
"""
- # Normalize headers between different versions of Python
- headers = {}
+ headers = HTTPHeaderDict()
for k, v in r.getheaders():
- # Python 3: Header keys are returned capitalised
- k = k.lower()
-
- has_value = headers.get(k)
- if has_value: # Python 3: Repeating header keys are unmerged.
- v = ', '.join([has_value, v])
-
- headers[k] = v
+ headers.add(k, v)
# HTTPResponse objects in Python 3 don't have a .strict attribute
strict = getattr(r, 'strict', 0)