-# urllib3/__init__.py
-# Copyright 2008-2013 Andrey Petrov and contributors (see CONTRIBUTORS.txt)
-#
-# This module is part of urllib3 and is released under
-# the MIT License: http://www.opensource.org/licenses/mit-license.php
-
"""
urllib3 - Thread-safe connection pooling and re-using.
"""
from .filepost import encode_multipart_formdata
from .poolmanager import PoolManager, ProxyManager, proxy_from_url
from .response import HTTPResponse
-from .util import make_headers, get_host, Timeout
+from .util.request import make_headers
+from .util.url import get_host
+from .util.timeout import Timeout
+from .util.retry import Retry
# Set default logging handler to avoid "No handler found" warnings.
handler.setFormatter(logging.Formatter('%(asctime)s %(levelname)s %(message)s'))
logger.addHandler(handler)
logger.setLevel(level)
- logger.debug('Added an stderr logging handler to logger: %s' % __name__)
+ logger.debug('Added a stderr logging handler to logger: %s' % __name__)
return handler
# ... Clean up.
del NullHandler
+
+
+# Set security warning to only go off once by default.
+import warnings
+warnings.simplefilter('module', exceptions.InsecureRequestWarning)
+
+def disable_warnings(category=exceptions.HTTPWarning):
+ """
+ Helper for quickly disabling all urllib3 warnings.
+ """
+ warnings.simplefilter('ignore', category)
-# urllib3/_collections.py
-# Copyright 2008-2013 Andrey Petrov and contributors (see CONTRIBUTORS.txt)
-#
-# This module is part of urllib3 and is released under
-# the MIT License: http://www.opensource.org/licenses/mit-license.php
-
from collections import Mapping, MutableMapping
try:
from threading import RLock
A ``dict`` like container for storing HTTP Headers.
Field names are stored and compared case-insensitively in compliance with
- RFC 2616. Iteration provides the first case-sensitive key seen for each
+ RFC 7230. Iteration provides the first case-sensitive key seen for each
case-insensitive pair.
Using ``__setitem__`` syntax overwrites fields that compare equal
-# urllib3/connection.py
-# Copyright 2008-2013 Andrey Petrov and contributors (see CONTRIBUTORS.txt)
-#
-# This module is part of urllib3 and is released under
-# the MIT License: http://www.opensource.org/licenses/mit-license.php
-
import sys
import socket
from socket import timeout as SocketTimeout
-try: # Python 3
+try: # Python 3
from http.client import HTTPConnection as _HTTPConnection, HTTPException
except ImportError:
from httplib import HTTPConnection as _HTTPConnection, HTTPException
+
class DummyConnection(object):
"Used to detect a failed ConnectionCls import."
pass
-try: # Compiled with SSL?
- ssl = None
+
+try: # Compiled with SSL?
HTTPSConnection = DummyConnection
+ import ssl
+ BaseSSLError = ssl.SSLError
+except (ImportError, AttributeError): # Platform-specific: No SSL.
+ ssl = None
class BaseSSLError(BaseException):
pass
- try: # Python 3
- from http.client import HTTPSConnection as _HTTPSConnection
- except ImportError:
- from httplib import HTTPSConnection as _HTTPSConnection
-
- import ssl
- BaseSSLError = ssl.SSLError
-
-except (ImportError, AttributeError): # Platform-specific: No SSL.
- pass
from .exceptions import (
ConnectTimeoutError,
)
from .packages.ssl_match_hostname import match_hostname
from .packages import six
-from .util import (
- assert_fingerprint,
+
+from .util.ssl_ import (
resolve_cert_reqs,
resolve_ssl_version,
ssl_wrap_socket,
+ assert_fingerprint,
)
+from .util import connection
+
port_by_scheme = {
'http': 80,
"""
Based on httplib.HTTPConnection but provides an extra constructor
backwards-compatibility layer between older and newer Pythons.
+
+ Additional keyword parameters are used to configure attributes of the connection.
+ Accepted parameters include:
+
+ - ``strict``: See the documentation on :class:`urllib3.connectionpool.HTTPConnectionPool`
+ - ``source_address``: Set the source address for the current connection.
+
+ .. note:: This is ignored for Python 2.6. It is only applied for 2.7 and 3.x
+
+ - ``socket_options``: Set specific options on the underlying socket. If not specified, then
+ defaults are loaded from ``HTTPConnection.default_socket_options`` which includes disabling
+ Nagle's algorithm (sets TCP_NODELAY to 1) unless the connection is behind a proxy.
+
+ For example, if you wish to enable TCP Keep Alive in addition to the defaults,
+ you might pass::
+
+ HTTPConnection.default_socket_options + [
+ (socket.SOL_SOCKET, socket.SO_KEEPALIVE, 1),
+ ]
+
+ Or you may want to disable the defaults by passing an empty list (e.g., ``[]``).
"""
default_port = port_by_scheme['http']
- # By default, disable Nagle's Algorithm.
- tcp_nodelay = 1
+ #: Disable Nagle's algorithm by default.
+ #: ``[(socket.IPPROTO_TCP, socket.TCP_NODELAY, 1)]``
+ default_socket_options = [(socket.IPPROTO_TCP, socket.TCP_NODELAY, 1)]
+
+ #: Whether this connection verifies the host's certificate.
+ is_verified = False
def __init__(self, *args, **kw):
if six.PY3: # Python 3
kw.pop('strict', None)
- if sys.version_info < (2, 7): # Python 2.6 and older
- kw.pop('source_address', None)
# Pre-set source_address in case we have an older Python like 2.6.
self.source_address = kw.get('source_address')
+ if sys.version_info < (2, 7): # Python 2.6
+ # _HTTPConnection on Python 2.6 will balk at this keyword arg, but
+ # not newer versions. We can still use it when creating a
+ # connection though, so we pop it *after* we have saved it as
+ # self.source_address.
+ kw.pop('source_address', None)
+
+ #: The socket options provided by the user. If no options are
+ #: provided, we use the default options.
+ self.socket_options = kw.pop('socket_options', self.default_socket_options)
+
# Superclass also sets self.source_address in Python 2.7+.
- _HTTPConnection.__init__(self, *args, **kw)
+ _HTTPConnection.__init__(self, *args, **kw)
def _new_conn(self):
""" Establish a socket connection and set nodelay settings on it.
- :return: a new socket connection
+ :return: New socket connection.
"""
- extra_args = []
- if self.source_address: # Python 2.7+
- extra_args.append(self.source_address)
+ extra_kw = {}
+ if self.source_address:
+ extra_kw['source_address'] = self.source_address
- conn = socket.create_connection(
- (self.host, self.port), self.timeout, *extra_args)
- conn.setsockopt(
- socket.IPPROTO_TCP, socket.TCP_NODELAY, self.tcp_nodelay)
+ if self.socket_options:
+ extra_kw['socket_options'] = self.socket_options
+
+ try:
+ conn = connection.create_connection(
+ (self.host, self.port), self.timeout, **extra_kw)
+
+ except SocketTimeout:
+ raise ConnectTimeoutError(
+ self, "Connection to %s timed out. (connect timeout=%s)" %
+ (self.host, self.timeout))
return conn
if getattr(self, '_tunnel_host', None):
# TODO: Fix tunnel so it doesn't depend on self.sock state.
self._tunnel()
+ # Mark this connection as not reusable
+ self.auto_open = 0
def connect(self):
conn = self._new_conn()
cert_reqs = None
ca_certs = None
ssl_version = None
- conn_kw = {}
def set_cert(self, key_file=None, cert_file=None,
cert_reqs=None, ca_certs=None,
def connect(self):
# Add certificate verification
-
- try:
- sock = socket.create_connection(
- address=(self.host, self.port), timeout=self.timeout,
- **self.conn_kw)
- except SocketTimeout:
- raise ConnectTimeoutError(
- self, "Connection to %s timed out. (connect timeout=%s)" %
- (self.host, self.timeout))
-
- sock.setsockopt(socket.IPPROTO_TCP, socket.TCP_NODELAY,
- self.tcp_nodelay)
+ conn = self._new_conn()
resolved_cert_reqs = resolve_cert_reqs(self.cert_reqs)
resolved_ssl_version = resolve_ssl_version(self.ssl_version)
# _tunnel_host was added in Python 2.6.3
# (See: http://hg.python.org/cpython/rev/0f57b30a152f)
- self.sock = sock
+ self.sock = conn
# Calls self._set_hostport(), so self.host is
# self._tunnel_host below.
self._tunnel()
+ # Mark this connection as not reusable
+ self.auto_open = 0
# Override the host with the one we're requesting data from.
hostname = self._tunnel_host
# Wrap socket using verification with the root certs in
# trusted_root_certs
- self.sock = ssl_wrap_socket(sock, self.key_file, self.cert_file,
+ self.sock = ssl_wrap_socket(conn, self.key_file, self.cert_file,
cert_reqs=resolved_cert_reqs,
ca_certs=self.ca_certs,
server_hostname=hostname,
match_hostname(self.sock.getpeercert(),
self.assert_hostname or hostname)
+ self.is_verified = resolved_cert_reqs == ssl.CERT_REQUIRED
+
if ssl:
# Make a copy for testing.
-# urllib3/connectionpool.py
-# Copyright 2008-2013 Andrey Petrov and contributors (see CONTRIBUTORS.txt)
-#
-# This module is part of urllib3 and is released under
-# the MIT License: http://www.opensource.org/licenses/mit-license.php
-
-import sys
import errno
import logging
+import sys
+import warnings
from socket import error as SocketError, timeout as SocketTimeout
import socket
-try: # Python 3
+try: # Python 3
from queue import LifoQueue, Empty, Full
except ImportError:
from Queue import LifoQueue, Empty, Full
from .exceptions import (
ClosedPoolError,
- ConnectionError,
- ConnectTimeoutError,
+ ProtocolError,
EmptyPoolError,
HostChangedError,
- LocationParseError,
+ LocationValueError,
MaxRetryError,
+ ProxyError,
+ ReadTimeoutError,
SSLError,
TimeoutError,
- ReadTimeoutError,
- ProxyError,
+ InsecureRequestWarning,
)
from .packages.ssl_match_hostname import CertificateError
from .packages import six
)
from .request import RequestMethods
from .response import HTTPResponse
-from .util import (
- get_host,
- is_connection_dropped,
- Timeout,
-)
+
+from .util.connection import is_connection_dropped
+from .util.retry import Retry
+from .util.timeout import Timeout
+from .util.url import get_host
xrange = six.moves.xrange
_Default = object()
-## Pool objects
+## Pool objects
class ConnectionPool(object):
"""
Base class for all connection pools, such as
QueueCls = LifoQueue
def __init__(self, host, port=None):
- if host is None:
- raise LocationParseError(host)
+ if not host:
+ raise LocationValueError("No host specified.")
# httplib doesn't like it when we include brackets in ipv6 addresses
- host = host.strip('[]')
-
- self.host = host
+ self.host = host.strip('[]')
self.port = port
def __str__(self):
# This is taken from http://hg.python.org/cpython/file/7aaba721ebc0/Lib/socket.py#l252
_blocking_errnos = set([errno.EAGAIN, errno.EWOULDBLOCK])
+
class HTTPConnectionPool(ConnectionPool, RequestMethods):
"""
Thread-safe connection pool for one host.
Headers to include with all requests, unless other headers are given
explicitly.
+ :param retries:
+ Retry configuration to use by default with requests in this pool.
+
:param _proxy:
Parsed proxy URL, should not be used directly, instead, see
:class:`urllib3.connectionpool.ProxyManager`"
:param _proxy_headers:
A dictionary with proxy headers, should not be used directly,
instead, see :class:`urllib3.connectionpool.ProxyManager`"
+
+ :param \**conn_kw:
+ Additional parameters are used to create fresh :class:`urllib3.connection.HTTPConnection`,
+ :class:`urllib3.connection.HTTPSConnection` instances.
"""
scheme = 'http'
def __init__(self, host, port=None, strict=False,
timeout=Timeout.DEFAULT_TIMEOUT, maxsize=1, block=False,
- headers=None, _proxy=None, _proxy_headers=None, **conn_kw):
+ headers=None, retries=None,
+ _proxy=None, _proxy_headers=None,
+ **conn_kw):
ConnectionPool.__init__(self, host, port)
RequestMethods.__init__(self, headers)
self.strict = strict
- # This is for backwards compatibility and can be removed once a timeout
- # can only be set to a Timeout object
if not isinstance(timeout, Timeout):
timeout = Timeout.from_float(timeout)
+ if retries is None:
+ retries = Retry.DEFAULT
+
self.timeout = timeout
+ self.retries = retries
self.pool = self.QueueCls(maxsize)
self.block = block
# These are mostly for testing and debugging purposes.
self.num_connections = 0
self.num_requests = 0
-
- if sys.version_info < (2, 7): # Python 2.6 and older
- conn_kw.pop('source_address', None)
self.conn_kw = conn_kw
+ if self.proxy:
+ # Enable Nagle's algorithm for proxies, to avoid packet fragmentation.
+ # We cannot know if the user has added default socket options, so we cannot replace the
+ # list.
+ self.conn_kw.setdefault('socket_options', [])
+
def _new_conn(self):
"""
Return a fresh :class:`HTTPConnection`.
conn = self.ConnectionCls(host=self.host, port=self.port,
timeout=self.timeout.connect_timeout,
strict=self.strict, **self.conn_kw)
- if self.proxy is not None:
- # Enable Nagle's algorithm for proxies, to avoid packet
- # fragmentation.
- conn.tcp_nodelay = 0
return conn
def _get_conn(self, timeout=None):
try:
conn = self.pool.get(block=self.block, timeout=timeout)
- except AttributeError: # self.pool is None
+ except AttributeError: # self.pool is None
raise ClosedPoolError(self, "Pool is closed.")
except Empty:
if conn and is_connection_dropped(conn):
log.info("Resetting dropped connection: %s" % self.host)
conn.close()
+ if getattr(conn, 'auto_open', 1) == 0:
+ # This is a proxied connection that has been mutated by
+ # httplib._tunnel() and cannot be reused (since it would
+ # attempt to bypass the proxy)
+ conn = None
return conn or self._new_conn()
"""
try:
self.pool.put(conn, block=False)
- return # Everything is dandy, done.
+ return # Everything is dandy, done.
except AttributeError:
# self.pool is None.
pass
if conn:
conn.close()
+ def _validate_conn(self, conn):
+ """
+ Called right before a request is made, after the socket is created.
+ """
+ pass
+
def _get_timeout(self, timeout):
""" Helper that always returns a :class:`urllib3.util.Timeout` """
if timeout is _Default:
self.num_requests += 1
timeout_obj = self._get_timeout(timeout)
+ timeout_obj.start_connect()
+ conn.timeout = timeout_obj.connect_timeout
- try:
- timeout_obj.start_connect()
- conn.timeout = timeout_obj.connect_timeout
- # conn.request() calls httplib.*.request, not the method in
- # urllib3.request. It also calls makefile (recv) on the socket.
- conn.request(method, url, **httplib_request_kw)
- except SocketTimeout:
- raise ConnectTimeoutError(
- self, "Connection to %s timed out. (connect timeout=%s)" %
- (self.host, timeout_obj.connect_timeout))
+ # Trigger any extra validation we need to do.
+ self._validate_conn(conn)
+
+ # conn.request() calls httplib.*.request, not the method in
+ # urllib3.request. It also calls makefile (recv) on the socket.
+ conn.request(method, url, **httplib_request_kw)
# Reset the timeout for the recv() on the socket
read_timeout = timeout_obj.read_timeout
# App Engine doesn't have a sock attr
- if hasattr(conn, 'sock'):
+ if getattr(conn, 'sock', None):
# In Python 3 socket.py will catch EAGAIN and return None when you
# try and read into the file pointer created by http.client, which
# instead raises a BadStatusLine exception. Instead of catching
# timeouts, check for a zero timeout before making the request.
if read_timeout == 0:
raise ReadTimeoutError(
- self, url,
- "Read timed out. (read timeout=%s)" % read_timeout)
+ self, url, "Read timed out. (read timeout=%s)" % read_timeout)
if read_timeout is Timeout.DEFAULT_TIMEOUT:
conn.sock.settimeout(socket.getdefaulttimeout())
- else: # None or a value
+ else: # None or a value
conn.sock.settimeout(read_timeout)
# Receive the response from the server
try:
- try: # Python 2.7+, use buffering of HTTP responses
+ try: # Python 2.7+, use buffering of HTTP responses
httplib_response = conn.getresponse(buffering=True)
- except TypeError: # Python 2.6 and older
+ except TypeError: # Python 2.6 and older
httplib_response = conn.getresponse()
except SocketTimeout:
raise ReadTimeoutError(
# http://bugs.python.org/issue10272
if 'timed out' in str(e) or \
'did not complete (read)' in str(e): # Python 2.6
- raise ReadTimeoutError(self, url, "Read timed out.")
+ raise ReadTimeoutError(
+ self, url, "Read timed out. (read timeout=%s)" % read_timeout)
raise
- except SocketError as e: # Platform-specific: Python 2
+ except SocketError as e: # Platform-specific: Python 2
# See the above comment about EAGAIN in Python 3. In Python 2 we
# have to specifically catch it and throw the timeout error
if e.errno in _blocking_errnos:
raise ReadTimeoutError(
- self, url,
- "Read timed out. (read timeout=%s)" % read_timeout)
+ self, url, "Read timed out. (read timeout=%s)" % read_timeout)
raise
conn.close()
except Empty:
- pass # Done.
+ pass # Done.
def is_same_host(self, url):
"""
return (scheme, host, port) == (self.scheme, self.host, self.port)
- def urlopen(self, method, url, body=None, headers=None, retries=3,
+ def urlopen(self, method, url, body=None, headers=None, retries=None,
redirect=True, assert_same_host=True, timeout=_Default,
pool_timeout=None, release_conn=None, **response_kw):
"""
these headers completely replace any pool-specific headers.
:param retries:
- Number of retries to allow before raising a MaxRetryError exception.
- If `False`, then retries are disabled and any exception is raised
- immediately.
+ Configure the number of retries to allow before raising a
+ :class:`~urllib3.exceptions.MaxRetryError` exception.
+
+ Pass ``None`` to retry until you receive a response. Pass a
+ :class:`~urllib3.util.retry.Retry` object for fine-grained control
+ over different types of retries.
+ Pass an integer number to retry connection errors that many times,
+ but no other types of errors. Pass zero to never retry.
+
+ If ``False``, then retries are disabled and any exception is raised
+ immediately. Also, instead of raising a MaxRetryError on redirects,
+ the redirect response will be returned.
+
+ :type retries: :class:`~urllib3.util.retry.Retry`, False, or an int.
:param redirect:
If True, automatically handle redirects (status codes 301, 302,
if headers is None:
headers = self.headers
- if retries < 0 and retries is not False:
- raise MaxRetryError(self, url)
+ if not isinstance(retries, Retry):
+ retries = Retry.from_int(retries, redirect=redirect, default=self.retries)
if release_conn is None:
release_conn = response_kw.get('preload_content', True)
# Check host
if assert_same_host and not self.is_same_host(url):
- raise HostChangedError(self, url, retries - 1)
+ raise HostChangedError(self, url, retries)
conn = None
err = None
try:
- # Request a connection from the queue
+ # Request a connection from the queue.
conn = self._get_conn(timeout=pool_timeout)
- # Make the request on the httplib connection object
+ # Make the request on the httplib connection object.
httplib_response = self._make_request(conn, method, url,
timeout=timeout,
body=body, headers=headers)
conn.close()
conn = None
- if not retries:
- if isinstance(e, TimeoutError):
- # TimeoutError is exempt from MaxRetryError-wrapping.
- # FIXME: ... Not sure why. Add a reason here.
- raise
-
- # Wrap unexpected exceptions with the most appropriate
- # module-level exception and re-raise.
- if isinstance(e, SocketError) and self.proxy:
- raise ProxyError('Cannot connect to proxy.', e)
+ stacktrace = sys.exc_info()[2]
+ if isinstance(e, SocketError) and self.proxy:
+ e = ProxyError('Cannot connect to proxy.', e)
+ elif isinstance(e, (SocketError, HTTPException)):
+ e = ProtocolError('Connection aborted.', e)
- if retries is False:
- raise ConnectionError('Connection failed.', e)
-
- raise MaxRetryError(self, url, e)
+ retries = retries.increment(method, url, error=e,
+ _pool=self, _stacktrace=stacktrace)
+ retries.sleep()
# Keep track of the error for the retry warning.
err = e
if not conn:
# Try again
- log.warning("Retrying (%d attempts remain) after connection "
+ log.warning("Retrying (%r) after connection "
"broken by '%r': %s" % (retries, err, url))
- return self.urlopen(method, url, body, headers, retries - 1,
+ return self.urlopen(method, url, body, headers, retries,
redirect, assert_same_host,
timeout=timeout, pool_timeout=pool_timeout,
release_conn=release_conn, **response_kw)
# Handle redirect?
redirect_location = redirect and response.get_redirect_location()
- if redirect_location and retries is not False:
+ if redirect_location:
if response.status == 303:
method = 'GET'
+
+ try:
+ retries = retries.increment(method, url, response=response, _pool=self)
+ except MaxRetryError:
+ if retries.raise_on_redirect:
+ raise
+ return response
+
log.info("Redirecting %s -> %s" % (url, redirect_location))
return self.urlopen(method, redirect_location, body, headers,
- retries - 1, redirect, assert_same_host,
- timeout=timeout, pool_timeout=pool_timeout,
- release_conn=release_conn, **response_kw)
+ retries=retries, redirect=redirect,
+ assert_same_host=assert_same_host,
+ timeout=timeout, pool_timeout=pool_timeout,
+ release_conn=release_conn, **response_kw)
+
+ # Check if we should retry the HTTP response.
+ if retries.is_forced_retry(method, status_code=response.status):
+ retries = retries.increment(method, url, response=response, _pool=self)
+ retries.sleep()
+ log.info("Forced retry: %s" % url)
+ return self.urlopen(method, url, body, headers,
+ retries=retries, redirect=redirect,
+ assert_same_host=assert_same_host,
+ timeout=timeout, pool_timeout=pool_timeout,
+ release_conn=release_conn, **response_kw)
return response
ConnectionCls = HTTPSConnection
def __init__(self, host, port=None,
- strict=False, timeout=None, maxsize=1,
- block=False, headers=None,
+ strict=False, timeout=Timeout.DEFAULT_TIMEOUT, maxsize=1,
+ block=False, headers=None, retries=None,
_proxy=None, _proxy_headers=None,
key_file=None, cert_file=None, cert_reqs=None,
ca_certs=None, ssl_version=None,
assert_hostname=None, assert_fingerprint=None,
**conn_kw):
- if sys.version_info < (2, 7): # Python 2.6 or older
- conn_kw.pop('source_address', None)
-
HTTPConnectionPool.__init__(self, host, port, strict, timeout, maxsize,
- block, headers, _proxy, _proxy_headers, **conn_kw)
+ block, headers, retries, _proxy, _proxy_headers,
+ **conn_kw)
self.key_file = key_file
self.cert_file = cert_file
self.cert_reqs = cert_reqs
self.ssl_version = ssl_version
self.assert_hostname = assert_hostname
self.assert_fingerprint = assert_fingerprint
- self.conn_kw = conn_kw
def _prepare_conn(self, conn):
"""
assert_hostname=self.assert_hostname,
assert_fingerprint=self.assert_fingerprint)
conn.ssl_version = self.ssl_version
- conn.conn_kw = self.conn_kw
if self.proxy is not None:
# Python 2.7+
set_tunnel = conn.set_tunnel
except AttributeError: # Platform-specific: Python 2.6
set_tunnel = conn._set_tunnel
- set_tunnel(self.host, self.port, self.proxy_headers)
+
+ if sys.version_info <= (2, 6, 4) and not self.proxy_headers: # Python 2.6.4 and older
+ set_tunnel(self.host, self.port)
+ else:
+ set_tunnel(self.host, self.port, self.proxy_headers)
+
# Establish tunnel connection early, because otherwise httplib
# would improperly set Host: header to proxy's IP:port.
conn.connect()
actual_host = self.proxy.host
actual_port = self.proxy.port
- extra_params = {}
- if not six.PY3: # Python 2
- extra_params['strict'] = self.strict
- extra_params.update(self.conn_kw)
-
conn = self.ConnectionCls(host=actual_host, port=actual_port,
timeout=self.timeout.connect_timeout,
- **extra_params)
- if self.proxy is not None:
- # Enable Nagle's algorithm for proxies, to avoid packet
- # fragmentation.
- conn.tcp_nodelay = 0
+ strict=self.strict, **self.conn_kw)
return self._prepare_conn(conn)
+ def _validate_conn(self, conn):
+ """
+ Called right before a request is made, after the socket is created.
+ """
+ super(HTTPSConnectionPool, self)._validate_conn(conn)
+
+ # Force connect early to allow us to validate the connection.
+ if not conn.sock:
+ conn.connect()
+
+ if not conn.is_verified:
+ warnings.warn((
+ 'Unverified HTTPS request is being made. '
+ 'Adding certificate verification is strongly advised. See: '
+ 'https://urllib3.readthedocs.org/en/latest/security.html '
+ '(This warning will only appear once by default.)'),
+ InsecureRequestWarning)
+
def connection_from_url(url, **kw):
"""
:class:`.ConnectionPool`. Useful for specifying things like
timeout, maxsize, headers, etc.
- Example: ::
+ Example::
>>> conn = connection_from_url('http://google.com/')
>>> r = conn.request('GET', '/')
-# urllib3/contrib/ntlmpool.py
-# Copyright 2008-2013 Andrey Petrov and contributors (see CONTRIBUTORS.txt)
-#
-# This module is part of urllib3 and is released under
-# the MIT License: http://www.opensource.org/licenses/mit-license.php
-
"""
NTLM authenticating pool, contributed by erikcederstran
from socket import _fileobject, timeout
import ssl
import select
-from cStringIO import StringIO
from .. import connection
from .. import util
return dns_name
-class fileobject(_fileobject):
-
- def _wait_for_sock(self):
- rd, wd, ed = select.select([self._sock], [], [],
- self._sock.gettimeout())
- if not rd:
- raise timeout()
-
-
- def read(self, size=-1):
- # Use max, disallow tiny reads in a loop as they are very inefficient.
- # We never leave read() with any leftover data from a new recv() call
- # in our internal buffer.
- rbufsize = max(self._rbufsize, self.default_bufsize)
- # Our use of StringIO rather than lists of string objects returned by
- # recv() minimizes memory usage and fragmentation that occurs when
- # rbufsize is large compared to the typical return value of recv().
- buf = self._rbuf
- buf.seek(0, 2) # seek end
- if size < 0:
- # Read until EOF
- self._rbuf = StringIO() # reset _rbuf. we consume it via buf.
- while True:
- try:
- data = self._sock.recv(rbufsize)
- except OpenSSL.SSL.WantReadError:
- self._wait_for_sock()
- continue
- if not data:
- break
- buf.write(data)
- return buf.getvalue()
- else:
- # Read until size bytes or EOF seen, whichever comes first
- buf_len = buf.tell()
- if buf_len >= size:
- # Already have size bytes in our buffer? Extract and return.
- buf.seek(0)
- rv = buf.read(size)
- self._rbuf = StringIO()
- self._rbuf.write(buf.read())
- return rv
-
- self._rbuf = StringIO() # reset _rbuf. we consume it via buf.
- while True:
- left = size - buf_len
- # recv() will malloc the amount of memory given as its
- # parameter even though it often returns much less data
- # than that. The returned data string is short lived
- # as we copy it into a StringIO and free it. This avoids
- # fragmentation issues on many platforms.
- try:
- data = self._sock.recv(left)
- except OpenSSL.SSL.WantReadError:
- self._wait_for_sock()
- continue
- if not data:
- break
- n = len(data)
- if n == size and not buf_len:
- # Shortcut. Avoid buffer data copies when:
- # - We have no data in our buffer.
- # AND
- # - Our call to recv returned exactly the
- # number of bytes we were asked to read.
- return data
- if n == left:
- buf.write(data)
- del data # explicit free
- break
- assert n <= left, "recv(%d) returned %d bytes" % (left, n)
- buf.write(data)
- buf_len += n
- del data # explicit free
- #assert buf_len == buf.tell()
- return buf.getvalue()
-
- def readline(self, size=-1):
- buf = self._rbuf
- buf.seek(0, 2) # seek end
- if buf.tell() > 0:
- # check if we already have it in our buffer
- buf.seek(0)
- bline = buf.readline(size)
- if bline.endswith('\n') or len(bline) == size:
- self._rbuf = StringIO()
- self._rbuf.write(buf.read())
- return bline
- del bline
- if size < 0:
- # Read until \n or EOF, whichever comes first
- if self._rbufsize <= 1:
- # Speed up unbuffered case
- buf.seek(0)
- buffers = [buf.read()]
- self._rbuf = StringIO() # reset _rbuf. we consume it via buf.
- data = None
- recv = self._sock.recv
- while True:
- try:
- while data != "\n":
- data = recv(1)
- if not data:
- break
- buffers.append(data)
- except OpenSSL.SSL.WantReadError:
- self._wait_for_sock()
- continue
- break
- return "".join(buffers)
-
- buf.seek(0, 2) # seek end
- self._rbuf = StringIO() # reset _rbuf. we consume it via buf.
- while True:
- try:
- data = self._sock.recv(self._rbufsize)
- except OpenSSL.SSL.WantReadError:
- self._wait_for_sock()
- continue
- if not data:
- break
- nl = data.find('\n')
- if nl >= 0:
- nl += 1
- buf.write(data[:nl])
- self._rbuf.write(data[nl:])
- del data
- break
- buf.write(data)
- return buf.getvalue()
- else:
- # Read until size bytes or \n or EOF seen, whichever comes first
- buf.seek(0, 2) # seek end
- buf_len = buf.tell()
- if buf_len >= size:
- buf.seek(0)
- rv = buf.read(size)
- self._rbuf = StringIO()
- self._rbuf.write(buf.read())
- return rv
- self._rbuf = StringIO() # reset _rbuf. we consume it via buf.
- while True:
- try:
- data = self._sock.recv(self._rbufsize)
- except OpenSSL.SSL.WantReadError:
- self._wait_for_sock()
- continue
- if not data:
- break
- left = size - buf_len
- # did we just receive a newline?
- nl = data.find('\n', 0, left)
- if nl >= 0:
- nl += 1
- # save the excess data to _rbuf
- self._rbuf.write(data[nl:])
- if buf_len:
- buf.write(data[:nl])
- break
- else:
- # Shortcut. Avoid data copy through buf when returning
- # a substring of our first recv().
- return data[:nl]
- n = len(data)
- if n == size and not buf_len:
- # Shortcut. Avoid data copy through buf when
- # returning exactly all of our first recv().
- return data
- if n >= left:
- buf.write(data[:left])
- self._rbuf.write(data[left:])
- break
- buf.write(data)
- buf_len += n
- #assert buf_len == buf.tell()
- return buf.getvalue()
-
-
class WrappedSocket(object):
'''API-compatibility wrapper for Python OpenSSL's Connection-class.'''
- def __init__(self, connection, socket):
+ def __init__(self, connection, socket, suppress_ragged_eofs=True):
self.connection = connection
self.socket = socket
+ self.suppress_ragged_eofs = suppress_ragged_eofs
def fileno(self):
return self.socket.fileno()
def makefile(self, mode, bufsize=-1):
- return fileobject(self.connection, mode, bufsize)
+ return _fileobject(self, mode, bufsize)
+
+ def recv(self, *args, **kwargs):
+ try:
+ data = self.connection.recv(*args, **kwargs)
+ except OpenSSL.SSL.SysCallError as e:
+ if self.suppress_ragged_eofs and e.args == (-1, 'Unexpected EOF'):
+ return b''
+ else:
+ raise
+ except OpenSSL.SSL.WantReadError:
+ rd, wd, ed = select.select(
+ [self.socket], [], [], self.socket.gettimeout())
+ if not rd:
+ raise timeout()
+ else:
+ return self.recv(*args, **kwargs)
+ else:
+ return data
def settimeout(self, timeout):
return self.socket.settimeout(timeout)
-# urllib3/exceptions.py
-# Copyright 2008-2013 Andrey Petrov and contributors (see CONTRIBUTORS.txt)
-#
-# This module is part of urllib3 and is released under
-# the MIT License: http://www.opensource.org/licenses/mit-license.php
-
## Base Exceptions
"Base exception used by this module."
pass
+class HTTPWarning(Warning):
+ "Base warning used by this module."
+ pass
+
+
class PoolError(HTTPError):
"Base exception for errors caused within a pool."
pass
-class ConnectionError(HTTPError):
- "Raised when a normal connection fails."
+class DecodeError(HTTPError):
+ "Raised when automatic decoding based on Content-Type fails."
pass
-class DecodeError(HTTPError):
- "Raised when automatic decoding based on Content-Type fails."
+class ProtocolError(HTTPError):
+ "Raised when something unexpected happens mid-request/response."
pass
+#: Renamed to ProtocolError but aliased for backwards compatibility.
+ConnectionError = ProtocolError
+
+
## Leaf Exceptions
class MaxRetryError(RequestError):
message = "Max retries exceeded with url: %s" % url
if reason:
- message += " (Caused by %s: %s)" % (type(reason), reason)
+ message += " (Caused by %r)" % reason
else:
message += " (Caused by redirect)"
pass
-class LocationParseError(ValueError, HTTPError):
+class LocationValueError(ValueError, HTTPError):
+ "Raised when there is something wrong with a given URL input."
+ pass
+
+
+class LocationParseError(LocationValueError):
"Raised when get_host or similar fails to parse the URL input."
def __init__(self, location):
HTTPError.__init__(self, message)
self.location = location
+
+
+class InsecureRequestWarning(HTTPWarning):
+ "Warned when making an unverified HTTPS request."
+ pass
-# urllib3/fields.py
-# Copyright 2008-2013 Andrey Petrov and contributors (see CONTRIBUTORS.txt)
-#
-# This module is part of urllib3 and is released under
-# the MIT License: http://www.opensource.org/licenses/mit-license.php
-
import email.utils
import mimetypes
"""
A :class:`~urllib3.fields.RequestField` factory from old-style tuple parameters.
- Supports constructing :class:`~urllib3.fields.RequestField` from parameter
- of key/value strings AND key/filetuple. A filetuple is a (filename, data, MIME type)
- tuple where the MIME type is optional. For example: ::
+ Supports constructing :class:`~urllib3.fields.RequestField` from
+ parameter of key/value strings AND key/filetuple. A filetuple is a
+ (filename, data, MIME type) tuple where the MIME type is optional.
+ For example::
'foo': 'bar',
'fakefile': ('foofile.txt', 'contents of foofile'),
'Content-Disposition' fields.
:param header_parts:
- A sequence of (k, v) typles or a :class:`dict` of (k, v) to format as
- `k1="v1"; k2="v2"; ...`.
+ A sequence of (k, v) typles or a :class:`dict` of (k, v) to format
+ as `k1="v1"; k2="v2"; ...`.
"""
parts = []
iterable = header_parts
lines.append('\r\n')
return '\r\n'.join(lines)
- def make_multipart(self, content_disposition=None, content_type=None, content_location=None):
+ def make_multipart(self, content_disposition=None, content_type=None,
+ content_location=None):
"""
Makes this request field into a multipart request field.
"""
self.headers['Content-Disposition'] = content_disposition or 'form-data'
- self.headers['Content-Disposition'] += '; '.join(['', self._render_parts((('name', self._name), ('filename', self._filename)))])
+ self.headers['Content-Disposition'] += '; '.join([
+ '', self._render_parts(
+ (('name', self._name), ('filename', self._filename))
+ )
+ ])
self.headers['Content-Type'] = content_type
self.headers['Content-Location'] = content_location
-# urllib3/filepost.py
-# Copyright 2008-2013 Andrey Petrov and contributors (see CONTRIBUTORS.txt)
-#
-# This module is part of urllib3 and is released under
-# the MIT License: http://www.opensource.org/licenses/mit-license.php
-
import codecs
-import mimetypes
from uuid import uuid4
from io import BytesIO
i = iter(fields)
for field in i:
- if isinstance(field, RequestField):
- yield field
- else:
- yield RequestField.from_tuples(*field)
+ if isinstance(field, RequestField):
+ yield field
+ else:
+ yield RequestField.from_tuples(*field)
def iter_fields(fields):
# Passes Python2.7's test suite and incorporates all the latest updates.
# Copyright 2009 Raymond Hettinger, released under the MIT License.
# http://code.activestate.com/recipes/576693/
-
try:
from thread import get_ident as _get_ident
except ImportError:
-# urllib3/poolmanager.py
-# Copyright 2008-2014 Andrey Petrov and contributors (see CONTRIBUTORS.txt)
-#
-# This module is part of urllib3 and is released under
-# the MIT License: http://www.opensource.org/licenses/mit-license.php
-
import logging
try: # Python 3
from ._collections import RecentlyUsedContainer
from .connectionpool import HTTPConnectionPool, HTTPSConnectionPool
from .connectionpool import port_by_scheme
+from .exceptions import LocationValueError
from .request import RequestMethods
-from .util import parse_url
+from .util.url import parse_url
+from .util.retry import Retry
__all__ = ['PoolManager', 'ProxyManager', 'proxy_from_url']
Additional parameters are used to create fresh
:class:`urllib3.connectionpool.ConnectionPool` instances.
- Example: ::
+ Example::
>>> manager = PoolManager(num_pools=2)
>>> r = manager.request('GET', 'http://google.com/')
``urllib3.connectionpool.port_by_scheme``.
"""
- scheme = scheme or 'http'
+ if not host:
+ raise LocationValueError("No host specified.")
+ scheme = scheme or 'http'
port = port or port_by_scheme.get(scheme, 80)
-
pool_key = (scheme, host, port)
with self.pools.lock:
# Make a fresh ConnectionPool of the desired type
pool = self._new_pool(scheme, host, port)
self.pools[pool_key] = pool
+
return pool
def connection_from_url(self, url):
# Support relative URLs for redirecting.
redirect_location = urljoin(url, redirect_location)
- # RFC 2616, Section 10.3.4
+ # RFC 7231, Section 6.4.4
if response.status == 303:
method = 'GET'
- log.info("Redirecting %s -> %s" % (url, redirect_location))
- kw['retries'] = kw.get('retries', 3) - 1 # Persist retries countdown
+ retries = kw.get('retries')
+ if not isinstance(retries, Retry):
+ retries = Retry.from_int(retries, redirect=redirect)
+
+ kw['retries'] = retries.increment(method, redirect_location)
kw['redirect'] = redirect
+
+ log.info("Redirecting %s -> %s" % (url, redirect_location))
return self.urlopen(method, redirect_location, **kw)
if not proxy.port:
port = port_by_scheme.get(proxy.scheme, 80)
proxy = proxy._replace(port=port)
+
+ assert proxy.scheme in ("http", "https"), \
+ 'Not supported proxy scheme %s' % proxy.scheme
+
self.proxy = proxy
self.proxy_headers = proxy_headers or {}
- assert self.proxy.scheme in ("http", "https"), \
- 'Not supported proxy scheme %s' % self.proxy.scheme
+
connection_pool_kw['_proxy'] = self.proxy
connection_pool_kw['_proxy_headers'] = self.proxy_headers
+
super(ProxyManager, self).__init__(
num_pools, headers, **connection_pool_kw)
# For proxied HTTPS requests, httplib sets the necessary headers
# on the CONNECT to the proxy. For HTTP, we'll definitely
# need to set 'Host' at the very least.
- kw['headers'] = self._set_proxy_headers(url, kw.get('headers',
- self.headers))
+ headers = kw.get('headers', self.headers)
+ kw['headers'] = self._set_proxy_headers(url, headers)
- return super(ProxyManager, self).urlopen(method, url, redirect, **kw)
+ return super(ProxyManager, self).urlopen(method, url, redirect=redirect, **kw)
def proxy_from_url(url, **kw):
-# urllib3/request.py
-# Copyright 2008-2013 Andrey Petrov and contributors (see CONTRIBUTORS.txt)
-#
-# This module is part of urllib3 and is released under
-# the MIT License: http://www.opensource.org/licenses/mit-license.php
-
try:
from urllib.parse import urlencode
except ImportError:
Specifically,
- :meth:`.request_encode_url` is for sending requests whose fields are encoded
- in the URL (such as GET, HEAD, DELETE).
+ :meth:`.request_encode_url` is for sending requests whose fields are
+ encoded in the URL (such as GET, HEAD, DELETE).
:meth:`.request_encode_body` is for sending requests whose fields are
encoded in the *body* of the request using multipart or www-form-urlencoded
def urlopen(self, method, url, body=None, headers=None,
encode_multipart=True, multipart_boundary=None,
- **kw): # Abstract
+ **kw): # Abstract
raise NotImplemented("Classes extending RequestMethods must implement "
"their own ``urlopen`` method.")
``fields`` based on the ``method`` used.
This is a convenience method that requires the least amount of manual
- effort. It can be used in most situations, while still having the option
- to drop down to more specific methods when necessary, such as
+ effort. It can be used in most situations, while still having the
+ option to drop down to more specific methods when necessary, such as
:meth:`request_encode_url`, :meth:`request_encode_body`,
or even the lowest level :meth:`urlopen`.
"""
if method in self._encode_url_methods:
return self.request_encode_url(method, url, fields=fields,
- headers=headers,
- **urlopen_kw)
+ headers=headers,
+ **urlopen_kw)
else:
return self.request_encode_body(method, url, fields=fields,
- headers=headers,
- **urlopen_kw)
+ headers=headers,
+ **urlopen_kw)
def request_encode_url(self, method, url, fields=None, **urlopen_kw):
"""
the body. This is useful for request methods like POST, PUT, PATCH, etc.
When ``encode_multipart=True`` (default), then
- :meth:`urllib3.filepost.encode_multipart_formdata` is used to encode the
- payload with the appropriate content type. Otherwise
+ :meth:`urllib3.filepost.encode_multipart_formdata` is used to encode
+ the payload with the appropriate content type. Otherwise
:meth:`urllib.urlencode` is used with the
'application/x-www-form-urlencoded' content type.
Multipart encoding must be used when posting files, and it's reasonably
- safe to use it in other times too. However, it may break request signing,
- such as with OAuth.
+ safe to use it in other times too. However, it may break request
+ signing, such as with OAuth.
Supports an optional ``fields`` parameter of key/value strings AND
key/filetuple. A filetuple is a (filename, data, MIME type) tuple where
- the MIME type is optional. For example: ::
+ the MIME type is optional. For example::
fields = {
'foo': 'bar',
When uploading a file, providing a filename (the first parameter of the
tuple) is optional but recommended to best mimick behavior of browsers.
- Note that if ``headers`` are supplied, the 'Content-Type' header will be
- overwritten because it depends on the dynamic random boundary string
+ Note that if ``headers`` are supplied, the 'Content-Type' header will
+ be overwritten because it depends on the dynamic random boundary string
which is used to compose the body of the request. The random boundary
string can be explicitly set with the ``multipart_boundary`` parameter.
"""
if encode_multipart:
- body, content_type = encode_multipart_formdata(fields or {},
- boundary=multipart_boundary)
+ body, content_type = encode_multipart_formdata(
+ fields or {}, boundary=multipart_boundary)
else:
body, content_type = (urlencode(fields or {}),
- 'application/x-www-form-urlencoded')
+ 'application/x-www-form-urlencoded')
if headers is None:
headers = self.headers
-# urllib3/response.py
-# Copyright 2008-2013 Andrey Petrov and contributors (see CONTRIBUTORS.txt)
-#
-# This module is part of urllib3 and is released under
-# the MIT License: http://www.opensource.org/licenses/mit-license.php
-
-
-import logging
import zlib
import io
+from socket import timeout as SocketTimeout
from ._collections import HTTPHeaderDict
-from .exceptions import DecodeError
+from .exceptions import ProtocolError, DecodeError, ReadTimeoutError
from .packages.six import string_types as basestring, binary_type
-from .util import is_fp_closed
-
+from .connection import HTTPException, BaseSSLError
+from .util.response import is_fp_closed
-log = logging.getLogger(__name__)
class DeflateDecoder(object):
self.decode_content = decode_content
self._decoder = None
- self._body = body if body and isinstance(body, basestring) else None
+ self._body = None
self._fp = None
self._original_response = original_response
self._fp_bytes_read = 0
+ if body and isinstance(body, (basestring, binary_type)):
+ self._body = body
+
self._pool = pool
self._connection = connection
after having ``.read()`` the file object. (Overridden if ``amt`` is
set.)
"""
- # Note: content-encoding value should be case-insensitive, per RFC 2616
- # Section 3.5
+ # Note: content-encoding value should be case-insensitive, per RFC 7230
+ # Section 3.2
content_encoding = self.headers.get('content-encoding', '').lower()
if self._decoder is None:
if content_encoding in self.CONTENT_DECODERS:
flush_decoder = False
try:
- if amt is None:
- # cStringIO doesn't like amt=None
- data = self._fp.read()
- flush_decoder = True
- else:
- cache_content = False
- data = self._fp.read(amt)
- if amt != 0 and not data: # Platform-specific: Buggy versions of Python.
- # Close the connection when no data is returned
- #
- # This is redundant to what httplib/http.client _should_
- # already do. However, versions of python released before
- # December 15, 2012 (http://bugs.python.org/issue16298) do not
- # properly close the connection in all cases. There is no harm
- # in redundantly calling close.
- self._fp.close()
+ try:
+ if amt is None:
+ # cStringIO doesn't like amt=None
+ data = self._fp.read()
flush_decoder = True
+ else:
+ cache_content = False
+ data = self._fp.read(amt)
+ if amt != 0 and not data: # Platform-specific: Buggy versions of Python.
+ # Close the connection when no data is returned
+ #
+ # This is redundant to what httplib/http.client _should_
+ # already do. However, versions of python released before
+ # December 15, 2012 (http://bugs.python.org/issue16298) do
+ # not properly close the connection in all cases. There is
+ # no harm in redundantly calling close.
+ self._fp.close()
+ flush_decoder = True
+
+ except SocketTimeout:
+ # FIXME: Ideally we'd like to include the url in the ReadTimeoutError but
+ # there is yet no clean way to get at it from this context.
+ raise ReadTimeoutError(self._pool, None, 'Read timed out.')
+
+ except BaseSSLError as e:
+ # FIXME: Is there a better way to differentiate between SSLErrors?
+ if not 'read operation timed out' in str(e): # Defensive:
+ # This shouldn't happen but just in case we're missing an edge
+ # case, let's avoid swallowing SSL errors.
+ raise
+
+ raise ReadTimeoutError(self._pool, None, 'Read timed out.')
+
+ except HTTPException as e:
+ # This includes IncompleteRead.
+ raise ProtocolError('Connection broken: %r' % e, e)
self._fp_bytes_read += len(data)
except (IOError, zlib.error) as e:
raise DecodeError(
"Received response with content-encoding: %s, but "
- "failed to decode it." % content_encoding,
- e)
+ "failed to decode it." % content_encoding, e)
if flush_decoder and decode_content and self._decoder:
buf = self._decoder.decompress(binary_type())
if data:
yield data
-
@classmethod
def from_httplib(ResponseCls, r, **response_kw):
"""
elif hasattr(self._fp, "fileno"):
return self._fp.fileno()
else:
- raise IOError("The file-like object this HTTPResponse is wrapped "
+ raise IOError("The file-like object this HTTPResponse is wrapped "
"around has no file descriptor")
def flush(self):
-# urllib3/util/__init__.py
-# Copyright 2008-2014 Andrey Petrov and contributors (see CONTRIBUTORS.txt)
-#
-# This module is part of urllib3 and is released under
-# the MIT License: http://www.opensource.org/licenses/mit-license.php
-
+# For backwards compatibility, provide imports that used to be here.
from .connection import is_connection_dropped
from .request import make_headers
from .response import is_fp_closed
current_time,
Timeout,
)
+
+from .retry import Retry
from .url import (
get_host,
parse_url,
-from socket import error as SocketError
+import socket
try:
from select import poll, POLLIN
except ImportError: # `poll` doesn't exist on OSX and other platforms
except ImportError: # `select` doesn't exist on AppEngine.
select = False
+
def is_connection_dropped(conn): # Platform-specific
"""
Returns True if the connection is dropped and should be closed.
if sock is False: # Platform-specific: AppEngine
return False
if sock is None: # Connection already closed (such as by httplib).
- return False
+ return True
if not poll:
if not select: # Platform-specific: AppEngine
try:
return select([sock], [], [], 0.0)[0]
- except SocketError:
+ except socket.error:
return True
# This version is better on platforms that support it.
return True
+# This function is copied from socket.py in the Python 2.7 standard
+# library test suite. Added to its signature is only `socket_options`.
+def create_connection(address, timeout=socket._GLOBAL_DEFAULT_TIMEOUT,
+ source_address=None, socket_options=None):
+ """Connect to *address* and return the socket object.
+
+ Convenience function. Connect to *address* (a 2-tuple ``(host,
+ port)``) and return the socket object. Passing the optional
+ *timeout* parameter will set the timeout on the socket instance
+ before attempting to connect. If no *timeout* is supplied, the
+ global default timeout setting returned by :func:`getdefaulttimeout`
+ is used. If *source_address* is set it must be a tuple of (host, port)
+ for the socket to bind as a source address before making the connection.
+ An host of '' or port 0 tells the OS to use the default.
+ """
+
+ host, port = address
+ err = None
+ for res in socket.getaddrinfo(host, port, 0, socket.SOCK_STREAM):
+ af, socktype, proto, canonname, sa = res
+ sock = None
+ try:
+ sock = socket.socket(af, socktype, proto)
+
+ # If provided, set socket level options before connecting.
+ # This is the only addition urllib3 makes to this function.
+ _set_socket_options(sock, socket_options)
+
+ if timeout is not socket._GLOBAL_DEFAULT_TIMEOUT:
+ sock.settimeout(timeout)
+ if source_address:
+ sock.bind(source_address)
+ sock.connect(sa)
+ return sock
+
+ except socket.error as _:
+ err = _
+ if sock is not None:
+ sock.close()
+
+ if err is not None:
+ raise err
+ else:
+ raise socket.error("getaddrinfo returns an empty list")
+
+
+def _set_socket_options(sock, options):
+ if options is None:
+ return
+ for opt in options:
+ sock.setsockopt(*opt)
from base64 import b64encode
-from ..packages import six
-
+from ..packages.six import b
ACCEPT_ENCODING = 'gzip,deflate'
def make_headers(keep_alive=None, accept_encoding=None, user_agent=None,
- basic_auth=None, proxy_basic_auth=None):
+ basic_auth=None, proxy_basic_auth=None, disable_cache=None):
"""
Shortcuts for generating request headers.
Colon-separated username:password string for 'proxy-authorization: basic ...'
auth header.
- Example: ::
+ :param disable_cache:
+ If ``True``, adds 'cache-control: no-cache' header.
+
+ Example::
>>> make_headers(keep_alive=True, user_agent="Batman/1.0")
{'connection': 'keep-alive', 'user-agent': 'Batman/1.0'}
if basic_auth:
headers['authorization'] = 'Basic ' + \
- b64encode(six.b(basic_auth)).decode('utf-8')
+ b64encode(b(basic_auth)).decode('utf-8')
if proxy_basic_auth:
headers['proxy-authorization'] = 'Basic ' + \
- b64encode(six.b(proxy_basic_auth)).decode('utf-8')
-
- return headers
+ b64encode(b(proxy_basic_auth)).decode('utf-8')
+ if disable_cache:
+ headers['cache-control'] = 'no-cache'
+ return headers
--- /dev/null
+import time
+import logging
+
+from ..exceptions import (
+ ProtocolError,
+ ConnectTimeoutError,
+ ReadTimeoutError,
+ MaxRetryError,
+)
+from ..packages import six
+
+
+log = logging.getLogger(__name__)
+
+
+class Retry(object):
+ """ Retry configuration.
+
+ Each retry attempt will create a new Retry object with updated values, so
+ they can be safely reused.
+
+ Retries can be defined as a default for a pool::
+
+ retries = Retry(connect=5, read=2, redirect=5)
+ http = PoolManager(retries=retries)
+ response = http.request('GET', 'http://example.com/')
+
+ Or per-request (which overrides the default for the pool)::
+
+ response = http.request('GET', 'http://example.com/', retries=Retry(10))
+
+ Retries can be disabled by passing ``False``::
+
+ response = http.request('GET', 'http://example.com/', retries=False)
+
+ Errors will be wrapped in :class:`~urllib3.exceptions.MaxRetryError` unless
+ retries are disabled, in which case the causing exception will be raised.
+
+
+ :param int total:
+ Total number of retries to allow. Takes precedence over other counts.
+
+ Set to ``None`` to remove this constraint and fall back on other
+ counts. It's a good idea to set this to some sensibly-high value to
+ account for unexpected edge cases and avoid infinite retry loops.
+
+ Set to ``0`` to fail on the first retry.
+
+ Set to ``False`` to disable and imply ``raise_on_redirect=False``.
+
+ :param int connect:
+ How many connection-related errors to retry on.
+
+ These are errors raised before the request is sent to the remote server,
+ which we assume has not triggered the server to process the request.
+
+ Set to ``0`` to fail on the first retry of this type.
+
+ :param int read:
+ How many times to retry on read errors.
+
+ These errors are raised after the request was sent to the server, so the
+ request may have side-effects.
+
+ Set to ``0`` to fail on the first retry of this type.
+
+ :param int redirect:
+ How many redirects to perform. Limit this to avoid infinite redirect
+ loops.
+
+ A redirect is a HTTP response with a status code 301, 302, 303, 307 or
+ 308.
+
+ Set to ``0`` to fail on the first retry of this type.
+
+ Set to ``False`` to disable and imply ``raise_on_redirect=False``.
+
+ :param iterable method_whitelist:
+ Set of uppercased HTTP method verbs that we should retry on.
+
+ By default, we only retry on methods which are considered to be
+ indempotent (multiple requests with the same parameters end with the
+ same state). See :attr:`Retry.DEFAULT_METHOD_WHITELIST`.
+
+ :param iterable status_forcelist:
+ A set of HTTP status codes that we should force a retry on.
+
+ By default, this is disabled with ``None``.
+
+ :param float backoff_factor:
+ A backoff factor to apply between attempts. urllib3 will sleep for::
+
+ {backoff factor} * (2 ^ ({number of total retries} - 1))
+
+ seconds. If the backoff_factor is 0.1, then :func:`.sleep` will sleep
+ for [0.1s, 0.2s, 0.4s, ...] between retries. It will never be longer
+ than :attr:`Retry.MAX_BACKOFF`.
+
+ By default, backoff is disabled (set to 0).
+
+ :param bool raise_on_redirect: Whether, if the number of redirects is
+ exhausted, to raise a MaxRetryError, or to return a response with a
+ response code in the 3xx range.
+ """
+
+ DEFAULT_METHOD_WHITELIST = frozenset([
+ 'HEAD', 'GET', 'PUT', 'DELETE', 'OPTIONS', 'TRACE'])
+
+ #: Maximum backoff time.
+ BACKOFF_MAX = 120
+
+ def __init__(self, total=10, connect=None, read=None, redirect=None,
+ method_whitelist=DEFAULT_METHOD_WHITELIST, status_forcelist=None,
+ backoff_factor=0, raise_on_redirect=True, _observed_errors=0):
+
+ self.total = total
+ self.connect = connect
+ self.read = read
+
+ if redirect is False or total is False:
+ redirect = 0
+ raise_on_redirect = False
+
+ self.redirect = redirect
+ self.status_forcelist = status_forcelist or set()
+ self.method_whitelist = method_whitelist
+ self.backoff_factor = backoff_factor
+ self.raise_on_redirect = raise_on_redirect
+ self._observed_errors = _observed_errors # TODO: use .history instead?
+
+ def new(self, **kw):
+ params = dict(
+ total=self.total,
+ connect=self.connect, read=self.read, redirect=self.redirect,
+ method_whitelist=self.method_whitelist,
+ status_forcelist=self.status_forcelist,
+ backoff_factor=self.backoff_factor,
+ raise_on_redirect=self.raise_on_redirect,
+ _observed_errors=self._observed_errors,
+ )
+ params.update(kw)
+ return type(self)(**params)
+
+ @classmethod
+ def from_int(cls, retries, redirect=True, default=None):
+ """ Backwards-compatibility for the old retries format."""
+ if retries is None:
+ retries = default if default is not None else cls.DEFAULT
+
+ if isinstance(retries, Retry):
+ return retries
+
+ redirect = bool(redirect) and None
+ new_retries = cls(retries, redirect=redirect)
+ log.debug("Converted retries value: %r -> %r" % (retries, new_retries))
+ return new_retries
+
+ def get_backoff_time(self):
+ """ Formula for computing the current backoff
+
+ :rtype: float
+ """
+ if self._observed_errors <= 1:
+ return 0
+
+ backoff_value = self.backoff_factor * (2 ** (self._observed_errors - 1))
+ return min(self.BACKOFF_MAX, backoff_value)
+
+ def sleep(self):
+ """ Sleep between retry attempts using an exponential backoff.
+
+ By default, the backoff factor is 0 and this method will return
+ immediately.
+ """
+ backoff = self.get_backoff_time()
+ if backoff <= 0:
+ return
+ time.sleep(backoff)
+
+ def _is_connection_error(self, err):
+ """ Errors when we're fairly sure that the server did not receive the
+ request, so it should be safe to retry.
+ """
+ return isinstance(err, ConnectTimeoutError)
+
+ def _is_read_error(self, err):
+ """ Errors that occur after the request has been started, so we can't
+ assume that the server did not process any of it.
+ """
+ return isinstance(err, (ReadTimeoutError, ProtocolError))
+
+ def is_forced_retry(self, method, status_code):
+ """ Is this method/response retryable? (Based on method/codes whitelists)
+ """
+ if self.method_whitelist and method.upper() not in self.method_whitelist:
+ return False
+
+ return self.status_forcelist and status_code in self.status_forcelist
+
+ def is_exhausted(self):
+ """ Are we out of retries?
+ """
+ retry_counts = (self.total, self.connect, self.read, self.redirect)
+ retry_counts = list(filter(None, retry_counts))
+ if not retry_counts:
+ return False
+
+ return min(retry_counts) < 0
+
+ def increment(self, method=None, url=None, response=None, error=None, _pool=None, _stacktrace=None):
+ """ Return a new Retry object with incremented retry counters.
+
+ :param response: A response object, or None, if the server did not
+ return a response.
+ :type response: :class:`~urllib3.response.HTTPResponse`
+ :param Exception error: An error encountered during the request, or
+ None if the response was received successfully.
+
+ :return: A new ``Retry`` object.
+ """
+ if self.total is False and error:
+ # Disabled, indicate to re-raise the error.
+ raise six.reraise(type(error), error, _stacktrace)
+
+ total = self.total
+ if total is not None:
+ total -= 1
+
+ _observed_errors = self._observed_errors
+ connect = self.connect
+ read = self.read
+ redirect = self.redirect
+
+ if error and self._is_connection_error(error):
+ # Connect retry?
+ if connect is False:
+ raise six.reraise(type(error), error, _stacktrace)
+ elif connect is not None:
+ connect -= 1
+ _observed_errors += 1
+
+ elif error and self._is_read_error(error):
+ # Read retry?
+ if read is False:
+ raise six.reraise(type(error), error, _stacktrace)
+ elif read is not None:
+ read -= 1
+ _observed_errors += 1
+
+ elif response and response.get_redirect_location():
+ # Redirect retry?
+ if redirect is not None:
+ redirect -= 1
+
+ else:
+ # FIXME: Nothing changed, scenario doesn't make sense.
+ _observed_errors += 1
+
+ new_retry = self.new(
+ total=total,
+ connect=connect, read=read, redirect=redirect,
+ _observed_errors=_observed_errors)
+
+ if new_retry.is_exhausted():
+ raise MaxRetryError(_pool, url, error)
+
+ log.debug("Incremented Retry for (url='%s'): %r" % (url, new_retry))
+
+ return new_retry
+
+
+ def __repr__(self):
+ return ('{cls.__name__}(total={self.total}, connect={self.connect}, '
+ 'read={self.read}, redirect={self.redirect})').format(
+ cls=type(self), self=self)
+
+
+# For backwards compatibility (equivalent to pre-v1.9):
+Retry.DEFAULT = Retry(3)
}
fingerprint = fingerprint.replace(':', '').lower()
+ digest_length, odd = divmod(len(fingerprint), 2)
- digest_length, rest = divmod(len(fingerprint), 2)
-
- if rest or digest_length not in hashfunc_map:
+ if odd or digest_length not in hashfunc_map:
raise SSLError('Fingerprint is of invalid length.')
# We need encode() here for py32; works on py2 and p33.
+# The default socket timeout, used by httplib to indicate that no timeout was
+# specified by the user
from socket import _GLOBAL_DEFAULT_TIMEOUT
import time
from ..exceptions import TimeoutStateError
+# A sentinel value to indicate that no timeout was specified by the user in
+# urllib3
+_Default = object()
def current_time():
"""
- Retrieve the current time, this function is mocked out in unit testing.
+ Retrieve the current time. This function is mocked out in unit testing.
"""
return time.time()
-_Default = object()
-# The default timeout to use for socket connections. This is the attribute used
-# by httplib to define the default timeout
+class Timeout(object):
+ """ Timeout configuration.
+ Timeouts can be defined as a default for a pool::
-class Timeout(object):
- """
- Utility object for storing timeout values.
+ timeout = Timeout(connect=2.0, read=7.0)
+ http = PoolManager(timeout=timeout)
+ response = http.request('GET', 'http://example.com/')
+
+ Or per-request (which overrides the default for the pool)::
+
+ response = http.request('GET', 'http://example.com/', timeout=Timeout(10))
+
+ Timeouts can be disabled by setting all the parameters to ``None``::
- Example usage:
+ no_timeout = Timeout(connect=None, read=None)
+ response = http.request('GET', 'http://example.com/, timeout=no_timeout)
- .. code-block:: python
- timeout = urllib3.util.Timeout(connect=2.0, read=7.0)
- pool = HTTPConnectionPool('www.google.com', 80, timeout=timeout)
- pool.request(...) # Etc, etc
+ :param total:
+ This combines the connect and read timeouts into one; the read timeout
+ will be set to the time leftover from the connect attempt. In the
+ event that both a connect timeout and a total are specified, or a read
+ timeout and a total are specified, the shorter timeout will be applied.
+
+ Defaults to None.
+
+ :type total: integer, float, or None
:param connect:
The maximum amount of time to wait for a connection attempt to a server
:type read: integer, float, or None
- :param total:
- This combines the connect and read timeouts into one; the read timeout
- will be set to the time leftover from the connect attempt. In the
- event that both a connect timeout and a total are specified, or a read
- timeout and a total are specified, the shorter timeout will be applied.
-
- Defaults to None.
-
- :type total: integer, float, or None
-
.. note::
Many factors can affect the total amount of time for urllib3 to return
- an HTTP response. Specifically, Python's DNS resolver does not obey the
- timeout specified on the socket. Other factors that can affect total
- request time include high CPU load, high swap, the program running at a
- low priority level, or other behaviors. The observed running time for
- urllib3 to return a response may be greater than the value passed to
- `total`.
+ an HTTP response.
+
+ For example, Python's DNS resolver does not obey the timeout specified
+ on the socket. Other factors that can affect total request time include
+ high CPU load, high swap, the program running at a low priority level,
+ or other behaviors.
In addition, the read and total timeouts only measure the time between
read operations on the socket connecting the client and the server,
response. For most requests, the timeout is raised because the server
has not sent the first byte in the specified time. This is not always
the case; if a server streams one byte every fifteen seconds, a timeout
- of 20 seconds will not ever trigger, even though the request will
- take several minutes to complete.
+ of 20 seconds will not trigger, even though the request will take
+ several minutes to complete.
If your goal is to cut off any request after a set amount of wall clock
time, consider having a second "watcher" thread to cut off a slow
return '%s(connect=%r, read=%r, total=%r)' % (
type(self).__name__, self._connect, self._read, self.total)
-
@classmethod
def _validate_timeout(cls, value, name):
- """ Check that a timeout attribute is valid
+ """ Check that a timeout attribute is valid.
:param value: The timeout value to validate
- :param name: The name of the timeout attribute to validate. This is used
- for clear error messages
- :return: the value
- :raises ValueError: if the type is not an integer or a float, or if it
- is a numeric value less than zero
+ :param name: The name of the timeout attribute to validate. This is
+ used to specify in error messages.
+ :return: The validated and casted version of the given value.
+ :raises ValueError: If the type is not an integer or a float, or if it
+ is a numeric value less than zero.
"""
if value is _Default:
return cls.DEFAULT_TIMEOUT
raise ValueError("Attempted to set %s timeout to %s, but the "
"timeout cannot be set to a value less "
"than 0." % (name, value))
- except TypeError: # Python 3
+ except TypeError: # Python 3
raise ValueError("Timeout value %s was %s, but it must be an "
"int or float." % (name, value))
The timeout value used by httplib.py sets the same timeout on the
connect(), and recv() socket requests. This creates a :class:`Timeout`
- object that sets the individual timeouts to the ``timeout`` value passed
- to this function.
+ object that sets the individual timeouts to the ``timeout`` value
+ passed to this function.
- :param timeout: The legacy timeout value
+ :param timeout: The legacy timeout value.
:type timeout: integer, float, sentinel default object, or None
- :return: a Timeout object
+ :return: Timeout object
:rtype: :class:`Timeout`
"""
return Timeout(read=timeout, connect=timeout)
def get_connect_duration(self):
""" Gets the time elapsed since the call to :meth:`start_connect`.
- :return: the elapsed time
+ :return: Elapsed time.
:rtype: float
:raises urllib3.exceptions.TimeoutStateError: if you attempt
to get duration for a timer that hasn't been started.
This will be a positive float or integer, the value None
(never timeout), or the default system timeout.
- :return: the connect timeout
+ :return: Connect timeout.
:rtype: int, float, :attr:`Timeout.DEFAULT_TIMEOUT` or None
"""
if self.total is None:
established, a :exc:`~urllib3.exceptions.TimeoutStateError` will be
raised.
- :return: the value to use for the read timeout
+ :return: Value to use for the read timeout.
:rtype: int, float, :attr:`Timeout.DEFAULT_TIMEOUT` or None
:raises urllib3.exceptions.TimeoutStateError: If :meth:`start_connect`
has not yet been called on this object.
self.total is not self.DEFAULT_TIMEOUT and
self._read is not None and
self._read is not self.DEFAULT_TIMEOUT):
- # in case the connect timeout has not yet been established.
+ # In case the connect timeout has not yet been established.
if self._start_connect is None:
return self._read
return max(0, min(self.total - self.get_connect_duration(),
from ..exceptions import LocationParseError
-class Url(namedtuple('Url', ['scheme', 'auth', 'host', 'port', 'path', 'query', 'fragment'])):
+url_attrs = ['scheme', 'auth', 'host', 'port', 'path', 'query', 'fragment']
+
+
+class Url(namedtuple('Url', url_attrs)):
"""
Datastructure for representing an HTTP URL. Used as a return value for
:func:`parse_url`.
"""
slots = ()
- def __new__(cls, scheme=None, auth=None, host=None, port=None, path=None, query=None, fragment=None):
- return super(Url, cls).__new__(cls, scheme, auth, host, port, path, query, fragment)
+ def __new__(cls, scheme=None, auth=None, host=None, port=None, path=None,
+ query=None, fragment=None):
+ return super(Url, cls).__new__(cls, scheme, auth, host, port, path,
+ query, fragment)
@property
def hostname(self):
If not found, then the first part is the full input string.
- Example: ::
+ Example::
>>> split_first('foo/bar?baz', '?/=')
('foo', 'bar?baz', '/')
Partly backwards-compatible with :mod:`urlparse`.
- Example: ::
+ Example::
>>> parse_url('http://google.com/mail/')
Url(scheme='http', host='google.com', port=None, path='/', ...)
# Additionally, this implementations does silly things to be optimal
# on CPython.
+ if not url:
+ # Empty
+ return Url()
+
scheme = None
auth = None
host = None