import socket
-from urllib import urlencode
from httplib import HTTPConnection, HTTPSConnection, HTTPException
from Queue import Queue, Empty, Full
from select import select
BaseSSLError = None
-from .filepost import encode_multipart_formdata
+from .request import RequestMethods
from .response import HTTPResponse
from .exceptions import (
SSLError,
MaxRetryError,
TimeoutError,
HostChangedError,
- EmptyPoolError)
+ EmptyPoolError,
+)
log = logging.getLogger(__name__)
_Default = object()
-
## Connection objects (extension of httplib)
class VerifiedHTTPSConnection(HTTPSConnection):
Based on httplib.HTTPSConnection but wraps the socket with
SSL certification.
"""
-
- def __init__(self, **kwargs):
- HTTPSConnection.__init__(self, **kwargs)
- self.cert_reqs = None
- self.ca_certs = None
+ cert_reqs = None
+ cert_reqs = None
def set_cert(self, key_file=None, cert_file=None,
cert_reqs='CERT_NONE', ca_certs=None):
## Pool objects
class ConnectionPool(object):
+ """
+ Base class for all connection pools, such as
+ :class:`.HTTPConnectionPool` and :class:`.HTTPSConnectionPool`.
+ """
pass
-class HTTPConnectionPool(ConnectionPool):
+class HTTPConnectionPool(ConnectionPool, RequestMethods):
"""
Thread-safe connection pool for one host.
- host
+ :param host:
Host used for this HTTP Connection (e.g. "localhost"), passed into
- httplib.HTTPConnection()
+ :class:`httplib.HTTPConnection`.
- port
+ :param port:
Port used for this HTTP Connection (None is equivalent to 80), passed
- into httplib.HTTPConnection()
+ into :class:`httplib.HTTPConnection`.
- strict
+ :param strict:
Causes BadStatusLine to be raised if the status line can't be parsed
as a valid HTTP/1.0 or 1.1 status line, passed into
- httplib.HTTPConnection()
+ :class:`httplib.HTTPConnection`.
- timeout
+ :param timeout:
Socket timeout for each individual connection, can be a float. None
disables timeout.
- maxsize
+ :param maxsize:
Number of connections to save that can be reused. More than 1 is useful
in multithreaded situations. If ``block`` is set to false, more
connections will be created but they will not be saved once they've
been used.
- block
+ :param block:
If set to True, no more than ``maxsize`` connections will be used at
a time. When no free connections are available, the call will block
until a connection has been released. This is a useful side effect for
particular multithreaded situations where one does not want to use more
than maxsize connections per host to prevent flooding.
- headers
+ :param headers:
Headers to include with all requests, unless other headers are given
explicitly.
"""
def _new_conn(self):
"""
- Return a fresh HTTPConnection.
+ Return a fresh :class:`httplib.HTTPConnection`.
"""
self.num_connections += 1
log.info("Starting new HTTP connection (%d): %s" %
def _get_conn(self, timeout=None):
"""
Get a connection. Will return a pooled connection if one is available.
- Otherwise, a fresh connection is returned.
+
+ If no connections are available and :prop:`.block` is ``False``, then a
+ fresh connection is returned.
+
+ :param timeout:
+ Seconds to wait before giving up and raising
+ :class:`urllib3.exceptions.EmptyPoolError` if the pool is empty and
+ :prop:`.block` is ``True``.
"""
conn = None
try:
def _put_conn(self, conn):
"""
Put a connection back into the pool.
+
+ :param conn:
+ Connection object for the current host and port as returned by
+ :meth:`._new_conn` or :meth:`._get_conn`.
+
If the pool is already full, the connection is discarded because we
exceeded maxsize. If connections are discarded frequently, then maxsize
should be increased.
if timeout is _Default:
timeout = self.timeout
-
conn.request(method, url, **httplib_request_kw)
conn.sock.settimeout(timeout)
httplib_response = conn.getresponse()
-
log.debug("\"%s %s %s\" %s %s" %
(method, url,
conn._http_vsn_str, # pylint: disable-msg=W0212
def is_same_host(self, url):
+ """
+ Check if the given ``url`` is a member of the same host as this
+ conncetion pool.
+ """
+ # TODO: Add optional support for socket.gethostbyname checking.
return (url.startswith('/') or
get_host(url) == (self.scheme, self.host, self.port))
redirect=True, assert_same_host=True, timeout=_Default,
pool_timeout=None, release_conn=None, **response_kw):
"""
- Get a connection from the pool and perform an HTTP request.
+ Get a connection from the pool and perform an HTTP request. This is the
+ lowest level call for making a request, so you'll need to specify all
+ the raw details.
+
+ .. note::
- method
+ More commonly, it's appropriate to use a convenience method provided
+ by :class:`.RequestMethods`, such as :meth:`.request`.
+
+ :param method:
HTTP request method (such as GET, POST, PUT, etc.)
- body
+ :param body:
Data to send in the request body (useful for creating
POST requests, see HTTPConnectionPool.post_url for
more convenience).
- headers
+ :param headers:
Dictionary of custom headers to send, such as User-Agent,
If-None-Match, etc. If None, pool headers are used. If provided,
these headers completely replace any pool-specific headers.
- retries
+ :param retries:
Number of retries to allow before raising
a MaxRetryError exception.
- redirect
+ :param redirect:
Automatically handle redirects (status codes 301, 302, 303, 307),
each redirect counts as a retry.
- assert_same_host
- If True, will make sure that the host of the pool requests is
+ :param assert_same_host:
+ If ``True``, will make sure that the host of the pool requests is
consistent else will raise HostChangedError. When False, you can
use the pool on an HTTP proxy and request foreign hosts.
- timeout
+ :param timeout:
If specified, overrides the default timeout for this one request.
- pool_timeout
+ :param pool_timeout:
If set and the pool is set to block=True, then this method will
block for ``pool_timeout`` seconds and raise EmptyPoolError if no
connection is available within the time period.
- release_conn
+ :param release_conn:
If False, then the urlopen call will not release the connection
back into the pool once a response is received. This is useful if
you're not preloading the response's content immediately. You will
the connection back into the pool. If None, it takes the value of
``response_kw.get('preload_content', True)``.
- Additional parameters are passed to
- ``HTTPResponse.from_httplib(r, **response_kw)``
+ :param \**response_kw:
+ Additional parameters are passed to
+ :meth:`urllib3.response.HTTPResponse.from_httplib`
"""
if headers is None:
headers = self.headers
# Request a connection from the queue
# (Could raise SocketError: Bad file descriptor)
conn = self._get_conn(timeout=pool_timeout)
+
# Make the request on the httplib connection object
httplib_response = self._make_request(conn, method, url,
timeout=timeout,
body=body, headers=headers)
- # print '!'
# Import httplib's response into our own wrapper object
response = HTTPResponse.from_httplib(httplib_response,
return response
- def get_url(self, url, fields=None, headers=None, retries=3,
- redirect=True, **response_kw):
- """
- Wrapper for performing GET with urlopen (see urlopen for more details).
-
- Supports an optional ``fields`` dictionary parameter key/value strings.
- If provided, they will be added to the url.
- """
- if fields:
- url += '?' + urlencode(fields)
- return self.urlopen('GET', url, headers=headers, retries=retries,
- redirect=redirect, **response_kw)
-
- def post_url(self, url, fields=None, headers=None, retries=3,
- redirect=True, encode_multipart=True, multipart_boundary=None,
- **response_kw):
- """
- Wrapper for performing POST with urlopen (see urlopen
- for more details).
-
- Supports an optional ``fields`` parameter of key/value strings AND
- key/filetuple. A filetuple is a (filename, data) tuple. For example:
-
- fields = {
- 'foo': 'bar',
- 'foofile': ('foofile.txt', 'contents of foofile'),
- }
-
- If encode_multipart=True (default), then
- ``urllib3.filepost.encode_multipart_formdata`` is used to encode the
- payload with the appropriate content type. Otherwise
- ``urllib.urlencode`` is used with 'application/x-www-form-urlencoded'
- content type.
-
- Multipart encoding must be used when posting files, and it's reasonably
- safe to use it other times too. It may break request signing, such as
- OAuth.
-
- NOTE: If ``headers`` are supplied, the 'Content-Type' value will be
- overwritten because it depends on the dynamic random boundary string
- which is used to compose the body of the request.
- """
- if encode_multipart:
- body, content_type = encode_multipart_formdata(fields or {},
- boundary=multipart_boundary)
- else:
- body, content_type = (
- urlencode(fields or {}),
- 'application/x-www-form-urlencoded')
-
- headers = headers or {}
- headers.update({'Content-Type': content_type})
-
- return self.urlopen('POST', url, body, headers=headers,
- retries=retries, redirect=redirect, **response_kw)
-
class HTTPSConnectionPool(HTTPConnectionPool):
"""
- Same as HTTPConnectionPool, but HTTPS.
+ Same as :class:`.HTTPConnectionPool`, but HTTPS.
+
+ When Python is compiled with the :mod:`ssl` module, then
+ :class:`.VerifiedHTTPSConnection` is used, which *can* verify certificates,
+ instead of :class:httplib.HTTPSConnection`.
+
+ The ``key_file``, ``cert_file``, ``cert_reqs``, and ``ca_certs`` parameters
+ are only used if :mod:`ssl` is available and are fed into
+ :meth:`ssl.wrap_socket` to upgrade the connection socket into an SSL socket.
"""
scheme = 'https'
strict=False, timeout=None, maxsize=1,
block=False, headers=None,
key_file=None, cert_file=None,
- cert_reqs=ssl.CERT_REQUIRED, ca_certs=None):
+ cert_reqs='CERT_NONE', ca_certs=None):
super(HTTPSConnectionPool, self).__init__(host, port,
strict, timeout, maxsize,
def _new_conn(self):
"""
- Return a fresh HTTPSConnection.
+ Return a fresh :class:`httplib.HTTPSConnection`.
"""
-
self.num_connections += 1
log.info("Starting new HTTPS connection (%d): %s"
% (self.num_connections, self.host))
"""
Shortcuts for generating request headers.
- keep_alive
- If true, adds 'connection: keep-alive' header.
+ :param keep_alive:
+ If ``True``, adds 'connection: keep-alive' header.
- accept_encoding
+ :param accept_encoding:
Can be a boolean, list, or string.
- True translates to 'gzip,deflate'.
+ ``True`` translates to 'gzip,deflate'.
List will get joined by comma.
String will be used as provided.
- user_agent
+ :param user_agent:
String representing the user-agent you want, such as
"python-urllib3/0.6"
- basic_auth
+ :param basic_auth:
Colon-separated username:password string for 'authorization: basic ...'
auth header.
+
+ Example: ::
+
+ >>> make_headers(keep_alive=True, user_agent="Batman/1.0")
+ {'connection': 'keep-alive', 'user-agent': 'Batman/1.0'}
+ >>> make_headers(accept_encoding=True)
+ {'accept-encoding': 'gzip,deflate'}
"""
headers = {}
if accept_encoding:
"""
Given a url, return its scheme, host and port (None if it's not there).
- For example:
- >>> get_host('http://google.com/mail/')
- http, google.com, None
- >>> get_host('google.com:80')
- http, google.com, 80
+ For example: ::
+
+ >>> get_host('http://google.com/mail/')
+ ('http', 'google.com', None)
+ >>> get_host('google.com:80')
+ ('http', 'google.com', 80)
"""
# This code is actually similar to urlparse.urlsplit, but much
# simplified for our needs.
def connection_from_url(url, **kw):
"""
- Given a url, return an HTTP(S)ConnectionPool instance of its host.
+ Given a url, return an :class:`.ConnectionPool` instance of its host.
+
+ This is a shortcut for not having to parse out the scheme, host, and port
+ of the url before creating an :class:`.ConnectionPool` instance.
+
+ :param url:
+ Absolute URL string that must include the scheme. Port is optional.
+
+ :param \**kw:
+ Passes additional parameters to the constructor of the appropriate
+ :class:`.ConnectionPool`. Useful for specifying things like
+ timeout, maxsize, headers, etc.
- This is a shortcut for not having to determine the host of the url
- before creating an HTTP(S)ConnectionPool instance.
+ Example: ::
- Passes on whatever kw arguments to the constructor of
- HTTP(S)ConnectionPool. (e.g. timeout, maxsize, block)
+ >>> conn = connection_from_url('http://google.com/')
+ >>> r = conn.request('GET', '/')
"""
scheme, host, port = get_host(url)
if scheme == 'https':
# the MIT License: http://www.opensource.org/licenses/mit-license.php
from ._collections import RecentlyUsedContainer
-from .connectionpool import HTTPConnectionPool, HTTPSConnectionPool, get_host
+from .connectionpool import (
+ HTTPConnectionPool, HTTPSConnectionPool,
+ get_host, connection_from_url,
+)
+
+
+__all__ = ['PoolManager', 'ProxyManager', 'proxy_from_url']
+
+
+from .request import RequestMethods
+from .connectionpool import HTTPConnectionPool, HTTPSConnectionPool
pool_classes_by_scheme = {
}
-class PoolManager(object):
+class PoolManager(RequestMethods):
"""
Allows for arbitrary requests while transparently keeping track of
necessary connection pools for you.
- num_pools
+ :param num_pools:
Number of connection pools to cache before discarding the least recently
used pool.
- Additional parameters are used to create fresh ConnectionPool instances.
+ :param \**connection_pool_kw:
+ Additional parameters are used to create fresh
+ :class:`urllib3.connectionpool.ConnectionPool` instances.
+
+ Example: ::
+
+ >>> manager = PoolManager()
+ >>> r = manager.urlopen("http://google.com/")
+ >>> r = manager.urlopen("http://google.com/mail")
+ >>> r = manager.urlopen("http://yahoo.com/")
+ >>> len(r.pools)
+ 2
"""
def __init__(self, num_pools=10, **connection_pool_kw):
self.connection_pool_kw = connection_pool_kw
-
self.pools = RecentlyUsedContainer(num_pools)
- self.recently_used_pools = []
def connection_from_host(self, host, port=80, scheme='http'):
"""
- Get a ConnectionPool based on the host, port, and scheme.
+ Get a :class:`ConnectionPool` based on the host, port, and scheme.
+
+ Note that an appropriate ``port`` value is required here to normalize
+ connection pools in our container most effectively.
"""
pool_key = (scheme, host, port)
-
# If the scheme, host, or port doesn't match existing open connections,
# open a new ConnectionPool.
pool = self.pools.get(pool_key)
def connection_from_url(self, url):
"""
- Similar to connectionpool.connection_from_url but doesn't pass any
- additional keywords to the ConnectionPool constructor. Additional
- keywords are taken from the PoolManager constructor.
+ Similar to :func:`urllib3.connectionpool.connection_from_url` but
+ doesn't pass any additional parameters to the
+ :class:`urllib3.connectionpool.ConnectionPool` constructor.
+
+ Additional parameters are taken from the :class:`.PoolManager`
+ constructor.
"""
scheme, host, port = get_host(url)
port = port or port_by_scheme.get(scheme, 80)
- r = self.connection_from_host(host, port=port, scheme=scheme)
+ return self.connection_from_host(host, port=port, scheme=scheme)
- return r
+ def urlopen(self, method, url, **kw):
+ """
+ Same as :meth:`urllib3.connectionpool.HTTPConnectionPool.urlopen`.
+
+ ``url`` must be absolute, such that an appropriate
+ :class:`urllib3.connectionpool.ConnectionPool` can be chosen for it.
+ """
+ conn = self.connection_from_url(url)
+ return conn.urlopen(method, url, assert_same_host=False, **kw)
+
+
+class ProxyManager(object):
+ """
+ Given a ConnectionPool to a proxy, the ProxyManager's ``urlopen`` method
+ will make requests to any url through the defined proxy.
+ """
+
+ def __init__(self, proxy_pool):
+ self.proxy_pool = proxy_pool
def urlopen(self, method, url, **kw):
"Same as HTTP(S)ConnectionPool.urlopen, ``url`` must be absolute."
- conn = self.connection_from_url(url)
- return conn.urlopen(method, url, **kw)
+ kw['assert_same_host'] = False
+ return self.proxy_pool.urlopen(method, url, **kw)
+
+
+def proxy_from_url(url, **pool_kw):
+ proxy_pool = connection_from_url(url, **pool_kw)
+ return ProxyManager(proxy_pool)
--- /dev/null
+# urllib3/request.py
+# Copyright 2008-2011 Andrey Petrov and contributors (see CONTRIBUTORS.txt)
+#
+# This module is part of urllib3 and is released under
+# the MIT License: http://www.opensource.org/licenses/mit-license.php
+
+
+from urllib import urlencode
+
+from .filepost import encode_multipart_formdata
+
+
+__all__ = ['RequestMethods']
+
+
+class RequestMethods(object):
+ """
+ Convenience mixin for classes who implement a :meth:`urlopen` method, such
+ as :class:`~urllib3.connectionpool.HTTPConnectionPool` and
+ :class:`~urllib3.poolmanager.PoolManager`.
+
+ Provides behavior for making common types of HTTP request methods and
+ decides which type of request field encoding to use.
+
+ Specifically,
+
+ :meth:`.request_encode_url` is for sending requests whose fields are encoded
+ in the URL (such as GET, HEAD, DELETE).
+
+ :meth:`.request_encode_body` is for sending requests whose fields are
+ encoded in the *body* of the request using multipart or www-orm-urlencoded
+ (such as for POST, PUT, PATCH).
+
+ :meth:`.request` is for making any kind of request, it will look up the
+ appropriate encoding format and use one of the above two methods to make
+ the request.
+ """
+
+ _encode_url_methods = set(['DELETE', 'GET', 'HEAD', 'OPTIONS'])
+
+ _encode_body_methods = set(['PATCH', 'POST', 'PUT', 'TRACE'])
+
+ def urlopen(self, method, url, body=None, headers=None,
+ encode_multipart=True, multipart_boundary=None,
+ **kw):
+ raise NotImplemented("Classes extending RequestMethods must implement "
+ "their own ``urlopen`` method.")
+
+ def request(self, method, url, fields=None, headers=None, **urlopen_kw):
+ """
+ Make a request using :meth:`urlopen` with the appropriate encoding of
+ ``fields`` based on the ``method`` used.
+
+ This is a convenience method that requires the least amount of manual
+ effort. It can be used in most situations, while still having the option
+ to drop down to more specific methods when necessary, such as
+ :meth:`request_encode_url`, :meth:`request_encode_body`,
+ or even the lowest level :meth:`urlopen`.
+ """
+ method = method.upper()
+
+ if method in self._encode_url_methods:
+ return self.request_encode_url(method, url, fields=fields,
+ headers=headers,
+ **urlopen_kw)
+ else:
+ return self.request_encode_body(method, url, fields=fields,
+ headers=headers,
+ **urlopen_kw)
+
+ def request_encode_url(self, method, url, fields=None, **urlopen_kw):
+ """
+ Make a request using :meth:`urlopen` with the ``fields`` encoded in
+ the url. This is useful for request methods like GET, HEAD, DELETE, etc.
+ """
+ if fields:
+ url += '?' + urlencode(fields)
+ return self.urlopen(method, url, **urlopen_kw)
+
+ def request_encode_body(self, method, url, fields=None, headers=None,
+ encode_multipart=True, multipart_boundary=None,
+ **urlopen_kw):
+ """
+ Make a request using :meth:`urlopen` with the ``fields`` encoded in
+ the body. This is useful for request methods like POST, PUT, PATCH, etc.
+
+ When ``encode_multipart=True`` (default), then
+ :meth:`urllib3.filepost.encode_multipart_formdata` is used to encode the
+ payload with the appropriate content type. Otherwise
+ :meth:`urllib.urlencode` is used with the
+ 'application/x-www-form-urlencoded' content type.
+
+ Multipart encoding must be used when posting files, and it's reasonably
+ safe to use it in other times too. However, it may break request signing,
+ such as with OAuth.
+
+ Supports an optional ``fields`` parameter of key/value strings AND
+ key/filetuple. A filetuple is a (filename, data) tuple. For example: ::
+
+ fields = {
+ 'foo': 'bar',
+ 'fakefile': ('foofile.txt', 'contents of foofile'),
+ 'realfile': ('barfile.txt', open('realfile').read()),
+ 'nonamefile': ('contents of nonamefile field'),
+ }
+
+ When uploading a file, providing a filename (the first parameter of the
+ tuple) is optional but recommended to best mimick behavior of browsers.
+
+ Note that if ``headers`` are supplied, the 'Content-Type' header will be
+ overwritten because it depends on the dynamic random boundary string
+ which is used to compose the body of the request. The random boundary
+ string can be explicitly set with the ``multipart_boundary`` parameter.
+ """
+ if encode_multipart:
+ body, content_type = encode_multipart_formdata(fields or {},
+ boundary=multipart_boundary)
+ else:
+ body, content_type = (urlencode(fields or {}),
+ 'application/x-www-form-urlencoded')
+
+ headers = headers or {}
+ headers.update({'Content-Type': content_type})
+
+ return self.urlopen(method, url, body=body, headers=headers,
+ **urlopen_kw)
+
+ # Deprecated:
+
+ def get_url(self, url, fields=None, **urlopen_kw):
+ """
+ .. deprecated:: 1.0
+ Use :meth:`request` instead.
+ """
+ return self.request_encode_url('GET', url, fields=fields,
+ **urlopen_kw)
+
+ def post_url(self, url, fields=None, headers=None, **urlopen_kw):
+ """
+ .. deprecated:: 1.0
+ Use :meth:`request` instead.
+ """
+ return self.request_encode_body('POST', url, fields=fields,
+ headers=headers,
+ **urlopen_kw)