import urllib3
authorKenneth Reitz <me@kennethreitz.com>
Sun, 18 Sep 2011 00:30:08 +0000 (20:30 -0400)
committerKenneth Reitz <me@kennethreitz.com>
Sun, 18 Sep 2011 00:30:08 +0000 (20:30 -0400)
requests/packages/urllib3/__init__.py [new file with mode: 0644]
requests/packages/urllib3/connectionpool.py [new file with mode: 0644]
requests/packages/urllib3/contrib/__init__.py [new file with mode: 0644]
requests/packages/urllib3/contrib/ntlmpool.py [new file with mode: 0644]
requests/packages/urllib3/filepost.py [new file with mode: 0644]

diff --git a/requests/packages/urllib3/__init__.py b/requests/packages/urllib3/__init__.py
new file mode 100644 (file)
index 0000000..c7ade88
--- /dev/null
@@ -0,0 +1,22 @@
+"""
+urllib3 - Thread-safe connection pooling and re-using.
+"""
+
+from connectionpool import (
+    connection_from_url,
+    get_host,
+    HTTPConnectionPool,
+    HTTPSConnectionPool,
+    make_headers)
+# Possible exceptions
+from connectionpool import (
+    HTTPError,
+    MaxRetryError,
+    SSLError,
+    TimeoutError)
+from filepost import encode_multipart_formdata
+
+
+__author__ = "Andrey Petrov (andrey.petrov@shazow.net)"
+__license__ = "MIT"
+__version__ = "$Rev$"
diff --git a/requests/packages/urllib3/connectionpool.py b/requests/packages/urllib3/connectionpool.py
new file mode 100644 (file)
index 0000000..552ccd9
--- /dev/null
@@ -0,0 +1,544 @@
+import gzip
+import zlib
+import logging
+import socket
+
+
+from urllib import urlencode
+from httplib import HTTPConnection, HTTPSConnection, HTTPException
+from Queue import Queue, Empty, Full
+from select import select
+from socket import error as SocketError, timeout as SocketTimeout
+
+
+try:
+    import ssl
+    BaseSSLError = ssl.SSLError
+except ImportError, e:
+    ssl = None
+    BaseSSLError = None
+
+try:
+    from cStringIO import StringIO
+except ImportError, e:
+    from StringIO import StringIO
+
+
+from filepost import encode_multipart_formdata
+
+
+log = logging.getLogger(__name__)
+
+
+## Exceptions
+
+class HTTPError(Exception):
+    "Base exception used by this module."
+    pass
+
+
+class SSLError(Exception):
+    "Raised when SSL certificate fails in an HTTPS connection."
+    pass
+
+
+class MaxRetryError(HTTPError):
+    "Raised when the maximum number of retries is exceeded."
+    pass
+
+
+class TimeoutError(HTTPError):
+    "Raised when a socket timeout occurs."
+    pass
+
+
+class HostChangedError(HTTPError):
+    "Raised when an existing pool gets a request for a foreign host."
+    pass
+
+
+## Response objects
+
+class HTTPResponse(object):
+    """
+    HTTP Response container.
+
+    Similar to httplib's HTTPResponse but the data is pre-loaded.
+    """
+
+    def __init__(self, data='', headers=None, status=0, version=0, reason=None,
+                 strict=0):
+        self.data = data
+        self.headers = headers or {}
+        self.status = status
+        self.version = version
+        self.reason = reason
+        self.strict = strict
+
+    @staticmethod
+    def from_httplib(r):
+        """
+        Given an httplib.HTTPResponse instance, return a corresponding
+        urllib3.HTTPResponse object.
+
+        NOTE: This method will perform r.read() which will have side effects
+        on the original http.HTTPResponse object.
+        """
+        tmp_data = r.read()
+        try:
+            if r.getheader('content-encoding') == 'gzip':
+                log.debug("Received response with content-encoding: gzip, "
+                          "decompressing with gzip.")
+
+                gzipper = gzip.GzipFile(fileobj=StringIO(tmp_data))
+                data = gzipper.read()
+            elif r.getheader('content-encoding') == 'deflate':
+                log.debug("Received response with content-encoding: deflate, "
+                          "decompressing with zlib.")
+                try:
+                    data = zlib.decompress(tmp_data)
+                except zlib.error, e:
+                    data = zlib.decompress(tmp_data, -zlib.MAX_WBITS)
+            else:
+                data = tmp_data
+
+        except IOError:
+            raise HTTPError("Received response with content-encoding: %s, "
+                            "but failed to decompress it." %
+                            (r.getheader('content-encoding')))
+
+        return HTTPResponse(data=data,
+                    headers=dict(r.getheaders()),
+                    status=r.status,
+                    version=r.version,
+                    reason=r.reason,
+                    strict=r.strict)
+
+    # Backwards-compatibility methods for httplib.HTTPResponse
+    def getheaders(self):
+        return self.headers
+
+    def getheader(self, name, default=None):
+        return self.headers.get(name, default)
+
+
+## Connection objects
+
+class VerifiedHTTPSConnection(HTTPSConnection):
+    """
+    Based on httplib.HTTPSConnection but wraps the socket with
+    SSL certification.
+    """
+
+    def set_cert(self, key_file=None, cert_file=None, cert_reqs='CERT_NONE',
+                 ca_certs=None):
+        ssl_req_scheme = {
+            'CERT_NONE': ssl.CERT_NONE,
+            'CERT_OPTIONAL': ssl.CERT_OPTIONAL,
+            'CERT_REQUIRED': ssl.CERT_REQUIRED
+        }
+
+        self.key_file = key_file
+        self.cert_file = cert_file
+        self.cert_reqs = ssl_req_scheme.get(cert_reqs) or ssl.CERT_NONE
+        self.ca_certs = ca_certs
+
+    def connect(self):
+        # Add certificate verification
+        sock = socket.create_connection((self.host, self.port), self.timeout)
+
+        # Wrap socket using verification with the root certs in
+        # trusted_root_certs
+        self.sock = ssl.wrap_socket(sock, self.key_file, self.cert_file,
+                                    cert_reqs=self.cert_reqs,
+                                    ca_certs=self.ca_certs)
+
+
+## Pool objects
+
+class HTTPConnectionPool(object):
+    """
+    Thread-safe connection pool for one host.
+
+    host
+        Host used for this HTTP Connection (e.g. "localhost"), passed into
+        httplib.HTTPConnection()
+
+    port
+        Port used for this HTTP Connection (None is equivalent to 80), passed
+        into httplib.HTTPConnection()
+
+    strict
+        Causes BadStatusLine to be raised if the status line can't be parsed
+        as a valid HTTP/1.0 or 1.1 status line, passed into
+        httplib.HTTPConnection()
+
+    timeout
+        Socket timeout for each individual connection, can be a float. None
+        disables timeout.
+
+    maxsize
+        Number of connections to save that can be reused. More than 1 is useful
+        in multithreaded situations. If ``block`` is set to false, more
+        connections will be created but they will not be saved once they've
+        been used.
+
+    block
+        If set to True, no more than ``maxsize`` connections will be used at
+        a time. When no free connections are available, the call will block
+        until a connection has been released. This is a useful side effect for
+        particular multithreaded situations where one does not want to use more
+        than maxsize connections per host to prevent flooding.
+
+    headers
+        Headers to include with all requests, unless other headers are given
+        explicitly.
+    """
+
+    scheme = 'http'
+
+    def __init__(self, host, port=None, strict=False, timeout=None, maxsize=1,
+                 block=False, headers=None):
+        self.host = host
+        self.port = port
+        self.strict = strict
+        self.timeout = timeout
+        self.pool = Queue(maxsize)
+        self.block = block
+        self.headers = headers or {}
+
+        # Fill the queue up so that doing get() on it will block properly
+        [self.pool.put(None) for i in xrange(maxsize)]
+
+        self.num_connections = 0
+        self.num_requests = 0
+
+    def _new_conn(self):
+        """
+        Return a fresh HTTPConnection.
+        """
+        self.num_connections += 1
+        log.info("Starting new HTTP connection (%d): %s" %
+                 (self.num_connections, self.host))
+        return HTTPConnection(host=self.host, port=self.port)
+
+    def _get_conn(self, timeout=None):
+        """
+        Get a connection. Will return a pooled connection if one is available.
+        Otherwise, a fresh connection is returned.
+        """
+        conn = None
+        try:
+            conn = self.pool.get(block=self.block, timeout=timeout)
+
+            # If this is a persistent connection, check if it got disconnected
+            if conn and conn.sock and select([conn.sock], [], [], 0.0)[0]:
+                # Either data is buffered (bad), or the connection is dropped.
+                log.warning("Connection pool detected dropped "
+                            "connection, resetting: %s" % self.host)
+                conn.close()
+
+        except Empty, e:
+            pass  # Oh well, we'll create a new connection then
+
+        return conn or self._new_conn()
+
+    def _put_conn(self, conn):
+        """
+        Put a connection back into the pool.
+        If the pool is already full, the connection is discarded because we
+        exceeded maxsize. If connections are discarded frequently, then maxsize
+        should be increased.
+        """
+        try:
+            self.pool.put(conn, block=False)
+        except Full, e:
+            # This should never happen if self.block == True
+            log.warning("HttpConnectionPool is full, discarding connection: %s"
+                        % self.host)
+
+    def is_same_host(self, url):
+        return (url.startswith('/') or
+                get_host(url) == (self.scheme, self.host, self.port))
+
+    def urlopen(self, method, url, body=None, headers=None, retries=3,
+                redirect=True, assert_same_host=True):
+        """
+        Get a connection from the pool and perform an HTTP request.
+
+        method
+            HTTP request method (such as GET, POST, PUT, etc.)
+
+        body
+            Data to send in the request body (useful for creating
+            POST requests, see HTTPConnectionPool.post_url for
+            more convenience).
+
+        headers
+            Dictionary of custom headers to send, such as User-Agent,
+            If-None-Match, etc. If None, pool headers are used. If provided,
+            these headers completely replace any pool-specific headers.
+
+        retries
+            Number of retries to allow before raising
+            a MaxRetryError exception.
+
+        redirect
+            Automatically handle redirects (status codes 301, 302, 303, 307),
+            each redirect counts as a retry.
+
+        assert_same_host
+            If True, will make sure that the host of the pool requests is
+            consistent else will raise HostChangedError. When False, you can
+            use the pool on an HTTP proxy and request foreign hosts.
+        """
+        if headers == None:
+            headers = self.headers
+
+        if retries < 0:
+            raise MaxRetryError("Max retries exceeded for url: %s" % url)
+
+        # Check host
+        if assert_same_host and not self.is_same_host(url):
+            host = "%s://%s" % (self.scheme, self.host)
+            if self.port:
+                host = "%s:%d" % (host, self.port)
+
+            raise HostChangedError("Connection pool with host '%s' tried to "
+                                   "open a foreign host: %s" % (host, url))
+
+        try:
+            # Request a connection from the queue
+            conn = self._get_conn()
+
+            # Make the request
+            self.num_requests += 1
+            conn.request(method, url, body=body, headers=headers)
+            conn.sock.settimeout(self.timeout)
+            httplib_response = conn.getresponse()
+            log.debug("\"%s %s %s\" %s %s" %
+                      (method, url, conn._http_vsn_str,
+                       httplib_response.status, httplib_response.length))
+
+            # from_httplib will perform httplib_response.read() which will have
+            # the side effect of letting us use this connection for another
+            # request.
+            response = HTTPResponse.from_httplib(httplib_response)
+
+            # Put the connection back to be reused
+            self._put_conn(conn)
+
+        except (SocketTimeout, Empty), e:
+            # Timed out either by socket or queue
+            raise TimeoutError("Request timed out after %f seconds" %
+                               self.timeout)
+
+        except (BaseSSLError), e:
+            # SSL certificate error
+            raise SSLError(e)
+
+        except (HTTPException, SocketError), e:
+            log.warn("Retrying (%d attempts remain) after connection "
+                     "broken by '%r': %s" % (retries, e, url))
+            return self.urlopen(method, url, body, headers, retries - 1,
+                                redirect, assert_same_host)  # Try again
+
+        # Handle redirection
+        if (redirect and
+            response.status in [301, 302, 303, 307] and
+            'location' in response.headers):  # Redirect, retry
+            log.info("Redirecting %s -> %s" %
+                     (url, response.headers.get('location')))
+            return self.urlopen(method, response.headers.get('location'), body,
+                                headers, retries - 1, redirect,
+                                assert_same_host)
+
+        return response
+
+    def get_url(self, url, fields=None, headers=None, retries=3,
+                redirect=True):
+        """
+        Wrapper for performing GET with urlopen (see urlopen for more details).
+
+        Supports an optional ``fields`` dictionary parameter key/value strings.
+        If provided, they will be added to the url.
+        """
+        if fields:
+            url += '?' + urlencode(fields)
+        return self.urlopen('GET', url, headers=headers, retries=retries,
+                            redirect=redirect)
+
+    def post_url(self, url, fields=None, headers=None, retries=3,
+                 redirect=True, encode_multipart=True):
+        """
+        Wrapper for performing POST with urlopen (see urlopen
+        for more details).
+
+        Supports an optional ``fields`` parameter of key/value strings AND
+        key/filetuple. A filetuple is a (filename, data) tuple. For example:
+
+        fields = {
+            'foo': 'bar',
+            'foofile': ('foofile.txt', 'contents of foofile'),
+        }
+
+        If encode_multipart=True (default), then
+        ``urllib3.filepost.encode_multipart_formdata`` is used to encode the
+        payload with the appropriate content type. Otherwise
+        ``urllib.urlencode`` is used with 'application/x-www-form-urlencoded'
+        content type.
+
+        Multipart encoding must be used when posting files, and it's reasonably
+        safe to use it other times too. It may break request signing, such as
+        OAuth.
+
+        NOTE: If ``headers`` are supplied, the 'Content-Type' value will be
+        overwritten because it depends on the dynamic random boundary string
+        which is used to compose the body of the request.
+        """
+        if encode_multipart:
+            body, content_type = encode_multipart_formdata(fields or {})
+        else:
+            body, content_type = (
+                urlencode(fields or {}),
+                'application/x-www-form-urlencoded')
+
+        headers = headers or {}
+        headers.update({'Content-Type': content_type})
+
+        return self.urlopen('POST', url, body, headers=headers,
+                            retries=retries, redirect=redirect)
+
+
+class HTTPSConnectionPool(HTTPConnectionPool):
+    """
+    Same as HTTPConnectionPool, but HTTPS.
+    """
+
+    scheme = 'https'
+
+    def __init__(self, host, port=None, strict=False, timeout=None, maxsize=1,
+                 block=False, headers=None, key_file=None,
+                 cert_file=None, cert_reqs='CERT_NONE', ca_certs=None):
+        self.host = host
+        self.port = port
+        self.strict = strict
+        self.timeout = timeout
+        self.pool = Queue(maxsize)
+        self.block = block
+        self.headers = headers or {}
+
+        self.key_file = key_file
+        self.cert_file = cert_file
+        self.cert_reqs = cert_reqs
+        self.ca_certs = ca_certs
+
+        # Fill the queue up so that doing get() on it will block properly
+        [self.pool.put(None) for i in xrange(maxsize)]
+
+        self.num_connections = 0
+        self.num_requests = 0
+
+    def _new_conn(self):
+        """
+        Return a fresh HTTPSConnection.
+        """
+        self.num_connections += 1
+        log.info("Starting new HTTPS connection (%d): %s"
+                 % (self.num_connections, self.host))
+
+        if not ssl:
+            return HTTPSConnection(host=self.host, port=self.port)
+
+        connection = VerifiedHTTPSConnection(host=self.host, port=self.port)
+        connection.set_cert(key_file=self.key_file, cert_file=self.cert_file,
+                            cert_reqs=self.cert_reqs, ca_certs=self.ca_certs)
+        return connection
+
+
+## Helpers
+
+def make_headers(keep_alive=None, accept_encoding=None, user_agent=None,
+                 basic_auth=None):
+    """
+    Shortcuts for generating request headers.
+
+    keep_alive
+        If true, adds 'connection: keep-alive' header.
+
+    accept_encoding
+        Can be a boolean, list, or string.
+        True translates to 'gzip,deflate'.
+        List will get joined by comma.
+        String will be used as provided.
+
+    user_agent
+        String representing the user-agent you want, such as
+        "python-urllib3/0.6"
+
+    basic_auth
+        Colon-separated username:password string for 'authorization: basic ...'
+        auth header.
+    """
+    headers = {}
+    if accept_encoding:
+        if isinstance(accept_encoding, str):
+            pass
+        elif isinstance(accept_encoding, list):
+            accept_encoding = ','.join(accept_encoding)
+        else:
+            accept_encoding = 'gzip,deflate'
+        headers['accept-encoding'] = accept_encoding
+
+    if user_agent:
+        headers['user-agent'] = user_agent
+
+    if keep_alive:
+        headers['connection'] = 'keep-alive'
+
+    if basic_auth:
+        headers['authorization'] = 'Basic ' + \
+            basic_auth.encode('base64').strip()
+
+    return headers
+
+
+def get_host(url):
+    """
+    Given a url, return its scheme, host and port (None if it's not there).
+
+    For example:
+    >>> get_host('http://google.com/mail/')
+    http, google.com, None
+    >>> get_host('google.com:80')
+    http, google.com, 80
+    """
+    # This code is actually similar to urlparse.urlsplit, but much
+    # simplified for our needs.
+    port = None
+    scheme = 'http'
+    if '//' in url:
+        scheme, url = url.split('://', 1)
+    if '/' in url:
+        url, path = url.split('/', 1)
+    if ':' in url:
+        url, port = url.split(':', 1)
+        port = int(port)
+    return scheme, url, port
+
+
+def connection_from_url(url, **kw):
+    """
+    Given a url, return an HTTP(S)ConnectionPool instance of its host.
+
+    This is a shortcut for not having to determine the host of the url
+    before creating an HTTP(S)ConnectionPool instance.
+
+    Passes on whatever kw arguments to the constructor of
+    HTTP(S)ConnectionPool. (e.g. timeout, maxsize, block)
+    """
+    scheme, host, port = get_host(url)
+    if scheme == 'https':
+        return HTTPSConnectionPool(host, port=port, **kw)
+    else:
+        return HTTPConnectionPool(host, port=port, **kw)
diff --git a/requests/packages/urllib3/contrib/__init__.py b/requests/packages/urllib3/contrib/__init__.py
new file mode 100644 (file)
index 0000000..e69de29
diff --git a/requests/packages/urllib3/contrib/ntlmpool.py b/requests/packages/urllib3/contrib/ntlmpool.py
new file mode 100644 (file)
index 0000000..a4642fa
--- /dev/null
@@ -0,0 +1,111 @@
+"""
+NTLM authenticating pool, contributed by erikcederstran
+
+Issue #10, see: http://code.google.com/p/urllib3/issues/detail?id=10
+"""
+
+import httplib
+from logging import getLogger
+from ntlm import ntlm
+
+from urllib3 import HTTPSConnectionPool
+
+
+log = getLogger(__name__)
+
+
+class NTLMConnectionPool(HTTPSConnectionPool):
+    """
+    Implements an NTLM authentication version of an urllib3 connection pool
+    """
+
+    scheme = 'https'
+
+    def __init__(self, user, pw, authurl, *args, **kwargs):
+        """
+        authurl is a random URL on the server that is protected by NTLM.
+        user is the Windows user, probably in the DOMAIN\username format.
+        pw is the password for the user.
+        """
+        super(NTLMConnectionPool, self).__init__(*args, **kwargs)
+        self.authurl = authurl
+        self.rawuser = user
+        user_parts = user.split('\\', 1)
+        self.domain = user_parts[0].upper()
+        self.user = user_parts[1]
+        self.pw = pw
+
+    def _new_conn(self):
+        # Performs the NTLM handshake that secures the connection. The socket
+        # must be kept open while requests are performed.
+        self.num_connections += 1
+        log.debug('Starting NTLM HTTPS connection no. %d: https://%s%s' %
+                  (self.num_connections, self.host, self.authurl))
+
+        headers = {}
+        headers['Connection'] = 'Keep-Alive'
+        req_header = 'Authorization'
+        resp_header = 'www-authenticate'
+
+        conn = httplib.HTTPSConnection(host=self.host, port=self.port)
+
+        # Send negotiation message
+        headers[req_header] = (
+            'NTLM %s' % ntlm.create_NTLM_NEGOTIATE_MESSAGE(self.rawuser))
+        log.debug('Request headers: %s' % headers)
+        conn.request('GET', self.authurl, None, headers)
+        res = conn.getresponse()
+        reshdr = dict(res.getheaders())
+        log.debug('Response status: %s %s' % (res.status, res.reason))
+        log.debug('Response headers: %s' % reshdr)
+        log.debug('Response data: %s [...]' % res.read(100))
+
+        # Remove the reference to the socket, so that it can not be closed by
+        # the response object (we want to keep the socket open)
+        res.fp = None
+
+        # Server should respond with a challenge message
+        auth_header_values = reshdr[resp_header].split(', ')
+        auth_header_value = None
+        for s in auth_header_values:
+            if s[:5] == 'NTLM ':
+                auth_header_value = s[5:]
+        if auth_header_value is None:
+            raise Exception('Unexpected %s response header: %s' %
+                            (resp_header, reshdr[resp_header]))
+
+        # Send authentication message
+        ServerChallenge, NegotiateFlags = \
+            ntlm.parse_NTLM_CHALLENGE_MESSAGE(auth_header_value)
+        auth_msg = ntlm.create_NTLM_AUTHENTICATE_MESSAGE(ServerChallenge,
+                                                         self.user,
+                                                         self.domain,
+                                                         self.pw,
+                                                         NegotiateFlags)
+        headers[req_header] = 'NTLM %s' % auth_msg
+        log.debug('Request headers: %s' % headers)
+        conn.request('GET', self.authurl, None, headers)
+        res = conn.getresponse()
+        log.debug('Response status: %s %s' % (res.status, res.reason))
+        log.debug('Response headers: %s' % dict(res.getheaders()))
+        log.debug('Response data: %s [...]' % res.read()[:100])
+        if res.status != 200:
+            if res.status == 401:
+                raise Exception('Server rejected request: wrong '
+                                'username or password')
+            raise Exception('Wrong server response: %s %s' %
+                            (res.status, res.reason))
+
+        res.fp = None
+        log.debug('Connection established')
+        return conn
+
+    def urlopen(self, method, url, body=None, headers=None, retries=3,
+                redirect=True, assert_same_host=True):
+        if headers is None:
+            headers = {}
+        headers['Connection'] = 'Keep-Alive'
+        return super(NTLMConnectionPool, self).urlopen(method, url, body,
+                                                       headers, retries,
+                                                       redirect,
+                                                       assert_same_host)
diff --git a/requests/packages/urllib3/filepost.py b/requests/packages/urllib3/filepost.py
new file mode 100644 (file)
index 0000000..181822b
--- /dev/null
@@ -0,0 +1,50 @@
+import codecs
+import mimetools
+import mimetypes
+try:
+    from cStringIO import StringIO
+except:
+    from StringIO import StringIO
+
+
+writer = codecs.lookup('utf-8')[3]
+
+
+def get_content_type(filename):
+    return mimetypes.guess_type(filename)[0] or 'application/octet-stream'
+
+
+def encode_multipart_formdata(fields):
+    body = StringIO()
+    BOUNDARY = mimetools.choose_boundary()
+
+    for fieldname, value in fields.iteritems():
+        body.write('--%s\r\n' % (BOUNDARY))
+
+        if isinstance(value, tuple):
+            filename, data = value
+            writer(body).write('Content-Disposition: form-data; name="%s"; '
+                               'filename="%s"\r\n' % (fieldname, filename))
+            body.write('Content-Type: %s\r\n\r\n' %
+                       (get_content_type(filename)))
+        else:
+            data = value
+            writer(body).write('Content-Disposition: form-data; name="%s"\r\n'
+                               % (fieldname))
+            body.write('Content-Type: text/plain\r\n\r\n')
+
+        if isinstance(data, int):
+            data = str(data)  # Backwards compatibility
+
+        if isinstance(data, unicode):
+            writer(body).write(data)
+        else:
+            body.write(data)
+
+        body.write('\r\n')
+
+    body.write('--%s--\r\n' % (BOUNDARY))
+
+    content_type = 'multipart/form-data; boundary=%s' % BOUNDARY
+
+    return body.getvalue(), content_type