From df272b7740d970a97ad243c061863892e072661a Mon Sep 17 00:00:00 2001 From: Kenneth Reitz Date: Thu, 24 Oct 2013 10:31:05 -0400 Subject: [PATCH] update internal dependencies --- requests/packages/charade/__init__.py | 34 +++ requests/packages/charade/__main__.py | 7 + requests/packages/charade/jpcntx.py | 2 +- requests/packages/charade/latin1prober.py | 2 +- .../packages/charade/universaldetector.py | 12 +- requests/packages/urllib3/connection.py | 107 +++++++++ requests/packages/urllib3/connectionpool.py | 205 ++++++------------ .../packages/urllib3/contrib/pyopenssl.py | 2 + requests/packages/urllib3/util.py | 39 ++-- 9 files changed, 247 insertions(+), 163 deletions(-) create mode 100644 requests/packages/charade/__main__.py create mode 100644 requests/packages/urllib3/connection.py diff --git a/requests/packages/charade/__init__.py b/requests/packages/charade/__init__.py index 1aadf3e..26362e9 100644 --- a/requests/packages/charade/__init__.py +++ b/requests/packages/charade/__init__.py @@ -30,3 +30,37 @@ def detect(aBuf): u.feed(aBuf) u.close() return u.result + +def _description_of(path): + """Return a string describing the probable encoding of a file.""" + from charade.universaldetector import UniversalDetector + + u = UniversalDetector() + for line in open(path, 'rb'): + u.feed(line) + u.close() + result = u.result + if result['encoding']: + return '%s: %s with confidence %s' % (path, + result['encoding'], + result['confidence']) + else: + return '%s: no result' % path + + +def charade_cli(): + """ + Script which takes one or more file paths and reports on their detected + encodings + + Example:: + + % chardetect.py somefile someotherfile + somefile: windows-1252 with confidence 0.5 + someotherfile: ascii with confidence 1.0 + + """ + from sys import argv + for path in argv[1:]: + print(_description_of(path)) + \ No newline at end of file diff --git a/requests/packages/charade/__main__.py b/requests/packages/charade/__main__.py new file mode 100644 index 0000000..c0d587f --- /dev/null +++ b/requests/packages/charade/__main__.py @@ -0,0 +1,7 @@ +''' +support ';python -m charade [file2] ...' package execution syntax (2.7+) +''' + +from charade import charade_cli + +charade_cli() diff --git a/requests/packages/charade/jpcntx.py b/requests/packages/charade/jpcntx.py index b4e6af4..e4e9e4d 100644 --- a/requests/packages/charade/jpcntx.py +++ b/requests/packages/charade/jpcntx.py @@ -169,7 +169,7 @@ class JapaneseContextAnalysis: def get_confidence(self): # This is just one way to calculate confidence. It works well for me. if self._mTotalRel > MINIMUM_DATA_THRESHOLD: - return (self._mTotalRel - self._mRelSample[0]) / self._mTotalRel + return float(self._mTotalRel - self._mRelSample[0]) / self._mTotalRel else: return DONT_KNOW diff --git a/requests/packages/charade/latin1prober.py b/requests/packages/charade/latin1prober.py index bebe1bc..18eefd4 100644 --- a/requests/packages/charade/latin1prober.py +++ b/requests/packages/charade/latin1prober.py @@ -129,7 +129,7 @@ class Latin1Prober(CharSetProber): if total < 0.01: confidence = 0.0 else: - confidence = ((self._mFreqCounter[3] / total) + confidence = ((float(self._mFreqCounter[3]) / total) - (self._mFreqCounter[1] * 20.0 / total)) if confidence < 0.0: confidence = 0.0 diff --git a/requests/packages/charade/universaldetector.py b/requests/packages/charade/universaldetector.py index 6175bfb..6a8e68a 100644 --- a/requests/packages/charade/universaldetector.py +++ b/requests/packages/charade/universaldetector.py @@ -74,12 +74,10 @@ class UniversalDetector: if aBuf[:3] == codecs.BOM: # EF BB BF UTF-8 with BOM self.result = {'encoding': "UTF-8", 'confidence': 1.0} - elif aBuf[:4] == codecs.BOM_UTF32_LE: + elif aBuf[:4] in (codecs.BOM_UTF32_LE, codecs.BOM_UTF32_BE): # FF FE 00 00 UTF-32, little-endian BOM - self.result = {'encoding': "UTF-32LE", 'confidence': 1.0} - elif aBuf[:4] == codecs.BOM_UTF32_BE: # 00 00 FE FF UTF-32, big-endian BOM - self.result = {'encoding': "UTF-32BE", 'confidence': 1.0} + self.result = {'encoding': "UTF-32", 'confidence': 1.0} elif aBuf[:4] == b'\xFE\xFF\x00\x00': # FE FF 00 00 UCS-4, unusual octet order BOM (3412) self.result = { @@ -92,12 +90,10 @@ class UniversalDetector: 'encoding': "X-ISO-10646-UCS-4-2143", 'confidence': 1.0 } - elif aBuf[:2] == codecs.BOM_LE: + elif aBuf[:2] == codecs.BOM_LE or aBuf[:2] == codecs.BOM_BE: # FF FE UTF-16, little endian BOM - self.result = {'encoding': "UTF-16LE", 'confidence': 1.0} - elif aBuf[:2] == codecs.BOM_BE: # FE FF UTF-16, big endian BOM - self.result = {'encoding': "UTF-16BE", 'confidence': 1.0} + self.result = {'encoding': "UTF-16", 'confidence': 1.0} self._mGotData = True if self.result['encoding'] and (self.result['confidence'] > 0.0): diff --git a/requests/packages/urllib3/connection.py b/requests/packages/urllib3/connection.py new file mode 100644 index 0000000..e240786 --- /dev/null +++ b/requests/packages/urllib3/connection.py @@ -0,0 +1,107 @@ +# urllib3/connection.py +# Copyright 2008-2013 Andrey Petrov and contributors (see CONTRIBUTORS.txt) +# +# This module is part of urllib3 and is released under +# the MIT License: http://www.opensource.org/licenses/mit-license.php + +import socket +from socket import timeout as SocketTimeout + +try: # Python 3 + from http.client import HTTPConnection, HTTPException +except ImportError: + from httplib import HTTPConnection, HTTPException + +class DummyConnection(object): + "Used to detect a failed ConnectionCls import." + pass + +try: # Compiled with SSL? + ssl = None + HTTPSConnection = DummyConnection + + class BaseSSLError(BaseException): + pass + + try: # Python 3 + from http.client import HTTPSConnection + except ImportError: + from httplib import HTTPSConnection + + import ssl + BaseSSLError = ssl.SSLError + +except (ImportError, AttributeError): # Platform-specific: No SSL. + pass + +from .exceptions import ( + ConnectTimeoutError, +) +from .packages.ssl_match_hostname import match_hostname +from .util import ( + assert_fingerprint, + resolve_cert_reqs, + resolve_ssl_version, + ssl_wrap_socket, +) + +class VerifiedHTTPSConnection(HTTPSConnection): + """ + Based on httplib.HTTPSConnection but wraps the socket with + SSL certification. + """ + cert_reqs = None + ca_certs = None + ssl_version = None + + def set_cert(self, key_file=None, cert_file=None, + cert_reqs=None, ca_certs=None, + assert_hostname=None, assert_fingerprint=None): + + self.key_file = key_file + self.cert_file = cert_file + self.cert_reqs = cert_reqs + self.ca_certs = ca_certs + self.assert_hostname = assert_hostname + self.assert_fingerprint = assert_fingerprint + + def connect(self): + # Add certificate verification + try: + sock = socket.create_connection( + address=(self.host, self.port), + timeout=self.timeout, + ) + except SocketTimeout: + raise ConnectTimeoutError( + self, "Connection to %s timed out. (connect timeout=%s)" % + (self.host, self.timeout)) + + resolved_cert_reqs = resolve_cert_reqs(self.cert_reqs) + resolved_ssl_version = resolve_ssl_version(self.ssl_version) + + if self._tunnel_host: + self.sock = sock + # Calls self._set_hostport(), so self.host is + # self._tunnel_host below. + self._tunnel() + + # Wrap socket using verification with the root certs in + # trusted_root_certs + self.sock = ssl_wrap_socket(sock, self.key_file, self.cert_file, + cert_reqs=resolved_cert_reqs, + ca_certs=self.ca_certs, + server_hostname=self.host, + ssl_version=resolved_ssl_version) + + if resolved_cert_reqs != ssl.CERT_NONE: + if self.assert_fingerprint: + assert_fingerprint(self.sock.getpeercert(binary_form=True), + self.assert_fingerprint) + elif self.assert_hostname is not False: + match_hostname(self.sock.getpeercert(), + self.assert_hostname or self.host) + + +if ssl: + HTTPSConnection = VerifiedHTTPSConnection diff --git a/requests/packages/urllib3/connectionpool.py b/requests/packages/urllib3/connectionpool.py index 691d4e2..1e58143 100644 --- a/requests/packages/urllib3/connectionpool.py +++ b/requests/packages/urllib3/connectionpool.py @@ -10,13 +10,6 @@ import logging from socket import error as SocketError, timeout as SocketTimeout import socket -try: # Python 3 - from http.client import HTTPConnection, HTTPException - from http.client import HTTP_PORT, HTTPS_PORT -except ImportError: - from httplib import HTTPConnection, HTTPException - from httplib import HTTP_PORT, HTTPS_PORT - try: # Python 3 from queue import LifoQueue, Empty, Full except ImportError: @@ -24,26 +17,6 @@ except ImportError: import Queue as _ # Platform-specific: Windows -try: # Compiled with SSL? - HTTPSConnection = object - - class BaseSSLError(BaseException): - pass - - ssl = None - - try: # Python 3 - from http.client import HTTPSConnection - except ImportError: - from httplib import HTTPSConnection - - import ssl - BaseSSLError = ssl.SSLError - -except (ImportError, AttributeError): # Platform-specific: No SSL. - pass - - from .exceptions import ( ClosedPoolError, ConnectTimeoutError, @@ -51,23 +24,27 @@ from .exceptions import ( HostChangedError, MaxRetryError, SSLError, + TimeoutError, ReadTimeoutError, ProxyError, ) -from .packages.ssl_match_hostname import CertificateError, match_hostname +from .packages.ssl_match_hostname import CertificateError from .packages import six +from .connection import ( + DummyConnection, + HTTPConnection, HTTPSConnection, VerifiedHTTPSConnection, + HTTPException, BaseSSLError, +) from .request import RequestMethods from .response import HTTPResponse from .util import ( assert_fingerprint, get_host, is_connection_dropped, - resolve_cert_reqs, - resolve_ssl_version, - ssl_wrap_socket, Timeout, ) + xrange = six.moves.xrange log = logging.getLogger(__name__) @@ -75,70 +52,11 @@ log = logging.getLogger(__name__) _Default = object() port_by_scheme = { - 'http': HTTP_PORT, - 'https': HTTPS_PORT, + 'http': 80, + 'https': 443, } -## Connection objects (extension of httplib) - -class VerifiedHTTPSConnection(HTTPSConnection): - """ - Based on httplib.HTTPSConnection but wraps the socket with - SSL certification. - """ - cert_reqs = None - ca_certs = None - ssl_version = None - - def set_cert(self, key_file=None, cert_file=None, - cert_reqs=None, ca_certs=None, - assert_hostname=None, assert_fingerprint=None): - - self.key_file = key_file - self.cert_file = cert_file - self.cert_reqs = cert_reqs - self.ca_certs = ca_certs - self.assert_hostname = assert_hostname - self.assert_fingerprint = assert_fingerprint - - def connect(self): - # Add certificate verification - try: - sock = socket.create_connection( - address=(self.host, self.port), - timeout=self.timeout) - except SocketTimeout: - raise ConnectTimeoutError( - self, "Connection to %s timed out. (connect timeout=%s)" % - (self.host, self.timeout)) - - resolved_cert_reqs = resolve_cert_reqs(self.cert_reqs) - resolved_ssl_version = resolve_ssl_version(self.ssl_version) - - if self._tunnel_host: - self.sock = sock - # Calls self._set_hostport(), so self.host is - # self._tunnel_host below. - self._tunnel() - - # Wrap socket using verification with the root certs in - # trusted_root_certs - self.sock = ssl_wrap_socket(sock, self.key_file, self.cert_file, - cert_reqs=resolved_cert_reqs, - ca_certs=self.ca_certs, - server_hostname=self.host, - ssl_version=resolved_ssl_version) - - if resolved_cert_reqs != ssl.CERT_NONE: - if self.assert_fingerprint: - assert_fingerprint(self.sock.getpeercert(binary_form=True), - self.assert_fingerprint) - elif self.assert_hostname is not False: - match_hostname(self.sock.getpeercert(), - self.assert_hostname or self.host) - - ## Pool objects class ConnectionPool(object): @@ -218,6 +136,7 @@ class HTTPConnectionPool(ConnectionPool, RequestMethods): """ scheme = 'http' + ConnectionCls = HTTPConnection def __init__(self, host, port=None, strict=False, timeout=Timeout.DEFAULT_TIMEOUT, maxsize=1, block=False, @@ -255,14 +174,14 @@ class HTTPConnectionPool(ConnectionPool, RequestMethods): self.num_connections += 1 log.info("Starting new HTTP connection (%d): %s" % (self.num_connections, self.host)) + extra_params = {} if not six.PY3: # Python 2 extra_params['strict'] = self.strict - return HTTPConnection(host=self.host, port=self.port, - timeout=self.timeout.connect_timeout, - **extra_params) - + return self.ConnectionCls(host=self.host, port=self.port, + timeout=self.timeout.connect_timeout, + **extra_params) def _get_conn(self, timeout=None): """ @@ -362,7 +281,7 @@ class HTTPConnectionPool(ConnectionPool, RequestMethods): timeout_obj.start_connect() conn.timeout = timeout_obj.connect_timeout # conn.request() calls httplib.*.request, not the method in - # request.py. It also calls makefile (recv) on the socket + # urllib3.request. It also calls makefile (recv) on the socket. conn.request(method, url, **httplib_request_kw) except SocketTimeout: raise ConnectTimeoutError( @@ -371,11 +290,9 @@ class HTTPConnectionPool(ConnectionPool, RequestMethods): # Reset the timeout for the recv() on the socket read_timeout = timeout_obj.read_timeout - log.debug("Setting read timeout to %s" % read_timeout) + # App Engine doesn't have a sock attr - if hasattr(conn, 'sock') and \ - read_timeout is not None and \ - read_timeout is not Timeout.DEFAULT_TIMEOUT: + if hasattr(conn, 'sock'): # In Python 3 socket.py will catch EAGAIN and return None when you # try and read into the file pointer created by http.client, which # instead raises a BadStatusLine exception. Instead of catching @@ -385,7 +302,10 @@ class HTTPConnectionPool(ConnectionPool, RequestMethods): raise ReadTimeoutError( self, url, "Read timed out. (read timeout=%s)" % read_timeout) - conn.sock.settimeout(read_timeout) + if read_timeout is Timeout.DEFAULT_TIMEOUT: + conn.sock.settimeout(socket.getdefaulttimeout()) + else: # None or a value + conn.sock.settimeout(read_timeout) # Receive the response from the server try: @@ -397,6 +317,16 @@ class HTTPConnectionPool(ConnectionPool, RequestMethods): raise ReadTimeoutError( self, url, "Read timed out. (read timeout=%s)" % read_timeout) + except BaseSSLError as e: + # Catch possible read timeouts thrown as SSL errors. If not the + # case, rethrow the original. We need to do this because of: + # http://bugs.python.org/issue10272 + if 'timed out' in str(e) or \ + 'did not complete (read)' in str(e): # Python 2.6 + raise ReadTimeoutError(self, url, "Read timed out.") + + raise + except SocketError as e: # Platform-specific: Python 2 # See the above comment about EAGAIN in Python 3. In Python 2 we # have to specifically catch it and throw the timeout error @@ -404,8 +334,8 @@ class HTTPConnectionPool(ConnectionPool, RequestMethods): raise ReadTimeoutError( self, url, "Read timed out. (read timeout=%s)" % read_timeout) - raise + raise # AppEngine doesn't have a version attr. http_version = getattr(conn, '_http_vsn_str', 'HTTP/?') @@ -559,24 +489,24 @@ class HTTPConnectionPool(ConnectionPool, RequestMethods): except Empty: # Timed out by queue - raise ReadTimeoutError( - self, url, "Read timed out, no pool connections are available.") - - except SocketTimeout: - # Timed out by socket - raise ReadTimeoutError(self, url, "Read timed out.") + raise EmptyPoolError(self, "No pool connections are available.") except BaseSSLError as e: - # SSL certificate error - if 'timed out' in str(e) or \ - 'did not complete (read)' in str(e): # Platform-specific: Python 2.6 - raise ReadTimeoutError(self, url, "Read timed out.") raise SSLError(e) except CertificateError as e: # Name mismatch raise SSLError(e) + except TimeoutError as e: + # Connection broken, discard. + conn = None + # Save the error off for retry logic. + err = e + + if retries == 0: + raise + except (HTTPException, SocketError) as e: if isinstance(e, SocketError) and self.proxy is not None: raise ProxyError('Cannot connect to proxy. ' @@ -639,6 +569,7 @@ class HTTPSConnectionPool(HTTPConnectionPool): """ scheme = 'https' + ConnectionCls = HTTPSConnection def __init__(self, host, port=None, strict=False, timeout=None, maxsize=1, @@ -658,33 +589,33 @@ class HTTPSConnectionPool(HTTPConnectionPool): self.assert_hostname = assert_hostname self.assert_fingerprint = assert_fingerprint - def _prepare_conn(self, connection): + def _prepare_conn(self, conn): """ Prepare the ``connection`` for :meth:`urllib3.util.ssl_wrap_socket` and establish the tunnel if proxy is used. """ - if isinstance(connection, VerifiedHTTPSConnection): - connection.set_cert(key_file=self.key_file, - cert_file=self.cert_file, - cert_reqs=self.cert_reqs, - ca_certs=self.ca_certs, - assert_hostname=self.assert_hostname, - assert_fingerprint=self.assert_fingerprint) - connection.ssl_version = self.ssl_version + if isinstance(conn, VerifiedHTTPSConnection): + conn.set_cert(key_file=self.key_file, + cert_file=self.cert_file, + cert_reqs=self.cert_reqs, + ca_certs=self.ca_certs, + assert_hostname=self.assert_hostname, + assert_fingerprint=self.assert_fingerprint) + conn.ssl_version = self.ssl_version if self.proxy is not None: # Python 2.7+ try: - set_tunnel = connection.set_tunnel + set_tunnel = conn.set_tunnel except AttributeError: # Platform-specific: Python 2.6 - set_tunnel = connection._set_tunnel + set_tunnel = conn._set_tunnel set_tunnel(self.host, self.port, self.proxy_headers) # Establish tunnel connection early, because otherwise httplib # would improperly set Host: header to proxy's IP:port. - connection.connect() + conn.connect() - return connection + return conn def _new_conn(self): """ @@ -694,28 +625,26 @@ class HTTPSConnectionPool(HTTPConnectionPool): log.info("Starting new HTTPS connection (%d): %s" % (self.num_connections, self.host)) + if not self.ConnectionCls or self.ConnectionCls is DummyConnection: + # Platform-specific: Python without ssl + raise SSLError("Can't connect to HTTPS URL because the SSL " + "module is not available.") + actual_host = self.host actual_port = self.port if self.proxy is not None: actual_host = self.proxy.host actual_port = self.proxy.port - if not ssl: # Platform-specific: Python compiled without +ssl - if not HTTPSConnection or HTTPSConnection is object: - raise SSLError("Can't connect to HTTPS URL because the SSL " - "module is not available.") - connection_class = HTTPSConnection - else: - connection_class = VerifiedHTTPSConnection - extra_params = {} if not six.PY3: # Python 2 extra_params['strict'] = self.strict - connection = connection_class(host=actual_host, port=actual_port, - timeout=self.timeout.connect_timeout, - **extra_params) - return self._prepare_conn(connection) + conn = self.ConnectionCls(host=actual_host, port=actual_port, + timeout=self.timeout.connect_timeout, + **extra_params) + + return self._prepare_conn(conn) def connection_from_url(url, **kw): diff --git a/requests/packages/urllib3/contrib/pyopenssl.py b/requests/packages/urllib3/contrib/pyopenssl.py index d43bcd6..91bc2fa 100644 --- a/requests/packages/urllib3/contrib/pyopenssl.py +++ b/requests/packages/urllib3/contrib/pyopenssl.py @@ -26,6 +26,7 @@ import OpenSSL.SSL from pyasn1.codec.der import decoder as der_decoder from socket import _fileobject import ssl +import select from cStringIO import StringIO from .. import connectionpool @@ -336,6 +337,7 @@ def ssl_wrap_socket(sock, keyfile=None, certfile=None, cert_reqs=None, try: cnx.do_handshake() except OpenSSL.SSL.WantReadError: + select.select([sock], [], []) continue except OpenSSL.SSL.Error as e: raise ssl.SSLError('bad handshake', e) diff --git a/requests/packages/urllib3/util.py b/requests/packages/urllib3/util.py index 266c9ed..cf934d4 100644 --- a/requests/packages/urllib3/util.py +++ b/requests/packages/urllib3/util.py @@ -80,14 +80,13 @@ class Timeout(object): :type read: integer, float, or None :param total: - The maximum amount of time to wait for an HTTP request to connect and - return. This combines the connect and read timeouts into one. In the + This combines the connect and read timeouts into one; the read timeout + will be set to the time leftover from the connect attempt. In the event that both a connect timeout and a total are specified, or a read timeout and a total are specified, the shorter timeout will be applied. Defaults to None. - :type total: integer, float, or None .. note:: @@ -101,18 +100,23 @@ class Timeout(object): `total`. In addition, the read and total timeouts only measure the time between - read operations on the socket connecting the client and the server, not - the total amount of time for the request to return a complete response. - As an example, you may want a request to return within 7 seconds or - fail, so you set the ``total`` timeout to 7 seconds. If the server - sends one byte to you every 5 seconds, the request will **not** trigger - time out. This case is admittedly rare. + read operations on the socket connecting the client and the server, + not the total amount of time for the request to return a complete + response. For most requests, the timeout is raised because the server + has not sent the first byte in the specified time. This is not always + the case; if a server streams one byte every fifteen seconds, a timeout + of 20 seconds will not ever trigger, even though the request will + take several minutes to complete. + + If your goal is to cut off any request after a set amount of wall clock + time, consider having a second "watcher" thread to cut off a slow + request. """ #: A sentinel object representing the default timeout value DEFAULT_TIMEOUT = _GLOBAL_DEFAULT_TIMEOUT - def __init__(self, connect=_Default, read=_Default, total=None): + def __init__(self, total=None, connect=_Default, read=_Default): self._connect = self._validate_timeout(connect, 'connect') self._read = self._validate_timeout(read, 'read') self.total = self._validate_timeout(total, 'total') @@ -372,7 +376,8 @@ def parse_url(url): # Auth if '@' in url: - auth, url = url.split('@', 1) + # Last '@' denotes end of auth part + auth, url = url.rsplit('@', 1) # IPv6 if url and url[0] == '[': @@ -386,10 +391,14 @@ def parse_url(url): if not host: host = _host - if not port.isdigit(): - raise LocationParseError("Failed to parse: %s" % url) - - port = int(port) + if port: + # If given, ports must be integers. + if not port.isdigit(): + raise LocationParseError("Failed to parse: %s" % url) + port = int(port) + else: + # Blank ports are cool, too. (rfc3986#section-3.2.3) + port = None elif not host and url: host = url -- 2.34.1