From 6e76ab7188b2e5f6338b8b15d7d77b69f1627a7b Mon Sep 17 00:00:00 2001 From: Dave Shawley Date: Thu, 16 May 2013 11:22:35 -0400 Subject: [PATCH] Fix for #1362. `PreparedRequest.prepare_url` incorrectly applied IDNA encoding to the URLs entire `netloc`. It should only be encoding the hostname portion of the URL. IDNA encoding was limiting the user info, host, and port segments to be a maximum of 63 characters which causes problems for all by the most trivial user + password combinations. - Replaced usage of `urlparse` in `PreparedRequest.prepare_url` with `urllib3` equivalent. - Modified IDNA encoding section so that it only encodes the host portion of the URL. --- AUTHORS.rst | 1 + requests/models.py | 22 +++++++++++++++------- test_requests.py | 9 +++++++++ 3 files changed, 25 insertions(+), 7 deletions(-) diff --git a/AUTHORS.rst b/AUTHORS.rst index fce2cf9..307dbb3 100644 --- a/AUTHORS.rst +++ b/AUTHORS.rst @@ -127,3 +127,4 @@ Patches and Suggestions - Colin Dunklau @cdunklau - Hugo Osvaldo Barrera @hobarrera - Łukasz Langa @llanga +- Dave Shawley diff --git a/requests/models.py b/requests/models.py index 9467a56..5571630 100644 --- a/requests/models.py +++ b/requests/models.py @@ -18,6 +18,7 @@ from .structures import CaseInsensitiveDict from .auth import HTTPBasicAuth from .cookies import cookiejar_from_dict, get_cookie_header from .packages.urllib3.filepost import encode_multipart_formdata +from .packages.urllib3.util import parse_url from .exceptions import HTTPError, RequestException, MissingSchema, InvalidURL from .utils import ( guess_filename, get_auth_from_url, requote_uri, @@ -284,19 +285,28 @@ class PreparedRequest(RequestEncodingMixin, RequestHooksMixin): pass # Support for unicode domain names and paths. - scheme, netloc, path, _params, query, fragment = urlparse(url) + scheme, auth, host, port, path, query, fragment = parse_url(url) if not scheme: raise MissingSchema("Invalid URL %r: No schema supplied" % url) - if not netloc: - raise InvalidURL("Invalid URL %t: No netloc supplied" % url) + if not host: + raise InvalidURL("Invalid URL %t: No host supplied" % url) + # Only want to apply IDNA to the hostname try: - netloc = netloc.encode('idna').decode('utf-8') + host = host.encode('idna').decode('utf-8') except UnicodeError: raise InvalidURL('URL has an invalid label.') + # Carefully reconstruct the network location + netloc = auth or '' + if netloc: + netloc += '@' + netloc += host + if port: + netloc += ':' + str(port) + # Bare domains aren't valid URLs. if not path: path = '/' @@ -308,8 +318,6 @@ class PreparedRequest(RequestEncodingMixin, RequestHooksMixin): netloc = netloc.encode('utf-8') if isinstance(path, str): path = path.encode('utf-8') - if isinstance(_params, str): - _params = _params.encode('utf-8') if isinstance(query, str): query = query.encode('utf-8') if isinstance(fragment, str): @@ -322,7 +330,7 @@ class PreparedRequest(RequestEncodingMixin, RequestHooksMixin): else: query = enc_params - url = requote_uri(urlunparse([scheme, netloc, path, _params, query, fragment])) + url = requote_uri(urlunparse([scheme, netloc, path, None, query, fragment])) self.url = url def prepare_headers(self, headers): diff --git a/test_requests.py b/test_requests.py index 07a2955..60e4498 100644 --- a/test_requests.py +++ b/test_requests.py @@ -521,6 +521,15 @@ class RequestsTestCase(unittest.TestCase): self.assertTrue('http://' in s2.adapters) self.assertTrue('https://' in s2.adapters) + def test_long_authinfo_in_url(self): + url = 'http://{0}:{1}@{2}:9000/path?query#frag'.format( + 'E8A3BE87-9E3F-4620-8858-95478E385B5B', + 'EA770032-DA4D-4D84-8CE9-29C6D910BF1E', + 'exactly-------------sixty-----------three------------characters', + ) + r = requests.Request('GET', url).prepare() + self.assertEqual(r.url, url) + class TestCaseInsensitiveDict(unittest.TestCase): -- 2.34.1