From c99de6c0215e8e6a98bba37192a038571b7de3bb Mon Sep 17 00:00:00 2001 From: JinWang An Date: Tue, 13 Apr 2021 11:23:10 +0900 Subject: [PATCH] [CVE-2019-18348] Disallow control characters in hostnames in http.client An issue was discovered in urllib2 in Python 2.x through 2.7.17 and urllib in Python 3.x through 3.8.0. CRLF injection is possible if the attacker controls a url parameter, as demonstrated by the first argument to urllib.request. urlopen with \r\n (specifically in the host component of a URL) followed by an HTTP header. Change-Id: I733ec1d4986c5b638865ed70530f70a3ea0bd524 Signed-off-by: JinWang An --- Lib/httplib.py | 13 +++++++++++++ Lib/test/test_httplib.py | 13 ++++++++++++- Lib/test/test_urllib2.py | 33 +++++++++++++++++++++++++-------- 3 files changed, 50 insertions(+), 9 deletions(-) diff --git a/Lib/httplib.py b/Lib/httplib.py index 79532b9..fcc4152 100644 --- a/Lib/httplib.py +++ b/Lib/httplib.py @@ -745,6 +745,8 @@ class HTTPConnection: (self.host, self.port) = self._get_hostport(host, port) + self._validate_host(self.host) + # This is stored as an instance variable to allow unittests # to replace with a suitable mock self._create_connection = socket.create_connection @@ -1029,6 +1031,17 @@ class HTTPConnection: ).format(matched=match.group(), url=url) raise InvalidURL(msg) + def _validate_host(self, host): + """Validate a host so it doesn't contain control characters.""" + # Prevent CVE-2019-18348. + match = _contains_disallowed_url_pchar_re.search(host) + if match: + msg = ( + "URL can't contain control characters. {host!r} " + "(found at least {matched!r})" + ).format(matched=match.group(), host=host) + raise InvalidURL(msg) + def putheader(self, header, *values): """Send a request header line to the server. diff --git a/Lib/test/test_httplib.py b/Lib/test/test_httplib.py index 5462fdd..d8a57f7 100644 --- a/Lib/test/test_httplib.py +++ b/Lib/test/test_httplib.py @@ -702,7 +702,7 @@ class BasicTest(TestCase): with self.assertRaisesRegexp(socket.error, "Invalid response"): conn._tunnel() - def test_putrequest_override_validation(self): + def test_putrequest_override_domain_validation(self): """ It should be possible to override the default validation behavior in putrequest (bpo-38216). @@ -715,6 +715,17 @@ class BasicTest(TestCase): conn.sock = FakeSocket('') conn.putrequest('GET', '/\x00') + def test_putrequest_override_host_validation(self): + class UnsafeHTTPConnection(httplib.HTTPConnection): + def _validate_host(self, url): + pass + + conn = UnsafeHTTPConnection('example.com\r\n') + conn.sock = FakeSocket('') + # set skip_host so a ValueError is not raised upon adding the + # invalid URL as the value of the "Host:" header + conn.putrequest('GET', '/', skip_host=1) + class OfflineTest(TestCase): def test_responses(self): diff --git a/Lib/test/test_urllib2.py b/Lib/test/test_urllib2.py index 4f04c70..0b2a60a 100644 --- a/Lib/test/test_urllib2.py +++ b/Lib/test/test_urllib2.py @@ -1340,7 +1340,7 @@ class MiscTests(unittest.TestCase, FakeHTTPMixin): ) @unittest.skipUnless(ssl, "ssl module required") - def test_url_with_control_char_rejected(self): + def test_url_path_with_control_char_rejected(self): for char_no in range(0, 0x21) + range(0x7f, 0x100): char = chr(char_no) schemeless_url = "//localhost:7777/test%s/" % char @@ -1364,7 +1364,7 @@ class MiscTests(unittest.TestCase, FakeHTTPMixin): self.unfakehttp() @unittest.skipUnless(ssl, "ssl module required") - def test_url_with_newline_header_injection_rejected(self): + def test_url_path_with_newline_header_injection_rejected(self): self.fakehttp(b"HTTP/1.1 200 OK\r\n\r\nHello.") host = "localhost:7777?a=1 HTTP/1.1\r\nX-injected: header\r\nTEST: 123" schemeless_url = "//" + host + ":8080/test/?test=a" @@ -1376,15 +1376,32 @@ class MiscTests(unittest.TestCase, FakeHTTPMixin): # calls urllib.parse.quote() on the URL which makes all of the # above attempts at injection within the url _path_ safe. InvalidURL = httplib.InvalidURL - with self.assertRaisesRegexp( - InvalidURL, r"contain control.*\\r.*(found at least . .)"): - urllib2.urlopen("http:" + schemeless_url) - with self.assertRaisesRegexp(InvalidURL, r"contain control.*\\n"): - urllib2.urlopen("https:" + schemeless_url) + with self.assertRaisesRegexp(InvalidURL, + r"contain control.*\\r.*(found at least . .)"): + urllib2.urlopen("http:{}".format(schemeless_url)) + with self.assertRaisesRegexp(InvalidURL, + r"contain control.*\\n"): + urllib2.urlopen("https:{}".format(schemeless_url)) finally: self.unfakehttp() - + @unittest.skipUnless(ssl, "ssl module required") + def test_url_host_with_control_char_rejected(self): + for char_no in list(range(0, 0x21)) + [0x7f]: + char = chr(char_no) + schemeless_url = "//localhost{}/test/".format(char) + self.fakehttp(b"HTTP/1.1 200 OK\r\n\r\nHello.") + try: + escaped_char_repr = repr(char).replace('\\', r'\\') + InvalidURL = httplib.InvalidURL + with self.assertRaisesRegexp(InvalidURL, + "contain control.*{}".format(escaped_char_repr)): + urllib2.urlopen("http:{}".format(schemeless_url)) + with self.assertRaisesRegexp(InvalidURL, + "contain control.*{}".format(escaped_char_repr)): + urllib2.urlopen("https:{}".format(schemeless_url)) + finally: + self.unfakehttp() class RequestTests(unittest.TestCase): -- 2.7.4