[CVE-2019-18348] Disallow control characters in hostnames in http.client 79/256779/1
authorJinWang An <jinwang.an@samsung.com>
Tue, 13 Apr 2021 02:23:10 +0000 (11:23 +0900)
committerJinWang An <jinwang.an@samsung.com>
Tue, 13 Apr 2021 02:23:10 +0000 (11:23 +0900)
An issue was discovered in urllib2 in Python 2.x through 2.7.17
and urllib in Python 3.x through 3.8.0. CRLF injection
is possible if the attacker controls a url parameter,
as demonstrated by the first argument to urllib.request.
urlopen with \r\n (specifically in the host component
of a URL) followed by an HTTP header.

Change-Id: I733ec1d4986c5b638865ed70530f70a3ea0bd524
Signed-off-by: JinWang An <jinwang.an@samsung.com>
Lib/httplib.py
Lib/test/test_httplib.py
Lib/test/test_urllib2.py

index 79532b9..fcc4152 100644 (file)
@@ -745,6 +745,8 @@ class HTTPConnection:
 
         (self.host, self.port) = self._get_hostport(host, port)
 
+        self._validate_host(self.host)
+
         # This is stored as an instance variable to allow unittests
         # to replace with a suitable mock
         self._create_connection = socket.create_connection
@@ -1029,6 +1031,17 @@ class HTTPConnection:
             ).format(matched=match.group(), url=url)
             raise InvalidURL(msg)
 
+    def _validate_host(self, host):
+        """Validate a host so it doesn't contain control characters."""
+        # Prevent CVE-2019-18348.
+        match = _contains_disallowed_url_pchar_re.search(host)
+        if match:
+            msg = (
+                "URL can't contain control characters. {host!r} "
+                "(found at least {matched!r})"
+            ).format(matched=match.group(), host=host)
+            raise InvalidURL(msg)
+
     def putheader(self, header, *values):
         """Send a request header line to the server.
 
index 5462fdd..d8a57f7 100644 (file)
@@ -702,7 +702,7 @@ class BasicTest(TestCase):
         with self.assertRaisesRegexp(socket.error, "Invalid response"):
             conn._tunnel()
 
-    def test_putrequest_override_validation(self):
+    def test_putrequest_override_domain_validation(self):
         """
         It should be possible to override the default validation
         behavior in putrequest (bpo-38216).
@@ -715,6 +715,17 @@ class BasicTest(TestCase):
         conn.sock = FakeSocket('')
         conn.putrequest('GET', '/\x00')
 
+    def test_putrequest_override_host_validation(self):
+        class UnsafeHTTPConnection(httplib.HTTPConnection):
+            def _validate_host(self, url):
+                pass
+
+        conn = UnsafeHTTPConnection('example.com\r\n')
+        conn.sock = FakeSocket('')
+        # set skip_host so a ValueError is not raised upon adding the
+        # invalid URL as the value of the "Host:" header
+        conn.putrequest('GET', '/', skip_host=1)
+
 
 class OfflineTest(TestCase):
     def test_responses(self):
index 4f04c70..0b2a60a 100644 (file)
@@ -1340,7 +1340,7 @@ class MiscTests(unittest.TestCase, FakeHTTPMixin):
         )
 
     @unittest.skipUnless(ssl, "ssl module required")
-    def test_url_with_control_char_rejected(self):
+    def test_url_path_with_control_char_rejected(self):
         for char_no in range(0, 0x21) + range(0x7f, 0x100):
             char = chr(char_no)
             schemeless_url = "//localhost:7777/test%s/" % char
@@ -1364,7 +1364,7 @@ class MiscTests(unittest.TestCase, FakeHTTPMixin):
                 self.unfakehttp()
 
     @unittest.skipUnless(ssl, "ssl module required")
-    def test_url_with_newline_header_injection_rejected(self):
+    def test_url_path_with_newline_header_injection_rejected(self):
         self.fakehttp(b"HTTP/1.1 200 OK\r\n\r\nHello.")
         host = "localhost:7777?a=1 HTTP/1.1\r\nX-injected: header\r\nTEST: 123"
         schemeless_url = "//" + host + ":8080/test/?test=a"
@@ -1376,15 +1376,32 @@ class MiscTests(unittest.TestCase, FakeHTTPMixin):
             # calls urllib.parse.quote() on the URL which makes all of the
             # above attempts at injection within the url _path_ safe.
             InvalidURL = httplib.InvalidURL
-            with self.assertRaisesRegexp(
-                InvalidURL, r"contain control.*\\r.*(found at least . .)"):
-                urllib2.urlopen("http:" + schemeless_url)
-            with self.assertRaisesRegexp(InvalidURL, r"contain control.*\\n"):
-                urllib2.urlopen("https:" + schemeless_url)
+            with self.assertRaisesRegexp(InvalidURL,
+                    r"contain control.*\\r.*(found at least . .)"):
+                urllib2.urlopen("http:{}".format(schemeless_url))
+            with self.assertRaisesRegexp(InvalidURL,
+                    r"contain control.*\\n"):
+                urllib2.urlopen("https:{}".format(schemeless_url))
         finally:
             self.unfakehttp()
 
-
+    @unittest.skipUnless(ssl, "ssl module required")
+    def test_url_host_with_control_char_rejected(self):
+        for char_no in list(range(0, 0x21)) + [0x7f]:
+            char = chr(char_no)
+            schemeless_url = "//localhost{}/test/".format(char)
+            self.fakehttp(b"HTTP/1.1 200 OK\r\n\r\nHello.")
+            try:
+                escaped_char_repr = repr(char).replace('\\', r'\\')
+                InvalidURL = httplib.InvalidURL
+                with self.assertRaisesRegexp(InvalidURL,
+                    "contain control.*{}".format(escaped_char_repr)):
+                        urllib2.urlopen("http:{}".format(schemeless_url))
+                with self.assertRaisesRegexp(InvalidURL,
+                    "contain control.*{}".format(escaped_char_repr)):
+                        urllib2.urlopen("https:{}".format(schemeless_url))
+            finally:
+                self.unfakehttp()
 
 class RequestTests(unittest.TestCase):