[CVE-2021-23336] only use '&' as a query string separator

author Senthil Kumaran <senthil@uthcode.com>

Mon, 15 Feb 2021 19:16:43 +0000 (11:16 -0800)

committer JinWang An <jinwang.an@samsung.com>

Tue, 28 Mar 2023 07:25:07 +0000 (16:25 +0900)
author Senthil Kumaran <senthil@uthcode.com>
Mon, 15 Feb 2021 19:16:43 +0000 (11:16 -0800)
committer JinWang An <jinwang.an@samsung.com>
Tue, 28 Mar 2023 07:25:07 +0000 (16:25 +0900)
diff --git a/Doc/library/cgi.rst b/Doc/library/cgi.rst

index ecd62c8c019463435229463549eb059fc0fc48c3..686774705b5feac47ff50cd441d185f1b42ca06a 100644 (file)
--- a/Doc/library/cgi.rst
+++ b/Doc/library/cgi.rst
@@ -287,10 +287,11 @@ algorithms implemented in this module in other circumstances.
  
  .. function:: parse(fp[, environ[, keep_blank_values[, strict_parsing]]])
  
-   Parse a query in the environment or from a file (the file defaults to
-   ``sys.stdin`` and environment defaults to ``os.environ``).  The *keep_blank_values* and *strict_parsing* parameters are
-   passed to :func:`urlparse.parse_qs` unchanged.
-
+   Parse a query in the environment or from a file (the file
+   defaults to ``sys.stdin`` and environment defaults to
+   ``os.environ``).  The *keep_blank_values*, *strict_parsing*,
+   and *separator* parameters are passed to
+   :func:`urlparse.parse_qs` unchanged.
  
  .. function:: parse_qs(qs[, keep_blank_values[, strict_parsing[, max_num_fields]]])
  
@@ -316,6 +317,9 @@ algorithms implemented in this module in other circumstances.
     Note that this does not parse nested multipart parts --- use
     :class:`FieldStorage` for that.
  
+   .. versionchanged:: 3.6.13
+      Added the *separator* parameter.
+
  
  .. function:: parse_header(string)
  
diff --git a/Lib/cgi.py b/Lib/cgi.py

index 5b903e0347739cdb18194f83905882574d0b6719..e51fd6ba337e41fbd5f42364db43bc23371f9588 100755 (executable)
--- a/Lib/cgi.py
+++ b/Lib/cgi.py
@@ -121,7 +121,8 @@ log = initlog           # The current logging function
  # 0 ==> unlimited input
  maxlen = 0
  
-def parse(fp=None, environ=os.environ, keep_blank_values=0, strict_parsing=0):
+def parse(fp=None, environ=os.environ, keep_blank_values=0,
+          strict_parsing=0, separator='&'):
      """Parse a query in the environment or from a file (default stdin)
  
          Arguments, all optional:
@@ -140,6 +141,9 @@ def parse(fp=None, environ=os.environ, keep_blank_values=0, strict_parsing=0):
          strict_parsing: flag indicating what to do with parsing errors.
              If false (the default), errors are silently ignored.
              If true, errors raise a ValueError exception.
+
+        separator: str. The symbol to use for separating the query arguments.
+            Defaults to &.
      """
      if fp is None:
          fp = sys.stdin
@@ -171,7 +175,8 @@ def parse(fp=None, environ=os.environ, keep_blank_values=0, strict_parsing=0):
          else:
              qs = ""
          environ['QUERY_STRING'] = qs    # XXX Shouldn't, really
-    return urlparse.parse_qs(qs, keep_blank_values, strict_parsing)
+    return urlparse.parse_qs(qs, keep_blank_values, strict_parsing,
+                             separator=separator)
  
  
  # parse query string function called from urlparse,
@@ -395,7 +400,7 @@ class FieldStorage:
  
      def __init__(self, fp=None, headers=None, outerboundary="",
                   environ=os.environ, keep_blank_values=0, strict_parsing=0,
-                 max_num_fields=None):
+                 max_num_fields=None, separator='&'):
          """Constructor.  Read multipart/* until last part.
  
          Arguments, all optional:
@@ -430,6 +435,7 @@ class FieldStorage:
          self.keep_blank_values = keep_blank_values
          self.strict_parsing = strict_parsing
          self.max_num_fields = max_num_fields
+        self.separator = separator
          if 'REQUEST_METHOD' in environ:
              method = environ['REQUEST_METHOD'].upper()
          self.qs_on_post = None
@@ -613,7 +619,9 @@ class FieldStorage:
          if self.qs_on_post:
              qs += '&' + self.qs_on_post
          query = urlparse.parse_qsl(qs, self.keep_blank_values,
-                                   self.strict_parsing, self.max_num_fields)
+                                   self.strict_parsing,
+                                   self.max_num_fields,
+                                   separator=self.separator)
          self.list = [MiniFieldStorage(key, value) for key, value in query]
          self.skip_lines()
  
@@ -629,7 +637,8 @@ class FieldStorage:
              query = urlparse.parse_qsl(self.qs_on_post,
                                         self.keep_blank_values,
                                         self.strict_parsing,
-                                       self.max_num_fields)
+                                       self.max_num_fields,
+                                       self.separator)
              self.list.extend(MiniFieldStorage(key, value)
                               for key, value in query)
              FieldStorageClass = None
@@ -642,7 +651,8 @@ class FieldStorage:
          klass = self.FieldStorageClass or self.__class__
          part = klass(self.fp, {}, ib,
                       environ, keep_blank_values, strict_parsing,
-                     max_num_fields)
+                     max_num_fields,
+                     self.separator)
  
          # Throw first part away
          while not part.done:
diff --git a/Lib/test/test_cgi.py b/Lib/test/test_cgi.py

index 743c2afbd4cd24ad540f28c25457bc6fec373ee6..f414faa23bdd5290d6a46181b5fbd5cf43f5a284 100644 (file)
--- a/Lib/test/test_cgi.py
+++ b/Lib/test/test_cgi.py
@@ -61,12 +61,9 @@ parse_strict_test_cases = [
      ("", ValueError("bad query field: ''")),
      ("&", ValueError("bad query field: ''")),
      ("&&", ValueError("bad query field: ''")),
-    (";", ValueError("bad query field: ''")),
-    (";&;", ValueError("bad query field: ''")),
      # Should the next few really be valid?
      ("=", {}),
      ("=&=", {}),
-    ("=;=", {}),
      # This rest seem to make sense
      ("=a", {'': ['a']}),
      ("&=a", ValueError("bad query field: ''")),
@@ -81,8 +78,6 @@ parse_strict_test_cases = [
      ("a=a+b&b=b+c", {'a': ['a b'], 'b': ['b c']}),
      ("a=a+b&a=b+a", {'a': ['a b', 'b a']}),
      ("x=1&y=2.0&z=2-3.%2b0", {'x': ['1'], 'y': ['2.0'], 'z': ['2-3.+0']}),
-    ("x=1;y=2.0&z=2-3.%2b0", {'x': ['1'], 'y': ['2.0'], 'z': ['2-3.+0']}),
-    ("x=1;y=2.0;z=2-3.%2b0", {'x': ['1'], 'y': ['2.0'], 'z': ['2-3.+0']}),
      ("Hbc5161168c542333633315dee1182227:key_store_seqid=400006&cuyer=r&view=bustomer&order_id=0bb2e248638833d48cb7fed300000f1b&expire=964546263&lobale=en-US&kid=130003.300038&ss=env",
       {'Hbc5161168c542333633315dee1182227:key_store_seqid': ['400006'],
        'cuyer': ['r'],
@@ -188,6 +183,30 @@ class CgiTests(unittest.TestCase):
              self.assertEqual(expect[k], v)
          self.assertItemsEqual(expect.values(), d.values())
  
+    def test_separator(self):
+        parse_semicolon = [
+            ("x=1;y=2.0", {'x': ['1'], 'y': ['2.0']}),
+            ("x=1;y=2.0;z=2-3.%2b0", {'x': ['1'], 'y': ['2.0'], 'z': ['2-3.+0']}),
+            (";", ValueError("bad query field: ''")),
+            (";;", ValueError("bad query field: ''")),
+            ("=;a", ValueError("bad query field: 'a'")),
+            (";b=a", ValueError("bad query field: ''")),
+            ("b;=a", ValueError("bad query field: 'b'")),
+            ("a=a+b;b=b+c", {'a': ['a b'], 'b': ['b c']}),
+            ("a=a+b;a=b+a", {'a': ['a b', 'b a']}),
+        ]
+        for orig, expect in parse_semicolon:
+            env = {'QUERY_STRING': orig}
+            fs = cgi.FieldStorage(separator=';', environ=env)
+            if isinstance(expect, dict):
+                for key in expect.keys():
+                    expect_val = expect[key]
+                    self.assertIn(key, fs)
+                    if len(expect_val) > 1:
+                        self.assertEqual(fs.getvalue(key), expect_val)
+                    else:
+                        self.assertEqual(fs.getvalue(key), expect_val[0])
+
      def test_log(self):
          cgi.log("Testing")
  
diff --git a/Lib/test/test_urllib2.py b/Lib/test/test_urllib2.py

index 20a0f581436d64030d088b90fb3974162770ca93..13313e03521ff79cae409c7f22c562e15791c483 100644 (file)
--- a/Lib/test/test_urllib2.py
+++ b/Lib/test/test_urllib2.py
@@ -1331,7 +1331,7 @@ class MiscTests(unittest.TestCase, FakeHTTPMixin):
                  # level 'def urlopen()' function defined in this... (quite ugly)
                  # test suite.  They use different url opening codepaths.  Plain
                  # urlopen uses FancyURLOpener which goes via a codepath that
-                # calls urllib.parse.quote() on the URL which makes all of the
+                # calls urlparse.quote() on the URL which makes all of the
                  # above attempts at injection within the url _path_ safe.
                  escaped_char_repr = repr(char).replace('\\', r'\\')
                  InvalidURL = httplib.InvalidURL
@@ -1354,7 +1354,7 @@ class MiscTests(unittest.TestCase, FakeHTTPMixin):
              # level 'def urlopen()' function defined in this... (quite ugly)
              # test suite.  They use different url opening codepaths.  Plain
              # urlopen uses FancyURLOpener which goes via a codepath that
-            # calls urllib.parse.quote() on the URL which makes all of the
+            # calls urlparse.quote() on the URL which makes all of the
              # above attempts at injection within the url _path_ safe.
              InvalidURL = httplib.InvalidURL
              with self.assertRaisesRegexp(InvalidURL,
diff --git a/Lib/test/test_urlparse.py b/Lib/test/test_urlparse.py

index fe599f066ba9da8e208fed9bc0509f647d5725b6..3802d7dc513b9bb4ec3101c60feb1fa306cb03f5 100644 (file)
--- a/Lib/test/test_urlparse.py
+++ b/Lib/test/test_urlparse.py
@@ -24,16 +24,10 @@ parse_qsl_test_cases = [
      ("&a=b", [('a', 'b')]),
      ("a=a+b&b=b+c", [('a', 'a b'), ('b', 'b c')]),
      ("a=1&a=2", [('a', '1'), ('a', '2')]),
-    (";", []),
-    (";;", []),
-    (";a=b", [('a', 'b')]),
-    ("a=a+b;b=b+c", [('a', 'a b'), ('b', 'b c')]),
-    ("a=1;a=2", [('a', '1'), ('a', '2')]),
-    (b";", []),
-    (b";;", []),
-    (b";a=b", [(b'a', b'b')]),
-    (b"a=a+b;b=b+c", [(b'a', b'a b'), (b'b', b'b c')]),
-    (b"a=1;a=2", [(b'a', b'1'), (b'a', b'2')]),
+    (";a=b", [(';a', 'b')]),
+    ("a=a+b;b=b+c", [('a', 'a b;b=b c')]),
+    (b";a=b", [(b';a', b'b')]),
+    (b"a=a+b;b=b+c", [(b'a', b'a b;b=b c')]),
  ]
  
  parse_qs_test_cases = [
@@ -57,16 +51,10 @@ parse_qs_test_cases = [
      (b"&a=b", {b'a': [b'b']}),
      (b"a=a+b&b=b+c", {b'a': [b'a b'], b'b': [b'b c']}),
      (b"a=1&a=2", {b'a': [b'1', b'2']}),
-    (";", {}),
-    (";;", {}),
-    (";a=b", {'a': ['b']}),
-    ("a=a+b;b=b+c", {'a': ['a b'], 'b': ['b c']}),
-    ("a=1;a=2", {'a': ['1', '2']}),
-    (b";", {}),
-    (b";;", {}),
-    (b";a=b", {b'a': [b'b']}),
-    (b"a=a+b;b=b+c", {b'a': [b'a b'], b'b': [b'b c']}),
-    (b"a=1;a=2", {b'a': [b'1', b'2']}),
+    (";a=b", {';a': ['b']}),
+    ("a=a+b;b=b+c", {'a': ['a b;b=b c']}),
+    (b";a=b", {b';a': [b'b']}),
+    (b"a=a+b;b=b+c", {b'a':[ b'a b;b=b c']}),
  ]
  
  class UrlParseTestCase(unittest.TestCase):
@@ -715,6 +703,43 @@ class UrlParseTestCase(unittest.TestCase):
                           "under NFKC normalization")
          self.assertIsInstance(cm.exception.args[0], str)
  
+    def test_parse_qs_separator(self):
+        parse_qs_semicolon_cases = [
+            (";", {}),
+            (";;", {}),
+            (";a=b", {'a': ['b']}),
+            ("a=a+b;b=b+c", {'a': ['a b'], 'b': ['b c']}),
+            ("a=1;a=2", {'a': ['1', '2']}),
+            (b";", {}),
+            (b";;", {}),
+            (b";a=b", {b'a': [b'b']}),
+            (b"a=a+b;b=b+c", {b'a': [b'a b'], b'b': [b'b c']}),
+            (b"a=1;a=2", {b'a': [b'1', b'2']}),
+        ]
+        for orig, expect in parse_qs_semicolon_cases:
+            result = urlparse.parse_qs(orig, separator=';')
+            self.assertEqual(result, expect, "Error parsing %r" % orig)
+
+
+    def test_parse_qsl_separator(self):
+        parse_qsl_semicolon_cases = [
+            (";", []),
+            (";;", []),
+            (";a=b", [('a', 'b')]),
+            ("a=a+b;b=b+c", [('a', 'a b'), ('b', 'b c')]),
+            ("a=1;a=2", [('a', '1'), ('a', '2')]),
+            (b";", []),
+            (b";;", []),
+            (b";a=b", [(b'a', b'b')]),
+            (b"a=a+b;b=b+c", [(b'a', b'a b'), (b'b', b'b c')]),
+            (b"a=1;a=2", [(b'a', b'1'), (b'a', b'2')]),
+        ]
+        for orig, expect in parse_qsl_semicolon_cases:
+            result = urlparse.parse_qsl(orig, separator=';')
+            self.assertEqual(result, expect, "Error parsing %r" % orig)
+
+
+
  def test_main():
      test_support.run_unittest(UrlParseTestCase)
  
diff --git a/Lib/urlparse.py b/Lib/urlparse.py

index d015f2c1e898613a152737faa85bc60ae4a20be0..f565e5000573615e1cbd3968046efe902bc3cb44 100644 (file)
--- a/Lib/urlparse.py
+++ b/Lib/urlparse.py
@@ -392,7 +392,8 @@ def unquote(s):
              append(item)
      return ''.join(res)
  
-def parse_qs(qs, keep_blank_values=0, strict_parsing=0, max_num_fields=None):
+def parse_qs(qs, keep_blank_values=0, strict_parsing=0,
+             max_num_fields=None, separator='&'):
      """Parse a query given as a string argument.
  
          Arguments:
@@ -412,17 +413,21 @@ def parse_qs(qs, keep_blank_values=0, strict_parsing=0, max_num_fields=None):
  
          max_num_fields: int. If set, then throws a ValueError if there
              are more than n fields read by parse_qsl().
+
+        separator: str. The symbol to use for separating the query arguments.
+            Defaults to &.
      """
      dict = {}
      for name, value in parse_qsl(qs, keep_blank_values, strict_parsing,
-                                 max_num_fields):
+                                 max_num_fields, separator=separator):
          if name in dict:
              dict[name].append(value)
          else:
              dict[name] = [value]
      return dict
  
-def parse_qsl(qs, keep_blank_values=0, strict_parsing=0, max_num_fields=None):
+def parse_qsl(qs, keep_blank_values=0, strict_parsing=0,
+              max_num_fields=None, separator='&'):
      """Parse a query given as a string argument.
  
      Arguments:
@@ -442,17 +447,23 @@ def parse_qsl(qs, keep_blank_values=0, strict_parsing=0, max_num_fields=None):
      max_num_fields: int. If set, then throws a ValueError if there
          are more than n fields read by parse_qsl().
  
+    separator: str. The symbol to use for separating the query arguments.
+        Defaults to &.
+
      Returns a list, as G-d intended.
      """
      # If max_num_fields is defined then check that the number of fields
      # is less than max_num_fields. This prevents a memory exhaustion DOS
      # attack via post bodies with many fields.
+    if not separator or (not isinstance(separator, (str, bytes))):
+        raise ValueError("Separator must be of type string or bytes.")
+
      if max_num_fields is not None:
-        num_fields = 1 + qs.count('&') + qs.count(';')
+        num_fields = 1 + qs.count(separator)
          if max_num_fields < num_fields:
              raise ValueError('Max number of fields exceeded')
  
-    pairs = [s2 for s1 in qs.split('&') for s2 in s1.split(';')]
+    pairs = [s1 for s1 in qs.split(separator)]
      r = []
      for name_value in pairs:
          if not name_value and not strict_parsing:
diff --git a/Misc/NEWS b/Misc/NEWS

index 63b3fa88c01e5a172d1a100b48e7400cfa3200e8..ef2bbe38ecec70edd51b282097335dd7a433015c 100644 (file)
--- a/Misc/NEWS
+++ b/Misc/NEWS
@@ -4246,7 +4246,7 @@ Library
  - bpo-18167: cgi.FieldStorage no longer fails to handle multipart/form-data
    when \r\n appears at end of 65535 bytes without other newlines.
  
-- bpo-17403: urllib.parse.robotparser normalizes the urls before adding to
+- bpo-17403: urlparse.robotparser normalizes the urls before adding to
    ruleline. This helps in handling certain types invalid urls in a
    conservative manner. Patch contributed by Mher Movsisyan.
  
@@ -8271,7 +8271,7 @@ Core and Builtins
  Library
  -------
  
-- bpo-7904: Changes to urllib.parse.urlsplit to handle schemes as defined by
+- bpo-7904: Changes to urlparse.urlsplit to handle schemes as defined by
    RFC3986. Anything before :// is considered a scheme and is followed by an
    authority (or netloc) and by '/' led path, which is optional.
  
diff --git a/Misc/NEWS.d/next/Security/2021-02-14-15-59-16.bpo-42967.YApqDS.rst b/Misc/NEWS.d/next/Security/2021-02-14-15-59-16.bpo-42967.YApqDS.rst

new file mode 100644 (file)

index 0000000..f08489b
--- /dev/null
+++ b/Misc/NEWS.d/next/Security/2021-02-14-15-59-16.bpo-42967.YApqDS.rst
@@ -0,0 +1 @@
+Fix web cache poisoning vulnerability by defaulting the query args separator to ``&``, and allowing the user to choose a custom separator.
author	Senthil Kumaran <senthil@uthcode.com>
	Mon, 15 Feb 2021 19:16:43 +0000 (11:16 -0800)
committer	JinWang An <jinwang.an@samsung.com>
	Tue, 28 Mar 2023 07:25:07 +0000 (16:25 +0900)
Doc/library/cgi.rst		patch \| blob \| history
Lib/cgi.py		patch \| blob \| history
Lib/test/test_cgi.py		patch \| blob \| history
Lib/test/test_urllib2.py		patch \| blob \| history
Lib/test/test_urlparse.py		patch \| blob \| history
Lib/urlparse.py		patch \| blob \| history
Misc/NEWS		patch \| blob \| history
Misc/NEWS.d/next/Security/2021-02-14-15-59-16.bpo-42967.YApqDS.rst	[new file with mode: 0644]	patch \| blob