.. function:: parse(fp[, environ[, keep_blank_values[, strict_parsing]]])
- Parse a query in the environment or from a file (the file defaults to
- ``sys.stdin`` and environment defaults to ``os.environ``). The *keep_blank_values* and *strict_parsing* parameters are
- passed to :func:`urlparse.parse_qs` unchanged.
-
+ Parse a query in the environment or from a file (the file
+ defaults to ``sys.stdin`` and environment defaults to
+ ``os.environ``). The *keep_blank_values*, *strict_parsing*,
+ and *separator* parameters are passed to
+ :func:`urlparse.parse_qs` unchanged.
.. function:: parse_qs(qs[, keep_blank_values[, strict_parsing[, max_num_fields]]])
Note that this does not parse nested multipart parts --- use
:class:`FieldStorage` for that.
+ .. versionchanged:: 3.6.13
+ Added the *separator* parameter.
+
.. function:: parse_header(string)
# 0 ==> unlimited input
maxlen = 0
-def parse(fp=None, environ=os.environ, keep_blank_values=0, strict_parsing=0):
+def parse(fp=None, environ=os.environ, keep_blank_values=0,
+ strict_parsing=0, separator='&'):
"""Parse a query in the environment or from a file (default stdin)
Arguments, all optional:
strict_parsing: flag indicating what to do with parsing errors.
If false (the default), errors are silently ignored.
If true, errors raise a ValueError exception.
+
+ separator: str. The symbol to use for separating the query arguments.
+ Defaults to &.
"""
if fp is None:
fp = sys.stdin
else:
qs = ""
environ['QUERY_STRING'] = qs # XXX Shouldn't, really
- return urlparse.parse_qs(qs, keep_blank_values, strict_parsing)
+ return urlparse.parse_qs(qs, keep_blank_values, strict_parsing,
+ separator=separator)
# parse query string function called from urlparse,
def __init__(self, fp=None, headers=None, outerboundary="",
environ=os.environ, keep_blank_values=0, strict_parsing=0,
- max_num_fields=None):
+ max_num_fields=None, separator='&'):
"""Constructor. Read multipart/* until last part.
Arguments, all optional:
self.keep_blank_values = keep_blank_values
self.strict_parsing = strict_parsing
self.max_num_fields = max_num_fields
+ self.separator = separator
if 'REQUEST_METHOD' in environ:
method = environ['REQUEST_METHOD'].upper()
self.qs_on_post = None
if self.qs_on_post:
qs += '&' + self.qs_on_post
query = urlparse.parse_qsl(qs, self.keep_blank_values,
- self.strict_parsing, self.max_num_fields)
+ self.strict_parsing,
+ self.max_num_fields,
+ separator=self.separator)
self.list = [MiniFieldStorage(key, value) for key, value in query]
self.skip_lines()
query = urlparse.parse_qsl(self.qs_on_post,
self.keep_blank_values,
self.strict_parsing,
- self.max_num_fields)
+ self.max_num_fields,
+ self.separator)
self.list.extend(MiniFieldStorage(key, value)
for key, value in query)
FieldStorageClass = None
klass = self.FieldStorageClass or self.__class__
part = klass(self.fp, {}, ib,
environ, keep_blank_values, strict_parsing,
- max_num_fields)
+ max_num_fields,
+ self.separator)
# Throw first part away
while not part.done:
("", ValueError("bad query field: ''")),
("&", ValueError("bad query field: ''")),
("&&", ValueError("bad query field: ''")),
- (";", ValueError("bad query field: ''")),
- (";&;", ValueError("bad query field: ''")),
# Should the next few really be valid?
("=", {}),
("=&=", {}),
- ("=;=", {}),
# This rest seem to make sense
("=a", {'': ['a']}),
("&=a", ValueError("bad query field: ''")),
("a=a+b&b=b+c", {'a': ['a b'], 'b': ['b c']}),
("a=a+b&a=b+a", {'a': ['a b', 'b a']}),
("x=1&y=2.0&z=2-3.%2b0", {'x': ['1'], 'y': ['2.0'], 'z': ['2-3.+0']}),
- ("x=1;y=2.0&z=2-3.%2b0", {'x': ['1'], 'y': ['2.0'], 'z': ['2-3.+0']}),
- ("x=1;y=2.0;z=2-3.%2b0", {'x': ['1'], 'y': ['2.0'], 'z': ['2-3.+0']}),
("Hbc5161168c542333633315dee1182227:key_store_seqid=400006&cuyer=r&view=bustomer&order_id=0bb2e248638833d48cb7fed300000f1b&expire=964546263&lobale=en-US&kid=130003.300038&ss=env",
{'Hbc5161168c542333633315dee1182227:key_store_seqid': ['400006'],
'cuyer': ['r'],
self.assertEqual(expect[k], v)
self.assertItemsEqual(expect.values(), d.values())
+ def test_separator(self):
+ parse_semicolon = [
+ ("x=1;y=2.0", {'x': ['1'], 'y': ['2.0']}),
+ ("x=1;y=2.0;z=2-3.%2b0", {'x': ['1'], 'y': ['2.0'], 'z': ['2-3.+0']}),
+ (";", ValueError("bad query field: ''")),
+ (";;", ValueError("bad query field: ''")),
+ ("=;a", ValueError("bad query field: 'a'")),
+ (";b=a", ValueError("bad query field: ''")),
+ ("b;=a", ValueError("bad query field: 'b'")),
+ ("a=a+b;b=b+c", {'a': ['a b'], 'b': ['b c']}),
+ ("a=a+b;a=b+a", {'a': ['a b', 'b a']}),
+ ]
+ for orig, expect in parse_semicolon:
+ env = {'QUERY_STRING': orig}
+ fs = cgi.FieldStorage(separator=';', environ=env)
+ if isinstance(expect, dict):
+ for key in expect.keys():
+ expect_val = expect[key]
+ self.assertIn(key, fs)
+ if len(expect_val) > 1:
+ self.assertEqual(fs.getvalue(key), expect_val)
+ else:
+ self.assertEqual(fs.getvalue(key), expect_val[0])
+
def test_log(self):
cgi.log("Testing")
# level 'def urlopen()' function defined in this... (quite ugly)
# test suite. They use different url opening codepaths. Plain
# urlopen uses FancyURLOpener which goes via a codepath that
- # calls urllib.parse.quote() on the URL which makes all of the
+ # calls urlparse.quote() on the URL which makes all of the
# above attempts at injection within the url _path_ safe.
escaped_char_repr = repr(char).replace('\\', r'\\')
InvalidURL = httplib.InvalidURL
# level 'def urlopen()' function defined in this... (quite ugly)
# test suite. They use different url opening codepaths. Plain
# urlopen uses FancyURLOpener which goes via a codepath that
- # calls urllib.parse.quote() on the URL which makes all of the
+ # calls urlparse.quote() on the URL which makes all of the
# above attempts at injection within the url _path_ safe.
InvalidURL = httplib.InvalidURL
with self.assertRaisesRegexp(InvalidURL,
("&a=b", [('a', 'b')]),
("a=a+b&b=b+c", [('a', 'a b'), ('b', 'b c')]),
("a=1&a=2", [('a', '1'), ('a', '2')]),
- (";", []),
- (";;", []),
- (";a=b", [('a', 'b')]),
- ("a=a+b;b=b+c", [('a', 'a b'), ('b', 'b c')]),
- ("a=1;a=2", [('a', '1'), ('a', '2')]),
- (b";", []),
- (b";;", []),
- (b";a=b", [(b'a', b'b')]),
- (b"a=a+b;b=b+c", [(b'a', b'a b'), (b'b', b'b c')]),
- (b"a=1;a=2", [(b'a', b'1'), (b'a', b'2')]),
+ (";a=b", [(';a', 'b')]),
+ ("a=a+b;b=b+c", [('a', 'a b;b=b c')]),
+ (b";a=b", [(b';a', b'b')]),
+ (b"a=a+b;b=b+c", [(b'a', b'a b;b=b c')]),
]
parse_qs_test_cases = [
(b"&a=b", {b'a': [b'b']}),
(b"a=a+b&b=b+c", {b'a': [b'a b'], b'b': [b'b c']}),
(b"a=1&a=2", {b'a': [b'1', b'2']}),
- (";", {}),
- (";;", {}),
- (";a=b", {'a': ['b']}),
- ("a=a+b;b=b+c", {'a': ['a b'], 'b': ['b c']}),
- ("a=1;a=2", {'a': ['1', '2']}),
- (b";", {}),
- (b";;", {}),
- (b";a=b", {b'a': [b'b']}),
- (b"a=a+b;b=b+c", {b'a': [b'a b'], b'b': [b'b c']}),
- (b"a=1;a=2", {b'a': [b'1', b'2']}),
+ (";a=b", {';a': ['b']}),
+ ("a=a+b;b=b+c", {'a': ['a b;b=b c']}),
+ (b";a=b", {b';a': [b'b']}),
+ (b"a=a+b;b=b+c", {b'a':[ b'a b;b=b c']}),
]
class UrlParseTestCase(unittest.TestCase):
"under NFKC normalization")
self.assertIsInstance(cm.exception.args[0], str)
+ def test_parse_qs_separator(self):
+ parse_qs_semicolon_cases = [
+ (";", {}),
+ (";;", {}),
+ (";a=b", {'a': ['b']}),
+ ("a=a+b;b=b+c", {'a': ['a b'], 'b': ['b c']}),
+ ("a=1;a=2", {'a': ['1', '2']}),
+ (b";", {}),
+ (b";;", {}),
+ (b";a=b", {b'a': [b'b']}),
+ (b"a=a+b;b=b+c", {b'a': [b'a b'], b'b': [b'b c']}),
+ (b"a=1;a=2", {b'a': [b'1', b'2']}),
+ ]
+ for orig, expect in parse_qs_semicolon_cases:
+ result = urlparse.parse_qs(orig, separator=';')
+ self.assertEqual(result, expect, "Error parsing %r" % orig)
+
+
+ def test_parse_qsl_separator(self):
+ parse_qsl_semicolon_cases = [
+ (";", []),
+ (";;", []),
+ (";a=b", [('a', 'b')]),
+ ("a=a+b;b=b+c", [('a', 'a b'), ('b', 'b c')]),
+ ("a=1;a=2", [('a', '1'), ('a', '2')]),
+ (b";", []),
+ (b";;", []),
+ (b";a=b", [(b'a', b'b')]),
+ (b"a=a+b;b=b+c", [(b'a', b'a b'), (b'b', b'b c')]),
+ (b"a=1;a=2", [(b'a', b'1'), (b'a', b'2')]),
+ ]
+ for orig, expect in parse_qsl_semicolon_cases:
+ result = urlparse.parse_qsl(orig, separator=';')
+ self.assertEqual(result, expect, "Error parsing %r" % orig)
+
+
+
def test_main():
test_support.run_unittest(UrlParseTestCase)
append(item)
return ''.join(res)
-def parse_qs(qs, keep_blank_values=0, strict_parsing=0, max_num_fields=None):
+def parse_qs(qs, keep_blank_values=0, strict_parsing=0,
+ max_num_fields=None, separator='&'):
"""Parse a query given as a string argument.
Arguments:
max_num_fields: int. If set, then throws a ValueError if there
are more than n fields read by parse_qsl().
+
+ separator: str. The symbol to use for separating the query arguments.
+ Defaults to &.
"""
dict = {}
for name, value in parse_qsl(qs, keep_blank_values, strict_parsing,
- max_num_fields):
+ max_num_fields, separator=separator):
if name in dict:
dict[name].append(value)
else:
dict[name] = [value]
return dict
-def parse_qsl(qs, keep_blank_values=0, strict_parsing=0, max_num_fields=None):
+def parse_qsl(qs, keep_blank_values=0, strict_parsing=0,
+ max_num_fields=None, separator='&'):
"""Parse a query given as a string argument.
Arguments:
max_num_fields: int. If set, then throws a ValueError if there
are more than n fields read by parse_qsl().
+ separator: str. The symbol to use for separating the query arguments.
+ Defaults to &.
+
Returns a list, as G-d intended.
"""
# If max_num_fields is defined then check that the number of fields
# is less than max_num_fields. This prevents a memory exhaustion DOS
# attack via post bodies with many fields.
+ if not separator or (not isinstance(separator, (str, bytes))):
+ raise ValueError("Separator must be of type string or bytes.")
+
if max_num_fields is not None:
- num_fields = 1 + qs.count('&') + qs.count(';')
+ num_fields = 1 + qs.count(separator)
if max_num_fields < num_fields:
raise ValueError('Max number of fields exceeded')
- pairs = [s2 for s1 in qs.split('&') for s2 in s1.split(';')]
+ pairs = [s1 for s1 in qs.split(separator)]
r = []
for name_value in pairs:
if not name_value and not strict_parsing:
- bpo-18167: cgi.FieldStorage no longer fails to handle multipart/form-data
when \r\n appears at end of 65535 bytes without other newlines.
-- bpo-17403: urllib.parse.robotparser normalizes the urls before adding to
+- bpo-17403: urlparse.robotparser normalizes the urls before adding to
ruleline. This helps in handling certain types invalid urls in a
conservative manner. Patch contributed by Mher Movsisyan.
Library
-------
-- bpo-7904: Changes to urllib.parse.urlsplit to handle schemes as defined by
+- bpo-7904: Changes to urlparse.urlsplit to handle schemes as defined by
RFC3986. Anything before :// is considered a scheme and is followed by an
authority (or netloc) and by '/' led path, which is optional.
--- /dev/null
+Fix web cache poisoning vulnerability by defaulting the query args separator to ``&``, and allowing the user to choose a custom separator.