if rv:
yield rv
+# The unreserved URI characters (RFC 3986)
+UNRESERVED_SET = frozenset(
+ "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz"
+ + "0123456789-._~")
+
+def unquote_unreserved(uri):
+ """Un-escape any percent-escape sequences in a URI that are unreserved
+ characters.
+ This leaves all reserved, illegal and non-ASCII bytes encoded.
+ """
+ parts = uri.split('%')
+ for i in range(1, len(parts)):
+ h = parts[i][0:2]
+ if len(h) == 2:
+ c = chr(int(h, 16))
+ if c in UNRESERVED_SET:
+ parts[i] = c + parts[i][2:]
+ else:
+ parts[i] = '%' + parts[i]
+ else:
+ parts[i] = '%' + parts[i]
+ return ''.join(parts)
def requote_path(path):
"""Re-quote the given URL path component.
ensure that it is fully and consistently quoted.
"""
parts = path.split("/")
- parts = (quote(unquote(part), safe="") for part in parts)
+ # Unquote only the unreserved characters
+ # Then quote only illegal characters (do not quote reserved, unreserved,
+ # or '%')
+ parts = (quote(unquote_unreserved(part), safe="!#$%&'()*+,/:;=?@[]~")
+ for part in parts)
return "/".join(parts)