Rewrite unquote_unreserved based on urllib.unquote
authorJonty Wareing <jonty@jonty.co.uk>
Wed, 4 Apr 2012 09:34:39 +0000 (09:34 +0000)
committerJonty Wareing <jonty@jonty.co.uk>
Wed, 11 Apr 2012 14:56:19 +0000 (14:56 +0000)
This is almost entirely taken from the unquote implementation in urllib,
slightly modified for the case in hand. It now deals with invalid %encodings
rather than exploding, and the code is somewhat simpler.

AUTHORS.rst
requests/utils.py

index 4ded1a62cbf2f5737477585ecee32918e7186823..7cc76d6b2dc381fbc1bba0f625dbb37b6ba2d807 100644 (file)
@@ -93,4 +93,5 @@ Patches and Suggestions
 - Jiri Machalek
 - Steve Pulec
 - Michael Kelly
-- Michael Newman <newmaniese@gmail.com>
\ No newline at end of file
+- Michael Newman <newmaniese@gmail.com>
+- Jonty Wareing <jonty@jonty.co.uk>
index 8028442400840280aea06caaa587b362db86d5dd..ab6672f9e05b7a1b7cd40f67db5d9f12b41f4f16 100644 (file)
@@ -441,28 +441,25 @@ def stream_untransfer(gen, resp):
 
 
 # The unreserved URI characters (RFC 3986)
-UNRESERVED_SET = frozenset(
+_unreserved_set = frozenset(
     "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz"
     + "0123456789-._~")
 
+_unreserved_hextochr = dict((c.encode('hex'), c) for c in _unreserved_set)
 
 def unquote_unreserved(uri):
     """Un-escape any percent-escape sequences in a URI that are unreserved
     characters.
     This leaves all reserved, illegal and non-ASCII bytes encoded.
     """
-    parts = uri.split('%')
-    for i in range(1, len(parts)):
-        h = parts[i][0:2]
-        if len(h) == 2:
-            c = chr(int(h, 16))
-            if c in UNRESERVED_SET:
-                parts[i] = c + parts[i][2:]
-            else:
-                parts[i] = '%' + parts[i]
-        else:
-            parts[i] = '%' + parts[i]
-    return ''.join(parts)
+    res = uri.split('%')
+    s = res[0]
+    for item in res[1:]:
+        try:
+            s += _unreserved_hextochr[item[:2].lower()] + item[2:]
+        except (KeyError, UnicodeDecodeError):
+            s += '%' + item
+    return s
 
 
 def requote_uri(uri):