From: Jonty Wareing Date: Wed, 4 Apr 2012 09:34:39 +0000 (+0000) Subject: Rewrite unquote_unreserved based on urllib.unquote X-Git-Tag: v0.12.0~53^2 X-Git-Url: http://review.tizen.org/git/?a=commitdiff_plain;h=06e4971fe72c377692ba9e37638431ed0582a028;p=services%2Fpython-requests.git Rewrite unquote_unreserved based on urllib.unquote This is almost entirely taken from the unquote implementation in urllib, slightly modified for the case in hand. It now deals with invalid %encodings rather than exploding, and the code is somewhat simpler. --- diff --git a/AUTHORS.rst b/AUTHORS.rst index 4ded1a6..7cc76d6 100644 --- a/AUTHORS.rst +++ b/AUTHORS.rst @@ -93,4 +93,5 @@ Patches and Suggestions - Jiri Machalek - Steve Pulec - Michael Kelly -- Michael Newman \ No newline at end of file +- Michael Newman +- Jonty Wareing diff --git a/requests/utils.py b/requests/utils.py index 8028442..ab6672f 100644 --- a/requests/utils.py +++ b/requests/utils.py @@ -441,28 +441,25 @@ def stream_untransfer(gen, resp): # The unreserved URI characters (RFC 3986) -UNRESERVED_SET = frozenset( +_unreserved_set = frozenset( "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz" + "0123456789-._~") +_unreserved_hextochr = dict((c.encode('hex'), c) for c in _unreserved_set) def unquote_unreserved(uri): """Un-escape any percent-escape sequences in a URI that are unreserved characters. This leaves all reserved, illegal and non-ASCII bytes encoded. """ - parts = uri.split('%') - for i in range(1, len(parts)): - h = parts[i][0:2] - if len(h) == 2: - c = chr(int(h, 16)) - if c in UNRESERVED_SET: - parts[i] = c + parts[i][2:] - else: - parts[i] = '%' + parts[i] - else: - parts[i] = '%' + parts[i] - return ''.join(parts) + res = uri.split('%') + s = res[0] + for item in res[1:]: + try: + s += _unreserved_hextochr[item[:2].lower()] + item[2:] + except (KeyError, UnicodeDecodeError): + s += '%' + item + return s def requote_uri(uri):