From: Kenneth Reitz Date: Sat, 20 Aug 2011 22:42:18 +0000 (-0400) Subject: Merge https://github.com/rickmak/requests into feature/unicode X-Git-Tag: v0.6.4^2~5^2~5^2~40^2~3 X-Git-Url: http://review.tizen.org/git/?a=commitdiff_plain;h=7eb272b299fb5b496c786c0c34f6bfd96585f00f;p=services%2Fpython-requests.git Merge https://github.com/rickmak/requests into feature/unicode Conflicts: requests/models.py --- 7eb272b299fb5b496c786c0c34f6bfd96585f00f diff --cc requests/models.py index 2d7fc8f,a8aa6cb..3cbf102 --- a/requests/models.py +++ b/requests/models.py @@@ -432,9 -402,55 +434,59 @@@ class Response(object) self._content = zlib.decompress(self._content, 16+zlib.MAX_WBITS) except zlib.error: pass - return self._content + return self.unicode_content(self._content) ++ ++ + else: + raise AttributeError + + + def get_content_type(self): + content_type = self.headers.get("content-type") + content_type, params = cgi.parse_header(content_type) + return content_type, params + + def get_encoding_from_content_type(self): + content_type, params = self.get_content_type() + if "charset" in params: + return params["charset"].strip("'\"") + + def get_encodings_from_content(self, content): + if self._charset_re is None: + self._charset_re = re.compile( + r']', flags=re.I + ) + return self._charset_re.findall(content) + + def unicode_content(self, content): + """ + Returns the requested content back in unicode. + Tried: + 1. charset from content-type + 2. every encodings from + 3. fall back and replace all unicode characters + """ + tried_encodings = [] + # Try charset from content-type + encoding = self.get_encoding_from_content_type() + if encoding: + try: + return unicode(content, encoding) + except UnicodeError: + tried_encodings.append(encoding) + + # Try every encodings from + encodings = self.get_encodings_from_content(content) + for encoding in encodings: + if encoding in tried_encodings: + continue + try: + return unicode(content, encoding) + except UnicodeError: + tried_encodings.append(encoding) + + # Fall back: + return unicode(content, encoding, errors="replace") def raise_for_status(self): """Raises stored :class:`HTTPError` or :class:`URLError`, if one occured."""