From: Kenneth Reitz Date: Wed, 24 Aug 2011 03:09:09 +0000 (-0400) Subject: utils.unicode_from_html X-Git-Tag: v0.8.0~94^2~125 X-Git-Url: http://review.tizen.org/git/?a=commitdiff_plain;h=636e61d68b9176c614f8243295c81552dd0ee199;p=services%2Fpython-requests.git utils.unicode_from_html --- diff --git a/requests/utils.py b/requests/utils.py index 8a419ac..35eb005 100644 --- a/requests/utils.py +++ b/requests/utils.py @@ -110,6 +110,25 @@ def get_encoding_from_headers(headers): return params['charset'].strip("'\"") +def unicode_from_html(content): + """Attempts to decode an HTML string into unicode. + If unsuccessful, the original content is returned. + """ + + encodings = get_encodings_from_content(content) + + for encoding in encodings: + + try: + return unicode(content, encoding) + except (UnicodeError, TypeError): + pass + + return content + + + + def get_unicode_from_response(r): """Returns the requested content back in unicode.