import urllib2
import socket
import zlib
+import cgi
from urllib2 import HTTPError
from urlparse import urlparse, urlunparse, urljoin
self._content = zlib.decompress(self._content, 16+zlib.MAX_WBITS)
except zlib.error:
pass
- return self._content
+ return self.unicode_content(self._content)
+
+
+ def get_content_type(self):
+ content_type = self.headers.get("content-type")
+ content_type, params = cgi.parse_header(content_type)
+ return content_type, params
+
+ def get_encoding_from_content_type(self):
+ content_type, params = self.get_content_type()
+ if "charset" in params:
+ return params["charset"].strip("'\"")
+
+ def get_encodings_from_content(self, content):
+ if self._charset_re is None:
+ self._charset_re = re.compile(
+ r'<meta.*?charset=["\']*(.+?)["\'>]', flags=re.I
+ )
+ return self._charset_re.findall(content)
+
+ def unicode_content(self, content):
+ """
+ Returns the requested content back in unicode.
+ Tried:
+ 1. charset from content-type
+ 2. every encodings from <meta ... charset=XXX>
+ 3. fall back and replace all unicode characters
+ """
+ # Try charset from content-type
+ encoding = self.get_encoding_from_content_type()
+ if encoding:
+ try:
+ return unicode(content, encoding)
+ except UnicodeError:
+ self.tried_encodings.append(encoding)
+
+ # Try every encodings from <meta ... charset=XXX>
+ encodings = self.get_encodings_from_content(content)
+ for encoding in encodings:
+ if encoding in self.tried_encodings:
+ continue
+ try:
+ return unicode(content, encoding)
+ except UnicodeError:
+ self.tried_encodings.append(encoding)
+ # Fall back:
+ return unicode(content, encoding, errors="replace")
def raise_for_status(self):
"""Raises stored :class:`HTTPError` or :class:`URLError`, if one occured."""
--- /dev/null
+#!/usr/bin/env python
+# -*- coding: utf-8 -*-
+
+from __future__ import with_statement
+
+import unittest
+import cookielib
+
+try:
+ import omnijson as json
+except ImportError:
+ import json
+
+import requests
+
+
+
+HTTPBIN_URL = 'http://httpbin.org/'
+HTTPSBIN_URL = 'https://httpbin.ep.io/'
+
+# HTTPBIN_URL = 'http://staging.httpbin.org/'
+# HTTPSBIN_URL = 'https://httpbin-staging.ep.io/'
+
+
+def httpbin(*suffix):
+ """Returns url for HTTPBIN resource."""
+
+ return HTTPBIN_URL + '/'.join(suffix)
+
+
+def httpsbin(*suffix):
+ """Returns url for HTTPSBIN resource."""
+
+ return HTTPSBIN_URL + '/'.join(suffix)
+
+
+SERVICES = (httpbin, httpsbin)
+
+
+
+class RequestsTestSuite(unittest.TestCase):
+ """Requests test cases."""
+
+
+ def setUp(self):
+ pass
+
+
+ def tearDown(self):
+ """Teardown."""
+ pass
+
+
+ def test_HTTP_200_OK_GET_ON_ISO88591(self):
+ r = requests.get("http://www.qypedeals.de/Verzehrgutschein+für+Jellyfish")
+ self.assertEqual(r.status_code, 200)
+ self.assertIsInstance(r.content, unicode)
+
+ def test_HTTP_200_OK_GET_ON_BIG5(self):
+ r = requests.get("http://google.com.hk/")
+ self.assertEqual(r.status_code, 200)
+ self.assertIsInstance(r.content, unicode)
+
+
+if __name__ == '__main__':
+ unittest.main()