From: Kenneth Reitz Date: Wed, 14 Sep 2011 12:39:52 +0000 (-0400) Subject: Merge pull request #150 X-Git-Tag: v0.8.0~94^2~98 X-Git-Url: http://review.tizen.org/git/?a=commitdiff_plain;h=a8e77688f0a610bc8f8d304f2d15094aac149ac7;p=services%2Fpython-requests.git Merge pull request #150 --- diff --git a/requests/models.py b/requests/models.py index 6180e96..a16d32e 100644 --- a/requests/models.py +++ b/requests/models.py @@ -9,6 +9,7 @@ requests.models import urllib import urllib2 import socket +import codecs import zlib @@ -21,7 +22,7 @@ from .monkeys import Request as _Request, HTTPBasicAuthHandler, HTTPForcedBasicA from .structures import CaseInsensitiveDict from .packages.poster.encode import multipart_encode from .packages.poster.streaminghttp import register_openers, get_handlers -from .utils import dict_from_cookiejar, get_unicode_from_response, decode_gzip +from .utils import dict_from_cookiejar, get_unicode_from_response, stream_decode_response_unicode, decode_gzip, stream_decode_gzip from .status_codes import codes from .exceptions import RequestException, AuthenticationError, Timeout, URLRequired, InvalidMethod, TooManyRedirects @@ -396,6 +397,7 @@ class Response(object): def __init__(self): self._content = None + self._content_consumed = False #: Integer Code of responded HTTP Status. self.status_code = None @@ -438,6 +440,31 @@ class Response(object): return not self.error + def iter_content(self, chunk_size=10 * 1024, decode_unicode=None): + """Iterates over the response data. This avoids reading the content + at once into memory for large responses. The chunk size is the number + of bytes it should read into memory. This is not necessarily the + length of each item returned as decoding can take place. + """ + if self._content_consumed: + raise RuntimeError('The content for this response was ' + 'already consumed') + + def generate(): + while 1: + chunk = self.fo.read(chunk_size) + if not chunk: + break + yield chunk + self._content_consumed = True + gen = generate() + if 'gzip' in self.headers.get('content-encoding', ''): + gen = stream_decode_gzip(gen) + if decode_unicode is None: + decode_unicode = settings.decode_unicode + if decode_unicode: + gen = stream_decode_response_unicode(gen, self) + return gen @property def content(self): @@ -448,6 +475,10 @@ class Response(object): if self._content is not None: return self._content + if self._content_consumed: + raise RuntimeError('The content for this response was ' + 'already consumed') + # Read the contents. self._content = self.fo.read() @@ -462,6 +493,7 @@ class Response(object): if settings.decode_unicode: self._content = get_unicode_from_response(self) + self._content_consumed = True return self._content diff --git a/requests/utils.py b/requests/utils.py index c1aed43..17f79a8 100644 --- a/requests/utils.py +++ b/requests/utils.py @@ -10,6 +10,7 @@ that are also useful for external consumption. """ import cgi +import codecs import cookielib import re import zlib @@ -177,6 +178,24 @@ def unicode_from_html(content): return content +def stream_decode_response_unicode(iterator, r): + """Stream decodes a iterator.""" + encoding = get_encoding_from_headers(r.headers) + if encoding is None: + for item in iterator: + yield item + return + + decoder = codecs.getincrementaldecoder(encoding)(errors='replace') + for chunk in iterator: + rv = decoder.decode(chunk) + if rv: + yield rv + rv = decoder.decode('', final=True) + if rv: + yield rv + + def get_unicode_from_response(r): """Returns the requested content back in unicode. @@ -217,7 +236,24 @@ def decode_gzip(content): """ return zlib.decompress(content, 16+zlib.MAX_WBITS) + return zlib.decompress(content, 16+zlib.MAX_WBITS) + return zlib.decompress(content, 16 + zlib.MAX_WBITS) + +def stream_decode_gzip(iterator): + """Stream decodes a gzip-encoded iterator""" + try: + dec = zlib.decompressobj(16 + zlib.MAX_WBITS) + for chunk in iterator: + rv = dec.decompress(chunk) + if rv: + yield rv + buf = dec.decompress('') + rv = buf + dec.flush() + if rv: + yield rv + except zlib.error: + pass def curl_from_request(request): """Returns a curl command from the request. diff --git a/test_requests.py b/test_requests.py index 5baff56..406d23c 100755 --- a/test_requests.py +++ b/test_requests.py @@ -478,6 +478,7 @@ class RequestsTestSuite(unittest.TestCase): self.assertEqual(r2.status_code, 200) + def test_curl_HTTP_OK_GET(self): curl_str = 'curl -L -X GET -H "Accept-Encoding:gzip" -H "User-Agent:python-requests.org" "http://httpbin.org//"' r = requests.get(httpbin('/'))