From: Chase Sterling Date: Sat, 31 Dec 2011 03:18:08 +0000 (-0500) Subject: Refactor stream_decode_gzip and _deflate into one function, stream_decompress. X-Git-Tag: v0.9.1~6 X-Git-Url: http://review.tizen.org/git/?a=commitdiff_plain;h=9c6ffc530319d3b6d75bb160267d76f0224b0743;p=services%2Fpython-requests.git Refactor stream_decode_gzip and _deflate into one function, stream_decompress. stream_decompress will now iterate over the raw data if there is a problem with decompression Remove gzip decoding from Response.content, as urllib3 was doing it anyway. --- diff --git a/requests/models.py b/requests/models.py index 4966496..17403a6 100644 --- a/requests/models.py +++ b/requests/models.py @@ -29,8 +29,8 @@ from .exceptions import ( ConnectionError, HTTPError, RequestException, Timeout, TooManyRedirects, URLRequired, SSLError) from .utils import ( - get_encoding_from_headers, stream_decode_response_unicode, decode_gzip, - stream_decode_gzip, stream_decode_deflate, guess_filename, requote_path) + get_encoding_from_headers, stream_decode_response_unicode, + stream_decompress, guess_filename, requote_path) REDIRECT_STATI = (codes.moved, codes.found, codes.other, codes.temporary_moved) @@ -488,7 +488,7 @@ class Request(object): redirect=False, assert_same_host=False, preload_content=False, - decode_content=False, + decode_content=True, retries=self.config.get('max_retries', 0), timeout=self.timeout, ) @@ -614,9 +614,9 @@ class Response(object): gen = generate() if 'gzip' in self.headers.get('content-encoding', ''): - gen = stream_decode_gzip(gen) + gen = stream_decompress(gen, mode='gzip') elif 'deflate' in self.headers.get('content-encoding', ''): - gen = stream_decode_deflate(gen) + gen = stream_decompress(gen, mode='deflate') if decode_unicode is None: decode_unicode = self.config.get('decode_unicode') @@ -675,13 +675,6 @@ class Response(object): content = self._content - # Decode GZip'd content. - if 'gzip' in self.headers.get('content-encoding', ''): - try: - content = decode_gzip(self._content) - except zlib.error: - pass - # Decode unicode content. if self.config.get('decode_unicode'): diff --git a/requests/utils.py b/requests/utils.py index 28b198a..c7ab0a4 100644 --- a/requests/utils.py +++ b/requests/utils.py @@ -354,36 +354,38 @@ def decode_gzip(content): return zlib.decompress(content, 16 + zlib.MAX_WBITS) -def stream_decode_gzip(iterator): - """Stream decodes a gzip-encoded iterator""" - try: - dec = zlib.decompressobj(16 + zlib.MAX_WBITS) - for chunk in iterator: - rv = dec.decompress(chunk) - if rv: - yield rv - buf = dec.decompress('') - rv = buf + dec.flush() - if rv: - yield rv - except zlib.error: - pass +def stream_decompress(iterator, mode='gzip'): + """ + Stream decodes an iterator over compressed data + :param iterator: An iterator over compressed data + :param mode: 'gzip' or 'deflate' + :return: An iterator over decompressed data + """ -def stream_decode_deflate(iterator): - """Stream decodes a deflate-encoded iterator""" + if mode not in ['gzip', 'deflate']: + raise ValueError('stream_decompress mode must be gzip or deflate') + + zlib_mode = 16 + zlib.MAX_WBITS if mode == 'gzip' else -zlib.MAX_WBITS + dec = zlib.decompressobj(zlib_mode) try: - dec = zlib.decompressobj(-zlib.MAX_WBITS) for chunk in iterator: rv = dec.decompress(chunk) if rv: yield rv + except zlib.error: + # If there was an error decompressing, just return the raw chunk + yield chunk + # Continue to return the rest of the raw data + for chunk in iterator: + yield chunk + else: + # Make sure everything has been returned from the decompression object buf = dec.decompress('') rv = buf + dec.flush() if rv: yield rv - except zlib.error: - pass + def requote_path(path): """Re-quote the given URL path component.