Refactor stream_decode_gzip and _deflate into one function, stream_decompress.
authorChase Sterling <chase.sterling@gmail.com>
Sat, 31 Dec 2011 03:18:08 +0000 (22:18 -0500)
committerChase Sterling <chase.sterling@gmail.com>
Sat, 31 Dec 2011 03:18:08 +0000 (22:18 -0500)
stream_decompress will now iterate over the raw data if there is a problem with decompression
Remove gzip decoding from Response.content, as urllib3 was doing it anyway.

requests/models.py
requests/utils.py

index 49664963a1b873eb134d12ea4dea078d36503c5d..17403a690c91ee5c4ccecef8fb9a19f2136e5623 100644 (file)
@@ -29,8 +29,8 @@ from .exceptions import (
     ConnectionError, HTTPError, RequestException, Timeout, TooManyRedirects,
     URLRequired, SSLError)
 from .utils import (
-    get_encoding_from_headers, stream_decode_response_unicode, decode_gzip,
-    stream_decode_gzip, stream_decode_deflate, guess_filename, requote_path)
+    get_encoding_from_headers, stream_decode_response_unicode,
+    stream_decompress, guess_filename, requote_path)
 
 
 REDIRECT_STATI = (codes.moved, codes.found, codes.other, codes.temporary_moved)
@@ -488,7 +488,7 @@ class Request(object):
                         redirect=False,
                         assert_same_host=False,
                         preload_content=False,
-                        decode_content=False,
+                        decode_content=True,
                         retries=self.config.get('max_retries', 0),
                         timeout=self.timeout,
                     )
@@ -614,9 +614,9 @@ class Response(object):
         gen = generate()
 
         if 'gzip' in self.headers.get('content-encoding', ''):
-            gen = stream_decode_gzip(gen)
+            gen = stream_decompress(gen, mode='gzip')
         elif 'deflate' in self.headers.get('content-encoding', ''):
-            gen = stream_decode_deflate(gen)
+            gen = stream_decompress(gen, mode='deflate')
 
         if decode_unicode is None:
             decode_unicode = self.config.get('decode_unicode')
@@ -675,13 +675,6 @@ class Response(object):
 
         content = self._content
 
-        # Decode GZip'd content.
-        if 'gzip' in self.headers.get('content-encoding', ''):
-            try:
-                content = decode_gzip(self._content)
-            except zlib.error:
-                pass
-
         # Decode unicode content.
         if self.config.get('decode_unicode'):
 
index 28b198ad3f39fd300c305b3447dc91efd865ae68..c7ab0a4e24ef4db2fcc2db7f733266f3797c5e7e 100644 (file)
@@ -354,36 +354,38 @@ def decode_gzip(content):
     return zlib.decompress(content, 16 + zlib.MAX_WBITS)
 
 
-def stream_decode_gzip(iterator):
-    """Stream decodes a gzip-encoded iterator"""
-    try:
-        dec = zlib.decompressobj(16 + zlib.MAX_WBITS)
-        for chunk in iterator:
-            rv = dec.decompress(chunk)
-            if rv:
-                yield rv
-        buf = dec.decompress('')
-        rv = buf + dec.flush()
-        if rv:
-            yield rv
-    except zlib.error:
-        pass
+def stream_decompress(iterator, mode='gzip'):
+    """
+    Stream decodes an iterator over compressed data
 
+    :param iterator: An iterator over compressed data
+    :param mode: 'gzip' or 'deflate'
+    :return: An iterator over decompressed data
+    """
 
-def stream_decode_deflate(iterator):
-    """Stream decodes a deflate-encoded iterator"""
+    if mode not in ['gzip', 'deflate']:
+        raise ValueError('stream_decompress mode must be gzip or deflate')
+
+    zlib_mode = 16 + zlib.MAX_WBITS if mode == 'gzip' else -zlib.MAX_WBITS
+    dec = zlib.decompressobj(zlib_mode)
     try:
-        dec = zlib.decompressobj(-zlib.MAX_WBITS)
         for chunk in iterator:
             rv = dec.decompress(chunk)
             if rv:
                 yield rv
+    except zlib.error:
+        # If there was an error decompressing, just return the raw chunk
+        yield chunk
+        # Continue to return the rest of the raw data
+        for chunk in iterator:
+            yield chunk
+    else:
+        # Make sure everything has been returned from the decompression object
         buf = dec.decompress('')
         rv = buf + dec.flush()
         if rv:
             yield rv
-    except zlib.error:
-        pass
+
 
 def requote_path(path):
     """Re-quote the given URL path component.