From 228c5e68aedc7d5d1e4b5bcc49aa4ae4811fbecd Mon Sep 17 00:00:00 2001 From: Jakub Wieczorek Date: Mon, 5 Mar 2012 18:39:19 +0100 Subject: [PATCH] Considerably simplify and fix the iter_lines() function 1. Do not use rstrip() as this would also throw away meaningful whitespaces 2. Avoid assuming what striplines() considers a line break terminator, the list of those is quite long in Unicode. 3. Add tests for the above edge cases. --- requests/models.py | 28 ++++++++++------------------ tests/test_requests.py | 18 +++++++++++------- 2 files changed, 21 insertions(+), 25 deletions(-) diff --git a/requests/models.py b/requests/models.py index c6c5f89..77e1043 100644 --- a/requests/models.py +++ b/requests/models.py @@ -694,39 +694,31 @@ class Response(object): return gen - def iter_lines(self, chunk_size=10 * 1024, decode_unicode=None): + def iter_lines(self, decode_unicode=None): """Iterates over the response data, one line at a time. This avoids reading the content at once into memory for large responses. """ - #TODO: why rstrip by default pending = None - for chunk in self.iter_content(chunk_size, decode_unicode=decode_unicode): + for chunk in self.iter_content(chunk_size=10 * 1024, decode_unicode=decode_unicode): if pending is not None: chunk = pending + chunk - lines = chunk.splitlines(True) + lines = chunk.splitlines() - for line in lines[:-1]: - yield line.rstrip() - - # Save the last part of the chunk for next iteration, to keep full line together - # lines may be empty for the last chunk of a chunked response - - if lines: - pending = lines[-1] - #if pending is a complete line, give it baack - if pending[-1] == '\n': - yield pending.rstrip() - pending = None + # An incomplete line. + if lines[-1].endswith(chunk[-1]): + pending = lines.pop() else: pending = None - # Yield the last line + for line in lines: + yield line + if pending is not None: - yield pending.rstrip() + yield pending @property def content(self): diff --git a/tests/test_requests.py b/tests/test_requests.py index 3f6a994..7a55ada 100755 --- a/tests/test_requests.py +++ b/tests/test_requests.py @@ -726,20 +726,24 @@ class RequestsTestSuite(TestSetup, unittest.TestCase): self.assertEqual(i, len_lines) - # Test 'dangling' fragment in responses that do not terminate in - # a newline. + # Tests that trailing whitespaces within lines do not get stripped. + # Tests that a trailing non-terminated line does not get stripped. quote = ( - '''Why will he not upon our fair request\n''' - '''Untent his person and share the air with us?''' + '''Agamemnon \n''' + '''\tWhy will he not upon our fair request\r\n''' + '''\tUntent his person and share the air with us?''' ) # Make a request and monkey-patch its contents r = get(httpbin('get')) r.raw = StringIO(quote) - # Make sure iter_lines doesn't chop the trailing bit - lines = '\n'.join(r.iter_lines()) - self.assertEqual(lines, quote) + lines = list(r.iter_lines()) + len_lines = len(lines) + self.assertEqual(len_lines, 3) + + joined = lines[0] + '\n' + lines[1] + '\r\n' + lines[2] + self.assertEqual(joined, quote) def test_safe_mode(self): -- 2.34.1