import-dsc: Delete NULs from dpkg-parsechangelog output before parsing
authorAndrej Shadura <andrew.shadura@collabora.co.uk>
Thu, 4 Feb 2021 17:33:20 +0000 (18:33 +0100)
committerGuido Günther <agx@sigxcpu.org>
Mon, 8 Feb 2021 10:06:15 +0000 (11:06 +0100)
Commit messages may not contain NUL characters; in practice,
Debian changelogs sometimes do, usually as the result of
incorrectly used encoding for non-ASCII personal names.

As a safety net, delete all NULs from the output of dpkg-parsechangelog
before parsing it, so that they don’t get fed into Git or anything else
further in the processing.

Closes: #981340
gbp/deb/changelog.py
tests/30_test_deb_changelog.py

index dda9b753b7115ac79eaa94ea02de2693aa11a8d0..fdee1a9db0b0d813529a79848296dd4c17b2ee30 100644 (file)
@@ -99,7 +99,7 @@ class ChangeLog(object):
         if cmd.returncode:
             raise ParseChangeLogError("Failed to parse changelog. "
                                       "dpkg-parsechangelog said:\n%s" % stderr.decode().strip())
-        return stdout.decode()
+        return stdout.decode().replace('\0', '')
 
     def _parse(self):
         """Parse a changelog based on the already read contents."""
index 9028621f770ef524d47c0a4ae58a6faeabae37e9..143f5056083281bc8588ed460829fd6e3b5bbb3f 100644 (file)
@@ -30,6 +30,21 @@ class TestQuoting(unittest.TestCase):
         self.assertEquals(cl.email, 'agx@sigxcpu.org')
 
 
+class TestEncoding(unittest.TestCase):
+    def test_nul(self):
+        """Test we remove NUL characters from strings when parsing (#981340)"""
+        changes = """git-buildpackage (0.9.2) unstable; urgency=low
+
+  * List of ch\0nges
+
+ -- User N\0me <agx@sigxcpu.org>  Sun, 12 Nov 2017 19:00:00 +0200
+"""
+        cl = ChangeLog(changes)
+        self.assertEquals(cl.author, 'User Nme')
+        self.assertEquals(cl.email, 'agx@sigxcpu.org')
+        self.assertEquals('\0' in cl.get_changes(), False)
+
+
 @skip_without_cmd('debchange')
 class Test(unittest.TestCase):
     def setUp(self):