rpm: add gbp.rpm.changelog module
authorMarkus Lehtonen <markus.lehtonen@linux.intel.com>
Tue, 4 Feb 2014 15:49:28 +0000 (17:49 +0200)
committerMarkus Lehtonen <markus.lehtonen@linux.intel.com>
Thu, 5 Jun 2014 11:20:07 +0000 (14:20 +0300)
This new module contains basic containers and functionality for parsing
and updating rpm changelogs. It is coupled with the rpm packaging policy
class which now has definitions for rpm changelog formatting.

Signed-off-by: Markus Lehtonen <markus.lehtonen@linux.intel.com>
gbp/rpm/changelog.py [new file with mode: 0644]
gbp/rpm/policy.py
tests/test_rpm_changelog.py [new file with mode: 0644]

diff --git a/gbp/rpm/changelog.py b/gbp/rpm/changelog.py
new file mode 100644 (file)
index 0000000..8eb08db
--- /dev/null
@@ -0,0 +1,246 @@
+# vim: set fileencoding=utf-8 :
+#
+# (C) 2014 Intel Corporation <markus.lehtonen@linux.intel.com>
+#    This program is free software; you can redistribute it and/or modify
+#    it under the terms of the GNU General Public License as published by
+#    the Free Software Foundation; either version 2 of the License, or
+#    (at your option) any later version.
+#
+#    This program is distributed in the hope that it will be useful,
+#    but WITHOUT ANY WARRANTY; without even the implied warranty of
+#    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+#    GNU General Public License for more details.
+#
+#    You should have received a copy of the GNU General Public License
+#    along with this program; if not, write to the Free Software
+#    Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+"""An RPM Changelog"""
+
+import datetime
+import re
+
+import gbp.log
+
+
+class ChangelogError(Exception):
+    """Problem parsing changelog"""
+    pass
+
+
+class _ChangelogHeader(object):
+    """The header part of one changelog section"""
+
+    def __init__(self, pkgpolicy, time=None, **kwargs):
+        self._pkgpolicy = pkgpolicy
+        self._data = {'time': time}
+        self._data.update(kwargs)
+
+    def __contains__(self, key):
+        return key in self._data
+
+    def __getitem__(self, key):
+        if key in self._data:
+            return self._data[key]
+        return None
+
+    def __str__(self):
+        keys = dict(self._data)
+        keys['time'] = self._data['time'].strftime(
+                            self._pkgpolicy.Changelog.header_time_format)
+        try:
+            return self._pkgpolicy.Changelog.header_format % keys + '\n'
+        except KeyError as err:
+            raise ChangelogError("Unable to format changelog header, missing "
+                                 "property %s" % err)
+
+
+class _ChangelogEntry(object):
+    """An entry (one 'change') in an RPM changelog"""
+
+    def __init__(self, pkgpolicy, author, text):
+        """
+        @param pkgpolicy: RPM packaging policy
+        @type pkgpolicy: L{RpmPkgPolicy}
+        @param author: author of the change
+        @type author: C{str}
+        @param text: message of the changelog entry
+        @type text: C{str} or C{list} of C{str}
+        """
+        self._pkgpolicy = pkgpolicy
+        self.author = author
+        if isinstance(text, str):
+            self._text = text.splitlines()
+        else:
+            self._text = text
+        # Strip trailing empty lines
+        while text and not text[-1].strip():
+            text.pop()
+
+    def __str__(self):
+        # Currently no (re-)formatting, just raw text
+        string = ""
+        for line in self._text:
+            string += line + '\n'
+        return string
+
+
+class _ChangelogSection(object):
+    """One section (set of changes) in an RPM changelog"""
+
+    def __init__(self, pkgpolicy, *args, **kwargs):
+        self._pkgpolicy = pkgpolicy
+        self.header = _ChangelogHeader(pkgpolicy, *args, **kwargs)
+        self.entries = []
+        self._trailer = '\n'
+
+
+    def __str__(self):
+        text = str(self.header)
+        for entry in self.entries:
+            text += str(entry)
+        # Add "section separator"
+        text += self._trailer
+        return text
+
+    def set_header(self, *args, **kwargs):
+        """Change the section header"""
+        self.header = _ChangelogHeader(self._pkgpolicy, *args, **kwargs)
+
+    def append_entry(self, entry):
+        """Add a new entry to the end of the list of entries"""
+        self.entries.append(entry)
+        return entry
+
+
+class Changelog(object):
+    """An RPM changelog"""
+
+    def __init__(self, pkgpolicy):
+        self._pkgpolicy = pkgpolicy
+        self.sections = []
+
+    def __str__(self):
+        string = ""
+        for section in self.sections:
+            string += str(section)
+        return string
+
+    def create_entry(self, *args, **kwargs):
+        """Create and return new entry object"""
+        return _ChangelogEntry(self._pkgpolicy, *args, **kwargs)
+
+    def add_section(self, *args, **kwargs):
+        """Add new empty section"""
+        section = _ChangelogSection(self._pkgpolicy, *args, **kwargs)
+        self.sections.insert(0, section)
+        return section
+
+
+class ChangelogParser(object):
+    """Parser for RPM changelogs"""
+
+    def __init__(self, pkgpolicy):
+        self._pkgpolicy = pkgpolicy
+        self.section_match_re = pkgpolicy.Changelog.section_match_re
+        self.section_split_re = pkgpolicy.Changelog.section_split_re
+        self.header_split_re = pkgpolicy.Changelog.header_split_re
+        self.header_name_split_re = pkgpolicy.Changelog.header_name_split_re
+        self.body_name_re = pkgpolicy.Changelog.body_name_re
+
+    def raw_parse_string(self, string):
+        """Parse changelog - only splits out raw changelog sections."""
+        changelog = Changelog(self._pkgpolicy)
+        ch_section = ""
+        for line in string.splitlines():
+            if re.match(self.section_match_re, line, re.M | re.S):
+                if ch_section:
+                    changelog.sections.append(ch_section)
+                ch_section = line + '\n'
+            elif ch_section:
+                ch_section += line + '\n'
+            else:
+                raise ChangelogError("First line in changelog is invalid")
+        if ch_section:
+            changelog.sections.append(ch_section)
+        return changelog
+
+    def raw_parse_file(self, changelog):
+        """Parse changelog file - only splits out raw changelog sections."""
+        try:
+            with open(changelog) as ch_file:
+                return self.raw_parse_string(ch_file.read())
+        except IOError as err:
+            raise ChangelogError("Unable to read changelog file: %s" % err)
+
+    def _parse_section_header(self, text):
+        """Parse one changelog section header"""
+        # Try to split out time stamp and "changelog name"
+        match = re.match(self.header_split_re, text, re.M)
+        if not match:
+            raise ChangelogError("Unable to parse changelog header: %s" % text)
+        try:
+            time = datetime.datetime.strptime(match.group('ch_time'),
+                                             "%a %b %d %Y")
+        except ValueError:
+            raise ChangelogError("Unable to parse changelog header: invalid "
+                                 "timestamp '%s'" % match.group('ch_time'))
+        # Parse "name" part which consists of name and/or email and an optional
+        # revision
+        name_text = match.group('ch_name')
+        match = re.match(self.header_name_split_re, name_text)
+        if not match:
+            raise ChangelogError("Unable to parse changelog header: invalid "
+                                 "name / revision '%s'" % name_text)
+        kwargs = match.groupdict()
+        return _ChangelogSection(self._pkgpolicy, time=time, **kwargs)
+
+    def _create_entry(self, author, text):
+        """Create a new changelog entry"""
+        return _ChangelogEntry(self._pkgpolicy, author=author, text=text)
+
+    def _parse_section_entries(self, text, default_author):
+        """Parse entries from a string and add them to a section"""
+        entries = []
+        entry_text = []
+        author = default_author
+        for line in text.splitlines():
+            match = re.match(self.body_name_re, line)
+            if match:
+                if entry_text:
+                    entries.append(self._create_entry(author, entry_text))
+                author = match.group('name')
+            else:
+                if line.startswith("-"):
+                    if entry_text:
+                        entries.append(self._create_entry(author, entry_text))
+                    entry_text = [line]
+                else:
+                    if not entry_text:
+                        gbp.log.info("First changelog entry (%s) is garbled, "
+                                     "entries should start with a dash ('-')" %
+                                     line)
+                    entry_text.append(line)
+        if entry_text:
+            entries.append(self._create_entry(author, entry_text))
+
+        return entries
+
+
+    def parse_section(self, text):
+        """Parse one section"""
+        # Check that the first line(s) look like a changelog header
+        match = re.match(self.section_split_re, text, re.M | re.S)
+        if not match:
+            raise ChangelogError("Doesn't look like changelog header: %s..." %
+                                 text.splitlines()[0])
+        # Parse header
+        section = self._parse_section_header(match.group('ch_header'))
+        header = section.header
+        # Parse entries
+        default_author = header['name'] if 'name' in header else header['email']
+        for entry in self._parse_section_entries(match.group('ch_body'),
+                                                 default_author):
+            section.append_entry(entry)
+
+        return section
+
index d5b096b..62d98f9 100644 (file)
@@ -143,3 +143,18 @@ class RpmPkgPolicy(PkgPolicy):
                 return version
         return None
 
+    class Changelog(object):
+        """Container for changelog related policy settings"""
+
+        # Regexps for splitting/parsing the changelog section (of
+        # Tizen / Fedora style changelogs)
+        section_match_re =  r'^\*'
+        section_split_re = r'^\*\s*(?P<ch_header>\S.*?)$\n(?P<ch_body>.*)'
+        header_split_re = r'(?P<ch_time>\S.*\s[0-9]{4})\s+(?P<ch_name>\S.*$)'
+        header_name_split_re = r'(?P<name>[^<]*)\s+<(?P<email>[^>]+)>((\s*-)?\s+(?P<revision>\S+))?$'
+        body_name_re = r'\[(?P<name>.*)\]'
+
+        # Changelog header format (when writing out changelog)
+        header_format = "* %(time)s %(name)s <%(email)s> %(revision)s"
+        header_time_format = "%a %b %d %Y"
+        header_rev_format = "%(version)s"
diff --git a/tests/test_rpm_changelog.py b/tests/test_rpm_changelog.py
new file mode 100644 (file)
index 0000000..2973293
--- /dev/null
@@ -0,0 +1,226 @@
+# vim: set fileencoding=utf-8 :
+#
+# (C) 2014 Intel Corporation <markus.lehtonen@linux.intel.com>
+#    This program is free software; you can redistribute it and/or modify
+#    it under the terms of the GNU General Public License as published by
+#    the Free Software Foundation; either version 2 of the License, or
+#    (at your option) any later version.
+#
+#    This program is distributed in the hope that it will be useful,
+#    but WITHOUT ANY WARRANTY; without even the implied warranty of
+#    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+#    GNU General Public License for more details.
+#
+#    You should have received a copy of the GNU General Public License
+#    along with this program; if not, write to the Free Software
+#    Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+"""Test RPM changelog classes and parsing"""
+
+from datetime import datetime
+from nose.tools import assert_raises, eq_, ok_  # pylint: disable=E0611
+from tempfile import NamedTemporaryFile
+
+from gbp.rpm.changelog import _ChangelogHeader, _ChangelogEntry
+from gbp.rpm.changelog import _ChangelogSection, Changelog
+from gbp.rpm.changelog import ChangelogParser, ChangelogError
+from gbp.rpm.policy import RpmPkgPolicy
+
+
+class TestChangelogHeader(object):
+    """Test the _ChangelogHeader class"""
+
+    def test_str_format(self):
+        """Basic test for header"""
+        time = datetime(2014, 01, 29, 12, 13, 14)
+        header = _ChangelogHeader(RpmPkgPolicy, time, name="John Doe",
+                                        email="user@host.com", revision="1")
+        eq_(str(header), "* Wed Jan 29 2014 John Doe <user@host.com> 1\n")
+
+    def test_str_format_err(self):
+        """Test missing properties"""
+        time = datetime(2014, 01, 29, 12, 13, 14)
+        header = _ChangelogHeader(RpmPkgPolicy, time, name="John", revision="1")
+        with assert_raises(ChangelogError):
+            str(header)
+
+    def test_container(self):
+        """Test the container methods of the class"""
+        header = _ChangelogHeader(RpmPkgPolicy, datetime(2014, 1, 1), name="N",
+                                  revision="1")
+        # Test __getitem__()
+        eq_(header['name'], "N")
+        eq_(header['email'], None)
+        # Test __contains__()
+        ok_('name' in header)
+        ok_('foo' not in header)
+
+
+class TestChangelogEntry(object):
+    """Test the _ChangelogEntry class"""
+
+    def test_str_format(self):
+        """Basic test"""
+        entry = _ChangelogEntry(RpmPkgPolicy, author="John Doe",
+                                text="- foo\n  bar")
+        eq_(str(entry), "- foo\n  bar\n")
+
+
+class TestChangelogSection(object):
+    """Test the _ChangelogSection class"""
+
+    def setup(self):
+        """Initialize test"""
+        time = datetime(2014, 01, 29, 12, 13, 14)
+        self.default_sect = _ChangelogSection(RpmPkgPolicy, time, name="J. D.",
+                                              email="u@h", revision="1")
+        entry = _ChangelogEntry(RpmPkgPolicy, "J. D.", "- my change")
+        self.default_sect.entries = [entry]
+
+    def test_str_format(self):
+        """Basic test"""
+        section = self.default_sect
+        eq_(str(section), "* Wed Jan 29 2014 J. D. <u@h> 1\n- my change\n\n")
+
+    def test_append_entry(self):
+        """Test add_entry() method"""
+        section = self.default_sect
+        entry = _ChangelogEntry(RpmPkgPolicy, author="",
+                                text="- another\n  change")
+        new_entry = section.append_entry(entry)
+        eq_(str(section), "* Wed Jan 29 2014 J. D. <u@h> 1\n- my change\n"
+                          "- another\n  change\n\n")
+        eq_(new_entry, section.entries[-1])
+
+
+    def test_set_header(self):
+        """Test set_header() method"""
+        section = self.default_sect
+        time = datetime(2014, 01, 30)
+        section.set_header(time=time, name="Jane", email="u@h", revision="1.1")
+        eq_(str(section), "* Thu Jan 30 2014 Jane <u@h> 1.1\n- my change\n\n")
+
+class TestChangelogParser(object):
+    """Test the default changelog parser"""
+
+    cl_default_style = """\
+* Wed Jan 29 2014 Markus Lehtonen <markus.lehtonen@linux.intel.com> 0.3-1
+- Version bump
+- Drop foo.patch
+
+* Tue Jan 28 2014 Markus Lehtonen <markus.lehtonen@linux.intel.com> 0.2
+- Update to 0.2
+
+* Mon Jan 27 2014 Markus Lehtonen <markus.lehtonen@linux.intel.com> 0.1
+- Initial version
+"""
+    cl_with_authors = """\
+* Wed Jan 29 2014 Markus Lehtonen <markus.lehtonen@linux.intel.com> 0.3-1
+[Markus Lehtonen]
+- Version bump
+[John Doe]
+- Bug fix
+"""
+    # Invalid timestamp / name
+    cl_broken_header_1 = """\
+* Wed Jan 29 2014Markus Lehtonen <markus.lehtonen@linux.intel.com> 0.3-1
+- Version bump
+"""
+    # Whitespace before the asterisk in the header
+    cl_broken_header_2 = """\
+ * Wed Jan 29 2014 Markus Lehtonen <markus.lehtonen@linux.intel.com> 0.3-1
+- Version bump
+"""
+    # Invalid timestamp
+    cl_broken_header_3 = """\
+* Wed Jan 32 2014 Markus Lehtonen <markus.lehtonen@linux.intel.com> 0.3-1
+- Version bump
+"""
+    # Missing email
+    cl_broken_header_4 = """\
+* Wed Jan 29 2014 Markus Lehtonen 0.3-1
+- Version bump
+"""
+    # Garbage before section header
+    cl_broken_header_5 = """\
+---garbage---
+* Wed Jan 29 2014 Markus Lehtonen <markus.lehtonen@linux.intel.com> 0.3-1
+- Version bump
+"""
+
+    parser = ChangelogParser(RpmPkgPolicy)
+
+    def test_parse_changelog(self):
+        """Basic tests for successful parsing"""
+        # Raw parsing of changelog
+        changelog = self.parser.raw_parse_string(self.cl_default_style)
+        eq_(len(changelog.sections), 3)
+
+        # Check that re-creating the changelog doesn't mangle it
+        eq_(str(changelog), self.cl_default_style)
+
+        # Parse and check section
+        section = self.parser.parse_section(changelog.sections[0])
+
+        eq_(section.header['time'], datetime(2014, 1, 29))
+        eq_(section.header['name'], "Markus Lehtonen")
+        eq_(section.header['email'], "markus.lehtonen@linux.intel.com")
+        eq_(section.header['revision'], "0.3-1")
+
+        # Check that re-creating section doesn't mangle it
+        eq_(str(section), changelog.sections[0])
+
+    def test_parse_authors(self):
+        """Test parsing of authors from changelog entries"""
+        section = self.parser.parse_section(self.cl_with_authors)
+        eq_(section.entries[0].author, "Markus Lehtonen")
+        eq_(section.entries[1].author, "John Doe")
+
+    def test_parse_changelog_file(self):
+        """Basic tests for parsing a file"""
+        # Create file and parse it
+        tmpfile = NamedTemporaryFile()
+        tmpfile.write(self.cl_default_style)
+        tmpfile.file.flush()
+        changelog = self.parser.raw_parse_file(tmpfile.name)
+        # Check parsing results
+        eq_(len(changelog.sections), 3)
+        eq_(str(changelog), self.cl_default_style)
+        # Cleanup
+        tmpfile.close()
+
+    def test_parse_section_fail(self):
+        """Basic tests for failures of changelog section parsing"""
+        with assert_raises(ChangelogError):
+            self.parser.parse_section(self.cl_broken_header_1)
+
+        with assert_raises(ChangelogError):
+            self.parser.parse_section(self.cl_broken_header_2)
+
+        with assert_raises(ChangelogError):
+            self.parser.parse_section(self.cl_broken_header_3)
+
+        with assert_raises(ChangelogError):
+            self.parser.parse_section(self.cl_broken_header_4)
+
+    def test_parse_changelog_fail(self):
+        """Basic tests for changelog parsing failures"""
+        with assert_raises(ChangelogError):
+            self.parser.raw_parse_string(self.cl_broken_header_5)
+
+
+class TestChangelog(object):
+    """Unit tests for the Changelog class"""
+
+    def basic_test(self):
+        """Test basic initialization"""
+        changelog = Changelog(RpmPkgPolicy)
+        eq_(str(changelog), "")
+
+    def test_add_section(self):
+        """Test the add_section() method"""
+        changelog = Changelog(RpmPkgPolicy)
+        time = datetime(2014, 01, 30)
+        new_section = changelog.add_section(time=time, name="Jane Doe",
+                                            email="j@doe.com", revision="1.2")
+        eq_(str(changelog), "* Thu Jan 30 2014 Jane Doe <j@doe.com> 1.2\n\n")
+        eq_(new_section, changelog.sections[0])