New Deltarepo

author Tomas Mlcoch <tmlcoch@redhat.com>

Thu, 10 Oct 2013 13:30:09 +0000 (15:30 +0200)

committer Tomas Mlcoch <tmlcoch@redhat.com>

Thu, 10 Oct 2013 13:54:22 +0000 (15:54 +0200)
author Tomas Mlcoch <tmlcoch@redhat.com>
Thu, 10 Oct 2013 13:30:09 +0000 (15:30 +0200)
committer Tomas Mlcoch <tmlcoch@redhat.com>
Thu, 10 Oct 2013 13:54:22 +0000 (15:54 +0200)
diff --git a/deltarepo/deltarepo.py b/deltarepo/deltarepo.py

index 6ddffa4..aa02454 100755 (executable)
--- a/deltarepo/deltarepo.py
+++ b/deltarepo/deltarepo.py
@@ -9,6 +9,10 @@ import deltarepo
  
  LOG_FORMAT = "%(message)s"
  
+# TODO:
+# - Support for different type of compression (?)
+
+
  def parse_options():
      parser = OptionParser("usage: %prog [options] <first_repo> <second_repo>\n" \
                            "       %prog --apply <repo> <delta_repo>")
@@ -24,12 +28,12 @@ def parse_options():
                        help="Output directory.", default="./")
  
      group = OptionGroup(parser, "Delta generation")
-    group.add_option("--skip", action="append", metavar="DATATYPE",
-                     help="Skip delta on the DATATYPE. Could be specified "\
-                     "multiple times. (E.g., --skip=comps)")
-    group.add_option("--do-only", action="append", metavar="DATATYPE",
-                     help="Do delta only for the DATATYPE. Could be specified "\
-                     "multiple times. (E.g., --do-only=primary)")
+    #group.add_option("--skip", action="append", metavar="DATATYPE",
+    #                 help="Skip delta on the DATATYPE. Could be specified "\
+    #                 "multiple times. (E.g., --skip=comps)")
+    #group.add_option("--do-only", action="append", metavar="DATATYPE",
+    #                 help="Do delta only for the DATATYPE. Could be specified "\
+    #                 "multiple times. (E.g., --do-only=primary)")
      group.add_option("-t", "--id-type", action="store", metavar="HASHTYPE",
                       help="Hash function for the ids (RepoId and DeltaRepoId). " \
                       "Default is sha256.", default="sha256")
@@ -38,8 +42,8 @@ def parse_options():
      group = OptionGroup(parser, "Delta application")
      group.add_option("-a", "--apply", action="store_true",
                       help="Enable delta application mode.")
-    group.add_option("-d", "--database", action="store_true",
-                     help="Gen database.")
+    #group.add_option("-d", "--database", action="store_true",
+    #                 help="Gen database.")
      parser.add_option_group(group)
  
      options, args = parser.parse_args()
@@ -97,16 +101,18 @@ if __name__ == "__main__":
  
      logger = setup_logging(options.quiet, options.verbose)
  
-    generator = deltarepo.DeltaRepoGenerator(id_type=options.id_type,
-                                             logger=logger)
-
-    # TODO: check if repo is really delta repo (must has a repoid and removed.xml)
-
      if options.apply:
          # Applying delta
-        generator.applydelta(args[0], args[1], out_path=options.outputdir,
-                             database=options.database)
+        da = deltarepo.DeltaRepoApplicator(args[0],
+                                           args[1],
+                                           out_path=options.outputdir,
+                                           logger=logger)
+        da.apply()
      else:
          # Do delta
-        generator.gendelta(args[0], args[1], out_path=options.outputdir,
-                           do_only=options.do_only, skip=options.skip)
+        dg = deltarepo.DeltaRepoGenerator(args[0],
+                                          args[1],
+                                          out_path=options.outputdir,
+                                          logger=logger,
+                                          repoid_type=options.id_type)
+        dg.gen()
diff --git a/deltarepo/deltarepo/__init__.py b/deltarepo/deltarepo/__init__.py

index 3533420..fad0c58 100644 (file)
--- a/deltarepo/deltarepo/__init__.py
+++ b/deltarepo/deltarepo/__init__.py
@@ -8,719 +8,16 @@ Copyright (C) 2013   Tomas Mlcoch
  
  """
  
-import os
-import shutil
-import hashlib
-import logging
-import xml.dom.minidom
-from lxml import etree
  import createrepo_c as cr
+from deltarepo.common import LoggingInterface, Metadata, RemovedXml
+from deltarepo.applicator import DeltaRepoApplicator
+from deltarepo.generator import DeltaRepoGenerator
+from deltarepo.delta_plugins import PLUGINS
+from deltarepo.errors import DeltaRepoError, DeltaRepoPluginError
  
  __all__ = ['VERSION', 'VERBOSE_VERSION', 'DeltaRepoError',
-           'DeltaRepoGenerator']
+           'DeltaRepoPluginError', 'DeltaRepoGenerator'
+           'DeltaRepoApplicator']
  
  VERSION = "0.0.1"
  VERBOSE_VERSION = "%s (createrepo_c: %s)" % (VERSION, cr.VERSION)
-
-class DeltaRepoError(Exception):
-    pass
-
-class LoggingInterface(object):
-    def __init__(self, logger=None):
-        if logger is None:
-            logger = logging.getLogger()
-            logger.disabled = True
-        self.logger = logger
-
-    def _debug(self, msg):
-        self.logger.debug(msg)
-
-    def _info(self, msg):
-        self.logger.info(msg)
-
-    def _warning(self, msg):
-        self.logger.warning(msg)
-
-    def _error(self, msg):
-        self.logger.error(msg)
-
-    def _critical(self, msg):
-        self.logger.critical(msg)
-
-class DeltaModule(LoggingInterface):
-
-    def __init__(self, id_type=None, logger=None):
-        LoggingInterface.__init__(self, logger)
-
-        if id_type is None:
-            id_type = "sha256"
-        self.id_type = id_type
-
-    def _path(self, path, record):
-        """Return path to the repodata file."""
-        return os.path.join(path, record.location_href)
-
-    def _pkg_id_tuple(self, pkg):
-        """Return tuple identifying a package in repodata.
-        (pkgId, location_href, location_base)"""
-        return (pkg.pkgId, pkg.location_href, pkg.location_base)
-
-    def _pkg_id_str(self, pkg):
-        """Return string identifying a package in repodata.
-        This strings are used for the RepoId calculation."""
-        if not pkg.pkgId:
-            self._warning("Missing pkgId in a package!")
-        if not pkg.location_href:
-            self._warning("Missing location_href at package %s %s" % \
-                          (pkg.name, pkg.pkgId))
-        return "%s%s%s" % (pkg.pkgId or '',
-                           pkg.location_href or '',
-                           pkg.location_base or '')
-
-class MainDeltaModule(DeltaModule):
-
-    def apply(self, pri_old_fn, pri_delta_fn, pri_f, pri_db, fil_old_fn,
-              fil_delta_fn, fil_f, fil_db,oth_old_fn, oth_delta_fn, oth_f,
-              oth_db, removed):
-
-        removed_packages = set() # set of pkgIds (hashes)
-        all_packages = {}        # dict { 'pkgId': pkg }
-
-        old_repoid_strings = []
-        new_repoid_strings = []
-
-        def old_pkgcb(pkg):
-            old_repoid_strings.append(self._pkg_id_str(pkg))
-            if pkg.location_href in removed.packages:
-                if removed.packages[pkg.location_href] == pkg.location_base:
-                    # This package won't be in new metadata
-                    return
-            new_repoid_strings.append(self._pkg_id_str(pkg))
-            all_packages[pkg.pkgId] = pkg
-
-        def delta_pkgcb(pkg):
-            new_repoid_strings.append(self._pkg_id_str(pkg))
-            all_packages[pkg.pkgId] = pkg
-
-        do_primary_files = 1
-        if fil_f and fil_delta_fn and fil_old_fn:
-            do_primary_files = 0
-
-        cr.xml_parse_primary(pri_old_fn, pkgcb=old_pkgcb,
-                             do_files=do_primary_files)
-        cr.xml_parse_primary(pri_delta_fn, pkgcb=delta_pkgcb,
-                             do_files=do_primary_files)
-
-        # Calculate RepoIds
-        old_repo_id = ""
-        new_repo_id = ""
-
-        h = hashlib.new(self.id_type)
-        old_repoid_strings.sort()
-        for i in old_repoid_strings:
-            h.update(i)
-        old_repo_id = h.hexdigest()
-
-        h = hashlib.new(self.id_type)
-        new_repoid_strings.sort()
-        for i in new_repoid_strings:
-            h.update(i)
-        new_repo_id = h.hexdigest()
-
-        # Sort packages
-        def cmp_pkgs(x, y):
-            # Compare only by filename
-            ret = cmp(os.path.basename(x.location_href),
-                      os.path.basename(y.location_href))
-            if ret != 0:
-                return ret
-
-            # Compare by full location_href path
-            return  cmp(x.location_href, y.location_href)
-
-        all_packages_sorted = sorted(all_packages.values(), cmp=cmp_pkgs)
-
-        def newpkgcb(pkgId, name, arch):
-            return all_packages.get(pkgId, None)
-
-        # Parse filelists
-        if fil_f and fil_delta_fn and fil_old_fn:
-            cr.xml_parse_filelists(fil_old_fn, newpkgcb=newpkgcb)
-            cr.xml_parse_filelists(fil_delta_fn, newpkgcb=newpkgcb)
-
-        # Parse other
-        if oth_f and oth_delta_fn and oth_old_fn:
-            cr.xml_parse_other(oth_old_fn, newpkgcb=newpkgcb)
-            cr.xml_parse_other(oth_delta_fn, newpkgcb=newpkgcb)
-
-        num_of_packages = len(all_packages_sorted)
-
-        # Write out primary
-        pri_f.set_num_of_pkgs(num_of_packages)
-        for pkg in all_packages_sorted:
-            pri_f.add_pkg(pkg)
-            if pri_db:
-                pri_db.add_pkg(pkg)
-
-        # Write out filelists
-        if fil_f:
-            fil_f.set_num_of_pkgs(num_of_packages)
-            for pkg in all_packages_sorted:
-                fil_f.add_pkg(pkg)
-                if fil_db:
-                    fil_db.add_pkg(pkg)
-
-        # Write out other
-        if oth_f:
-            oth_f.set_num_of_pkgs(num_of_packages)
-            for pkg in all_packages_sorted:
-                oth_f.add_pkg(pkg)
-                if oth_db:
-                    oth_db.add_pkg(pkg)
-
-        return (old_repo_id, new_repo_id)
-
-    def do(self, pri_old_fn, pri_new_fn, pri_f,
-           fil_new_fn, fil_f, oth_new_fn, oth_f, removed):
-
-        old_packages = set()
-        added_packages = {}         # dict { 'pkgId': pkg }
-        added_packages_ids = []     # list of package ids
-
-        old_repoid_strings = []
-        new_repoid_strings = []
-
-        def old_pkgcb(pkg):
-            old_packages.add(self._pkg_id_tuple(pkg))
-            old_repoid_strings.append(self._pkg_id_str(pkg))
-
-        def new_pkgcb(pkg):
-            new_repoid_strings.append(self._pkg_id_str(pkg))
-            pkg_id_tuple = self._pkg_id_tuple(pkg)
-            if not pkg_id_tuple in old_packages:
-                # This package is only in new repodata
-                added_packages[pkg.pkgId] = pkg
-                added_packages_ids.append(pkg.pkgId)
-            else:
-                # This package is also in the old repodata
-                old_packages.remove(pkg_id_tuple)
-
-        do_new_primary_files = 1
-        if fil_f and fil_new_fn:
-            # All files will be parsed from filelists
-            do_new_primary_files = 0
-
-        cr.xml_parse_primary(pri_old_fn, pkgcb=old_pkgcb, do_files=0)
-        cr.xml_parse_primary(pri_new_fn, pkgcb=new_pkgcb,
-                             do_files=do_new_primary_files)
-
-        # Calculate RepoIds
-        old_repo_id = ""
-        new_repo_id = ""
-
-        h = hashlib.new(self.id_type)
-        old_repoid_strings.sort()
-        for i in old_repoid_strings:
-            h.update(i)
-        old_repo_id = h.hexdigest()
-
-        h = hashlib.new(self.id_type)
-        new_repoid_strings.sort()
-        for i in new_repoid_strings:
-            h.update(i)
-        new_repo_id = h.hexdigest()
-
-        removed_pkgs = sorted(old_packages)
-        for _, location_href, location_base in removed_pkgs:
-            removed.add_pkg_locations(location_href, location_base)
-
-        num_of_packages = len(added_packages)
-
-        # Filelists and Other cb
-        def newpkgcb(pkgId, name, arch):
-            return added_packages.get(pkgId, None)
-
-        # Write out filelists delta
-        if fil_f and fil_new_fn:
-            cr.xml_parse_filelists(fil_new_fn, newpkgcb=newpkgcb)
-            fil_f.set_num_of_pkgs(num_of_packages)
-            for pkgid in added_packages_ids:
-                fil_f.add_pkg(added_packages[pkgid])
-            fil_f.close()
-
-        # Write out other delta
-        if oth_f and oth_new_fn:
-            cr.xml_parse_other(oth_new_fn, newpkgcb=newpkgcb)
-            oth_f.set_num_of_pkgs(num_of_packages)
-            for pkgid in added_packages_ids:
-                oth_f.add_pkg(added_packages[pkgid])
-            oth_f.close()
-
-        # Write out primary delta
-        # Note: Writing of primary delta has to be after parsing of filelists
-        # Otherway cause missing files if do_new_primary_files was 0
-        pri_f.set_num_of_pkgs(num_of_packages)
-        for pkgid in added_packages_ids:
-            pri_f.add_pkg(added_packages[pkgid])
-        pri_f.close()
-
-        return (old_repo_id, new_repo_id)
-
-class RemovedXml(object):
-    def __init__(self):
-        self.packages = {}  # { location_href: location_base }
-        self.files = {}     # { location_href: location_base or Null }
-
-    def __str__(self):
-        print self.packages
-        print self.files
-
-    def add_pkg(self, pkg):
-        self.packages[pkg.location_href] = pkg.location_base
-
-    def add_pkg_locations(self, location_href, location_base):
-        self.packages[location_href] = location_base
-
-    def add_record(self, rec):
-        self.files[rec.location_href] = rec.location_base
-
-    def xml_dump(self):
-        xmltree = etree.Element("removed")
-        packages = etree.SubElement(xmltree, "packages")
-        for href, base in self.packages.iteritems():
-            attrs = {}
-            if href: attrs['href'] = href
-            if base: attrs['base'] = base
-            if not attrs: continue
-            etree.SubElement(packages, "location", attrs)
-        files = etree.SubElement(xmltree, "files")
-        for href, base in self.files.iteritems():
-            attrs = {}
-            if href: attrs['href'] = href
-            if base: attrs['base'] = base
-            if not attrs: continue
-            etree.SubElement(files, "location", attrs)
-        return etree.tostring(xmltree,
-                              pretty_print=True,
-                              encoding="UTF-8",
-                              xml_declaration=True)
-
-    def xml_parse(self, path):
-        dom = xml.dom.minidom.parse(path)
-
-        packages = dom.getElementsByTagName("packages")
-        if packages:
-            for loc in packages[0].getElementsByTagName("location"):
-                href = loc.getAttribute("href")
-                base = loc.getAttribute("base")
-                if not href:
-                    continue
-                if not base:
-                    base = None
-                self.packages[href] = base
-
-        files = dom.getElementsByTagName("files")
-        if files:
-            for loc in files[0].getElementsByTagName("location"):
-                href = loc.getAttribute("href")
-                base = loc.getAttribute("base")
-                if not href:
-                    continue
-                if not base:
-                    base = None
-                self.files[href] = base
-
-class DeltaRepoGenerator(LoggingInterface):
-    """Object for generating of DeltaRepositories."""
-
-    def __init__(self, id_type=None, logger=None):
-        LoggingInterface.__init__(self, logger)
-
-        # id_type is type of checksum used for RepoId and
-        # DeltaRepoId calculation
-        if id_type is None:
-            id_type = "sha256"
-        self.id_type = id_type
-
-        # checksum_type is checksum type used for the repomd records.
-        self.checksum_type = cr.SHA256
-
-    def _fn_without_checksum(self, path):
-        """Strip checksum from a record filename"""
-        path = os.path.basename(path)
-        return path.rsplit('-')[-1]
-
-    def applydelta(self, old_path, delta_path, out_path=None, database=False):
-        removedxml = RemovedXml()
-        hash_in_the_name = False
-
-        # Prepare variables with paths
-        old_repodata_path = os.path.join(old_path, "repodata/")
-        delta_repodata_path = os.path.join(delta_path, "repodata/")
-
-        old_repomd_path = os.path.join(old_repodata_path, "repomd.xml")
-        delta_repomd_path = os.path.join(delta_repodata_path, "repomd.xml")
-
-        # Prepare Repomd objects
-        old_repomd = cr.Repomd(old_repomd_path)
-        delta_repomd = cr.Repomd(delta_repomd_path)
-        new_repomd = cr.Repomd()
-
-        # Check if delta id correspond with used repo
-        if not delta_repomd.repoid or len(delta_repomd.repoid.split('-')) != 2:
-            raise DeltaRepoError("Bad DeltaRepoId")
-
-        self.id_type = delta_repomd.repoid_type
-
-        old_id, new_id = delta_repomd.repoid.split('-')
-
-        self._debug("Delta %s -> %s" % (old_id, new_id))
-
-        if old_repomd.repoid_type == delta_repomd.repoid_type:
-            if old_repomd.repoid and old_repomd.repoid != old_id:
-                raise DeltaRepoError("Not suitable delta for current repo " \
-                        "(Expected: %s Real: %s)" % (old_id, old_repomd.repoid))
-        else:
-            self._debug("Different repoid types repo: %s vs delta: %s" % \
-                    (old_repomd.repoid_type, delta_repomd.repoid_type))
-
-        # Prepare output path
-        new_path = os.path.join(out_path, ".repodata/")
-        new_repodata_path = os.path.join(new_path, "repodata/")
-        os.mkdir(new_path)
-        os.mkdir(new_repodata_path)
-
-        # Apply repomd delta
-        new_repomd.set_revision(delta_repomd.revision)
-        for tag in delta_repomd.distro_tags:
-            new_repomd.add_distro_tag(tag[1], tag[0])
-        for tag in delta_repomd.repo_tags:
-            new_repomd.add_repo_tag(tag)
-        for tag in delta_repomd.content_tags:
-            new_repomd.add_content_tag(tag)
-
-        old_records = dict([(record.type, record) for record in old_repomd.records ])
-        delta_records = dict([(record.type, record) for record in delta_repomd.records ])
-        old_record_types = set(old_records.keys())
-        delta_record_types = set(delta_records.keys())
-        deleted_repomd_record_types = old_record_types - delta_record_types
-        added_repomd_record_types = delta_record_types - old_record_types
-
-        # Prepare removedxml
-        if "removed" in delta_records:
-            removedxml_path = os.path.join(delta_path,
-                                delta_records["removed"].location_href)
-            removedxml.xml_parse(removedxml_path)
-        else:
-            self._warning("\"removed\" record is missing in repomd.xml "\
-                          "of delta repo")
-
-        # Important sanity check (repo without primary is definitely bad)
-        if not "primary" in old_records or not "primary" in delta_records:
-            raise DeltaRepoError("Missing primary metadata")
-
-        # Detect type of checksum in the delta repomd.xml
-        self.checksum_type = cr.checksum_type(delta_records["primary"].checksum_type)
-        if self.checksum_type == cr.UNKNOWN_CHECKSUM:
-            raise DeltaRepoError("Unknown checksum type detected: %s" % \
-                    delta_records["primary"].checksum_type)
-
-        # Detection if use unique md filenames
-        if delta_records["primary"].location_href.split("primary")[0] != "":
-            hash_in_the_name = True
-
-        # Apply delta on primary, filelists and other
-        pri_old_fn = os.path.join(old_path, old_records["primary"].location_href)
-        pri_delta_fn = os.path.join(delta_path, delta_records["primary"].location_href)
-        pri_out_fn = os.path.join(new_repodata_path, "primary.xml.gz")
-        pri_out_f_stat = cr.ContentStat(self.checksum_type)
-        pri_out_f = cr.PrimaryXmlFile(pri_out_fn, cr.GZ_COMPRESSION)
-        pri_db_fn = None
-        pri_db = None
-        if database:
-            pri_db_fn = os.path.join(new_repodata_path, "primary.sqlite")
-            pri_db = cr.PrimarySqlite(pri_db_fn)
-
-        fil_old_fn = None
-        fil_delta_fn = None
-        fil_out_fn = None
-        fil_out_f_stat = None
-        fil_out_f = None
-        fil_db_fn = None
-        fil_db = None
-        if ("filelists" in delta_records):
-            fil_old_fn = os.path.join(old_path, old_records["filelists"].location_href)
-            fil_delta_fn = os.path.join(delta_path, delta_records["filelists"].location_href)
-            fil_out_fn = os.path.join(new_repodata_path, "filelists.xml.gz")
-            fil_out_f_stat = cr.ContentStat(self.checksum_type)
-            fil_out_f = cr.FilelistsXmlFile(fil_out_fn, cr.GZ_COMPRESSION)
-            if database:
-                fil_db_fn = os.path.join(new_repodata_path, "filelists.sqlite")
-                fil_db = cr.FilelistsSqlite(fil_db_fn)
-
-        oth_old_fn = None
-        oth_delta_fn = None
-        oth_out_fn = None
-        oth_out_f_stat = None
-        oth_out_f = None
-        oth_db_fn = None
-        oth_db = None
-        if ("other" in delta_records):
-            oth_old_fn = os.path.join(old_path, old_records["other"].location_href)
-            oth_delta_fn = os.path.join(delta_path, delta_records["other"].location_href)
-            oth_out_fn = os.path.join(new_repodata_path, "other.xml.gz")
-            oth_out_f_stat = cr.ContentStat(self.checksum_type)
-            oth_out_f = cr.OtherXmlFile(oth_out_fn, cr.GZ_COMPRESSION)
-            if database:
-                oth_db_fn = os.path.join(new_repodata_path, "other.sqlite")
-                oth_db = cr.OtherSqlite(oth_db_fn)
-
-        deltamodule = MainDeltaModule(id_type=self.id_type,
-                                      logger=self.logger)
-        ids = deltamodule.apply(pri_old_fn, pri_delta_fn, pri_out_f, pri_db,
-                                fil_old_fn, fil_delta_fn, fil_out_f, fil_db,
-                                oth_old_fn, oth_delta_fn, oth_out_f, oth_db,
-                                removedxml)
-
-        pri_out_f.close()
-        fil_out_f.close()
-        oth_out_f.close()
-
-        # Check returned IDs
-        cold_id, cnew_id = ids  # Calculated ids
-
-        if cold_id != old_id:
-            raise DeltaRepoError("Calculated old RepoId doesn't match!")
-
-        if cnew_id != new_id:
-            raise DeltaRepoError("Calculated new RepoId doesn't match!")
-
-        self._debug("RepoIds match")
-
-        # Prepare repomd.xml records
-        pri_rec = cr.RepomdRecord("primary", pri_out_fn)
-        pri_rec.load_contentstat(pri_out_f_stat)
-        pri_rec.fill(self.checksum_type)
-        if hash_in_the_name:
-            pri_rec.rename_file()
-        new_repomd.set_record(pri_rec)
-
-        if database:
-            pri_db.dbinfo_update(pri_rec.checksum)
-            pri_db.close()
-            pri_db_stat = cr.ContentStat(self.checksum_type)
-            pri_db_compressed = os.path.join(pri_db_fn+".bz2")
-            cr.compress_file(pri_db_fn, None, cr.BZ2, pri_db_stat)
-            os.remove(pri_db_fn)
-            pri_db_rec = cr.RepomdRecord("primary_db", pri_db_compressed)
-            pri_db_rec.load_contentstat(pri_db_stat)
-            pri_db_rec.fill(self.checksum_type)
-            if hash_in_the_name:
-                pri_db_rec.rename_file()
-            new_repomd.set_record(pri_db_rec)
-
-        if fil_out_fn:
-            fil_rec = cr.RepomdRecord("filelists", fil_out_fn)
-            fil_rec.load_contentstat(fil_out_f_stat)
-            fil_rec.fill(self.checksum_type)
-            if hash_in_the_name:
-                fil_rec.rename_file()
-            new_repomd.set_record(fil_rec)
-
-        if database:
-            fil_db.dbinfo_update(fil_rec.checksum)
-            fil_db.close()
-            fil_db_stat = cr.ContentStat(self.checksum_type)
-            fil_db_compressed = os.path.join(fil_db_fn+".bz2")
-            cr.compress_file(fil_db_fn, None, cr.BZ2, fil_db_stat)
-            os.remove(fil_db_fn)
-            fil_db_rec = cr.RepomdRecord("primary_db", fil_db_compressed)
-            fil_db_rec.load_contentstat(fil_db_stat)
-            fil_db_rec.fill(self.checksum_type)
-            if hash_in_the_name:
-                fil_db_rec.rename_file()
-            new_repomd.set_record(fil_db_rec)
-
-
-        if oth_out_fn:
-            oth_rec = cr.RepomdRecord("other", oth_out_fn)
-            oth_rec.load_contentstat(oth_out_f_stat)
-            oth_rec.fill(self.checksum_type)
-            if hash_in_the_name:
-                oth_rec.rename_file()
-            new_repomd.set_record(oth_rec)
-
-        if database:
-            oth_db.dbinfo_update(oth_rec.checksum)
-            oth_db.close()
-            oth_db_stat = cr.ContentStat(self.checksum_type)
-            oth_db_compressed = os.path.join(oth_db_fn+".bz2")
-            cr.compress_file(oth_db_fn, None, cr.BZ2, oth_db_stat)
-            os.remove(oth_db_fn)
-            oth_db_rec = cr.RepomdRecord("primary_db", oth_db_compressed)
-            oth_db_rec.load_contentstat(oth_db_stat)
-            oth_db_rec.fill(self.checksum_type)
-            if hash_in_the_name:
-                oth_db_rec.rename_file()
-            new_repomd.set_record(oth_db_rec)
-
-
-        # Write out repomd.xml
-        new_repomd.set_repoid(ids[1], self.id_type)
-        new_repomd_path = os.path.join(new_repodata_path, "repomd.xml")
-        new_repomd_xml = new_repomd.xml_dump()
-        self._debug("Writing repomd.xml")
-        open(new_repomd_path, "w").write(new_repomd_xml)
-
-        # Final move
-        final_destination = os.path.join(out_path, "repodata/")
-        if os.path.exists(final_destination):
-            self._warning("Destination dir already exists! Removing %s" % \
-                          final_destination)
-            shutil.rmtree(final_destination)
-        self._info("Moving %s -> %s" % (new_path, final_destination))
-        os.rename(new_path, final_destination)
-
-    def gendelta(self, old_path, new_path, out_path=None,
-                 do_only=None, skip=None):
-        removedxml = RemovedXml()
-        hash_in_the_name = False
-
-        # Prepare variables with paths
-        new_repodata_path = os.path.join(new_path, "repodata/")
-        old_repodata_path = os.path.join(old_path, "repodata/")
-
-        old_repomd_path = os.path.join(old_repodata_path, "repomd.xml")
-        new_repomd_path = os.path.join(new_repodata_path, "repomd.xml")
-
-        # Prepare Repomd objects
-        old_repomd = cr.Repomd(old_repomd_path)
-        new_repomd = cr.Repomd(new_repomd_path)
-        delta_repomd = cr.Repomd()
-
-        # Prepare output path
-        delta_path = os.path.join(out_path, ".deltarepo/")
-        delta_repodata_path = os.path.join(delta_path, "repodata/")
-        os.mkdir(delta_path)
-        os.mkdir(delta_repodata_path)
-
-        # Do repomd delta
-        delta_repomd.set_revision(new_repomd.revision)
-        for tag in new_repomd.distro_tags:
-            delta_repomd.add_distro_tag(tag[1], tag[0])
-        for tag in new_repomd.repo_tags:
-            delta_repomd.add_repo_tag(tag)
-        for tag in new_repomd.content_tags:
-            delta_repomd.add_content_tag(tag)
-
-        old_records = dict([(record.type, record) for record in old_repomd.records ])
-        new_records = dict([(record.type, record) for record in new_repomd.records ])
-        old_record_types = set(old_records.keys())
-        new_record_types = set(new_records.keys())
-        deleted_repomd_record_types = old_record_types - new_record_types
-        added_repomd_record_types = new_record_types - old_record_types
-
-        # Important sanity check (repo without primary is definitely bad)
-        if not "primary" in old_records or not "primary" in new_records:
-            raise DeltaRepoError("Missing primary metadata")
-
-        # Detect type of checksum in the new repomd.xml
-        self.checksum_type = cr.checksum_type(new_records["primary"].checksum_type)
-        if self.checksum_type == cr.UNKNOWN_CHECKSUM:
-            raise DeltaRepoError("Unknown checksum type detected: %s" % \
-                    new_records["primary"].checksum_type)
-
-        # Detection if use unique md filenames
-        if new_records["primary"].location_href.split("primary")[0] != "":
-            hash_in_the_name = True
-
-        # Do deltas for the "primary", "filelists" and "other"
-        pri_old_fn = os.path.join(old_path, old_records["primary"].location_href)
-        pri_new_fn = os.path.join(new_path, new_records["primary"].location_href)
-        pri_out_fn = os.path.join(delta_repodata_path, "primary.xml.gz")
-        pri_out_f_stat = cr.ContentStat(self.checksum_type)
-        pri_out_f = cr.PrimaryXmlFile(pri_out_fn, cr.GZ_COMPRESSION)
-
-        fil_new_fn = None
-        fil_out_fn = None
-        fil_out_f_stat = None
-        fil_out_f = None
-        if ("filelists" in new_records):
-            fil_new_fn = os.path.join(new_path, new_records["filelists"].location_href)
-            fil_out_fn = os.path.join(delta_repodata_path, "filelists.xml.gz")
-            fil_out_f_stat = cr.ContentStat(self.checksum_type)
-            fil_out_f = cr.FilelistsXmlFile(fil_out_fn, cr.GZ_COMPRESSION)
-
-        oth_new_fn = None
-        out_out_fn = None
-        oth_out_f_stat = None
-        oth_out_f = None
-        if ("other" in new_records):
-            oth_new_fn = os.path.join(new_path, new_records["other"].location_href)
-            oth_out_fn = os.path.join(delta_repodata_path, "other.xml.gz")
-            oth_out_f_stat = cr.ContentStat(self.checksum_type)
-            oth_out_f = cr.OtherXmlFile(oth_out_fn, cr.GZ_COMPRESSION)
-
-        deltamodule = MainDeltaModule(id_type=self.id_type,
-                                      logger=self.logger)
-        ids = deltamodule.do(pri_old_fn, pri_new_fn, pri_out_f, fil_new_fn,
-                             fil_out_f, oth_new_fn, oth_out_f, removedxml)
-
-        # Prepare repomd.xml records
-        pri_rec = cr.RepomdRecord("primary", pri_out_fn)
-        pri_rec.load_contentstat(pri_out_f_stat)
-        pri_rec.fill(self.checksum_type)
-        if hash_in_the_name:
-            pri_rec.rename_file()
-        delta_repomd.set_record(pri_rec)
-
-        if fil_out_fn:
-            fil_rec = cr.RepomdRecord("filelists", fil_out_fn)
-            fil_rec.load_contentstat(fil_out_f_stat)
-            fil_rec.fill(self.checksum_type)
-            if hash_in_the_name:
-                fil_rec.rename_file()
-            delta_repomd.set_record(fil_rec)
-
-        if oth_out_fn:
-            oth_rec = cr.RepomdRecord("other", oth_out_fn)
-            oth_rec.load_contentstat(oth_out_f_stat)
-            oth_rec.fill(self.checksum_type)
-            if hash_in_the_name:
-                oth_rec.rename_file()
-            delta_repomd.set_record(oth_rec)
-
-        # Write out removed.xml
-        # TODO: Compression via compression wrapper
-        removedxml_path = os.path.join(delta_repodata_path, "removed.xml")
-        #removedxml_path_gz = os.path.join(delta_repodata_path, "removed.xml.gz")
-        removedxml_xml = removedxml.xml_dump()
-        self._debug("Writing removed.xml")
-        open(removedxml_path, "w").write(removedxml_xml)
-        stat = cr.ContentStat(self.checksum_type)
-        #cr.compress_file(removedxml_path, removedxml_path_gz, cr.GZ, stat)
-        #os.remove(removedxml_path)
-        #removedxml_rec = cr.RepomdRecord("removed", removedxml_path_gz)
-        removedxml_rec = cr.RepomdRecord("removed", removedxml_path)
-        removedxml_rec.load_contentstat(stat)
-        removedxml_rec.fill(self.checksum_type)
-        if hash_in_the_name:
-            removedxml_rec.rename_file()
-        delta_repomd.set_record(removedxml_rec)
-
-        # Write out repomd.xml
-        deltarepoid = "%s-%s" % ids
-        delta_repomd.set_repoid(deltarepoid, self.id_type)
-        delta_repomd_path = os.path.join(delta_repodata_path, "repomd.xml")
-        delta_repomd_xml = delta_repomd.xml_dump()
-        self._debug("Writing repomd.xml")
-        open(delta_repomd_path, "w").write(delta_repomd_xml)
-
-        # Final move
-        final_destination = os.path.join(out_path, "%s-%s" % ids)
-        if os.path.exists(final_destination):
-            self._warning("Destination dir already exists! Removing %s" % \
-                          final_destination)
-            shutil.rmtree(final_destination)
-        self._info("Moving %s -> %s" % (delta_path, final_destination))
-        os.rename(delta_path, final_destination)
diff --git a/deltarepo/deltarepo/applicator.py b/deltarepo/deltarepo/applicator.py

new file mode 100644 (file)

index 0000000..110bbf1
--- /dev/null
+++ b/deltarepo/deltarepo/applicator.py
@@ -0,0 +1,297 @@
+"""
+DeltaRepo package for Python.
+This is the library for generation, application and handling of
+DeltaRepositories.
+The library is builded on the Createrepo_c library and its a part of it.
+
+Copyright (C) 2013   Tomas Mlcoch
+
+"""
+
+import os
+import shutil
+import createrepo_c as cr
+from deltarepo.common import LoggingInterface, Metadata, RemovedXml
+from deltarepo.delta_plugins import PLUGINS
+from deltarepo.errors import DeltaRepoError, DeltaRepoPluginError
+
+__all__ = ['DeltaRepoApplicator']
+
+class DeltaRepoApplicator(LoggingInterface):
+
+    def __init__(self,
+                 old_repo_path,
+                 delta_repo_path,
+                 out_path=None,
+                 logger=None):
+
+        # Initialization
+
+        LoggingInterface.__init__(self, logger)
+
+        self.repoid_type = None
+        self.unique_md_filenames = False
+        self.databases = False
+        self.removedxmlobj = RemovedXml()
+
+        self.out_path = out_path or "./"
+
+        self.final_path = os.path.join(self.out_path, "repodata")
+
+        self.new_repo_path = out_path
+        self.new_repodata_path = os.path.join(self.new_repo_path, ".repodata/")
+        self.new_repomd_path = os.path.join(self.new_repodata_path, "repomd.xml")
+
+        self.old_repo_path = old_repo_path
+        self.old_repodata_path = os.path.join(self.old_repo_path, "repodata/")
+        self.old_repomd_path = os.path.join(self.old_repodata_path, "repomd.xml")
+
+        self.delta_repo_path = delta_repo_path
+        self.delta_repodata_path = os.path.join(self.delta_repo_path, "repodata/")
+        self.delta_repomd_path = os.path.join(self.delta_repodata_path, "repomd.xml")
+
+        # Prepare repomd objects
+        self.old_repomd = cr.Repomd(self.old_repomd_path)
+        self.delta_repomd = cr.Repomd(self.delta_repomd_path)
+        self.new_repomd = cr.Repomd()
+
+        # Check if delta repo id correspond with the old repo id
+        if not self.delta_repomd.repoid or \
+                len(self.delta_repomd.repoid.split('-')) != 2:
+            raise DeltaRepoError("Bad DeltaRepoId")
+
+        self.repoid_type_str = self.delta_repomd.repoid_type
+        self.old_id, self.new_id = self.delta_repomd.repoid.split('-')
+        self._debug("Delta %s -> %s" % (self.old_id, self.new_id))
+
+        if self.old_repomd.repoid_type == self.delta_repomd.repoid_type:
+            if self.old_repomd.repoid and self.old_repomd.repoid != self.old_id:
+                raise DeltaRepoError("Not suitable delta for current repo " \
+                        "(Expected: {0} Real: {1})".format(
+                            self.old_id, self.old_repomd.repoid))
+        else:
+            self._debug("Different repoid types repo: {0} vs delta: {1}".format(
+                    self.old_repomd.repoid_type, self.delta_repomd.repoid_type))
+
+        # Use revision and tags
+        self.new_repomd.set_revision(self.delta_repomd.revision)
+        for tag in self.delta_repomd.distro_tags:
+            self.new_repomd.add_distro_tag(tag[1], tag[0])
+        for tag in self.delta_repomd.repo_tags:
+            self.new_repomd.add_repo_tag(tag)
+        for tag in self.delta_repomd.content_tags:
+            self.new_repomd.add_content_tag(tag)
+
+        # Load records
+        self.old_records = {}
+        self.delta_records = {}
+        for record in self.old_repomd.records:
+            self.old_records[record.type] = record
+        for record in self.delta_repomd.records:
+            self.delta_records[record.type] = record
+
+        old_record_types = set(self.old_records.keys())
+        delta_record_types = set(self.delta_records.keys())
+
+        self.deleted_repomd_record_types = old_record_types - delta_record_types
+        self.added_repomd_record_types = delta_record_types - old_record_types
+
+        # Important sanity checks (repo without primary is definitely bad)
+        if not "primary" in self.old_records:
+            raise DeltaRepoError("Missing \"primary\" metadata in old repo")
+
+        if not "primary" in self.delta_records:
+            raise DeltaRepoError("Missing \"primary\" metadata in delta repo")
+
+        # Detect type of checksum in the delta repomd.xml
+        self.checksum_type = cr.checksum_type(self.delta_records["primary"].checksum_type)
+        if self.checksum_type == cr.UNKNOWN_CHECKSUM:
+            raise DeltaRepoError("Unknown checksum type used in delta repo: %s" % \
+                    self.delta_records["primary"].checksum_type)
+
+        # Detection if use unique md filenames
+        if self.delta_records["primary"].location_href.split("primary")[0] != "":
+            self.unique_md_filenames = True
+
+        # Load removedxml
+        self.removedxml_path = None
+        if "removed" in self.delta_records:
+            self.removedxml_path = os.path.join(self.delta_repo_path,
+                                   self.delta_records["removed"].location_href)
+            self.removedxmlobj.xml_parse(self.removedxml_path)
+        else:
+            self._warning("\"removed\" record is missing in repomd.xml "\
+                          "of delta repo")
+
+        # Prepare bundle
+        self.bundle = {}
+        self.bundle["repoid_type_str"] = self.repoid_type_str
+        self.bundle["removed_obj"] = self.removedxmlobj
+        self.bundle["unique_md_filenames"] = self.unique_md_filenames
+
+    def _new_metadata(self, metadata_type):
+        """Return Metadata Object for the metadata_type or None"""
+
+        if metadata_type not in self.delta_records:
+            return None
+
+        metadata = Metadata(metadata_type)
+
+        # Build delta filename
+        metadata.delta_fn = os.path.join(self.delta_repo_path,
+                            self.delta_records[metadata_type].location_href)
+
+        # Build old filename
+        if metadata_type in self.old_records:
+            metadata.old_fn = os.path.join(self.old_repo_path,
+                            self.old_records[metadata_type].location_href)
+
+        # Set output directory
+        metadata.out_dir = self.new_repodata_path
+
+        # Determine checksum type
+        metadata.checksum_type = cr.checksum_type(
+                self.delta_records[metadata_type].checksum_type)
+
+        # Determine compression type
+        metadata.compression_type = cr.detect_compression(metadata.delta_fn)
+        if (metadata.compression_type == cr.UNKNOWN_COMPRESSION):
+            raise DeltaRepoError("Cannot detect compression type for {0}".format(
+                    metadata.delta_fn))
+
+        return metadata
+
+    def apply(self):
+
+        # Prepare output path
+        os.mkdir(self.new_repodata_path)
+
+        processed_metadata = set()
+        used_plugins = set()
+        plugin_used = True
+
+        while plugin_used:
+            # Iterate on plugins until any of them was used
+            plugin_used = False
+
+            for plugin in PLUGINS:
+
+                # Use only plugins that haven't been already used
+                if plugin in used_plugins:
+                    continue
+
+                # Check which metadata this plugin want to process
+                conflicting_metadata = set(plugin.METADATA) & processed_metadata
+                if conflicting_metadata:
+                    message = "Plugin {0}: Error - Plugin want to process " \
+                              "already processed metadata {1}".format(
+                               plugin.NAME, conflicting_metadata)
+                    self._error(message)
+                    raise DeltaRepoError(message)
+
+                # Prepare metadata for the plugin
+                metadata_objects = {}
+                for metadata_name in plugin.METADATA:
+                    metadata_object = self._new_metadata(metadata_name)
+                    if metadata_object is not None:
+                        metadata_objects[metadata_name] = metadata_object
+
+                # Skip plugin if no supported metadata available
+                if not metadata_objects:
+                    self._debug("Plugin {0}: Skipped - None of supported " \
+                                "metadata {1} available".format(
+                                plugin.NAME, plugin.METADATA))
+                    used_plugins.add(plugin)
+                    continue
+
+                # Check if bundle contains all what plugin need
+                required_bundle_keys = set(plugin.APPLY_REQUIRED_BUNDLE_KEYS)
+                bundle_keys = set(self.bundle.keys())
+                if not required_bundle_keys.issubset(bundle_keys):
+                    self._debug("Plugin {0}: Skipped - Bundle keys {1} "\
+                                "are not available".format(plugin.NAME,
+                                (required_bundle_keys - bundle_keys)))
+                    continue
+
+                # Use the plugin
+                self._debug("Plugin {0}: Active".format(plugin.NAME))
+                plugin_instance = plugin()
+                plugin_instance.apply(metadata_objects, self.bundle)
+
+                # Check what bundle keys was added by the plugin
+                new_bundle_keys = set(self.bundle.keys())
+                diff = new_bundle_keys - bundle_keys
+                if diff != set(plugin.APPLY_BUNDLE_CONTRIBUTION):
+                    message = "Plugin {0}: Error - Plugin should add: {1} " \
+                               "bundle items but add: {2}".format(
+                               plugin.NAME, plugin.APPLY_BUNDLE_CONTRIBUTION,
+                               list(diff))
+                    self._error(message)
+                    raise DeltaRepoError(message)
+
+                # Put repomd records from processed metadatas to repomd
+                for md in metadata_objects.values():
+                    self._debug("Plugin {0}: Processed \"{1}\" delta record "\
+                                "which produced:".format(
+                                plugin.NAME, md.metadata_type))
+                    for repomd_record in md.generated_repomd_records:
+                        self._debug(" - {0}".format(repomd_record.type))
+                        self.new_repomd.set_record(repomd_record)
+
+                # Organization stuff
+                processed_metadata.update(set(metadata_objects.keys()))
+                used_plugins.add(plugin)
+                plugin_used = True
+
+        # TODO:
+        # Process rest of the metadata files
+
+        self._debug("Used plugins: {0}".format([p.NAME for p in used_plugins]))
+
+        # Check if calculated repoids match
+        self._debug("Checking expected repoids")
+
+        if "old_repoid" in self.bundle:
+            if self.old_id != self.bundle["old_repoid"]:
+                message = "Repoid of the \"{0}\" repository doesn't match "\
+                          "the real repoid ({1} != {2}).".format(
+                           self.old_repo_path, self.old_id,
+                           self.bundle["old_repoid"])
+                self._error(message)
+                raise DeltaRepoError(message)
+            else:
+                self._debug("Repoid of the old repo matches ({0})".format(
+                            self.old_id))
+        else:
+            self._warning("\"old_repoid\" item is missing in bundle.")
+
+        if "new_repoid" in self.bundle:
+            if self.new_id != self.bundle["new_repoid"]:
+                message = "Repoid of the \"{0}\" repository doesn't match "\
+                          "the real repoid ({1} != {2}).".format(
+                           self.new_repo_path, self.new_id,
+                           self.bundle["new_repoid"])
+                self._error(message)
+                raise DeltaRepoError(message)
+            else:
+                self._debug("Repoid of the new repo matches ({0})".format(
+                            self.new_id))
+        else:
+            self._warning("\"new_repoid\" item is missing in bundle.")
+
+        # Prepare and write out the new repomd.xml
+        self._debug("Preparing repomd.xml ...")
+        self.new_repomd.set_repoid(self.new_id, self.repoid_type_str)
+        new_repomd_xml = self.new_repomd.xml_dump()
+
+        self._debug("Writing repomd.xml ...")
+        open(self.new_repomd_path, "w").write(new_repomd_xml)
+
+        # Final move
+        if os.path.exists(self.final_path):
+            self._warning("Destination dir already exists! Removing %s" % \
+                          self.final_path)
+            shutil.rmtree(self.final_path)
+        self._debug("Moving %s -> %s" % (self.new_repodata_path, self.final_path))
+        os.rename(self.new_repodata_path, self.final_path)
+
diff --git a/deltarepo/deltarepo/common.py b/deltarepo/deltarepo/common.py

new file mode 100644 (file)

index 0000000..c6316d5
--- /dev/null
+++ b/deltarepo/deltarepo/common.py
@@ -0,0 +1,122 @@
+import os
+import tempfile
+import logging
+import xml.dom.minidom
+import createrepo_c as cr
+from lxml import etree
+
+class LoggingInterface(object):
+    def __init__(self, logger=None):
+        if logger is None:
+            logger = logging.getLogger()
+            logger.disabled = True
+        self.logger = logger
+
+    def _debug(self, msg):
+        self.logger.debug(msg)
+
+    def _info(self, msg):
+        self.logger.info(msg)
+
+    def _warning(self, msg):
+        self.logger.warning(msg)
+
+    def _error(self, msg):
+        self.logger.error(msg)
+
+    def _critical(self, msg):
+        self.logger.critical(msg)
+
+class RemovedXml(object):
+    def __init__(self):
+        self.packages = {}  # { location_href: location_base }
+        self.files = {}     # { location_href: location_base or Null }
+
+    def __str__(self):
+        print self.packages
+        print self.files
+
+    def add_pkg(self, pkg):
+        self.packages[pkg.location_href] = pkg.location_base
+
+    def add_pkg_locations(self, location_href, location_base):
+        self.packages[location_href] = location_base
+
+    def add_record(self, rec):
+        self.files[rec.location_href] = rec.location_base
+
+    def xml_dump(self):
+        xmltree = etree.Element("removed")
+        packages = etree.SubElement(xmltree, "packages")
+        for href, base in self.packages.iteritems():
+            attrs = {}
+            if href: attrs['href'] = href
+            if base: attrs['base'] = base
+            if not attrs: continue
+            etree.SubElement(packages, "location", attrs)
+        files = etree.SubElement(xmltree, "files")
+        for href, base in self.files.iteritems():
+            attrs = {}
+            if href: attrs['href'] = href
+            if base: attrs['base'] = base
+            if not attrs: continue
+            etree.SubElement(files, "location", attrs)
+        return etree.tostring(xmltree,
+                              pretty_print=True,
+                              encoding="UTF-8",
+                              xml_declaration=True)
+
+    def xml_parse(self, path):
+
+        _, tmp_path = tempfile.mkstemp()
+        cr.decompress_file(path, tmp_path, cr.AUTO_DETECT_COMPRESSION)
+        dom = xml.dom.minidom.parse(tmp_path)
+        os.remove(tmp_path)
+
+        packages = dom.getElementsByTagName("packages")
+        if packages:
+            for loc in packages[0].getElementsByTagName("location"):
+                href = loc.getAttribute("href")
+                base = loc.getAttribute("base")
+                if not href:
+                    continue
+                if not base:
+                    base = None
+                self.packages[href] = base
+
+        files = dom.getElementsByTagName("files")
+        if files:
+            for loc in files[0].getElementsByTagName("location"):
+                href = loc.getAttribute("href")
+                base = loc.getAttribute("base")
+                if not href:
+                    continue
+                if not base:
+                    base = None
+                self.files[href] = base
+
+class Metadata(object):
+    """Metadata file"""
+
+    def __init__(self, metadata_type):
+
+        self.metadata_type = metadata_type
+
+        # Paths
+        self.old_fn = None
+        self.delta_fn = None
+        self.new_fn = None
+
+        self.out_dir = None
+
+        # Settings
+        self.checksum_type = None
+        self.compression_type = None
+
+        # Records
+
+        # List of new repomd records related to this delta file
+        # List because some deltafiles could lead to more output files.
+        # E.g.: delta of primary.xml generate new primary.xml and
+        # primary.sqlite as well.
+        self.generated_repomd_records = []
diff --git a/deltarepo/deltarepo/delta_plugins.py b/deltarepo/deltarepo/delta_plugins.py

new file mode 100644 (file)

index 0000000..c675a7b
--- /dev/null
+++ b/deltarepo/deltarepo/delta_plugins.py
@@ -0,0 +1,476 @@
+import os
+import os.path
+import hashlib
+import createrepo_c as cr
+from deltarepo.errors import DeltaRepoError, DeltaRepoPluginError
+
+PLUGINS = []
+
+class DeltaRepoPlugin(object):
+
+    # Plugin name
+    NAME = ""
+
+    # Plugin version (integer number!)
+    VERSION = 1
+
+    # List of Metadata this plugin takes care of.
+    # The plugin HAS TO do deltas for each of listed metadata and be able
+    # to apply deltas on them!
+    MATADATA = []
+
+    # Its highly recomended for plugin to be maximal independend on
+    # other plugins and metadata not specified in METADATA.
+    # But some kind of dependency mechanism can be implemented via
+    # *_REQUIRED_BUNDLE_KEYS and *_BUDLE_CONTRIBUTION.
+
+    # List of bundle keys that have to be filled before
+    # apply() method of this plugin should be called
+    APPLY_REQUIRED_BUNDLE_KEYS = []
+
+    # List of bundle key this pulugin adds during apply() method call
+    APPLY_BUNDLE_CONTRIBUTION = []
+
+    # List of bundle keys that have to be filled before
+    # gen() method of this plugin should be called
+    GEN_REQUIRED_BUNDLE_KEYS = []
+
+    # List of bundle key this pulugin adds during gen() method call
+    GEN_BUNDLE_CONTRIBUTION = []
+
+    # If two plugins want to add the same key to the bundle
+    # then exception is raised.
+    # If plugin requires a key that isn't provided by any of registered
+    # plugins then exception is raised.
+    # If plugin adds to bundle a key that is not listed in BUNDLE_CONTRIBUTION,
+    # then exception is raised.
+
+    def __init__(self):
+        pass
+
+    def apply(self, metadata, bundle):
+        """
+        :arg metadata: Dict with available metadata listed in METADATA.
+            key is metadata type (e.g. "primary", "filelists", ...)
+            value is Metadata object
+            This method is called only if at least one metadata listed
+            in METADATA are found in delta repomd.
+        :arg bundle: Dict with various metadata.
+
+        Apply method has to do:
+         * Raise DeltaRepoPluginError if something is bad
+         * Build a new filename for each metadata and store it
+           to Metadata Object.
+         * 
+        """
+        raise NotImplementedError("Not implemented")
+
+    def gen(self, metadata, bundle):
+        raise NotImplementedError("Not implemented")
+
+class MainDeltaRepoPlugin(DeltaRepoPlugin):
+
+    NAME = "MainDeltaPlugin"
+    VERSION = 1
+    METADATA = ["primary", "filelists", "other"]
+    APPLY_REQUIRED_BUNDLE_KEYS = ["repoid_type_str",
+                                  "removed_obj",
+                                  "unique_md_filenames"]
+    APPLY_BUNDLE_CONTRIBUTION = ["old_repoid", "new_repoid"]
+    GEN_REQUIRED_BUNDLE_KEYS = ["repoid_type_str",
+                                "removed_obj",
+                                "unique_md_filenames"]
+    GEN_BUNDLE_CONTRIBUTION = ["old_repoid", "new_repoid"]
+
+    def _path(self, path, record):
+        """Return path to the repodata file."""
+        return os.path.join(path, record.location_href)
+
+    def _pkg_id_tuple(self, pkg):
+        """Return tuple identifying a package in repodata.
+        (pkgId, location_href, location_base)"""
+        return (pkg.pkgId, pkg.location_href, pkg.location_base)
+
+    def _pkg_id_str(self, pkg):
+        """Return string identifying a package in repodata.
+        This strings are used for the RepoId calculation."""
+        if not pkg.pkgId:
+            self._warning("Missing pkgId in a package!")
+        if not pkg.location_href:
+            self._warning("Missing location_href at package %s %s" % \
+                          (pkg.name, pkg.pkgId))
+        return "%s%s%s" % (pkg.pkgId or '',
+                           pkg.location_href or '',
+                           pkg.location_base or '')
+
+    def apply(self, metadata, bundle):
+
+        # Get info from bundle
+        removed_obj = bundle["removed_obj"]
+        repoid_type_str = bundle["repoid_type_str"]
+        unique_md_filenames = bundle["unique_md_filenames"]
+
+        # Check input arguments
+        if "primary" not in metadata:
+            raise DeltaRepoPluginError("Primary metadata missing")
+
+        pri_md = metadata.get("primary")
+        fil_md = metadata.get("filelists")
+        oth_md = metadata.get("other")
+
+        # Build and prepare destination paths
+        # (And store them in the same Metadata object)
+        def prepare_paths_in_metadata(md, xmlclass, dbclass):
+            if md is None:
+                return
+
+            suffix = cr.compression_suffix(md.compression_type) or ""
+            md.new_fn = os.path.join(md.out_dir,
+                                     "{0}.xml{1}".format(
+                                     md.metadata_type, suffix))
+            md.new_f_stat = cr.ContentStat(md.checksum_type)
+            md.new_f = xmlclass(md.new_fn,
+                                md.compression_type,
+                                md.new_f_stat)
+            md.db_fn = os.path.join(md.out_dir, "{0}.sqlite".format(
+                                    md.metadata_type))
+            md.db = dbclass(md.db_fn)
+
+        # Primary
+        prepare_paths_in_metadata(pri_md,
+                                  cr.PrimaryXmlFile,
+                                  cr.PrimarySqlite)
+
+        # Filelists
+        prepare_paths_in_metadata(fil_md,
+                                  cr.FilelistsXmlFile,
+                                  cr.FilelistsSqlite)
+
+        # Other
+        prepare_paths_in_metadata(oth_md,
+                                  cr.OtherXmlFile,
+                                  cr.OtherSqlite)
+
+        # Apply delta
+
+        removed_packages = set() # set of pkgIds (hashes)
+        all_packages = {}        # dict { 'pkgId': pkg }
+
+        old_repoid_strings = []
+        new_repoid_strings = []
+
+        def old_pkgcb(pkg):
+            old_repoid_strings.append(self._pkg_id_str(pkg))
+            if pkg.location_href in removed_obj.packages:
+                if removed_obj.packages[pkg.location_href] == pkg.location_base:
+                    # This package won't be in new metadata
+                    return
+            new_repoid_strings.append(self._pkg_id_str(pkg))
+            all_packages[pkg.pkgId] = pkg
+
+        def delta_pkgcb(pkg):
+            new_repoid_strings.append(self._pkg_id_str(pkg))
+            all_packages[pkg.pkgId] = pkg
+
+        do_primary_files = 1
+        if fil_md and fil_md.delta_fn and fil_md.old_fn:
+            # Don't read files from primary if there is filelists.xml
+            do_primary_files = 0
+
+        cr.xml_parse_primary(pri_md.old_fn, pkgcb=old_pkgcb,
+                             do_files=do_primary_files)
+        cr.xml_parse_primary(pri_md.delta_fn, pkgcb=delta_pkgcb,
+                             do_files=do_primary_files)
+
+        # Calculate RepoIds
+        old_repoid = ""
+        new_repoid = ""
+
+        h = hashlib.new(repoid_type_str)
+        old_repoid_strings.sort()
+        for i in old_repoid_strings:
+            h.update(i)
+        old_repoid = h.hexdigest()
+
+        h = hashlib.new(repoid_type_str)
+        new_repoid_strings.sort()
+        for i in new_repoid_strings:
+            h.update(i)
+        new_repoid = h.hexdigest()
+
+        bundle["old_repoid"] = old_repoid
+        bundle["new_repoid"] = new_repoid
+
+        # Sort packages
+        def cmp_pkgs(x, y):
+            # Compare only by filename
+            ret = cmp(os.path.basename(x.location_href),
+                      os.path.basename(y.location_href))
+            if ret != 0:
+                return ret
+
+            # Compare by full location_href path
+            return  cmp(x.location_href, y.location_href)
+
+        all_packages_sorted = sorted(all_packages.values(), cmp=cmp_pkgs)
+
+        def newpkgcb(pkgId, name, arch):
+            return all_packages.get(pkgId, None)
+
+        # Parse filelists
+        if fil_md and fil_md.delta_fn and fil_md.old_fn:
+            cr.xml_parse_filelists(fil_md.old_fn, newpkgcb=newpkgcb)
+            cr.xml_parse_filelists(fil_md.delta_fn, newpkgcb=newpkgcb)
+
+        # Parse other
+        if oth_md and oth_md.delta_fn and oth_md.old_fn:
+            cr.xml_parse_other(oth_md.old_fn, newpkgcb=newpkgcb)
+            cr.xml_parse_other(oth_md.delta_fn, newpkgcb=newpkgcb)
+
+        num_of_packages = len(all_packages_sorted)
+
+        # Write out primary
+        pri_md.new_f.set_num_of_pkgs(num_of_packages)
+        for pkg in all_packages_sorted:
+            pri_md.new_f.add_pkg(pkg)
+            if pri_md.db:
+                pri_md.db.add_pkg(pkg)
+
+        # Write out filelists
+        if fil_md.new_f:
+            fil_md.new_f.set_num_of_pkgs(num_of_packages)
+            for pkg in all_packages_sorted:
+                fil_md.new_f.add_pkg(pkg)
+                if fil_md.db:
+                    fil_md.db.add_pkg(pkg)
+
+        # Write out other
+        if oth_md.new_f:
+            oth_md.new_f.set_num_of_pkgs(num_of_packages)
+            for pkg in all_packages_sorted:
+                oth_md.new_f.add_pkg(pkg)
+                if oth_md.db:
+                    oth_md.db.add_pkg(pkg)
+
+        # Finish metadata
+        def finish_metadata(md):
+            if md is None:
+                return
+
+            # Close XML file
+            md.new_f.close()
+
+            # Prepare repomd record of xml file
+            rec = cr.RepomdRecord(md.metadata_type, md.new_fn)
+            rec.load_contentstat(md.new_f_stat)
+            rec.fill(md.checksum_type)
+            if unique_md_filenames:
+                rec.rename_file()
+
+            md.generated_repomd_records.append(rec)
+
+            # Prepare database
+            if hasattr(md, "db") and md.db:
+                md.db.dbinfo_update(rec.checksum)
+                md.db.close()
+                db_stat = cr.ContentStat(md.checksum_type)
+                db_compressed = md.db_fn+".bz2"
+                cr.compress_file(md.db_fn, None, cr.BZ2, db_stat)
+                os.remove(md.db_fn)
+
+                # Pripare repomd record of database file
+                db_rec = cr.RepomdRecord("{0}_db".format(md.metadata_type),
+                                         db_compressed)
+                db_rec.load_contentstat(db_stat)
+                db_rec.fill(md.checksum_type)
+                if unique_md_filenames:
+                    db_rec.rename_file()
+
+                md.generated_repomd_records.append(db_rec)
+
+        # Add records to the bundle
+
+        finish_metadata(pri_md)
+        finish_metadata(fil_md)
+        finish_metadata(oth_md)
+
+    def gen(self, metadata, bundle):
+
+        # Get info from bundle
+        removed_obj = bundle["removed_obj"]
+        repoid_type_str = bundle["repoid_type_str"]
+        unique_md_filenames = bundle["unique_md_filenames"]
+
+        # Check input arguments
+        if "primary" not in metadata:
+            raise DeltaRepoPluginError("Primary metadata missing")
+
+        pri_md = metadata.get("primary")
+        fil_md = metadata.get("filelists")
+        oth_md = metadata.get("other")
+
+        # Build and prepare destination paths
+        # (And store them in the same Metadata object)
+        def prepare_paths_in_metadata(md, xmlclass, dbclass):
+            if md is None:
+                return
+
+            suffix = cr.compression_suffix(md.compression_type) or ""
+            md.delta_fn = os.path.join(md.out_dir,
+                                     "{0}.xml{1}".format(
+                                     md.metadata_type, suffix))
+            md.delta_f_stat = cr.ContentStat(md.checksum_type)
+            md.delta_f = xmlclass(md.delta_fn,
+                                  md.compression_type,
+                                  md.delta_f_stat)
+            # Database for delta repo is redundant
+            #md.db_fn = os.path.join(md.out_dir, "{0}.sqlite".format(
+            #                        md.metadata_type))
+            #md.db = dbclass(md.db_fn)
+
+        # Primary
+        prepare_paths_in_metadata(pri_md,
+                                  cr.PrimaryXmlFile,
+                                  cr.PrimarySqlite)
+
+        # Filelists
+        prepare_paths_in_metadata(fil_md,
+                                  cr.FilelistsXmlFile,
+                                  cr.FilelistsSqlite)
+
+        # Other
+        prepare_paths_in_metadata(oth_md,
+                                  cr.OtherXmlFile,
+                                  cr.OtherSqlite)
+
+        # Gen delta
+
+        old_packages = set()
+        added_packages = {}         # dict { 'pkgId': pkg }
+        added_packages_ids = []     # list of package ids
+
+        old_repoid_strings = []
+        new_repoid_strings = []
+
+        def old_pkgcb(pkg):
+            old_packages.add(self._pkg_id_tuple(pkg))
+            old_repoid_strings.append(self._pkg_id_str(pkg))
+
+        def new_pkgcb(pkg):
+            new_repoid_strings.append(self._pkg_id_str(pkg))
+            pkg_id_tuple = self._pkg_id_tuple(pkg)
+            if not pkg_id_tuple in old_packages:
+                # This package is only in new repodata
+                added_packages[pkg.pkgId] = pkg
+                added_packages_ids.append(pkg.pkgId)
+            else:
+                # This package is also in the old repodata
+                old_packages.remove(pkg_id_tuple)
+
+        do_new_primary_files = 1
+        if fil_md.delta_f and fil_md.new_fn:
+            # All files will be parsed from filelists
+            do_new_primary_files = 0
+
+        cr.xml_parse_primary(pri_md.old_fn, pkgcb=old_pkgcb, do_files=0)
+        cr.xml_parse_primary(pri_md.new_fn, pkgcb=new_pkgcb,
+                             do_files=do_new_primary_files)
+
+        # Calculate RepoIds
+        old_repo_id = ""
+        new_repo_id = ""
+
+        h = hashlib.new(repoid_type_str)
+        old_repoid_strings.sort()
+        for i in old_repoid_strings:
+            h.update(i)
+        old_repoid = h.hexdigest()
+
+        h = hashlib.new(repoid_type_str)
+        new_repoid_strings.sort()
+        for i in new_repoid_strings:
+            h.update(i)
+        new_repoid = h.hexdigest()
+
+        bundle["old_repoid"] = old_repoid
+        bundle["new_repoid"] = new_repoid
+
+        # Prepare list of removed packages
+        removed_pkgs = sorted(old_packages)
+        for _, location_href, location_base in removed_pkgs:
+            removed_obj.add_pkg_locations(location_href, location_base)
+
+        num_of_packages = len(added_packages)
+
+        # Filelists and Other cb
+        def newpkgcb(pkgId, name, arch):
+            return added_packages.get(pkgId, None)
+
+        # Write out filelists delta
+        if fil_md.delta_f and fil_md.new_fn:
+            cr.xml_parse_filelists(fil_md.new_fn, newpkgcb=newpkgcb)
+            fil_md.delta_f.set_num_of_pkgs(num_of_packages)
+            for pkgid in added_packages_ids:
+                fil_md.delta_f.add_pkg(added_packages[pkgid])
+            fil_md.delta_f.close()
+
+        # Write out other delta
+        if oth_md.delta_f and oth_md.new_fn:
+            cr.xml_parse_other(oth_md.new_fn, newpkgcb=newpkgcb)
+            oth_md.delta_f.set_num_of_pkgs(num_of_packages)
+            for pkgid in added_packages_ids:
+                oth_md.delta_f.add_pkg(added_packages[pkgid])
+            oth_md.delta_f.close()
+
+        # Write out primary delta
+        # Note: Writing of primary delta has to be after parsing of filelists
+        # Otherway cause missing files if do_new_primary_files was 0
+        pri_md.delta_f.set_num_of_pkgs(num_of_packages)
+        for pkgid in added_packages_ids:
+            pri_md.delta_f.add_pkg(added_packages[pkgid])
+        pri_md.delta_f.close()
+
+        # Finish metadata
+        def finish_metadata(md):
+            if md is None:
+                return
+
+            # Close XML file
+            md.delta_f.close()
+
+            # Prepare repomd record of xml file
+            rec = cr.RepomdRecord(md.metadata_type, md.delta_fn)
+            rec.load_contentstat(md.delta_f_stat)
+            rec.fill(md.checksum_type)
+            if unique_md_filenames:
+                rec.rename_file()
+
+            md.generated_repomd_records.append(rec)
+
+            # Prepare database
+            if hasattr(md, "db") and md.db:
+                md.db.dbinfo_update(rec.checksum)
+                md.db.close()
+                db_stat = cr.ContentStat(md.checksum_type)
+                db_compressed = md.db_fn+".bz2"
+                cr.compress_file(md.db_fn, None, cr.BZ2, db_stat)
+                os.remove(md.db_fn)
+
+                # Pripare repomd record of database file
+                db_rec = cr.RepomdRecord("{0}_db".format(md.metadata_type),
+                                         db_compressed)
+                db_rec.load_contentstat(db_stat)
+                db_rec.fill(md.checksum_type)
+                if unique_md_filenames:
+                    db_rec.rename_file()
+
+                md.generated_repomd_records.append(db_rec)
+
+        # Add records to the bundle
+
+        finish_metadata(pri_md)
+        finish_metadata(fil_md)
+        finish_metadata(oth_md)
+
+
+PLUGINS.append(MainDeltaRepoPlugin)
diff --git a/deltarepo/deltarepo/errors.py b/deltarepo/deltarepo/errors.py

new file mode 100644 (file)

index 0000000..9a0012b
--- /dev/null
+++ b/deltarepo/deltarepo/errors.py
@@ -0,0 +1,8 @@
+
+__all__ = ["DeltaRepoError", "DeltaRepoPluginError"]
+
+class DeltaRepoError(Exception):
+    pass
+
+class DeltaRepoPluginError(DeltaRepoError):
+    pass
diff --git a/deltarepo/deltarepo/generator.py b/deltarepo/deltarepo/generator.py

new file mode 100644 (file)

index 0000000..d89d924
--- /dev/null
+++ b/deltarepo/deltarepo/generator.py
@@ -0,0 +1,310 @@
+"""
+DeltaRepo package for Python.
+This is the library for generation, application and handling of
+DeltaRepositories.
+The library is builded on the Createrepo_c library and its a part of it.
+
+Copyright (C) 2013   Tomas Mlcoch
+
+"""
+
+import os
+import shutil
+import createrepo_c as cr
+from deltarepo.common import LoggingInterface, Metadata, RemovedXml
+from deltarepo.delta_plugins import PLUGINS
+from deltarepo.errors import DeltaRepoError, DeltaRepoPluginError
+
+__all__ = ['DeltaRepoGenerator']
+
+class DeltaRepoGenerator(LoggingInterface):
+
+    def __init__(self,
+                 old_repo_path,
+                 new_repo_path,
+                 out_path=None,
+                 logger=None,
+                 repoid_type="sha256",
+                 compression_type="xz"):
+
+        # Initialization
+
+        LoggingInterface.__init__(self, logger)
+
+        self.out_path = out_path or "./"
+
+        self.final_path = os.path.join(self.out_path, "repodata")
+
+        self.new_repo_path = new_repo_path
+        self.new_repodata_path = os.path.join(self.new_repo_path, "repodata/")
+        self.new_repomd_path = os.path.join(self.new_repodata_path, "repomd.xml")
+
+        self.old_repo_path = old_repo_path
+        self.old_repodata_path = os.path.join(self.old_repo_path, "repodata/")
+        self.old_repomd_path = os.path.join(self.old_repodata_path, "repomd.xml")
+
+        self.delta_repo_path = out_path
+        self.delta_repodata_path = os.path.join(self.delta_repo_path, ".repodata/")
+        self.delta_repomd_path = os.path.join(self.delta_repodata_path, "repomd.xml")
+
+        # Repoid type
+        self.repoid_type_str = repoid_type or "sha256"
+        self.compression_type_str = compression_type or "xz"
+        self.compression_type = cr.compression_type(self.compression_type_str)
+
+        # Prepare Repomd objects
+        self.old_repomd = cr.Repomd(self.old_repomd_path)
+        self.new_repomd = cr.Repomd(self.new_repomd_path)
+        self.delta_repomd = cr.Repomd()
+
+        # Use revision and tags
+        self.delta_repomd.set_revision(self.new_repomd.revision)
+        for tag in self.new_repomd.distro_tags:
+            self.delta_repomd.add_distro_tag(tag[1], tag[0])
+        for tag in self.new_repomd.repo_tags:
+            self.delta_repomd.add_repo_tag(tag)
+        for tag in self.new_repomd.content_tags:
+            self.delta_repomd.add_content_tag(tag)
+
+        # Load records
+        self.old_records = {}
+        self.new_records = {}
+        for record in self.old_repomd.records:
+            self.old_records[record.type] = record
+        for record in self.new_repomd.records:
+            self.new_records[record.type] = record
+
+        old_record_types = set(self.old_records.keys())
+        new_record_types = set(self.new_records.keys())
+
+        self.deleted_repomd_record_types = old_record_types - new_record_types
+        self.added_repomd_record_types = new_record_types - old_record_types
+
+        # Important sanity checks (repo without primary is definitely bad)
+        if not "primary" in self.old_records:
+            raise DeltaRepoError("Missing \"primary\" metadata in old repo")
+
+        if not "primary" in self.new_records:
+            raise DeltaRepoError("Missing \"primary\" metadata in new repo")
+
+        # Detect type of checksum in the new repomd.xml (global)
+        self.checksum_type = cr.checksum_type(self.new_records["primary"].checksum_type)
+        if self.checksum_type == cr.UNKNOWN_CHECKSUM:
+            raise DeltaRepoError("Unknown checksum type used in new repo: %s" % \
+                    self.new_records["primary"].checksum_type)
+
+        # TODO: Je treba detekovat typ checksumu, kdyz se stejne pro kazdej
+        # record nakonec detekuje znova???
+
+        # Detection if use unique md filenames
+        if self.new_records["primary"].location_href.split("primary")[0] != "":
+            self.unique_md_filenames = True
+
+        self.old_id = self.old_repomd.repoid
+        self.new_id = self.new_repomd.repoid
+
+        # Prepare removed xml object
+        self.removedxmlobj = RemovedXml()
+
+        # Prepare bundle
+        self.bundle = {}
+        self.bundle["repoid_type_str"] = self.repoid_type_str
+        self.bundle["removed_obj"] = self.removedxmlobj
+        self.bundle["unique_md_filenames"] = self.unique_md_filenames
+
+    def _new_metadata(self, metadata_type):
+        """Return Metadata Object for the metadata_type or None"""
+
+        if metadata_type not in self.new_records:
+            return None
+
+        metadata = Metadata(metadata_type)
+
+        # Build new filename
+        metadata.new_fn = os.path.join(self.new_repo_path,
+                            self.new_records[metadata_type].location_href)
+
+        # Build old filename
+        if metadata_type in self.old_records:
+            metadata.old_fn = os.path.join(self.old_repo_path,
+                            self.old_records[metadata_type].location_href)
+
+        # Set output directory
+        metadata.out_dir = self.delta_repodata_path
+
+        # Determine checksum type (for this specific repodata)
+        # Detected checksum type should be probably same as the globally
+        # detected one, but it is not guaranted
+        metadata.checksum_type = cr.checksum_type(
+                self.new_records[metadata_type].checksum_type)
+
+        # Determine compression type
+        metadata.compression_type = cr.detect_compression(metadata.new_fn)
+        if (metadata.compression_type == cr.UNKNOWN_COMPRESSION):
+            raise DeltaRepoError("Cannot detect compression type for {0}".format(
+                    metadata.new_fn))
+
+        return metadata
+
+    def gen(self):
+
+        # Prepare output path
+        os.mkdir(self.delta_repodata_path)
+
+        processed_metadata = set()
+        used_plugins = set()
+        plugin_used = True
+
+        while plugin_used:
+            # Iterate on plugins until any of them was used
+            plugin_used = False
+
+            for plugin in PLUGINS:
+
+                # Use only plugins that haven't been already used
+                if plugin in used_plugins:
+                    continue
+
+                # Check which metadata this plugin want to process
+                conflicting_metadata = set(plugin.METADATA) & processed_metadata
+                if conflicting_metadata:
+                    message = "Plugin {0}: Error - Plugin want to process " \
+                              "already processed metadata {1}".format(
+                               plugin.NAME, conflicting_metadata)
+                    self._error(message)
+                    raise DeltaRepoError(message)
+
+                # Prepare metadata for the plugin
+                metadata_objects = {}
+                for metadata_name in plugin.METADATA:
+                    metadata_object = self._new_metadata(metadata_name)
+                    if metadata_object is not None:
+                        metadata_objects[metadata_name] = metadata_object
+
+                # Skip plugin if no supported metadata available
+                if not metadata_objects:
+                    self._debug("Plugin {0}: Skipped - None of supported " \
+                                "metadata {1} available".format(
+                                plugin.NAME, plugin.METADATA))
+                    used_plugins.add(plugin)
+                    continue
+
+                # Check if bundle contains all what plugin need
+                required_bundle_keys = set(plugin.GEN_REQUIRED_BUNDLE_KEYS)
+                bundle_keys = set(self.bundle.keys())
+                if not required_bundle_keys.issubset(bundle_keys):
+                    self._debug("Plugin {0}: Skipped - Bundle keys {1} "\
+                                "are not available".format(plugin.NAME,
+                                (required_bundle_keys - bundle_keys)))
+                    continue
+
+                # Use the plugin
+                self._debug("Plugin {0}: Active".format(plugin.NAME))
+                plugin_instance = plugin()
+                plugin_instance.gen(metadata_objects, self.bundle)
+
+                # Check what bundle keys was added by the plugin
+                new_bundle_keys = set(self.bundle.keys())
+                diff = new_bundle_keys - bundle_keys
+                if diff != set(plugin.GEN_BUNDLE_CONTRIBUTION):
+                    message = "Plugin {0}: Error - Plugin should add: {1} " \
+                               "bundle items but add: {2}".format(
+                               plugin.NAME, plugin.GEN_BUNDLE_CONTRIBUTION,
+                               list(diff))
+                    self._error(message)
+                    raise DeltaRepoError(message)
+
+                # Put repomd records from processed metadatas to repomd
+                for md in metadata_objects.values():
+                    self._debug("Plugin {0}: Processed \"{1}\" delta record "\
+                                "which produced:".format(
+                                plugin.NAME, md.metadata_type))
+                    for repomd_record in md.generated_repomd_records:
+                        self._debug(" - {0}".format(repomd_record.type))
+                        self.delta_repomd.set_record(repomd_record)
+
+                # Organization stuff
+                processed_metadata.update(set(metadata_objects.keys()))
+                used_plugins.add(plugin)
+                plugin_used = True
+
+        # TODO:
+        # Process rest of the metadata files
+
+        # Write out removed.xml
+        self._debug("Writing removed.xml ...")
+        removedxml_xml = self.removedxmlobj.xml_dump()
+        removedxml_path = os.path.join(self.delta_repodata_path, "removed.xml")
+
+        if (self.compression_type != cr.UNKNOWN_COMPRESSION):
+            removedxml_path += cr.compression_suffix(self.compression_type)
+            stat = cr.ContentStat(self.checksum_type)
+            f = cr.CrFile(removedxml_path, cr.MODE_WRITE, cr.XZ, stat)
+            f.write(removedxml_xml)
+            f.close()
+        else:
+            open(removedxml_path, "w").write(removedxml_xml)
+
+        removedxml_rec = cr.RepomdRecord("removed", removedxml_path)
+        removedxml_rec.load_contentstat(stat)
+        removedxml_rec.fill(self.checksum_type)
+        if self.unique_md_filenames:
+            removedxml_rec.rename_file()
+        self.delta_repomd.set_record(removedxml_rec)
+
+        # Check if calculated repoids match
+        self._debug("Checking expected repoids")
+
+        if not "new_repoid" in self.bundle or not "old_repoid" in self.bundle:
+            message = "\"new_repoid\" or \"old_repoid\" is missing in bundle"
+            self._error(message)
+            raise DeltaRepoError(message)
+
+        if self.old_id:
+            if self.old_id != self.bundle["old_repoid"]:
+                message = "Repoid of the \"{0}\" repository doesn't match "\
+                          "the real repoid ({1} != {2}).".format(
+                           self.old_repo_path, self.old_id,
+                           self.bundle["old_repoid"])
+                self._error(message)
+                raise DeltaRepoError(message)
+            else:
+                self._debug("Repoid of the old repo matches ({0})".format(
+                            self.old_id))
+        else:
+            self._debug("Repoid of the \"{0}\" is not part of its "\
+                        "repomd".format(self.old_repo_path))
+
+        if self.new_id:
+            if self.new_id and self.new_id != self.bundle["new_repoid"]:
+                message = "Repoid of the \"{0}\" repository doesn't match "\
+                          "the real repoid ({1} != {2}).".format(
+                           self.new_repo_path, self.new_id,
+                           self.bundle["new_repoid"])
+                self._error(message)
+                raise DeltaRepoError(message)
+            else:
+                self._debug("Repoid of the new repo matches ({0})".format(
+                            self.new_id))
+        else:
+            self._debug("Repoid of the \"{0}\" is not part of its "\
+                        "repomd".format(self.new_repo_path))
+
+        # Prepare and write out the new repomd.xml
+        self._debug("Preparing repomd.xml ...")
+        deltarepoid = "{0}-{1}".format(self.bundle["old_repoid"],
+                                       self.bundle["new_repoid"])
+        self.delta_repomd.set_repoid(deltarepoid, self.repoid_type_str)
+        delta_repomd_xml = self.delta_repomd.xml_dump()
+
+        self._debug("Writing repomd.xml ...")
+        open(self.delta_repomd_path, "w").write(delta_repomd_xml)
+
+        # Final move
+        if os.path.exists(self.final_path):
+            self._warning("Destination dir already exists! Removing %s" % \
+                          self.final_path)
+            shutil.rmtree(self.final_path)
+        self._debug("Moving %s -> %s" % (self.delta_repodata_path, self.final_path))
+        os.rename(self.delta_repodata_path, self.final_path)
+
author	Tomas Mlcoch <tmlcoch@redhat.com>
	Thu, 10 Oct 2013 13:30:09 +0000 (15:30 +0200)
committer	Tomas Mlcoch <tmlcoch@redhat.com>
	Thu, 10 Oct 2013 13:54:22 +0000 (15:54 +0200)
deltarepo/deltarepo.py		patch \| blob \| history
deltarepo/deltarepo/__init__.py		patch \| blob \| history
deltarepo/deltarepo/applicator.py	[new file with mode: 0644]	patch \| blob
deltarepo/deltarepo/common.py	[new file with mode: 0644]	patch \| blob
deltarepo/deltarepo/delta_plugins.py	[new file with mode: 0644]	patch \| blob
deltarepo/deltarepo/errors.py	[new file with mode: 0644]	patch \| blob
deltarepo/deltarepo/generator.py	[new file with mode: 0644]	patch \| blob