deltarepo: ...
authorTomas Mlcoch <tmlcoch@redhat.com>
Tue, 11 Jun 2013 15:10:33 +0000 (17:10 +0200)
committerTomas Mlcoch <tmlcoch@redhat.com>
Tue, 11 Jun 2013 15:10:33 +0000 (17:10 +0200)
deltarepo/README.md [new file with mode: 0644]
deltarepo/deltarepo.py
deltarepo/deltarepo/__init__.py

diff --git a/deltarepo/README.md b/deltarepo/README.md
new file mode 100644 (file)
index 0000000..b62f639
--- /dev/null
@@ -0,0 +1,5 @@
+# Examples of usage
+
+    ./deltarepo.py repo1 repo2 -v
+
+    ./deltarepo.py --apply repo1 delta/
index 6cf480cba48f2a2a33e1986c46d66eda82df6f26..9e14ee8fcddf1885a3f2c165c681eaefe491023a 100755 (executable)
@@ -95,12 +95,15 @@ if __name__ == "__main__":
 
     logger = setup_logging(options.quiet, options.verbose)
 
+    generator = deltarepo.DeltaRepoGenerator(id_type=options.id_type,
+                                             logger=logger)
+
+    # TODO: check if repo is really delta repo (must has a repoid and removed.xml)
+
     if options.apply:
         # Applying delta
-        pass
+        generator.applydelta(args[0], args[1], out_path=options.outputdir)
     else:
         # Do delta
-        generator = deltarepo.DeltaRepoGenerator(id_type=options.id_type,
-                                                 logger=logger)
         generator.gendelta(args[0], args[1], out_path=options.outputdir,
                            do_only=options.do_only, skip=options.skip)
index 982a5be641cacbcd48d6952de675aa4851ed9400..0f40feb5989593958a67b028b0c8ea98b19961e8 100644 (file)
@@ -12,10 +12,12 @@ import os
 import shutil
 import hashlib
 import logging
+import xml.dom.minidom
 from lxml import etree
 import createrepo_c as cr
 
-__all__ = ['VERSION', 'VERBOSE_VERSION', 'DeltaRepoGenerator']
+__all__ = ['VERSION', 'VERBOSE_VERSION', 'DeltaRepoError',
+           'DeltaRepoGenerator']
 
 VERSION = "0.0.1"
 VERBOSE_VERSION = "%s (createrepo_c: %s)" % (VERSION, cr.VERSION)
@@ -23,12 +25,115 @@ VERBOSE_VERSION = "%s (createrepo_c: %s)" % (VERSION, cr.VERSION)
 class DeltaRepoError(Exception):
     pass
 
-class DeltaModule(object):
+class LoggingInterface(object):
+    def __init__(self, logger=None):
+        if logger is None:
+            logger = logging.getLogger()
+            logger.disabled = True
+        self.logger = logger
+
+    def _debug(self, msg):
+        self.logger.debug(msg)
+
+    def _info(self, msg):
+        self.logger.info(msg)
+
+    def _warning(self, msg):
+        self.logger.warning(msg)
+
+    def _error(self, msg):
+        self.logger.error(msg)
+
+    def _critical(self, msg):
+        self.logger.critical(msg)
+
+class DeltaModule(LoggingInterface):
+
+    def __init__(self, id_type=None, logger=None):
+        LoggingInterface.__init__(self, logger)
+
+        if id_type is None:
+            id_type = "sha256"
+        self.id_type = id_type
 
     def _path(self, path, record):
         return os.path.join(path, record.location_href)
 
 class MainDeltaModule(DeltaModule):
+
+    def apply(self, pri_old_fn, pri_delta_fn, pri_f, fil_old_fn,
+              fil_delta_fn, fil_f, oth_old_fn, oth_delta_fn, oth_f, removed):
+
+        removed_packages = set() # set of pkgIds (hashes)
+        all_packages = {}        # dicst { 'pkgId': pkg }
+
+        def old_pkgcb(pkg):
+            if pkg.location_href in removed.packages:
+                if removed.packages[pkg.location_href] == pkg.location_base:
+                    # This package won't be in new metadata
+                    return
+            all_packages[pkg.pkgId] = pkg
+
+        def delta_pkgcb(pkg):
+            all_packages[pkg.pkgId] = pkg
+
+        do_primary_files = 1
+        if fil_f and fil_delta_fn and fil_old_fn:
+            do_primary_files = 0
+
+        cr.xml_parse_primary(pri_old_fn, pkgcb=old_pkgcb,
+                             do_files=do_primary_files)
+        cr.xml_parse_primary(pri_delta_fn, pkgcb=delta_pkgcb,
+                             do_files=do_primary_files)
+
+        # Sort packages
+        def cmp_pkgs(x, y):
+            # Compare only by filename
+            ret = cmp(os.path.basename(x.location_href),
+                      os.path.basename(y.location_href))
+            if ret != 0:
+                return ret
+
+            # Compare by full location_href path
+            return  cmp(x.location_href, y.location_href)
+
+        all_packages_sorted = sorted(all_packages.values(), cmp=cmp_pkgs)
+
+        def newpkgcb(pkgId, name, arch):
+            return all_packages.get(pkgId, None)
+
+        # Parse filelists
+        if fil_f and fil_delta_fn and fil_old_fn:
+            cr.xml_parse_filelists(fil_old_fn, newpkgcb=newpkgcb)
+            cr.xml_parse_filelists(fil_delta_fn, newpkgcb=newpkgcb)
+
+        # Parse other
+        if oth_f and oth_delta_fn and oth_old_fn:
+            cr.xml_parse_other(oth_old_fn, newpkgcb=newpkgcb)
+            cr.xml_parse_other(oth_delta_fn, newpkgcb=newpkgcb)
+
+        num_of_packages = len(all_packages_sorted)
+
+        # Write out primary
+        pri_f.set_num_of_pkgs(num_of_packages)
+        for pkg in all_packages_sorted:
+            pri_f.add_pkg(pkg)
+        pri_f.close()
+
+        # Write out filelists
+        if fil_f:
+            fil_f.set_num_of_pkgs(num_of_packages)
+            for pkg in all_packages_sorted:
+                fil_f.add_pkg(pkg)
+            fil_f.close()
+
+        # Write out other
+        if oth_f:
+            oth_f.set_num_of_pkgs(num_of_packages)
+            for pkg in all_packages_sorted:
+                oth_f.add_pkg(pkg)
+            oth_f.close()
+
     def do(self, pri_old_fn, pri_new_fn, pri_f,
            fil_new_fn, fil_f, oth_new_fn, oth_f, removed):
 
@@ -36,21 +141,55 @@ class MainDeltaModule(DeltaModule):
         added_packages = {}         # dict { 'pkgId': pkg }
         added_packages_ids = []     # list of package ids
 
+        old_repoid_strings = []
+        new_repoid_strings = []
+
         def old_pkgcb(pkg):
-            old_packages.add((pkg.pkgId, pkg.location_href, pkg.location_base))
+            pkg_id_tuple = (pkg.pkgId, pkg.location_href, pkg.location_base)
+            old_packages.add(pkg_id_tuple)
+            pkg_id_string = "%s%s%s" % (pkg.pkgId,
+                                        pkg.location_href,
+                                        pkg.location_base or '')
+            old_repoid_strings.append(pkg_id_string)
 
         def new_pkgcb(pkg):
-            pkg_tuple = (pkg.pkgId, pkg.location_href, pkg.location_base)
-            if not pkg_tuple in old_packages:
+            pkg_id_tuple = (pkg.pkgId, pkg.location_href, pkg.location_base)
+            pkg_id_string = "%s%s%s" % (pkg.pkgId,
+                                        pkg.location_href,
+                                        pkg.location_base or '')
+            new_repoid_strings.append(pkg_id_string)
+            if not pkg_id_tuple in old_packages:
                 # This package is only in new repodata
                 added_packages[pkg.pkgId] = pkg
                 added_packages_ids.append(pkg.pkgId)
             else:
                 # This package is also in the old repodata
-                old_packages.remove(pkg_tuple)
+                old_packages.remove(pkg_id_tuple)
+
+        do_new_primary_files = 1
+        if fil_f and fil_new_fn:
+            # All files will be parsed from filelists
+            do_new_primary_files = 0
 
-        cr.xml_parse_primary(pri_old_fn, pkgcb=old_pkgcb)
-        cr.xml_parse_primary(pri_new_fn, pkgcb=new_pkgcb)
+        cr.xml_parse_primary(pri_old_fn, pkgcb=old_pkgcb, do_files=0)
+        cr.xml_parse_primary(pri_new_fn, pkgcb=new_pkgcb,
+                             do_files=do_new_primary_files)
+
+        # Calculate RepoIds
+        old_repo_id = ""
+        new_repo_id = ""
+
+        h = hashlib.new(self.id_type)
+        old_repoid_strings.sort()
+        for i in old_repoid_strings:
+            h.update(i)
+        old_repo_id = h.hexdigest()
+
+        h = hashlib.new(self.id_type)
+        new_repoid_strings.sort()
+        for i in new_repoid_strings:
+            h.update(i)
+        new_repo_id = h.hexdigest()
 
         removed_pkgs = sorted(old_packages)
         for _, location_href, location_base in removed_pkgs:
@@ -58,14 +197,6 @@ class MainDeltaModule(DeltaModule):
 
         num_of_packages = len(added_packages)
 
-        # Write out primary delta
-        pri_f.set_num_of_pkgs(num_of_packages)
-        for pkgid in added_packages_ids:
-            pri_f.add_pkg(added_packages[pkgid])
-        pri_f.close()
-
-        ",".join(("a", "b"))
-
         # Filelists and Other cb
         def newpkgcb(pkgId, name, arch):
             return added_packages.get(pkgId, None)
@@ -86,6 +217,16 @@ class MainDeltaModule(DeltaModule):
                 oth_f.add_pkg(added_packages[pkgid])
             oth_f.close()
 
+        # Write out primary delta
+        # Note: Writing of primary delta has to be after parsing of filelists
+        # Otherway cause missing files if do_new_primary_files was 0
+        pri_f.set_num_of_pkgs(num_of_packages)
+        for pkgid in added_packages_ids:
+            pri_f.add_pkg(added_packages[pkgid])
+        pri_f.close()
+
+        return (old_repo_id, new_repo_id)
+
 class RemovedXml(object):
     def __init__(self):
         self.packages = {}  # { location_href: location_base }
@@ -126,30 +267,29 @@ class RemovedXml(object):
                               xml_declaration=True)
 
     def xml_parse(self, path):
-        # TODO: parsing for RemovedXml
-        pass
-
-class LoggingInterface(object):
-    def __init__(self, logger=None):
-        if logger is None:
-            logger = logging.getLogger()
-            logger.disabled = True
-        self.logger = logger
-
-    def _debug(self, msg):
-        self.logger.debug(msg)
-
-    def _info(self, msg):
-        self.logger.info(msg)
-
-    def _warning(self, msg):
-        self.logger.warning(msg)
-
-    def _error(self, msg):
-        self.logger.error(msg)
-
-    def _critical(self, msg):
-        self.logger.critical(msg)
+        dom = xml.dom.minidom.parse(path)
+
+        packages = dom.getElementsByTagName("packages")
+        if packages:
+            for loc in packages[0].getElementsByTagName("location"):
+                href = loc.getAttribute("href")
+                base = loc.getAttribute("base")
+                if not href:
+                    continue
+                if not base:
+                    base = None
+                self.packages[href] = base
+
+        files = dom.getElementsByTagName("files")
+        if files:
+            for loc in files[0].getElementsByTagName("location"):
+                href = loc.getAttribute("href")
+                base = loc.getAttribute("base")
+                if not href:
+                    continue
+                if not base:
+                    base = None
+                self.files[href] = base
 
 class DeltaRepoGenerator(LoggingInterface):
     """Object for generating of DeltaRepositories."""
@@ -166,20 +306,146 @@ class DeltaRepoGenerator(LoggingInterface):
         path = os.path.basename(path)
         return path.rsplit('-')[-1]
 
+    def applydelta(self, old_path, delta_path, out_path=None):
+        removedxml = RemovedXml()
+        hash_in_the_name = False
+
+        # Prepare variables with paths
+        old_repodata_path = os.path.join(old_path, "repodata/")
+        delta_repodata_path = os.path.join(delta_path, "repodata/")
+
+        old_repomd_path = os.path.join(old_repodata_path, "repomd.xml")
+        delta_repomd_path = os.path.join(delta_repodata_path, "repomd.xml")
+
+        # Prepare Repomd objects
+        old_repomd = cr.Repomd(old_repomd_path)
+        delta_repomd = cr.Repomd(delta_repomd_path)
+        new_repomd = cr.Repomd()
+
+        # Prepare output path
+        new_path = os.path.join(out_path, ".repodata/")
+        new_repodata_path = os.path.join(new_path, "repodata/")
+        os.mkdir(new_path)
+        os.mkdir(new_repodata_path)
+
+        # Apply repomd delta
+        new_repomd.set_revision(delta_repomd.revision)
+        for tag in delta_repomd.distro_tags:
+            new_repomd.add_distro_tag(tag[1], tag[0])
+        for tag in delta_repomd.repo_tags:
+            new_repomd.add_repo_tag(tag)
+        for tag in delta_repomd.content_tags:
+            new_repomd.add_content_tag(tag)
+
+        old_records = dict([(record.type, record) for record in old_repomd.records ])
+        delta_records = dict([(record.type, record) for record in delta_repomd.records ])
+        old_record_types = set(old_records.keys())
+        delta_record_types = set(delta_records.keys())
+        deleted_repomd_record_types = old_record_types - delta_record_types
+        added_repomd_record_types = delta_record_types - old_record_types
+        # TODO: Skip removed record
+
+        # Prepare removedxml
+        if "removed" in delta_records:
+            removedxml_path = os.path.join(delta_path,
+                                delta_records["removed"].location_href)
+            removedxml.xml_parse(removedxml_path)
+        else:
+            self._warning("\"removed\" record is missing in repomd.xml "\
+                          "of delta repo")
+
+        # Apply delta on primary, filelists and other
+        if not "primary" in old_records or not "primary" in delta_records:
+            raise DeltaRepoError("Missing primary metadata")
+
+        if delta_records["primary"].location_href.split("primary")[0] != "":
+            hash_in_the_name = True
+
+        pri_old_fn = os.path.join(old_path, old_records["primary"].location_href)
+        pri_delta_fn = os.path.join(delta_path, delta_records["primary"].location_href)
+        pri_out_fn = os.path.join(new_repodata_path, "primary.xml.gz")
+        pri_out_f_stat = cr.ContentStat(cr.SHA256)
+        pri_out_f = cr.PrimaryXmlFile(pri_out_fn, cr.GZ_COMPRESSION)
+
+        fil_old_fn = None
+        fil_delta_fn = None
+        fil_out_fn = None
+        fil_out_f_stat = None
+        fil_out_f = None
+        if ("filelists" in delta_records):
+            fil_old_fn = os.path.join(old_path, old_records["filelists"].location_href)
+            fil_delta_fn = os.path.join(delta_path, delta_records["filelists"].location_href)
+            fil_out_fn = os.path.join(new_repodata_path, "filelists.xml.gz")
+            fil_out_f_stat = cr.ContentStat(cr.SHA256)
+            fil_out_f = cr.FilelistsXmlFile(fil_out_fn, cr.GZ_COMPRESSION)
+
+        oth_old_fn = None
+        oth_delta_fn = None
+        out_out_fn = None
+        oth_out_f_stat = None
+        oth_out_f = None
+        if ("other" in delta_records):
+            oth_old_fn = os.path.join(old_path, old_records["other"].location_href)
+            oth_delta_fn = os.path.join(delta_path, delta_records["other"].location_href)
+            oth_out_fn = os.path.join(new_repodata_path, "other.xml.gz")
+            oth_out_f_stat = cr.ContentStat(cr.SHA256)
+            oth_out_f = cr.OtherXmlFile(oth_out_fn, cr.GZ_COMPRESSION)
+
+        deltamodule = MainDeltaModule(id_type=self.id_type, logger=self.logger)
+        deltamodule.apply(pri_old_fn, pri_delta_fn, pri_out_f, fil_old_fn,
+                          fil_delta_fn, fil_out_f, oth_old_fn, oth_delta_fn,
+                          oth_out_f, removedxml)
+
+        # Prepare repomd.xml records
+        pri_rec = cr.RepomdRecord("primary", pri_out_fn)
+        pri_rec.load_contentstat(pri_out_f_stat)
+        pri_rec.fill(cr.SHA256)
+        if hash_in_the_name:
+            pri_rec.rename_file()
+        new_repomd.set_record(pri_rec)
+
+        if fil_out_fn:
+            fil_rec = cr.RepomdRecord("filelists", fil_out_fn)
+            fil_rec.load_contentstat(fil_out_f_stat)
+            fil_rec.fill(cr.SHA256)
+            if hash_in_the_name:
+                fil_rec.rename_file()
+            new_repomd.set_record(fil_rec)
+
+        if oth_out_fn:
+            oth_rec = cr.RepomdRecord("other", oth_out_fn)
+            oth_rec.load_contentstat(oth_out_f_stat)
+            oth_rec.fill(cr.SHA256)
+            if hash_in_the_name:
+                oth_rec.rename_file()
+            new_repomd.set_record(oth_rec)
+
+        # Write out repomd.xml
+        deltarepoid = "TODO"  # TODO
+        new_repomd.set_repoid(deltarepoid, self.id_type)
+        new_repomd_path = os.path.join(new_repodata_path, "repomd.xml")
+        new_repomd_xml = new_repomd.xml_dump()
+        self._debug("Writing repomd.xml")
+        open(new_repomd_path, "w").write(new_repomd_xml)
+
+        # Final move
+        final_destination = os.path.join(out_path, "repodata/")
+        if os.path.exists(final_destination):
+            self._warning("Destination dir already exists! Removing %s" % \
+                          final_destination)
+            shutil.rmtree(final_destination)
+        self._info("Moving %s -> %s" % (new_path, final_destination))
+        os.rename(new_path, final_destination)
+
     def gendelta(self, old_path, new_path, out_path=None,
                  do_only=None, skip=None):
         removedxml = RemovedXml()
+        hash_in_the_name = False
 
         # Prepare variables with paths
         new_repodata_path = os.path.join(new_path, "repodata/")
         old_repodata_path = os.path.join(old_path, "repodata/")
 
-        if not os.path.isdir(new_repodata_path):
-            raise IOError("Directory %s doesn't exists" % new_repodata_path)
-
-        if not os.path.isdir(old_repodata_path):
-            raise IOError("Directory %s doesn't exists" % old_repodata_path)
-
         old_repomd_path = os.path.join(old_repodata_path, "repomd.xml")
         new_repomd_path = os.path.join(new_repodata_path, "repomd.xml")
 
@@ -210,14 +476,13 @@ class DeltaRepoGenerator(LoggingInterface):
         deleted_repomd_record_types = old_record_types - new_record_types
         added_repomd_record_types = new_record_types - old_record_types
 
-        delta_data = {  # Data shared between delta modules
-                "removedxml": removedxml,
-            }
-
         # Do deltas for the "primary", "filelists" and "other"
         if not "primary" in old_records or not "primary" in new_records:
             raise DeltaRepoError("Missing primary metadata")
 
+        if new_records["primary"].location_href.split("primary")[0] != "":
+            hash_in_the_name = True
+
         pri_old_fn = os.path.join(old_path, old_records["primary"].location_href)
         pri_new_fn = os.path.join(new_path, new_records["primary"].location_href)
         pri_out_fn = os.path.join(delta_repodata_path, "primary.xml.gz")
@@ -245,42 +510,64 @@ class DeltaRepoGenerator(LoggingInterface):
             oth_out_f = cr.OtherXmlFile(oth_out_fn, cr.GZ_COMPRESSION)
 
         deltamodule = MainDeltaModule()
-        deltamodule.do(pri_old_fn, pri_new_fn, pri_out_f, fil_new_fn,
-                       fil_out_f, oth_new_fn, oth_out_f, removedxml)
+        ids = deltamodule.do(pri_old_fn, pri_new_fn, pri_out_f, fil_new_fn,
+                             fil_out_f, oth_new_fn, oth_out_f, removedxml)
 
+        # Prepare repomd.xml records
         pri_rec = cr.RepomdRecord("primary", pri_out_fn)
-        # TODO: Function for this
-        pri_rec.size_open = pri_out_f_stat.size
-        pri_rec.checksum_open = pri_out_f_stat.checksum
-        pri_rec.checksum_open_type = cr.checksum_name_str(pri_out_f_stat.checksum_type)
+        pri_rec.load_contentstat(pri_out_f_stat)
         pri_rec.fill(cr.SHA256)
+        if hash_in_the_name:
+            pri_rec.rename_file()
         delta_repomd.set_record(pri_rec)
 
         if fil_out_fn:
             fil_rec = cr.RepomdRecord("filelists", fil_out_fn)
+            fil_rec.load_contentstat(fil_out_f_stat)
             fil_rec.fill(cr.SHA256)
+            if hash_in_the_name:
+                fil_rec.rename_file()
             delta_repomd.set_record(fil_rec)
 
         if oth_out_fn:
             oth_rec = cr.RepomdRecord("other", oth_out_fn)
+            oth_rec.load_contentstat(oth_out_f_stat)
             oth_rec.fill(cr.SHA256)
+            if hash_in_the_name:
+                oth_rec.rename_file()
             delta_repomd.set_record(oth_rec)
 
         # Write out removed.xml
-        # TODO: Compressed!!
+        # TODO: Compression via compression wrapper
         removedxml_path = os.path.join(delta_repodata_path, "removed.xml")
+        #removedxml_path_gz = os.path.join(delta_repodata_path, "removed.xml.gz")
         removedxml_xml = removedxml.xml_dump()
+        self._debug("Writing removed.xml")
         open(removedxml_path, "w").write(removedxml_xml)
+        stat = cr.ContentStat(cr.SHA256)
+        #cr.compress_file(removedxml_path, removedxml_path_gz, cr.GZ, stat)
+        #os.remove(removedxml_path)
+        #removedxml_rec = cr.RepomdRecord("removed", removedxml_path_gz)
         removedxml_rec = cr.RepomdRecord("removed", removedxml_path)
+        removedxml_rec.load_contentstat(stat)
         removedxml_rec.fill(cr.SHA256)
+        if hash_in_the_name:
+            removedxml_rec.rename_file()
         delta_repomd.set_record(removedxml_rec)
 
         # Write out repomd.xml
-        #deltarepoid = "%s-%s" % (old_repomd.repoid, new_repomd.repoid)
-        # RepoId must be calculated during primary delta calculation
-        deltarepoid = "xxx"
+        deltarepoid = "%s-%s" % ids
         delta_repomd.set_repoid(deltarepoid, self.id_type)
         delta_repomd_path = os.path.join(delta_repodata_path, "repomd.xml")
         delta_repomd_xml = delta_repomd.xml_dump()
+        self._debug("Writing repomd.xml")
         open(delta_repomd_path, "w").write(delta_repomd_xml)
 
+        # Final move
+        final_destination = os.path.join(out_path, "%s-%s" % ids)
+        if os.path.exists(final_destination):
+            self._warning("Destination dir already exists! Removing %s" % \
+                          final_destination)
+            shutil.rmtree(final_destination)
+        self._info("Moving %s -> %s" % (delta_path, final_destination))
+        os.rename(delta_path, final_destination)