deltarepo: ...
authorTomas Mlcoch <tmlcoch@redhat.com>
Wed, 12 Jun 2013 13:57:55 +0000 (15:57 +0200)
committerTomas Mlcoch <tmlcoch@redhat.com>
Wed, 12 Jun 2013 13:57:55 +0000 (15:57 +0200)
deltarepo/deltarepo.py
deltarepo/deltarepo/__init__.py

index 9e14ee8fcddf1885a3f2c165c681eaefe491023a..6ddffa42a39fd7f6b532d3b6ba288df020d3453c 100755 (executable)
@@ -18,16 +18,16 @@ def parse_options():
                       help="Run in quiet mode.")
     parser.add_option("-v", "--verbose", action="store_true",
                       help="Run in verbose mode.")
-    parser.add_option("-l", "--list-datatypes", action="store_true",
-                      help="List datatypes for which delta is supported.")
+    #parser.add_option("-l", "--list-datatypes", action="store_true",
+    #                  help="List datatypes for which delta is supported.")
     parser.add_option("-o", "--outputdir", action="store", metavar="DIR",
                       help="Output directory.", default="./")
 
     group = OptionGroup(parser, "Delta generation")
-    group.add_option("-s", "--skip", action="append", metavar="DATATYPE",
+    group.add_option("--skip", action="append", metavar="DATATYPE",
                      help="Skip delta on the DATATYPE. Could be specified "\
                      "multiple times. (E.g., --skip=comps)")
-    group.add_option("-d", "--do-only", action="append", metavar="DATATYPE",
+    group.add_option("--do-only", action="append", metavar="DATATYPE",
                      help="Do delta only for the DATATYPE. Could be specified "\
                      "multiple times. (E.g., --do-only=primary)")
     group.add_option("-t", "--id-type", action="store", metavar="HASHTYPE",
@@ -38,6 +38,8 @@ def parse_options():
     group = OptionGroup(parser, "Delta application")
     group.add_option("-a", "--apply", action="store_true",
                      help="Enable delta application mode.")
+    group.add_option("-d", "--database", action="store_true",
+                     help="Gen database.")
     parser.add_option_group(group)
 
     options, args = parser.parse_args()
@@ -102,7 +104,8 @@ if __name__ == "__main__":
 
     if options.apply:
         # Applying delta
-        generator.applydelta(args[0], args[1], out_path=options.outputdir)
+        generator.applydelta(args[0], args[1], out_path=options.outputdir,
+                             database=options.database)
     else:
         # Do delta
         generator.gendelta(args[0], args[1], out_path=options.outputdir,
index 0f40feb5989593958a67b028b0c8ea98b19961e8..5b3d7632032a634d62e6c300c05af837d3a46954 100644 (file)
@@ -57,24 +57,49 @@ class DeltaModule(LoggingInterface):
         self.id_type = id_type
 
     def _path(self, path, record):
+        """Return path to the repodata file."""
         return os.path.join(path, record.location_href)
 
+    def _pkg_id_tuple(self, pkg):
+        """Return tuple identifying a package in repodata.
+        (pkgId, location_href, location_base)"""
+        return (pkg.pkgId, pkg.location_href, pkg.location_base)
+
+    def _pkg_id_str(self, pkg):
+        """Return string identifying a package in repodata.
+        This strings are used for the RepoId calculation."""
+        if not pkg.pkgId:
+            self._warning("Missing pkgId in a package!")
+        if not pkg.location_href:
+            self._warning("Missing location_href at package %s %s" % \
+                          pkg.name, pkg.pkgId)
+        return "%s%s%s" % (pkg.pkgId or '',
+                           pkg.location_href or '',
+                           pkg.location_base or '')
+
 class MainDeltaModule(DeltaModule):
 
-    def apply(self, pri_old_fn, pri_delta_fn, pri_f, fil_old_fn,
-              fil_delta_fn, fil_f, oth_old_fn, oth_delta_fn, oth_f, removed):
+    def apply(self, pri_old_fn, pri_delta_fn, pri_f, pri_db, fil_old_fn,
+              fil_delta_fn, fil_f, fil_db,oth_old_fn, oth_delta_fn, oth_f,
+              oth_db, removed):
 
         removed_packages = set() # set of pkgIds (hashes)
-        all_packages = {}        # dicst { 'pkgId': pkg }
+        all_packages = {}        # dict { 'pkgId': pkg }
+
+        old_repoid_strings = []
+        new_repoid_strings = []
 
         def old_pkgcb(pkg):
+            old_repoid_strings.append(self._pkg_id_str(pkg))
             if pkg.location_href in removed.packages:
                 if removed.packages[pkg.location_href] == pkg.location_base:
                     # This package won't be in new metadata
                     return
+            new_repoid_strings.append(self._pkg_id_str(pkg))
             all_packages[pkg.pkgId] = pkg
 
         def delta_pkgcb(pkg):
+            new_repoid_strings.append(self._pkg_id_str(pkg))
             all_packages[pkg.pkgId] = pkg
 
         do_primary_files = 1
@@ -86,6 +111,22 @@ class MainDeltaModule(DeltaModule):
         cr.xml_parse_primary(pri_delta_fn, pkgcb=delta_pkgcb,
                              do_files=do_primary_files)
 
+        # Calculate RepoIds
+        old_repo_id = ""
+        new_repo_id = ""
+
+        h = hashlib.new(self.id_type)
+        old_repoid_strings.sort()
+        for i in old_repoid_strings:
+            h.update(i)
+        old_repo_id = h.hexdigest()
+
+        h = hashlib.new(self.id_type)
+        new_repoid_strings.sort()
+        for i in new_repoid_strings:
+            h.update(i)
+        new_repo_id = h.hexdigest()
+
         # Sort packages
         def cmp_pkgs(x, y):
             # Compare only by filename
@@ -118,21 +159,26 @@ class MainDeltaModule(DeltaModule):
         pri_f.set_num_of_pkgs(num_of_packages)
         for pkg in all_packages_sorted:
             pri_f.add_pkg(pkg)
-        pri_f.close()
+            if pri_db:
+                pri_db.add_pkg(pkg)
 
         # Write out filelists
         if fil_f:
             fil_f.set_num_of_pkgs(num_of_packages)
             for pkg in all_packages_sorted:
                 fil_f.add_pkg(pkg)
-            fil_f.close()
+                if fil_db:
+                    fil_db.add_pkg(pkg)
 
         # Write out other
         if oth_f:
             oth_f.set_num_of_pkgs(num_of_packages)
             for pkg in all_packages_sorted:
                 oth_f.add_pkg(pkg)
-            oth_f.close()
+                if oth_db:
+                    oth_db.add_pkg(pkg)
+
+        return (old_repo_id, new_repo_id)
 
     def do(self, pri_old_fn, pri_new_fn, pri_f,
            fil_new_fn, fil_f, oth_new_fn, oth_f, removed):
@@ -145,19 +191,12 @@ class MainDeltaModule(DeltaModule):
         new_repoid_strings = []
 
         def old_pkgcb(pkg):
-            pkg_id_tuple = (pkg.pkgId, pkg.location_href, pkg.location_base)
-            old_packages.add(pkg_id_tuple)
-            pkg_id_string = "%s%s%s" % (pkg.pkgId,
-                                        pkg.location_href,
-                                        pkg.location_base or '')
-            old_repoid_strings.append(pkg_id_string)
+            old_packages.add(self._pkg_id_tuple(pkg))
+            old_repoid_strings.append(self._pkg_id_str(pkg))
 
         def new_pkgcb(pkg):
-            pkg_id_tuple = (pkg.pkgId, pkg.location_href, pkg.location_base)
-            pkg_id_string = "%s%s%s" % (pkg.pkgId,
-                                        pkg.location_href,
-                                        pkg.location_base or '')
-            new_repoid_strings.append(pkg_id_string)
+            new_repoid_strings.append(self._pkg_id_str(pkg))
+            pkg_id_tuple = self._pkg_id_tuple(pkg)
             if not pkg_id_tuple in old_packages:
                 # This package is only in new repodata
                 added_packages[pkg.pkgId] = pkg
@@ -297,16 +336,21 @@ class DeltaRepoGenerator(LoggingInterface):
     def __init__(self, id_type=None, logger=None):
         LoggingInterface.__init__(self, logger)
 
+        # id_type is type of checksum used for RepoId and
+        # DeltaRepoId calculation
         if id_type is None:
             id_type = "sha256"
         self.id_type = id_type
 
+        # checksum_type is checksum type used for the repomd records.
+        self.checksum_type = cr.SHA256
+
     def _fn_without_checksum(self, path):
         """Strip checksum from a record filename"""
         path = os.path.basename(path)
         return path.rsplit('-')[-1]
 
-    def applydelta(self, old_path, delta_path, out_path=None):
+    def applydelta(self, old_path, delta_path, out_path=None, database=False):
         removedxml = RemovedXml()
         hash_in_the_name = False
 
@@ -322,6 +366,24 @@ class DeltaRepoGenerator(LoggingInterface):
         delta_repomd = cr.Repomd(delta_repomd_path)
         new_repomd = cr.Repomd()
 
+        # Check if delta id correspond with used repo
+        if not delta_repomd.repoid or len(delta_repomd.repoid.split('-')) != 2:
+            raise DeltaRepoError("Bad DeltaRepoId")
+
+        self.id_type = delta_repomd.repoid_type
+
+        old_id, new_id = delta_repomd.repoid.split('-')
+
+        self._debug("Delta %s -> %s" % (old_id, new_id))
+
+        if old_repomd.repoid_type == delta_repomd.repoid_type:
+            if old_repomd.repoid and old_repomd.repoid != old_id:
+                raise DeltaRepoError("Not suitable delta for current repo " \
+                        "(Expected: %s Real: %s)" % (old_id, old_repomd.repoid))
+        else:
+            self._debug("Different repoid types repo: %s vs delta: %s" % \
+                    (old_repomd.repoid_type, delta_repomd.repoid_type))
+
         # Prepare output path
         new_path = os.path.join(out_path, ".repodata/")
         new_repodata_path = os.path.join(new_path, "repodata/")
@@ -343,7 +405,6 @@ class DeltaRepoGenerator(LoggingInterface):
         delta_record_types = set(delta_records.keys())
         deleted_repomd_record_types = old_record_types - delta_record_types
         added_repomd_record_types = delta_record_types - old_record_types
-        # TODO: Skip removed record
 
         # Prepare removedxml
         if "removed" in delta_records:
@@ -354,75 +415,158 @@ class DeltaRepoGenerator(LoggingInterface):
             self._warning("\"removed\" record is missing in repomd.xml "\
                           "of delta repo")
 
-        # Apply delta on primary, filelists and other
+        # Important sanity check (repo without primary is definitely bad)
         if not "primary" in old_records or not "primary" in delta_records:
             raise DeltaRepoError("Missing primary metadata")
 
+        # Detect type of checksum in the delta repomd.xml
+        self.checksum_type = cr.checksum_type(delta_records["primary"].checksum_type)
+        if self.checksum_type == cr.UNKNOWN_CHECKSUM:
+            raise DeltaRepoError("Unknown checksum type detected: %s" % \
+                    new_records["primary"].checksum_type)
+
+        # Detection if use unique md filenames
         if delta_records["primary"].location_href.split("primary")[0] != "":
             hash_in_the_name = True
 
+        # Apply delta on primary, filelists and other
         pri_old_fn = os.path.join(old_path, old_records["primary"].location_href)
         pri_delta_fn = os.path.join(delta_path, delta_records["primary"].location_href)
         pri_out_fn = os.path.join(new_repodata_path, "primary.xml.gz")
-        pri_out_f_stat = cr.ContentStat(cr.SHA256)
+        pri_out_f_stat = cr.ContentStat(self.checksum_type)
         pri_out_f = cr.PrimaryXmlFile(pri_out_fn, cr.GZ_COMPRESSION)
+        pri_db_fn = None
+        pri_db = None
+        if database:
+            pri_db_fn = os.path.join(new_repodata_path, "primary.sqlite")
+            pri_db = cr.PrimarySqlite(pri_db_fn)
 
         fil_old_fn = None
         fil_delta_fn = None
         fil_out_fn = None
         fil_out_f_stat = None
         fil_out_f = None
+        fil_db_fn = None
+        fil_db = None
         if ("filelists" in delta_records):
             fil_old_fn = os.path.join(old_path, old_records["filelists"].location_href)
             fil_delta_fn = os.path.join(delta_path, delta_records["filelists"].location_href)
             fil_out_fn = os.path.join(new_repodata_path, "filelists.xml.gz")
-            fil_out_f_stat = cr.ContentStat(cr.SHA256)
+            fil_out_f_stat = cr.ContentStat(self.checksum_type)
             fil_out_f = cr.FilelistsXmlFile(fil_out_fn, cr.GZ_COMPRESSION)
+            if database:
+                fil_db_fn = os.path.join(new_repodata_path, "filelists.sqlite")
+                fil_db = cr.FilelistsSqlite(fil_db_fn)
 
         oth_old_fn = None
         oth_delta_fn = None
-        out_out_fn = None
+        oth_out_fn = None
         oth_out_f_stat = None
         oth_out_f = None
+        oth_db_fn = None
+        oth_db = None
         if ("other" in delta_records):
             oth_old_fn = os.path.join(old_path, old_records["other"].location_href)
             oth_delta_fn = os.path.join(delta_path, delta_records["other"].location_href)
             oth_out_fn = os.path.join(new_repodata_path, "other.xml.gz")
-            oth_out_f_stat = cr.ContentStat(cr.SHA256)
+            oth_out_f_stat = cr.ContentStat(self.checksum_type)
             oth_out_f = cr.OtherXmlFile(oth_out_fn, cr.GZ_COMPRESSION)
+            if database:
+                oth_db_fn = os.path.join(new_repodata_path, "other.sqlite")
+                oth_db = cr.OtherSqlite(oth_db_fn)
+
+        deltamodule = MainDeltaModule(id_type=self.id_type,
+                                      logger=self.logger)
+        ids = deltamodule.apply(pri_old_fn, pri_delta_fn, pri_out_f, pri_db,
+                                fil_old_fn, fil_delta_fn, fil_out_f, fil_db,
+                                oth_old_fn, oth_delta_fn, oth_out_f, oth_db,
+                                removedxml)
+
+        pri_out_f.close()
+        fil_out_f.close()
+        oth_out_f.close()
+
+        # Check returned IDs
+        cold_id, cnew_id = ids  # Calculated ids
 
-        deltamodule = MainDeltaModule(id_type=self.id_type, logger=self.logger)
-        deltamodule.apply(pri_old_fn, pri_delta_fn, pri_out_f, fil_old_fn,
-                          fil_delta_fn, fil_out_f, oth_old_fn, oth_delta_fn,
-                          oth_out_f, removedxml)
+        if cold_id != old_id:
+            raise DeltaRepoError("Calculated old RepoId doesn't match!")
+
+        if cnew_id != new_id:
+            raise DeltaRepoError("Calculated new RepoId doesn't match!")
+
+        self._debug("RepoIds match")
 
         # Prepare repomd.xml records
         pri_rec = cr.RepomdRecord("primary", pri_out_fn)
         pri_rec.load_contentstat(pri_out_f_stat)
-        pri_rec.fill(cr.SHA256)
+        pri_rec.fill(self.checksum_type)
         if hash_in_the_name:
             pri_rec.rename_file()
         new_repomd.set_record(pri_rec)
 
+        if database:
+            pri_db.dbinfo_update(pri_rec.checksum)
+            pri_db.close()
+            pri_db_stat = cr.ContentStat(self.checksum_type)
+            pri_db_compressed = os.path.join(pri_db_fn+".bz2")
+            cr.compress_file(pri_db_fn, None, cr.BZ2, pri_db_stat)
+            os.remove(pri_db_fn)
+            pri_db_rec = cr.RepomdRecord("primary_db", pri_db_compressed)
+            pri_db_rec.load_contentstat(pri_db_stat)
+            pri_db_rec.fill(self.checksum_type)
+            if hash_in_the_name:
+                pri_db_rec.rename_file()
+            new_repomd.set_record(pri_db_rec)
+
         if fil_out_fn:
             fil_rec = cr.RepomdRecord("filelists", fil_out_fn)
             fil_rec.load_contentstat(fil_out_f_stat)
-            fil_rec.fill(cr.SHA256)
+            fil_rec.fill(self.checksum_type)
             if hash_in_the_name:
                 fil_rec.rename_file()
             new_repomd.set_record(fil_rec)
 
+        if database:
+            fil_db.dbinfo_update(fil_rec.checksum)
+            fil_db.close()
+            fil_db_stat = cr.ContentStat(self.checksum_type)
+            fil_db_compressed = os.path.join(fil_db_fn+".bz2")
+            cr.compress_file(fil_db_fn, None, cr.BZ2, fil_db_stat)
+            os.remove(fil_db_fn)
+            fil_db_rec = cr.RepomdRecord("primary_db", fil_db_compressed)
+            fil_db_rec.load_contentstat(fil_db_stat)
+            fil_db_rec.fill(self.checksum_type)
+            if hash_in_the_name:
+                fil_db_rec.rename_file()
+            new_repomd.set_record(fil_db_rec)
+
+
         if oth_out_fn:
             oth_rec = cr.RepomdRecord("other", oth_out_fn)
             oth_rec.load_contentstat(oth_out_f_stat)
-            oth_rec.fill(cr.SHA256)
+            oth_rec.fill(self.checksum_type)
             if hash_in_the_name:
                 oth_rec.rename_file()
             new_repomd.set_record(oth_rec)
 
+        if database:
+            oth_db.dbinfo_update(oth_rec.checksum)
+            oth_db.close()
+            oth_db_stat = cr.ContentStat(self.checksum_type)
+            oth_db_compressed = os.path.join(oth_db_fn+".bz2")
+            cr.compress_file(oth_db_fn, None, cr.BZ2, oth_db_stat)
+            os.remove(oth_db_fn)
+            oth_db_rec = cr.RepomdRecord("primary_db", oth_db_compressed)
+            oth_db_rec.load_contentstat(oth_db_stat)
+            oth_db_rec.fill(self.checksum_type)
+            if hash_in_the_name:
+                oth_db_rec.rename_file()
+            new_repomd.set_record(oth_db_rec)
+
+
         # Write out repomd.xml
-        deltarepoid = "TODO"  # TODO
-        new_repomd.set_repoid(deltarepoid, self.id_type)
+        new_repomd.set_repoid(ids[1], self.id_type)
         new_repomd_path = os.path.join(new_repodata_path, "repomd.xml")
         new_repomd_xml = new_repomd.xml_dump()
         self._debug("Writing repomd.xml")
@@ -476,17 +620,25 @@ class DeltaRepoGenerator(LoggingInterface):
         deleted_repomd_record_types = old_record_types - new_record_types
         added_repomd_record_types = new_record_types - old_record_types
 
-        # Do deltas for the "primary", "filelists" and "other"
+        # Important sanity check (repo without primary is definitely bad)
         if not "primary" in old_records or not "primary" in new_records:
             raise DeltaRepoError("Missing primary metadata")
 
+        # Detect type of checksum in the new repomd.xml
+        self.checksum_type = cr.checksum_type(new_records["primary"].checksum_type)
+        if self.checksum_type == cr.UNKNOWN_CHECKSUM:
+            raise DeltaRepoError("Unknown checksum type detected: %s" % \
+                    new_records["primary"].checksum_type)
+
+        # Detection if use unique md filenames
         if new_records["primary"].location_href.split("primary")[0] != "":
             hash_in_the_name = True
 
+        # Do deltas for the "primary", "filelists" and "other"
         pri_old_fn = os.path.join(old_path, old_records["primary"].location_href)
         pri_new_fn = os.path.join(new_path, new_records["primary"].location_href)
         pri_out_fn = os.path.join(delta_repodata_path, "primary.xml.gz")
-        pri_out_f_stat = cr.ContentStat(cr.SHA256)
+        pri_out_f_stat = cr.ContentStat(self.checksum_type)
         pri_out_f = cr.PrimaryXmlFile(pri_out_fn, cr.GZ_COMPRESSION)
 
         fil_new_fn = None
@@ -496,7 +648,7 @@ class DeltaRepoGenerator(LoggingInterface):
         if ("filelists" in new_records):
             fil_new_fn = os.path.join(new_path, new_records["filelists"].location_href)
             fil_out_fn = os.path.join(delta_repodata_path, "filelists.xml.gz")
-            fil_out_f_stat = cr.ContentStat(cr.SHA256)
+            fil_out_f_stat = cr.ContentStat(self.checksum_type)
             fil_out_f = cr.FilelistsXmlFile(fil_out_fn, cr.GZ_COMPRESSION)
 
         oth_new_fn = None
@@ -506,17 +658,18 @@ class DeltaRepoGenerator(LoggingInterface):
         if ("other" in new_records):
             oth_new_fn = os.path.join(new_path, new_records["other"].location_href)
             oth_out_fn = os.path.join(delta_repodata_path, "other.xml.gz")
-            oth_out_f_stat = cr.ContentStat(cr.SHA256)
+            oth_out_f_stat = cr.ContentStat(self.checksum_type)
             oth_out_f = cr.OtherXmlFile(oth_out_fn, cr.GZ_COMPRESSION)
 
-        deltamodule = MainDeltaModule()
+        deltamodule = MainDeltaModule(id_type=self.id_type,
+                                      logger=self.logger)
         ids = deltamodule.do(pri_old_fn, pri_new_fn, pri_out_f, fil_new_fn,
                              fil_out_f, oth_new_fn, oth_out_f, removedxml)
 
         # Prepare repomd.xml records
         pri_rec = cr.RepomdRecord("primary", pri_out_fn)
         pri_rec.load_contentstat(pri_out_f_stat)
-        pri_rec.fill(cr.SHA256)
+        pri_rec.fill(self.checksum_type)
         if hash_in_the_name:
             pri_rec.rename_file()
         delta_repomd.set_record(pri_rec)
@@ -524,7 +677,7 @@ class DeltaRepoGenerator(LoggingInterface):
         if fil_out_fn:
             fil_rec = cr.RepomdRecord("filelists", fil_out_fn)
             fil_rec.load_contentstat(fil_out_f_stat)
-            fil_rec.fill(cr.SHA256)
+            fil_rec.fill(self.checksum_type)
             if hash_in_the_name:
                 fil_rec.rename_file()
             delta_repomd.set_record(fil_rec)
@@ -532,7 +685,7 @@ class DeltaRepoGenerator(LoggingInterface):
         if oth_out_fn:
             oth_rec = cr.RepomdRecord("other", oth_out_fn)
             oth_rec.load_contentstat(oth_out_f_stat)
-            oth_rec.fill(cr.SHA256)
+            oth_rec.fill(self.checksum_type)
             if hash_in_the_name:
                 oth_rec.rename_file()
             delta_repomd.set_record(oth_rec)
@@ -544,13 +697,13 @@ class DeltaRepoGenerator(LoggingInterface):
         removedxml_xml = removedxml.xml_dump()
         self._debug("Writing removed.xml")
         open(removedxml_path, "w").write(removedxml_xml)
-        stat = cr.ContentStat(cr.SHA256)
+        stat = cr.ContentStat(self.checksum_type)
         #cr.compress_file(removedxml_path, removedxml_path_gz, cr.GZ, stat)
         #os.remove(removedxml_path)
         #removedxml_rec = cr.RepomdRecord("removed", removedxml_path_gz)
         removedxml_rec = cr.RepomdRecord("removed", removedxml_path)
         removedxml_rec.load_contentstat(stat)
-        removedxml_rec.fill(cr.SHA256)
+        removedxml_rec.fill(self.checksum_type)
         if hash_in_the_name:
             removedxml_rec.rename_file()
         delta_repomd.set_record(removedxml_rec)