Deltarepo: Several bugfixes
authorTomas Mlcoch <tmlcoch@redhat.com>
Mon, 16 Dec 2013 14:46:55 +0000 (15:46 +0100)
committerTomas Mlcoch <tmlcoch@redhat.com>
Mon, 16 Dec 2013 14:50:21 +0000 (15:50 +0100)
deltarepo/acceptance_tests/repos/repo1/foobar-1 [new file with mode: 0644]
deltarepo/acceptance_tests/test.sh
deltarepo/deltarepo/applicator.py
deltarepo/deltarepo/delta_plugins.py
deltarepo/deltarepo/generator.py
deltarepo/deltarepo/util.py [new file with mode: 0644]

diff --git a/deltarepo/acceptance_tests/repos/repo1/foobar-1 b/deltarepo/acceptance_tests/repos/repo1/foobar-1
new file mode 100644 (file)
index 0000000..76fc659
--- /dev/null
@@ -0,0 +1 @@
+a content
\ No newline at end of file
index a279a41..a2180b5 100755 (executable)
@@ -1,6 +1,6 @@
 #!/bin/bash
 
-DELTAREPO="../deltarepo.py --debug"
+DELTAREPO="../deltarepo.py"
 
 MY_DIR=`dirname $0`
 
@@ -127,9 +127,6 @@ testcase01 $REPO1 $REPO2_NODATABASE
 testcase01 $REPO1 $REPO3
 testcase01 $REPO1 $REPO3_MD5
 
-testcase01 $REPO1_ONLY_PRI_FIL $REPO2_INCOMPLETE
-testcase01 $REPO1_ONLY_PRI_FIL $REPO2_INCOMPLETE_2
-
 testcase01 $REPO2 $REPO1
 testcase01 $REPO2 $REPO2_INCOMPLETE
 testcase01 $REPO2 $REPO2_INCOMPLETE_2
@@ -210,6 +207,8 @@ function testcase01_that_should_fail {
 
 testcase01_that_should_fail $REPO1_ONLY_PRI_FIL $REPO2
 testcase01_that_should_fail $REPO1_ONLY_PRI_FIL $REPO2_NODATABASE
+testcase01_that_should_fail $REPO1_ONLY_PRI_FIL $REPO2_INCOMPLETE
+testcase01_that_should_fail $REPO1_ONLY_PRI_FIL $REPO2_INCOMPLETE_2
 testcase01_that_should_fail $REPO1_ONLY_PRI_FIL $REPO3
 testcase01_that_should_fail $REPO1_ONLY_PRI_FIL $REPO3_MD5
 testcase01_that_should_fail $REPO1_ONLY_PRI_FIL $REPO1_ONLY_PRI_FIL
@@ -267,6 +266,8 @@ testcase01_that_should_succeed $REPO1_ONLY_PRI_FIL $REPO2_NODATABASE
 testcase01_that_should_succeed $REPO1_ONLY_PRI_FIL $REPO3
 testcase01_that_should_succeed $REPO1_ONLY_PRI_FIL $REPO3_MD5
 testcase01_that_should_succeed $REPO1_ONLY_PRI_FIL $REPO1_ONLY_PRI_FIL
+testcase01_that_should_succeed $REPO1_ONLY_PRI_FIL $REPO2_INCOMPLETE
+testcase01_that_should_succeed $REPO1_ONLY_PRI_FIL $REPO2_INCOMPLETE_2
 testcase01_that_should_succeed $REPO1 $REPO1_ONLY_PRI_FIL
 testcase01_that_should_succeed $REPO2 $REPO1_ONLY_PRI_FIL
 testcase01_that_should_succeed $REPO2_INCOMPLETE $REPO1_ONLY_PRI_FIL
index 8c00a11..4688a58 100644 (file)
@@ -107,17 +107,14 @@ class DeltaRepoApplicator(LoggingInterface):
         if not "primary" in self.old_records:
             raise DeltaRepoError("Missing \"primary\" metadata in old repo")
 
-        if not "primary" in self.delta_records:
-            raise DeltaRepoError("Missing \"primary\" metadata in delta repo")
-
         # Detect type of checksum in the delta repomd.xml
-        self.checksum_type = cr.checksum_type(self.delta_records["primary"].checksum_type)
+        self.checksum_type = cr.checksum_type(self.delta_records["deltametadata"].checksum_type)
         if self.checksum_type == cr.UNKNOWN_CHECKSUM:
             raise DeltaRepoError("Unknown checksum type used in delta repo: %s" % \
-                    self.delta_records["primary"].checksum_type)
+                    self.delta_records["deltametadata"].checksum_type)
 
         # Detection if use unique md filenames
-        if self.delta_records["primary"].location_href.split("primary")[0] != "":
+        if self.delta_records["deltametadata"].location_href.split("deltametadata")[0] != "":
             self.unique_md_filenames = True
 
         # Load removedxml
@@ -142,7 +139,7 @@ class DeltaRepoApplicator(LoggingInterface):
 
         metadata = Metadata(metadata_type)
 
-        metadata.checksum_type = DEFAULT_CHECKSUM_TYPE
+        metadata.checksum_type = self.checksum_type
         metadata.compression_type = DEFAULT_COMPRESSION_TYPE
 
         # Set output directory
@@ -192,16 +189,17 @@ class DeltaRepoApplicator(LoggingInterface):
 
         return metadata
 
-    def check_content_hashes(self):
+    def check_content_hashes(self, pri_md):
         self._debug("Checking expected content hashes")
 
+        if not pri_md:
+            self._warning("Content hashes cannot be checked!")
+
         c_old_contenthash = self.globalbundle.calculated_old_contenthash
         c_new_contenthash = self.globalbundle.calculated_new_contenthash
 
         if not c_old_contenthash or not c_new_contenthash:
 
-            pri_md = self._new_metadata("primary")
-
             if not c_old_contenthash:
                 if not pri_md.old_fn_exists:
                     raise DeltaRepoError("Old repository doesn't have "
@@ -217,6 +215,9 @@ class DeltaRepoApplicator(LoggingInterface):
                                                           self.contenthash_type_str,
                                                           self._get_logger())
 
+            self.globalbundle.calculated_old_contenthash = c_old_contenthash
+            self.globalbundle.calculated_new_contenthash = c_new_contenthash
+
         self._debug("Calculated content hash of the old repo: {0}".format(
                     c_old_contenthash))
         self._debug("Calculated content hash of the new repo: {0}".format(
@@ -225,7 +226,7 @@ class DeltaRepoApplicator(LoggingInterface):
         if self.old_contenthash != c_old_contenthash:
             message = "Content hash of the old repository doesn't match "\
                       "the real one ({1} != {2}).".format(self.old_contenthash,
-                      self.globalbundle.calculated_old_contenthash)
+                      c_old_contenthash)
             self._error(message)
             raise DeltaRepoError(message)
         else:
@@ -235,7 +236,7 @@ class DeltaRepoApplicator(LoggingInterface):
         if self.new_contenthash != c_new_contenthash:
             message = "Content hash of the new repository doesn't match "\
                       "the real one ({1} != {2}).".format(self.new_contenthash,
-                      self.globalbundle.calculated_new_contenthash)
+                      c_new_contenthash)
             self._error(message)
             raise DeltaRepoError(message)
         else:
@@ -249,6 +250,7 @@ class DeltaRepoApplicator(LoggingInterface):
 
         # Set of types of processed metadata records ("primary", "primary_db"...)
         processed_metadata = set()
+        primary_metadata_object = None
 
         for plugin in PLUGINS:
 
@@ -256,6 +258,8 @@ class DeltaRepoApplicator(LoggingInterface):
             metadata_objects = {}
             for metadata_name in plugin.METADATA:
                 metadata_object = self._new_metadata(metadata_name)
+                if metadata_name == "primary":
+                    primary_metadata_object = metadata_object
                 if metadata_object is not None:
                     metadata_objects[metadata_name] = metadata_object
 
@@ -338,7 +342,7 @@ class DeltaRepoApplicator(LoggingInterface):
                 self.new_repomd.set_record(rec)
 
         # Check if calculated repoids match
-        self.check_content_hashes()
+        self.check_content_hashes(primary_metadata_object)
 
         # Prepare and write out the new repomd.xml
         self._debug("Preparing repomd.xml ...")
index 3e26977..8e9312b 100644 (file)
@@ -193,10 +193,13 @@ class DeltaRepoPlugin(LoggingInterface):
             # No metadata - Nothing to do
             return (True, None, None)
 
+        md.delta_rec = None
+        md.delta_fn_exists = False
+
         if not md.old_rec and not md.new_rec:
-            # None metadata record exists. This is weird and shouldn't happen
-            self._warning("\"{0}\": WEIRD CONDITIONS: doesn't exist "
-                          "in any repo".format())
+            # None metadata record exists.
+            self._debug("\"{0}\": Doesn't exist "
+                        "in any repo".format(md.metadata_type))
             return (True, None, None)
 
         if not md.new_rec:
@@ -247,6 +250,9 @@ class DeltaRepoPlugin(LoggingInterface):
             if compression != cr.NO_COMPRESSION:
                 notes["compressed"] = "1"
 
+            md.delta_rec = rec
+            md.delta_fn_exists = True
+
             return (True, rec, notes)
 
         # At this point we are sure that we have both metadata files
@@ -276,14 +282,20 @@ class DeltaRepoPlugin(LoggingInterface):
             # No metadata - Nothing to do
             return (True, None)
 
+        # Init some stuff in md
+        # This variables should be set only if new record was generated
+        # Otherwise it should by None/False
+        md.new_rec = None
+        md.new_fn_exists = False
+
         if not notes:
             # No notes - Nothing to do
             return (True, None)
 
         if not md.old_rec and not md.delta_rec:
-            # None metadata record exists. This is weird and shouldn't happen
-            self._warning("\"{0}\": WEIRD CONDITIONS: doesn't exist "
-                          "in any repo".format())
+            # None metadata record exists.
+            self._debug("\"{0}\": Doesn't exist "
+                        "in any repo".format(md.metadata_type))
             return (True, None)
 
         if not md.delta_rec:
@@ -320,6 +332,10 @@ class DeltaRepoPlugin(LoggingInterface):
             if self.globalbundle.unique_md_filenames:
                 rec.rename_file()
                 md.new_fn = rec.location_real
+
+            md.new_rec = rec
+            md.new_fn_exists = True
+
             return (True, rec)
 
         if not md.delta_fn_exists:
@@ -340,6 +356,10 @@ class DeltaRepoPlugin(LoggingInterface):
 
             rec = self.apply_use_original(md, decompress)
             self._debug("\"{0}\": Used delta is just a copy")
+
+            md.new_rec = rec
+            md.new_fn_exists = True
+
             return (True, rec)
 
         if not md.old_fn_exists:
@@ -422,6 +442,52 @@ class MainDeltaRepoPlugin(DeltaRepoPlugin):
                           pkg.location_base or '')
         return idstr
 
+    def _gen_db_from_xml(self, md, source="delta"):
+        """Gen sqlite db from the delta metadata.
+        """
+        mdtype = md.metadata_type
+
+        if mdtype == "primary":
+            dbclass = cr.PrimarySqlite
+            parsefunc = cr.xml_parse_primary
+        elif mdtype == "filelists":
+            dbclass = cr.FilelistsSqlite
+            parsefunc = cr.xml_parse_filelists
+        elif mdtype == "other":
+            dbclass = cr.OtherSqlite
+            parsefunc = cr.xml_parse_other
+        else:
+            raise DeltaRepoPluginError("Unsupported type of metadata {0}".format(mdtype))
+
+        src_fn = md.new_fn
+        src_rec = md.new_rec
+
+        md.db_fn = os.path.join(md.out_dir, "{0}.sqlite".format(mdtype))
+        db = dbclass(md.db_fn)
+
+        def pkgcb(pkg):
+            db.add_pkg(pkg)
+
+        parsefunc(src_fn, pkgcb=pkgcb)
+
+        db.dbinfo_update(src_rec.checksum)
+        db.close()
+
+        db_stat = cr.ContentStat(md.checksum_type)
+        db_compressed = md.db_fn+".bz2"
+        cr.compress_file(md.db_fn, None, cr.BZ2, db_stat)
+        os.remove(md.db_fn)
+
+        # Prepare repomd record of database file
+        db_rec = cr.RepomdRecord("{0}_db".format(md.metadata_type),
+                                 db_compressed)
+        db_rec.load_contentstat(db_stat)
+        db_rec.fill(md.checksum_type)
+        if self.globalbundle.unique_md_filenames:
+            db_rec.rename_file()
+
+        return db_rec
+
     def apply(self, metadata):
         # Check input arguments
         if "primary" not in metadata:
@@ -430,14 +496,16 @@ class MainDeltaRepoPlugin(DeltaRepoPlugin):
 
         gen_repomd_recs = []
 
-        gen_db_for = set([])
         removed_packages = {}
 
         pri_md = metadata.get("primary")
         fil_md = metadata.get("filelists")
         oth_md = metadata.get("other")
 
-        def try_simple_delta(md):
+        def try_simple_delta(md, dbclass):
+            if not md:
+                return
+
             notes = self._metadata_notes_from_plugin_bundle(md.metadata_type)
             if not notes:
                 self._warning("Metadata \"{0}\" doesn't have a record in "
@@ -449,16 +517,21 @@ class MainDeltaRepoPlugin(DeltaRepoPlugin):
             if rec:
                 gen_repomd_recs.append(rec)
 
+            if not md.new_fn_exists:
+                return True
+
             # Gen DB here
-            # TODO: TODO
+            if self.globalbundle.force_database or notes.get("database") == "1":
+                rec = self._gen_db_from_xml(md)
+                gen_repomd_recs.append(rec)
 
             return True
 
         # At first try to simple delta
 
-        simple_pri_delta = try_simple_delta(pri_md)
-        simple_fil_delta = try_simple_delta(fil_md)
-        simple_oth_delta = try_simple_delta(oth_md)
+        simple_pri_delta = try_simple_delta(pri_md, cr.PrimarySqlite)
+        simple_fil_delta = try_simple_delta(fil_md, cr.FilelistsSqlite)
+        simple_oth_delta = try_simple_delta(oth_md, cr.OtherSqlite)
 
         if simple_pri_delta:
             assert simple_fil_delta
@@ -471,15 +544,6 @@ class MainDeltaRepoPlugin(DeltaRepoPlugin):
         if simple_oth_delta:
             oth_md = None
 
-        # Make a set of md_types for which databases should be generated
-        # TODO: REFACTOR THIS
-        for record in self.pluginbundle.get_list("metadata", []):
-            mdtype = record.get("type")
-            if not mdtype:
-                continue
-            if self.globalbundle.force_database or record.get("database") == "1":
-                gen_db_for.add(mdtype)
-
         # Make a dict of removed packages key is location_href,
         # value is location_base
         for record in self.pluginbundle.get_list("removedpackage", []):
@@ -496,6 +560,13 @@ class MainDeltaRepoPlugin(DeltaRepoPlugin):
             if md is None:
                 return
 
+            notes = self._metadata_notes_from_plugin_bundle(md.metadata_type)
+            if not notes:
+                # TODO PRIDAT NEJAKEJ FLAG NA INGONRACI
+                self._warning("Metadata \"{0}\" doesn't have a record in "
+                              "deltametadata.xml - Ignoring")
+                return
+
             suffix = cr.compression_suffix(md.compression_type) or ""
             md.new_fn = os.path.join(md.out_dir,
                                      "{0}.xml{1}".format(
@@ -505,7 +576,7 @@ class MainDeltaRepoPlugin(DeltaRepoPlugin):
                                 md.compression_type,
                                 md.new_f_stat)
 
-            if md.metadata_type in gen_db_for:
+            if self.globalbundle.force_database or notes.get("database") == "1":
                 md.db_fn = os.path.join(md.out_dir, "{0}.sqlite".format(
                                         md.metadata_type))
                 md.db = dbclass(md.db_fn)
@@ -640,6 +711,9 @@ class MainDeltaRepoPlugin(DeltaRepoPlugin):
             if self.globalbundle.unique_md_filenames:
                 rec.rename_file()
 
+            md.new_rec = rec
+            md.new_fn_exists = True
+
             gen_repomd_recs.append(rec)
 
             # Prepare database
@@ -855,6 +929,9 @@ class MainDeltaRepoPlugin(DeltaRepoPlugin):
             if self.globalbundle.unique_md_filenames:
                 rec.rename_file()
 
+            md.delta_rec = rec
+            md.delta_fn_exists = True
+
             gen_repomd_recs.append(rec)
 
             # Prepare database
index 45f48e6..073c373 100644 (file)
@@ -196,6 +196,9 @@ class DeltaRepoGenerator(LoggingInterface):
                                                           self.contenthash_type_str,
                                                           self._get_logger())
 
+            self.globalbundle.calculated_old_contenthash = c_old_contenthash
+            self.globalbundle.calculated_new_contenthash = c_new_contenthash
+
         self._debug("Calculated content hash of the old repo: {0}".format(
                     c_old_contenthash))
         self._debug("Calculated content hash of the new repo: {0}".format(
diff --git a/deltarepo/deltarepo/util.py b/deltarepo/deltarepo/util.py
new file mode 100644 (file)
index 0000000..77d9800
--- /dev/null
@@ -0,0 +1,39 @@
+import hashlib
+import logging
+import createrepo_c as cr
+
+
+def log(logger, level, msg):
+    if not logger:
+        return
+    logger.log(level, msg)
+
+def pkg_id_str(pkg, logger=None):
+    """Return string identifying a package in repodata.
+    This strings are used for the RepoId calculation."""
+    if not pkg.pkgId:
+        log(logger, logging.WARNING, "Missing pkgId in a package!")
+    if not pkg.location_href:
+        log(logger, logging.WARNING, "Missing location_href at "
+                                     "package %s %s" % (pkg.name, pkg.pkgId))
+
+    idstr = "%s%s%s" % (pkg.pkgId or '',
+                        pkg.location_href or '',
+                        pkg.location_base or '')
+    return idstr
+
+def calculate_content_hash(path_to_primary_xml, type="sha256", logger=None):
+    pkg_id_strs = []
+
+    def old_pkgcb(pkg):
+        pkg_id_strs.append(pkg_id_str(pkg, logger))
+
+    cr.xml_parse_primary(path_to_primary_xml, pkgcb=old_pkgcb, do_files=False)
+
+    pkg_id_strs.sort()
+
+    packages_hash = []
+    h = hashlib.new(type)
+    for i in pkg_id_strs:
+        h.update(i)
+    return h.hexdigest()