From fe922897916e6b607f6b4a3096d81d8d8d0acfbd Mon Sep 17 00:00:00 2001 From: Tomas Mlcoch Date: Mon, 16 Dec 2013 15:46:55 +0100 Subject: [PATCH] Deltarepo: Several bugfixes --- deltarepo/acceptance_tests/repos/repo1/foobar-1 | 1 + deltarepo/acceptance_tests/test.sh | 9 +- deltarepo/deltarepo/applicator.py | 30 +++--- deltarepo/deltarepo/delta_plugins.py | 121 +++++++++++++++++++----- deltarepo/deltarepo/generator.py | 3 + deltarepo/deltarepo/util.py | 39 ++++++++ 6 files changed, 164 insertions(+), 39 deletions(-) create mode 100644 deltarepo/acceptance_tests/repos/repo1/foobar-1 create mode 100644 deltarepo/deltarepo/util.py diff --git a/deltarepo/acceptance_tests/repos/repo1/foobar-1 b/deltarepo/acceptance_tests/repos/repo1/foobar-1 new file mode 100644 index 0000000..76fc659 --- /dev/null +++ b/deltarepo/acceptance_tests/repos/repo1/foobar-1 @@ -0,0 +1 @@ +a content \ No newline at end of file diff --git a/deltarepo/acceptance_tests/test.sh b/deltarepo/acceptance_tests/test.sh index a279a41..a2180b5 100755 --- a/deltarepo/acceptance_tests/test.sh +++ b/deltarepo/acceptance_tests/test.sh @@ -1,6 +1,6 @@ #!/bin/bash -DELTAREPO="../deltarepo.py --debug" +DELTAREPO="../deltarepo.py" MY_DIR=`dirname $0` @@ -127,9 +127,6 @@ testcase01 $REPO1 $REPO2_NODATABASE testcase01 $REPO1 $REPO3 testcase01 $REPO1 $REPO3_MD5 -testcase01 $REPO1_ONLY_PRI_FIL $REPO2_INCOMPLETE -testcase01 $REPO1_ONLY_PRI_FIL $REPO2_INCOMPLETE_2 - testcase01 $REPO2 $REPO1 testcase01 $REPO2 $REPO2_INCOMPLETE testcase01 $REPO2 $REPO2_INCOMPLETE_2 @@ -210,6 +207,8 @@ function testcase01_that_should_fail { testcase01_that_should_fail $REPO1_ONLY_PRI_FIL $REPO2 testcase01_that_should_fail $REPO1_ONLY_PRI_FIL $REPO2_NODATABASE +testcase01_that_should_fail $REPO1_ONLY_PRI_FIL $REPO2_INCOMPLETE +testcase01_that_should_fail $REPO1_ONLY_PRI_FIL $REPO2_INCOMPLETE_2 testcase01_that_should_fail $REPO1_ONLY_PRI_FIL $REPO3 testcase01_that_should_fail $REPO1_ONLY_PRI_FIL $REPO3_MD5 testcase01_that_should_fail $REPO1_ONLY_PRI_FIL $REPO1_ONLY_PRI_FIL @@ -267,6 +266,8 @@ testcase01_that_should_succeed $REPO1_ONLY_PRI_FIL $REPO2_NODATABASE testcase01_that_should_succeed $REPO1_ONLY_PRI_FIL $REPO3 testcase01_that_should_succeed $REPO1_ONLY_PRI_FIL $REPO3_MD5 testcase01_that_should_succeed $REPO1_ONLY_PRI_FIL $REPO1_ONLY_PRI_FIL +testcase01_that_should_succeed $REPO1_ONLY_PRI_FIL $REPO2_INCOMPLETE +testcase01_that_should_succeed $REPO1_ONLY_PRI_FIL $REPO2_INCOMPLETE_2 testcase01_that_should_succeed $REPO1 $REPO1_ONLY_PRI_FIL testcase01_that_should_succeed $REPO2 $REPO1_ONLY_PRI_FIL testcase01_that_should_succeed $REPO2_INCOMPLETE $REPO1_ONLY_PRI_FIL diff --git a/deltarepo/deltarepo/applicator.py b/deltarepo/deltarepo/applicator.py index 8c00a11..4688a58 100644 --- a/deltarepo/deltarepo/applicator.py +++ b/deltarepo/deltarepo/applicator.py @@ -107,17 +107,14 @@ class DeltaRepoApplicator(LoggingInterface): if not "primary" in self.old_records: raise DeltaRepoError("Missing \"primary\" metadata in old repo") - if not "primary" in self.delta_records: - raise DeltaRepoError("Missing \"primary\" metadata in delta repo") - # Detect type of checksum in the delta repomd.xml - self.checksum_type = cr.checksum_type(self.delta_records["primary"].checksum_type) + self.checksum_type = cr.checksum_type(self.delta_records["deltametadata"].checksum_type) if self.checksum_type == cr.UNKNOWN_CHECKSUM: raise DeltaRepoError("Unknown checksum type used in delta repo: %s" % \ - self.delta_records["primary"].checksum_type) + self.delta_records["deltametadata"].checksum_type) # Detection if use unique md filenames - if self.delta_records["primary"].location_href.split("primary")[0] != "": + if self.delta_records["deltametadata"].location_href.split("deltametadata")[0] != "": self.unique_md_filenames = True # Load removedxml @@ -142,7 +139,7 @@ class DeltaRepoApplicator(LoggingInterface): metadata = Metadata(metadata_type) - metadata.checksum_type = DEFAULT_CHECKSUM_TYPE + metadata.checksum_type = self.checksum_type metadata.compression_type = DEFAULT_COMPRESSION_TYPE # Set output directory @@ -192,16 +189,17 @@ class DeltaRepoApplicator(LoggingInterface): return metadata - def check_content_hashes(self): + def check_content_hashes(self, pri_md): self._debug("Checking expected content hashes") + if not pri_md: + self._warning("Content hashes cannot be checked!") + c_old_contenthash = self.globalbundle.calculated_old_contenthash c_new_contenthash = self.globalbundle.calculated_new_contenthash if not c_old_contenthash or not c_new_contenthash: - pri_md = self._new_metadata("primary") - if not c_old_contenthash: if not pri_md.old_fn_exists: raise DeltaRepoError("Old repository doesn't have " @@ -217,6 +215,9 @@ class DeltaRepoApplicator(LoggingInterface): self.contenthash_type_str, self._get_logger()) + self.globalbundle.calculated_old_contenthash = c_old_contenthash + self.globalbundle.calculated_new_contenthash = c_new_contenthash + self._debug("Calculated content hash of the old repo: {0}".format( c_old_contenthash)) self._debug("Calculated content hash of the new repo: {0}".format( @@ -225,7 +226,7 @@ class DeltaRepoApplicator(LoggingInterface): if self.old_contenthash != c_old_contenthash: message = "Content hash of the old repository doesn't match "\ "the real one ({1} != {2}).".format(self.old_contenthash, - self.globalbundle.calculated_old_contenthash) + c_old_contenthash) self._error(message) raise DeltaRepoError(message) else: @@ -235,7 +236,7 @@ class DeltaRepoApplicator(LoggingInterface): if self.new_contenthash != c_new_contenthash: message = "Content hash of the new repository doesn't match "\ "the real one ({1} != {2}).".format(self.new_contenthash, - self.globalbundle.calculated_new_contenthash) + c_new_contenthash) self._error(message) raise DeltaRepoError(message) else: @@ -249,6 +250,7 @@ class DeltaRepoApplicator(LoggingInterface): # Set of types of processed metadata records ("primary", "primary_db"...) processed_metadata = set() + primary_metadata_object = None for plugin in PLUGINS: @@ -256,6 +258,8 @@ class DeltaRepoApplicator(LoggingInterface): metadata_objects = {} for metadata_name in plugin.METADATA: metadata_object = self._new_metadata(metadata_name) + if metadata_name == "primary": + primary_metadata_object = metadata_object if metadata_object is not None: metadata_objects[metadata_name] = metadata_object @@ -338,7 +342,7 @@ class DeltaRepoApplicator(LoggingInterface): self.new_repomd.set_record(rec) # Check if calculated repoids match - self.check_content_hashes() + self.check_content_hashes(primary_metadata_object) # Prepare and write out the new repomd.xml self._debug("Preparing repomd.xml ...") diff --git a/deltarepo/deltarepo/delta_plugins.py b/deltarepo/deltarepo/delta_plugins.py index 3e26977..8e9312b 100644 --- a/deltarepo/deltarepo/delta_plugins.py +++ b/deltarepo/deltarepo/delta_plugins.py @@ -193,10 +193,13 @@ class DeltaRepoPlugin(LoggingInterface): # No metadata - Nothing to do return (True, None, None) + md.delta_rec = None + md.delta_fn_exists = False + if not md.old_rec and not md.new_rec: - # None metadata record exists. This is weird and shouldn't happen - self._warning("\"{0}\": WEIRD CONDITIONS: doesn't exist " - "in any repo".format()) + # None metadata record exists. + self._debug("\"{0}\": Doesn't exist " + "in any repo".format(md.metadata_type)) return (True, None, None) if not md.new_rec: @@ -247,6 +250,9 @@ class DeltaRepoPlugin(LoggingInterface): if compression != cr.NO_COMPRESSION: notes["compressed"] = "1" + md.delta_rec = rec + md.delta_fn_exists = True + return (True, rec, notes) # At this point we are sure that we have both metadata files @@ -276,14 +282,20 @@ class DeltaRepoPlugin(LoggingInterface): # No metadata - Nothing to do return (True, None) + # Init some stuff in md + # This variables should be set only if new record was generated + # Otherwise it should by None/False + md.new_rec = None + md.new_fn_exists = False + if not notes: # No notes - Nothing to do return (True, None) if not md.old_rec and not md.delta_rec: - # None metadata record exists. This is weird and shouldn't happen - self._warning("\"{0}\": WEIRD CONDITIONS: doesn't exist " - "in any repo".format()) + # None metadata record exists. + self._debug("\"{0}\": Doesn't exist " + "in any repo".format(md.metadata_type)) return (True, None) if not md.delta_rec: @@ -320,6 +332,10 @@ class DeltaRepoPlugin(LoggingInterface): if self.globalbundle.unique_md_filenames: rec.rename_file() md.new_fn = rec.location_real + + md.new_rec = rec + md.new_fn_exists = True + return (True, rec) if not md.delta_fn_exists: @@ -340,6 +356,10 @@ class DeltaRepoPlugin(LoggingInterface): rec = self.apply_use_original(md, decompress) self._debug("\"{0}\": Used delta is just a copy") + + md.new_rec = rec + md.new_fn_exists = True + return (True, rec) if not md.old_fn_exists: @@ -422,6 +442,52 @@ class MainDeltaRepoPlugin(DeltaRepoPlugin): pkg.location_base or '') return idstr + def _gen_db_from_xml(self, md, source="delta"): + """Gen sqlite db from the delta metadata. + """ + mdtype = md.metadata_type + + if mdtype == "primary": + dbclass = cr.PrimarySqlite + parsefunc = cr.xml_parse_primary + elif mdtype == "filelists": + dbclass = cr.FilelistsSqlite + parsefunc = cr.xml_parse_filelists + elif mdtype == "other": + dbclass = cr.OtherSqlite + parsefunc = cr.xml_parse_other + else: + raise DeltaRepoPluginError("Unsupported type of metadata {0}".format(mdtype)) + + src_fn = md.new_fn + src_rec = md.new_rec + + md.db_fn = os.path.join(md.out_dir, "{0}.sqlite".format(mdtype)) + db = dbclass(md.db_fn) + + def pkgcb(pkg): + db.add_pkg(pkg) + + parsefunc(src_fn, pkgcb=pkgcb) + + db.dbinfo_update(src_rec.checksum) + db.close() + + db_stat = cr.ContentStat(md.checksum_type) + db_compressed = md.db_fn+".bz2" + cr.compress_file(md.db_fn, None, cr.BZ2, db_stat) + os.remove(md.db_fn) + + # Prepare repomd record of database file + db_rec = cr.RepomdRecord("{0}_db".format(md.metadata_type), + db_compressed) + db_rec.load_contentstat(db_stat) + db_rec.fill(md.checksum_type) + if self.globalbundle.unique_md_filenames: + db_rec.rename_file() + + return db_rec + def apply(self, metadata): # Check input arguments if "primary" not in metadata: @@ -430,14 +496,16 @@ class MainDeltaRepoPlugin(DeltaRepoPlugin): gen_repomd_recs = [] - gen_db_for = set([]) removed_packages = {} pri_md = metadata.get("primary") fil_md = metadata.get("filelists") oth_md = metadata.get("other") - def try_simple_delta(md): + def try_simple_delta(md, dbclass): + if not md: + return + notes = self._metadata_notes_from_plugin_bundle(md.metadata_type) if not notes: self._warning("Metadata \"{0}\" doesn't have a record in " @@ -449,16 +517,21 @@ class MainDeltaRepoPlugin(DeltaRepoPlugin): if rec: gen_repomd_recs.append(rec) + if not md.new_fn_exists: + return True + # Gen DB here - # TODO: TODO + if self.globalbundle.force_database or notes.get("database") == "1": + rec = self._gen_db_from_xml(md) + gen_repomd_recs.append(rec) return True # At first try to simple delta - simple_pri_delta = try_simple_delta(pri_md) - simple_fil_delta = try_simple_delta(fil_md) - simple_oth_delta = try_simple_delta(oth_md) + simple_pri_delta = try_simple_delta(pri_md, cr.PrimarySqlite) + simple_fil_delta = try_simple_delta(fil_md, cr.FilelistsSqlite) + simple_oth_delta = try_simple_delta(oth_md, cr.OtherSqlite) if simple_pri_delta: assert simple_fil_delta @@ -471,15 +544,6 @@ class MainDeltaRepoPlugin(DeltaRepoPlugin): if simple_oth_delta: oth_md = None - # Make a set of md_types for which databases should be generated - # TODO: REFACTOR THIS - for record in self.pluginbundle.get_list("metadata", []): - mdtype = record.get("type") - if not mdtype: - continue - if self.globalbundle.force_database or record.get("database") == "1": - gen_db_for.add(mdtype) - # Make a dict of removed packages key is location_href, # value is location_base for record in self.pluginbundle.get_list("removedpackage", []): @@ -496,6 +560,13 @@ class MainDeltaRepoPlugin(DeltaRepoPlugin): if md is None: return + notes = self._metadata_notes_from_plugin_bundle(md.metadata_type) + if not notes: + # TODO PRIDAT NEJAKEJ FLAG NA INGONRACI + self._warning("Metadata \"{0}\" doesn't have a record in " + "deltametadata.xml - Ignoring") + return + suffix = cr.compression_suffix(md.compression_type) or "" md.new_fn = os.path.join(md.out_dir, "{0}.xml{1}".format( @@ -505,7 +576,7 @@ class MainDeltaRepoPlugin(DeltaRepoPlugin): md.compression_type, md.new_f_stat) - if md.metadata_type in gen_db_for: + if self.globalbundle.force_database or notes.get("database") == "1": md.db_fn = os.path.join(md.out_dir, "{0}.sqlite".format( md.metadata_type)) md.db = dbclass(md.db_fn) @@ -640,6 +711,9 @@ class MainDeltaRepoPlugin(DeltaRepoPlugin): if self.globalbundle.unique_md_filenames: rec.rename_file() + md.new_rec = rec + md.new_fn_exists = True + gen_repomd_recs.append(rec) # Prepare database @@ -855,6 +929,9 @@ class MainDeltaRepoPlugin(DeltaRepoPlugin): if self.globalbundle.unique_md_filenames: rec.rename_file() + md.delta_rec = rec + md.delta_fn_exists = True + gen_repomd_recs.append(rec) # Prepare database diff --git a/deltarepo/deltarepo/generator.py b/deltarepo/deltarepo/generator.py index 45f48e6..073c373 100644 --- a/deltarepo/deltarepo/generator.py +++ b/deltarepo/deltarepo/generator.py @@ -196,6 +196,9 @@ class DeltaRepoGenerator(LoggingInterface): self.contenthash_type_str, self._get_logger()) + self.globalbundle.calculated_old_contenthash = c_old_contenthash + self.globalbundle.calculated_new_contenthash = c_new_contenthash + self._debug("Calculated content hash of the old repo: {0}".format( c_old_contenthash)) self._debug("Calculated content hash of the new repo: {0}".format( diff --git a/deltarepo/deltarepo/util.py b/deltarepo/deltarepo/util.py new file mode 100644 index 0000000..77d9800 --- /dev/null +++ b/deltarepo/deltarepo/util.py @@ -0,0 +1,39 @@ +import hashlib +import logging +import createrepo_c as cr + + +def log(logger, level, msg): + if not logger: + return + logger.log(level, msg) + +def pkg_id_str(pkg, logger=None): + """Return string identifying a package in repodata. + This strings are used for the RepoId calculation.""" + if not pkg.pkgId: + log(logger, logging.WARNING, "Missing pkgId in a package!") + if not pkg.location_href: + log(logger, logging.WARNING, "Missing location_href at " + "package %s %s" % (pkg.name, pkg.pkgId)) + + idstr = "%s%s%s" % (pkg.pkgId or '', + pkg.location_href or '', + pkg.location_base or '') + return idstr + +def calculate_content_hash(path_to_primary_xml, type="sha256", logger=None): + pkg_id_strs = [] + + def old_pkgcb(pkg): + pkg_id_strs.append(pkg_id_str(pkg, logger)) + + cr.xml_parse_primary(path_to_primary_xml, pkgcb=old_pkgcb, do_files=False) + + pkg_id_strs.sort() + + packages_hash = [] + h = hashlib.new(type) + for i in pkg_id_strs: + h.update(i) + return h.hexdigest() -- 2.7.4