deltarepo: ...

author Tomas Mlcoch <tmlcoch@redhat.com>

Wed, 12 Jun 2013 13:57:55 +0000 (15:57 +0200)

committer Tomas Mlcoch <tmlcoch@redhat.com>

Wed, 12 Jun 2013 13:57:55 +0000 (15:57 +0200)
author Tomas Mlcoch <tmlcoch@redhat.com>
Wed, 12 Jun 2013 13:57:55 +0000 (15:57 +0200)
committer Tomas Mlcoch <tmlcoch@redhat.com>
Wed, 12 Jun 2013 13:57:55 +0000 (15:57 +0200)
diff --git a/deltarepo/deltarepo.py b/deltarepo/deltarepo.py

index 9e14ee8fcddf1885a3f2c165c681eaefe491023a..6ddffa42a39fd7f6b532d3b6ba288df020d3453c 100755 (executable)
--- a/deltarepo/deltarepo.py
+++ b/deltarepo/deltarepo.py
@@ -18,16 +18,16 @@ def parse_options():
                        help="Run in quiet mode.")
      parser.add_option("-v", "--verbose", action="store_true",
                        help="Run in verbose mode.")
-    parser.add_option("-l", "--list-datatypes", action="store_true",
-                      help="List datatypes for which delta is supported.")
+    #parser.add_option("-l", "--list-datatypes", action="store_true",
+    #                  help="List datatypes for which delta is supported.")
      parser.add_option("-o", "--outputdir", action="store", metavar="DIR",
                        help="Output directory.", default="./")
  
      group = OptionGroup(parser, "Delta generation")
-    group.add_option("-s", "--skip", action="append", metavar="DATATYPE",
+    group.add_option("--skip", action="append", metavar="DATATYPE",
                       help="Skip delta on the DATATYPE. Could be specified "\
                       "multiple times. (E.g., --skip=comps)")
-    group.add_option("-d", "--do-only", action="append", metavar="DATATYPE",
+    group.add_option("--do-only", action="append", metavar="DATATYPE",
                       help="Do delta only for the DATATYPE. Could be specified "\
                       "multiple times. (E.g., --do-only=primary)")
      group.add_option("-t", "--id-type", action="store", metavar="HASHTYPE",
@@ -38,6 +38,8 @@ def parse_options():
      group = OptionGroup(parser, "Delta application")
      group.add_option("-a", "--apply", action="store_true",
                       help="Enable delta application mode.")
+    group.add_option("-d", "--database", action="store_true",
+                     help="Gen database.")
      parser.add_option_group(group)
  
      options, args = parser.parse_args()
@@ -102,7 +104,8 @@ if __name__ == "__main__":
  
      if options.apply:
          # Applying delta
-        generator.applydelta(args[0], args[1], out_path=options.outputdir)
+        generator.applydelta(args[0], args[1], out_path=options.outputdir,
+                             database=options.database)
      else:
          # Do delta
          generator.gendelta(args[0], args[1], out_path=options.outputdir,
diff --git a/deltarepo/deltarepo/__init__.py b/deltarepo/deltarepo/__init__.py

index 0f40feb5989593958a67b028b0c8ea98b19961e8..5b3d7632032a634d62e6c300c05af837d3a46954 100644 (file)
--- a/deltarepo/deltarepo/__init__.py
+++ b/deltarepo/deltarepo/__init__.py
@@ -57,24 +57,49 @@ class DeltaModule(LoggingInterface):
          self.id_type = id_type
  
      def _path(self, path, record):
+        """Return path to the repodata file."""
          return os.path.join(path, record.location_href)
  
+    def _pkg_id_tuple(self, pkg):
+        """Return tuple identifying a package in repodata.
+        (pkgId, location_href, location_base)"""
+        return (pkg.pkgId, pkg.location_href, pkg.location_base)
+
+    def _pkg_id_str(self, pkg):
+        """Return string identifying a package in repodata.
+        This strings are used for the RepoId calculation."""
+        if not pkg.pkgId:
+            self._warning("Missing pkgId in a package!")
+        if not pkg.location_href:
+            self._warning("Missing location_href at package %s %s" % \
+                          pkg.name, pkg.pkgId)
+        return "%s%s%s" % (pkg.pkgId or '',
+                           pkg.location_href or '',
+                           pkg.location_base or '')
+
  class MainDeltaModule(DeltaModule):
  
-    def apply(self, pri_old_fn, pri_delta_fn, pri_f, fil_old_fn,
-              fil_delta_fn, fil_f, oth_old_fn, oth_delta_fn, oth_f, removed):
+    def apply(self, pri_old_fn, pri_delta_fn, pri_f, pri_db, fil_old_fn,
+              fil_delta_fn, fil_f, fil_db,oth_old_fn, oth_delta_fn, oth_f,
+              oth_db, removed):
  
          removed_packages = set() # set of pkgIds (hashes)
-        all_packages = {}        # dicst { 'pkgId': pkg }
+        all_packages = {}        # dict { 'pkgId': pkg }
+
+        old_repoid_strings = []
+        new_repoid_strings = []
  
          def old_pkgcb(pkg):
+            old_repoid_strings.append(self._pkg_id_str(pkg))
              if pkg.location_href in removed.packages:
                  if removed.packages[pkg.location_href] == pkg.location_base:
                      # This package won't be in new metadata
                      return
+            new_repoid_strings.append(self._pkg_id_str(pkg))
              all_packages[pkg.pkgId] = pkg
  
          def delta_pkgcb(pkg):
+            new_repoid_strings.append(self._pkg_id_str(pkg))
              all_packages[pkg.pkgId] = pkg
  
          do_primary_files = 1
@@ -86,6 +111,22 @@ class MainDeltaModule(DeltaModule):
          cr.xml_parse_primary(pri_delta_fn, pkgcb=delta_pkgcb,
                               do_files=do_primary_files)
  
+        # Calculate RepoIds
+        old_repo_id = ""
+        new_repo_id = ""
+
+        h = hashlib.new(self.id_type)
+        old_repoid_strings.sort()
+        for i in old_repoid_strings:
+            h.update(i)
+        old_repo_id = h.hexdigest()
+
+        h = hashlib.new(self.id_type)
+        new_repoid_strings.sort()
+        for i in new_repoid_strings:
+            h.update(i)
+        new_repo_id = h.hexdigest()
+
          # Sort packages
          def cmp_pkgs(x, y):
              # Compare only by filename
@@ -118,21 +159,26 @@ class MainDeltaModule(DeltaModule):
          pri_f.set_num_of_pkgs(num_of_packages)
          for pkg in all_packages_sorted:
              pri_f.add_pkg(pkg)
-        pri_f.close()
+            if pri_db:
+                pri_db.add_pkg(pkg)
  
          # Write out filelists
          if fil_f:
              fil_f.set_num_of_pkgs(num_of_packages)
              for pkg in all_packages_sorted:
                  fil_f.add_pkg(pkg)
-            fil_f.close()
+                if fil_db:
+                    fil_db.add_pkg(pkg)
  
          # Write out other
          if oth_f:
              oth_f.set_num_of_pkgs(num_of_packages)
              for pkg in all_packages_sorted:
                  oth_f.add_pkg(pkg)
-            oth_f.close()
+                if oth_db:
+                    oth_db.add_pkg(pkg)
+
+        return (old_repo_id, new_repo_id)
  
      def do(self, pri_old_fn, pri_new_fn, pri_f,
             fil_new_fn, fil_f, oth_new_fn, oth_f, removed):
@@ -145,19 +191,12 @@ class MainDeltaModule(DeltaModule):
          new_repoid_strings = []
  
          def old_pkgcb(pkg):
-            pkg_id_tuple = (pkg.pkgId, pkg.location_href, pkg.location_base)
-            old_packages.add(pkg_id_tuple)
-            pkg_id_string = "%s%s%s" % (pkg.pkgId,
-                                        pkg.location_href,
-                                        pkg.location_base or '')
-            old_repoid_strings.append(pkg_id_string)
+            old_packages.add(self._pkg_id_tuple(pkg))
+            old_repoid_strings.append(self._pkg_id_str(pkg))
  
          def new_pkgcb(pkg):
-            pkg_id_tuple = (pkg.pkgId, pkg.location_href, pkg.location_base)
-            pkg_id_string = "%s%s%s" % (pkg.pkgId,
-                                        pkg.location_href,
-                                        pkg.location_base or '')
-            new_repoid_strings.append(pkg_id_string)
+            new_repoid_strings.append(self._pkg_id_str(pkg))
+            pkg_id_tuple = self._pkg_id_tuple(pkg)
              if not pkg_id_tuple in old_packages:
                  # This package is only in new repodata
                  added_packages[pkg.pkgId] = pkg
@@ -297,16 +336,21 @@ class DeltaRepoGenerator(LoggingInterface):
      def __init__(self, id_type=None, logger=None):
          LoggingInterface.__init__(self, logger)
  
+        # id_type is type of checksum used for RepoId and
+        # DeltaRepoId calculation
          if id_type is None:
              id_type = "sha256"
          self.id_type = id_type
  
+        # checksum_type is checksum type used for the repomd records.
+        self.checksum_type = cr.SHA256
+
      def _fn_without_checksum(self, path):
          """Strip checksum from a record filename"""
          path = os.path.basename(path)
          return path.rsplit('-')[-1]
  
-    def applydelta(self, old_path, delta_path, out_path=None):
+    def applydelta(self, old_path, delta_path, out_path=None, database=False):
          removedxml = RemovedXml()
          hash_in_the_name = False
  
@@ -322,6 +366,24 @@ class DeltaRepoGenerator(LoggingInterface):
          delta_repomd = cr.Repomd(delta_repomd_path)
          new_repomd = cr.Repomd()
  
+        # Check if delta id correspond with used repo
+        if not delta_repomd.repoid or len(delta_repomd.repoid.split('-')) != 2:
+            raise DeltaRepoError("Bad DeltaRepoId")
+
+        self.id_type = delta_repomd.repoid_type
+
+        old_id, new_id = delta_repomd.repoid.split('-')
+
+        self._debug("Delta %s -> %s" % (old_id, new_id))
+
+        if old_repomd.repoid_type == delta_repomd.repoid_type:
+            if old_repomd.repoid and old_repomd.repoid != old_id:
+                raise DeltaRepoError("Not suitable delta for current repo " \
+                        "(Expected: %s Real: %s)" % (old_id, old_repomd.repoid))
+        else:
+            self._debug("Different repoid types repo: %s vs delta: %s" % \
+                    (old_repomd.repoid_type, delta_repomd.repoid_type))
+
          # Prepare output path
          new_path = os.path.join(out_path, ".repodata/")
          new_repodata_path = os.path.join(new_path, "repodata/")
@@ -343,7 +405,6 @@ class DeltaRepoGenerator(LoggingInterface):
          delta_record_types = set(delta_records.keys())
          deleted_repomd_record_types = old_record_types - delta_record_types
          added_repomd_record_types = delta_record_types - old_record_types
-        # TODO: Skip removed record
  
          # Prepare removedxml
          if "removed" in delta_records:
@@ -354,75 +415,158 @@ class DeltaRepoGenerator(LoggingInterface):
              self._warning("\"removed\" record is missing in repomd.xml "\
                            "of delta repo")
  
-        # Apply delta on primary, filelists and other
+        # Important sanity check (repo without primary is definitely bad)
          if not "primary" in old_records or not "primary" in delta_records:
              raise DeltaRepoError("Missing primary metadata")
  
+        # Detect type of checksum in the delta repomd.xml
+        self.checksum_type = cr.checksum_type(delta_records["primary"].checksum_type)
+        if self.checksum_type == cr.UNKNOWN_CHECKSUM:
+            raise DeltaRepoError("Unknown checksum type detected: %s" % \
+                    new_records["primary"].checksum_type)
+
+        # Detection if use unique md filenames
          if delta_records["primary"].location_href.split("primary")[0] != "":
              hash_in_the_name = True
  
+        # Apply delta on primary, filelists and other
          pri_old_fn = os.path.join(old_path, old_records["primary"].location_href)
          pri_delta_fn = os.path.join(delta_path, delta_records["primary"].location_href)
          pri_out_fn = os.path.join(new_repodata_path, "primary.xml.gz")
-        pri_out_f_stat = cr.ContentStat(cr.SHA256)
+        pri_out_f_stat = cr.ContentStat(self.checksum_type)
          pri_out_f = cr.PrimaryXmlFile(pri_out_fn, cr.GZ_COMPRESSION)
+        pri_db_fn = None
+        pri_db = None
+        if database:
+            pri_db_fn = os.path.join(new_repodata_path, "primary.sqlite")
+            pri_db = cr.PrimarySqlite(pri_db_fn)
  
          fil_old_fn = None
          fil_delta_fn = None
          fil_out_fn = None
          fil_out_f_stat = None
          fil_out_f = None
+        fil_db_fn = None
+        fil_db = None
          if ("filelists" in delta_records):
              fil_old_fn = os.path.join(old_path, old_records["filelists"].location_href)
              fil_delta_fn = os.path.join(delta_path, delta_records["filelists"].location_href)
              fil_out_fn = os.path.join(new_repodata_path, "filelists.xml.gz")
-            fil_out_f_stat = cr.ContentStat(cr.SHA256)
+            fil_out_f_stat = cr.ContentStat(self.checksum_type)
              fil_out_f = cr.FilelistsXmlFile(fil_out_fn, cr.GZ_COMPRESSION)
+            if database:
+                fil_db_fn = os.path.join(new_repodata_path, "filelists.sqlite")
+                fil_db = cr.FilelistsSqlite(fil_db_fn)
  
          oth_old_fn = None
          oth_delta_fn = None
-        out_out_fn = None
+        oth_out_fn = None
          oth_out_f_stat = None
          oth_out_f = None
+        oth_db_fn = None
+        oth_db = None
          if ("other" in delta_records):
              oth_old_fn = os.path.join(old_path, old_records["other"].location_href)
              oth_delta_fn = os.path.join(delta_path, delta_records["other"].location_href)
              oth_out_fn = os.path.join(new_repodata_path, "other.xml.gz")
-            oth_out_f_stat = cr.ContentStat(cr.SHA256)
+            oth_out_f_stat = cr.ContentStat(self.checksum_type)
              oth_out_f = cr.OtherXmlFile(oth_out_fn, cr.GZ_COMPRESSION)
+            if database:
+                oth_db_fn = os.path.join(new_repodata_path, "other.sqlite")
+                oth_db = cr.OtherSqlite(oth_db_fn)
+
+        deltamodule = MainDeltaModule(id_type=self.id_type,
+                                      logger=self.logger)
+        ids = deltamodule.apply(pri_old_fn, pri_delta_fn, pri_out_f, pri_db,
+                                fil_old_fn, fil_delta_fn, fil_out_f, fil_db,
+                                oth_old_fn, oth_delta_fn, oth_out_f, oth_db,
+                                removedxml)
+
+        pri_out_f.close()
+        fil_out_f.close()
+        oth_out_f.close()
+
+        # Check returned IDs
+        cold_id, cnew_id = ids  # Calculated ids
  
-        deltamodule = MainDeltaModule(id_type=self.id_type, logger=self.logger)
-        deltamodule.apply(pri_old_fn, pri_delta_fn, pri_out_f, fil_old_fn,
-                          fil_delta_fn, fil_out_f, oth_old_fn, oth_delta_fn,
-                          oth_out_f, removedxml)
+        if cold_id != old_id:
+            raise DeltaRepoError("Calculated old RepoId doesn't match!")
+
+        if cnew_id != new_id:
+            raise DeltaRepoError("Calculated new RepoId doesn't match!")
+
+        self._debug("RepoIds match")
  
          # Prepare repomd.xml records
          pri_rec = cr.RepomdRecord("primary", pri_out_fn)
          pri_rec.load_contentstat(pri_out_f_stat)
-        pri_rec.fill(cr.SHA256)
+        pri_rec.fill(self.checksum_type)
          if hash_in_the_name:
              pri_rec.rename_file()
          new_repomd.set_record(pri_rec)
  
+        if database:
+            pri_db.dbinfo_update(pri_rec.checksum)
+            pri_db.close()
+            pri_db_stat = cr.ContentStat(self.checksum_type)
+            pri_db_compressed = os.path.join(pri_db_fn+".bz2")
+            cr.compress_file(pri_db_fn, None, cr.BZ2, pri_db_stat)
+            os.remove(pri_db_fn)
+            pri_db_rec = cr.RepomdRecord("primary_db", pri_db_compressed)
+            pri_db_rec.load_contentstat(pri_db_stat)
+            pri_db_rec.fill(self.checksum_type)
+            if hash_in_the_name:
+                pri_db_rec.rename_file()
+            new_repomd.set_record(pri_db_rec)
+
          if fil_out_fn:
              fil_rec = cr.RepomdRecord("filelists", fil_out_fn)
              fil_rec.load_contentstat(fil_out_f_stat)
-            fil_rec.fill(cr.SHA256)
+            fil_rec.fill(self.checksum_type)
              if hash_in_the_name:
                  fil_rec.rename_file()
              new_repomd.set_record(fil_rec)
  
+        if database:
+            fil_db.dbinfo_update(fil_rec.checksum)
+            fil_db.close()
+            fil_db_stat = cr.ContentStat(self.checksum_type)
+            fil_db_compressed = os.path.join(fil_db_fn+".bz2")
+            cr.compress_file(fil_db_fn, None, cr.BZ2, fil_db_stat)
+            os.remove(fil_db_fn)
+            fil_db_rec = cr.RepomdRecord("primary_db", fil_db_compressed)
+            fil_db_rec.load_contentstat(fil_db_stat)
+            fil_db_rec.fill(self.checksum_type)
+            if hash_in_the_name:
+                fil_db_rec.rename_file()
+            new_repomd.set_record(fil_db_rec)
+
+
          if oth_out_fn:
              oth_rec = cr.RepomdRecord("other", oth_out_fn)
              oth_rec.load_contentstat(oth_out_f_stat)
-            oth_rec.fill(cr.SHA256)
+            oth_rec.fill(self.checksum_type)
              if hash_in_the_name:
                  oth_rec.rename_file()
              new_repomd.set_record(oth_rec)
  
+        if database:
+            oth_db.dbinfo_update(oth_rec.checksum)
+            oth_db.close()
+            oth_db_stat = cr.ContentStat(self.checksum_type)
+            oth_db_compressed = os.path.join(oth_db_fn+".bz2")
+            cr.compress_file(oth_db_fn, None, cr.BZ2, oth_db_stat)
+            os.remove(oth_db_fn)
+            oth_db_rec = cr.RepomdRecord("primary_db", oth_db_compressed)
+            oth_db_rec.load_contentstat(oth_db_stat)
+            oth_db_rec.fill(self.checksum_type)
+            if hash_in_the_name:
+                oth_db_rec.rename_file()
+            new_repomd.set_record(oth_db_rec)
+
+
          # Write out repomd.xml
-        deltarepoid = "TODO"  # TODO
-        new_repomd.set_repoid(deltarepoid, self.id_type)
+        new_repomd.set_repoid(ids[1], self.id_type)
          new_repomd_path = os.path.join(new_repodata_path, "repomd.xml")
          new_repomd_xml = new_repomd.xml_dump()
          self._debug("Writing repomd.xml")
@@ -476,17 +620,25 @@ class DeltaRepoGenerator(LoggingInterface):
          deleted_repomd_record_types = old_record_types - new_record_types
          added_repomd_record_types = new_record_types - old_record_types
  
-        # Do deltas for the "primary", "filelists" and "other"
+        # Important sanity check (repo without primary is definitely bad)
          if not "primary" in old_records or not "primary" in new_records:
              raise DeltaRepoError("Missing primary metadata")
  
+        # Detect type of checksum in the new repomd.xml
+        self.checksum_type = cr.checksum_type(new_records["primary"].checksum_type)
+        if self.checksum_type == cr.UNKNOWN_CHECKSUM:
+            raise DeltaRepoError("Unknown checksum type detected: %s" % \
+                    new_records["primary"].checksum_type)
+
+        # Detection if use unique md filenames
          if new_records["primary"].location_href.split("primary")[0] != "":
              hash_in_the_name = True
  
+        # Do deltas for the "primary", "filelists" and "other"
          pri_old_fn = os.path.join(old_path, old_records["primary"].location_href)
          pri_new_fn = os.path.join(new_path, new_records["primary"].location_href)
          pri_out_fn = os.path.join(delta_repodata_path, "primary.xml.gz")
-        pri_out_f_stat = cr.ContentStat(cr.SHA256)
+        pri_out_f_stat = cr.ContentStat(self.checksum_type)
          pri_out_f = cr.PrimaryXmlFile(pri_out_fn, cr.GZ_COMPRESSION)
  
          fil_new_fn = None
@@ -496,7 +648,7 @@ class DeltaRepoGenerator(LoggingInterface):
          if ("filelists" in new_records):
              fil_new_fn = os.path.join(new_path, new_records["filelists"].location_href)
              fil_out_fn = os.path.join(delta_repodata_path, "filelists.xml.gz")
-            fil_out_f_stat = cr.ContentStat(cr.SHA256)
+            fil_out_f_stat = cr.ContentStat(self.checksum_type)
              fil_out_f = cr.FilelistsXmlFile(fil_out_fn, cr.GZ_COMPRESSION)
  
          oth_new_fn = None
@@ -506,17 +658,18 @@ class DeltaRepoGenerator(LoggingInterface):
          if ("other" in new_records):
              oth_new_fn = os.path.join(new_path, new_records["other"].location_href)
              oth_out_fn = os.path.join(delta_repodata_path, "other.xml.gz")
-            oth_out_f_stat = cr.ContentStat(cr.SHA256)
+            oth_out_f_stat = cr.ContentStat(self.checksum_type)
              oth_out_f = cr.OtherXmlFile(oth_out_fn, cr.GZ_COMPRESSION)
  
-        deltamodule = MainDeltaModule()
+        deltamodule = MainDeltaModule(id_type=self.id_type,
+                                      logger=self.logger)
          ids = deltamodule.do(pri_old_fn, pri_new_fn, pri_out_f, fil_new_fn,
                               fil_out_f, oth_new_fn, oth_out_f, removedxml)
  
          # Prepare repomd.xml records
          pri_rec = cr.RepomdRecord("primary", pri_out_fn)
          pri_rec.load_contentstat(pri_out_f_stat)
-        pri_rec.fill(cr.SHA256)
+        pri_rec.fill(self.checksum_type)
          if hash_in_the_name:
              pri_rec.rename_file()
          delta_repomd.set_record(pri_rec)
@@ -524,7 +677,7 @@ class DeltaRepoGenerator(LoggingInterface):
          if fil_out_fn:
              fil_rec = cr.RepomdRecord("filelists", fil_out_fn)
              fil_rec.load_contentstat(fil_out_f_stat)
-            fil_rec.fill(cr.SHA256)
+            fil_rec.fill(self.checksum_type)
              if hash_in_the_name:
                  fil_rec.rename_file()
              delta_repomd.set_record(fil_rec)
@@ -532,7 +685,7 @@ class DeltaRepoGenerator(LoggingInterface):
          if oth_out_fn:
              oth_rec = cr.RepomdRecord("other", oth_out_fn)
              oth_rec.load_contentstat(oth_out_f_stat)
-            oth_rec.fill(cr.SHA256)
+            oth_rec.fill(self.checksum_type)
              if hash_in_the_name:
                  oth_rec.rename_file()
              delta_repomd.set_record(oth_rec)
@@ -544,13 +697,13 @@ class DeltaRepoGenerator(LoggingInterface):
          removedxml_xml = removedxml.xml_dump()
          self._debug("Writing removed.xml")
          open(removedxml_path, "w").write(removedxml_xml)
-        stat = cr.ContentStat(cr.SHA256)
+        stat = cr.ContentStat(self.checksum_type)
          #cr.compress_file(removedxml_path, removedxml_path_gz, cr.GZ, stat)
          #os.remove(removedxml_path)
          #removedxml_rec = cr.RepomdRecord("removed", removedxml_path_gz)
          removedxml_rec = cr.RepomdRecord("removed", removedxml_path)
          removedxml_rec.load_contentstat(stat)
-        removedxml_rec.fill(cr.SHA256)
+        removedxml_rec.fill(self.checksum_type)
          if hash_in_the_name:
              removedxml_rec.rename_file()
          delta_repomd.set_record(removedxml_rec)
author	Tomas Mlcoch <tmlcoch@redhat.com>
	Wed, 12 Jun 2013 13:57:55 +0000 (15:57 +0200)
committer	Tomas Mlcoch <tmlcoch@redhat.com>
	Wed, 12 Jun 2013 13:57:55 +0000 (15:57 +0200)
deltarepo/deltarepo.py		patch \| blob \| history
deltarepo/deltarepo/__init__.py		patch \| blob \| history