--- /dev/null
+#!/usr/bin/env python
+
+import sys
+import os.path
+import hashlib
+import logging
+from optparse import OptionParser, OptionGroup
+import deltarepo
+
+LOG_FORMAT = "%(message)s"
+
+def parse_options():
+ parser = OptionParser("usage: %prog [options] <first_repo> <second_repo>\n" \
+ " %prog --apply <repo> <delta_repo>")
+ parser.add_option("--version", action="store_true",
+ help="Show version number and quit.")
+ parser.add_option("-q", "--quiet", action="store_true",
+ help="Run in quiet mode.")
+ parser.add_option("-v", "--verbose", action="store_true",
+ help="Run in verbose mode.")
+ parser.add_option("-l", "--list-datatypes", action="store_true",
+ help="List datatypes for which delta is supported.")
+ parser.add_option("-o", "--outputdir", action="store", metavar="DIR",
+ help="Output directory.", default="./")
+
+ group = OptionGroup(parser, "Delta generation")
+ group.add_option("-s", "--skip", action="append", metavar="DATATYPE",
+ help="Skip delta on the DATATYPE. Could be specified "\
+ "multiple times. (E.g., --skip=comps)")
+ group.add_option("-d", "--do-only", action="append", metavar="DATATYPE",
+ help="Do delta only for the DATATYPE. Could be specified "\
+ "multiple times. (E.g., --do-only=primary)")
+ group.add_option("-t", "--id-type", action="store", metavar="HASHTYPE",
+ help="Hash function for the ids (RepoId and DeltaRepoId). " \
+ "Default is sha256.", default="sha256")
+ parser.add_option_group(group)
+
+ group = OptionGroup(parser, "Delta application")
+ group.add_option("-a", "--apply", action="store_true",
+ help="Enable delta application mode.")
+ parser.add_option_group(group)
+
+ options, args = parser.parse_args()
+
+ # Error checks
+
+ if options.version:
+ return (options.args)
+
+ if len(args) != 2:
+ parser.error("Two repository paths have to be specified!")
+
+ if options.id_type not in hashlib.algorithms:
+ parser.error("Unsupported hash algorithm %s" % options.id_type)
+
+ if options.quiet and options.verbose:
+ parser.error("Cannot use quiet and verbose simultaneously!")
+
+ if not os.path.isdir(args[0]) or \
+ not os.path.isdir(os.path.join(args[0], "repodata")) or \
+ not os.path.isfile(os.path.join(args[0], "repodata", "repomd.xml")):
+ parser.error("Not a repository: %s" % args[0])
+
+ if not os.path.isdir(args[1]) or \
+ not os.path.isdir(os.path.join(args[1], "repodata")) or \
+ not os.path.isfile(os.path.join(args[1], "repodata", "repomd.xml")):
+ parser.error("Not a repository: %s" % args[1])
+
+ if not os.path.isdir(options.outputdir):
+ parser.error("Not a directory: %s" % options.outputdir)
+
+ return (options, args)
+
+def print_version():
+ print "DeltaRepo: %s" % deltarepo.VERBOSE_VERSION
+
+def setup_logging(quiet, verbose):
+ logger = logging.getLogger("deltarepo_logger")
+ formatter = logging.Formatter(LOG_FORMAT)
+ logging.basicConfig(format=LOG_FORMAT)
+ if quiet:
+ logger.setLevel(logging.ERROR)
+ elif verbose:
+ logger.setLevel(logging.DEBUG)
+ else:
+ logger.setLevel(logging.INFO)
+ return logger
+
+if __name__ == "__main__":
+ options, args = parse_options()
+
+ if options.version:
+ print_version()
+ sys.exit(0)
+
+ logger = setup_logging(options.quiet, options.verbose)
+
+ if options.apply:
+ # Applying delta
+ pass
+ else:
+ # Do delta
+ generator = deltarepo.DeltaRepoGenerator(id_type=options.id_type,
+ logger=logger)
+ generator.gendelta(args[0], args[1], out_path=options.outputdir,
+ do_only=options.do_only, skip=options.skip)
--- /dev/null
+"""
+DeltaRepo package for Python.
+This is the library for generation, application and handling of
+DeltaRepositories.
+The library is builded on the Createrepo_c library and its a part of it.
+
+Copyright (C) 2013 Tomas Mlcoch
+
+"""
+
+import os
+import shutil
+import hashlib
+import logging
+from lxml import etree
+import createrepo_c as cr
+
+__all__ = ['VERSION', 'VERBOSE_VERSION', 'DeltaRepoGenerator']
+
+VERSION = "0.0.1"
+VERBOSE_VERSION = "%s (createrepo_c: %s)" % (VERSION, cr.VERSION)
+
+class DeltaRepoError(Exception):
+ pass
+
+class DeltaModule(object):
+
+ def _path(self, path, record):
+ return os.path.join(path, record.location_href)
+
+class PrimaryDeltaModule(DeltaModule):
+ def do(old_path, old_rec, new_path, new_rec, delta_path, data):
+
+ old_fn = self._path(old_path, old_rec)
+
+ old_packages = set()
+
+ def pkgcb(pkg):
+ old_packages.add(pkg.pkgId, pkg.location_href, location_base)
+
+ cr.xml_parse_primary(old_fn, pkgcb=pkgcb)
+
+ print old_packages
+ print "DONE"
+
+
+_DELTA_MODULES = {
+ "primary": PrimaryDeltaModule,
+# "filelists": FilelistsDeltaModule,
+# "other": OtherDeltaModule,
+ }
+
+class RemovedXml(object):
+ def __init__(self):
+ self.packages = {} # { location_href: location_base }
+ self.files = {} # { location_href: location_base or Null }
+
+ def __str__(self):
+ print self.packages
+ print self.files
+
+ def add_pkg(self, pkg):
+ self.packages[pkg.location_href] = pkg.location_base
+
+ def add_record(self, rec):
+ self.files[rec.location_href] = rec.location_base
+
+ def xml_dump(self):
+ xmltree = etree.Element("removed")
+ packages = etree.SubElement(xmltree, "packages")
+ for href, base in self.packages.iteritems():
+ attrs = {}
+ if href: attrs['href'] = href
+ if base: attrs['base'] = base
+ if not attrs: continue
+ etree.SubElement(packages, "location", attrs)
+ files = etree.SubElement(xmltree, "files")
+ for href, base in self.files.iteritems():
+ attrs = {}
+ if href: attrs['href'] = href
+ if base: attrs['base'] = base
+ if not attrs: continue
+ etree.SubElement(files, "location", attrs)
+ return etree.tostring(xmltree,
+ pretty_print=True,
+ encoding="UTF-8",
+ xml_declaration=True)
+
+ def xml_parse(self, path):
+ # TODO: parsing for RemovedXml
+ pass
+
+class LoggingInterface(object):
+ def __init__(self, logger=None):
+ if logger is None:
+ logger = logging.getLogger()
+ logger.disabled = True
+ self.logger = logger
+
+ def _debug(self, msg):
+ self.logger.debug(msg)
+
+ def _info(self, msg):
+ self.logger.info(msg)
+
+ def _warning(self, msg):
+ self.logger.warning(msg)
+
+ def _error(self, msg):
+ self.logger.error(msg)
+
+ def _critical(self, msg):
+ self.logger.critical(msg)
+
+class DeltaRepoGenerator(LoggingInterface):
+ """Object for generating of DeltaRepositories."""
+
+ def __init__(self, id_type=None, logger=None):
+ LoggingInterface.__init__(self, logger)
+
+ if id_type is None:
+ id_type = "sha256"
+ self.id_type = id_type
+
+ def _fn_without_checksum(self, path):
+ """Strip checksum from a record filename"""
+ path = os.path.basename(path)
+ return path.rsplit('-')[-1]
+
+ def gendelta(self, old_path, new_path, out_path=None,
+ do_only=None, skip=None):
+ removedxml = RemovedXml()
+
+ # Prepare variables with paths
+ new_repodata_path = os.path.join(new_path, "repodata/")
+ old_repodata_path = os.path.join(old_path, "repodata/")
+
+ if not os.path.isdir(new_repodata_path):
+ raise IOError("Directory %s doesn't exists" % new_repodata_path)
+
+ if not os.path.isdir(old_repodata_path):
+ raise IOError("Directory %s doesn't exists" % old_repodata_path)
+
+ old_repomd_path = os.path.join(old_repodata_path, "repomd.xml")
+ new_repomd_path = os.path.join(new_repodata_path, "repomd.xml")
+
+ # Prepare Repomd objects
+ old_repomd = cr.Repomd(old_repomd_path)
+ new_repomd = cr.Repomd(new_repomd_path)
+ delta_repomd = cr.Repomd()
+
+ # Prepare output path
+ delta_path = os.path.join(out_path, ".deltarepo/")
+ delta_repodata_path = os.path.join(delta_path, "repodata/")
+ os.mkdir(delta_path)
+ os.mkdir(delta_repodata_path)
+
+ # Do repomd delta
+ delta_repomd.set_revision(new_repomd.revision)
+ for tag in new_repomd.distro_tags:
+ delta_repomd.add_distro_tag(tag[1], tag[0])
+ for tag in new_repomd.repo_tags:
+ delta_repomd.add_repo_tag(tag)
+ for tag in new_repomd.content_tags:
+ delta_repomd.add_content_tag(tag)
+
+ old_records = dict([(record.type, record) for record in old_repomd.records ])
+ new_records = dict([(record.type, record) for record in new_repomd.records ])
+ old_record_types = set(old_records.keys())
+ new_record_types = set(new_records.keys())
+ deleted_repomd_record_types = old_record_types - new_record_types
+ added_repomd_record_types = new_record_types - old_record_types
+
+ delta_data = { # Data shared between delta modules
+ "removedxml": removedxml,
+ }
+
+ # Do deltas for the "primary
+ if not "primary" in old_records or not "primary" in new_records:
+ raise DeltaRepoError("Missing primary metadata")
+
+ delta_fn = os.path.join(delta_repodata_path, "primary.xml")
+ deltamodule = _DELTA_MODULES["primary"]()
+ deltamodule.do(old_path, old_records["primary"],
+ new_path, new_records["primary"],
+ delta_fn, delta_data)
+
+ # Do deltas for the rest of the metadata
+ for record_type in added_repomd_record_types:
+ # Added records
+ self._debug("Added: %s" % record_type)
+ rec = new_records[record_type]
+ rec_path = os.path.join(new_path, rec.location_href)
+ shutil.copy2(rec_path, delta_repodata_path)
+ delta_repomd.set_record(rec)
+
+ # Do deltas for individual records
+ for record in old_repomd.records:
+ if record.type == "primary":
+ # primary record is already done
+ continue
+
+ if record.type in deleted_repomd_record_types:
+ # Removed record
+ removedxml.add_record(record)
+ self._debug("Removed: %s" % record.type)
+ continue
+
+ old_rec = old_records[record.type]
+ new_rec = new_records[record.type]
+ if old_rec.checksum == new_rec.checksum and \
+ old_rec.checksum_open == new_rec.checksum_open:
+ # File unchanged
+ self._debug("Unchanged: %s" % record.type)
+ continue
+
+ if (skip and record.type in skip) or \
+ (do_only and record.type not in do_only) or \
+ (record.type not in _DELTA_MODULES):
+ # Do not do delta of this file, just copy it
+ self._debug("No delta for: %s" % record.type)
+ rec = new_records[record.type]
+ rec_path = os.path.join(new_path, rec.location_href)
+ shutil.copy2(rec_path, delta_repodata_path)
+ delta_repomd.set_record(record)
+ continue
+
+ # TODO: Do delta
+ delta_fn = os.path.join(delta_repodata_path,
+ self._fn_without_checksum(record.location_href))
+ print delta_fn
+ deltamodule = _DELTA_MODULES[record.type]()
+ deltamodule.do(old_rec, new_rec, delta_fn, delta_data)
+ # TODO
+
+ # Write out removed.xml
+ # TODO: Compressed!!
+ removedxml_path = os.path.join(delta_repodata_path, "removed.xml")
+ removedxml_xml = removedxml.xml_dump()
+ open(removedxml_path, "w").write(removedxml_xml)
+ removedxml_rec = cr.RepomdRecord("removed", removedxml_path)
+ removedxml_rec.fill(cr.SHA256)
+ delta_repomd.set_record(removedxml_rec)
+
+ # Write out repomd.xml
+ #deltarepoid = "%s-%s" % (old_repomd.repoid, new_repomd.repoid)
+ # RepoId must be calculated during primary delta calculation
+ deltarepoid = "xxx"
+ delta_repomd.set_repoid(deltarepoid, self.id_type)
+ delta_repomd_path = os.path.join(delta_repodata_path, "repomd.xml")
+ delta_repomd_xml = delta_repomd.xml_dump()
+ open(delta_repomd_path, "w").write(delta_repomd_xml)
+