From: Tomas Mlcoch Date: Mon, 10 Jun 2013 09:18:47 +0000 (+0200) Subject: deltarepo: Add draft of deltarepo. X-Git-Tag: upstream/0.2.1~97 X-Git-Url: http://review.tizen.org/git/?a=commitdiff_plain;h=92f2a9c8f0068e3b90cdd8fd061749fad0014a17;p=services%2Fcreaterepo_c.git deltarepo: Add draft of deltarepo. --- diff --git a/deltarepo/deltarepo.py b/deltarepo/deltarepo.py new file mode 100755 index 0000000..6cf480c --- /dev/null +++ b/deltarepo/deltarepo.py @@ -0,0 +1,106 @@ +#!/usr/bin/env python + +import sys +import os.path +import hashlib +import logging +from optparse import OptionParser, OptionGroup +import deltarepo + +LOG_FORMAT = "%(message)s" + +def parse_options(): + parser = OptionParser("usage: %prog [options] \n" \ + " %prog --apply ") + parser.add_option("--version", action="store_true", + help="Show version number and quit.") + parser.add_option("-q", "--quiet", action="store_true", + help="Run in quiet mode.") + parser.add_option("-v", "--verbose", action="store_true", + help="Run in verbose mode.") + parser.add_option("-l", "--list-datatypes", action="store_true", + help="List datatypes for which delta is supported.") + parser.add_option("-o", "--outputdir", action="store", metavar="DIR", + help="Output directory.", default="./") + + group = OptionGroup(parser, "Delta generation") + group.add_option("-s", "--skip", action="append", metavar="DATATYPE", + help="Skip delta on the DATATYPE. Could be specified "\ + "multiple times. (E.g., --skip=comps)") + group.add_option("-d", "--do-only", action="append", metavar="DATATYPE", + help="Do delta only for the DATATYPE. Could be specified "\ + "multiple times. (E.g., --do-only=primary)") + group.add_option("-t", "--id-type", action="store", metavar="HASHTYPE", + help="Hash function for the ids (RepoId and DeltaRepoId). " \ + "Default is sha256.", default="sha256") + parser.add_option_group(group) + + group = OptionGroup(parser, "Delta application") + group.add_option("-a", "--apply", action="store_true", + help="Enable delta application mode.") + parser.add_option_group(group) + + options, args = parser.parse_args() + + # Error checks + + if options.version: + return (options.args) + + if len(args) != 2: + parser.error("Two repository paths have to be specified!") + + if options.id_type not in hashlib.algorithms: + parser.error("Unsupported hash algorithm %s" % options.id_type) + + if options.quiet and options.verbose: + parser.error("Cannot use quiet and verbose simultaneously!") + + if not os.path.isdir(args[0]) or \ + not os.path.isdir(os.path.join(args[0], "repodata")) or \ + not os.path.isfile(os.path.join(args[0], "repodata", "repomd.xml")): + parser.error("Not a repository: %s" % args[0]) + + if not os.path.isdir(args[1]) or \ + not os.path.isdir(os.path.join(args[1], "repodata")) or \ + not os.path.isfile(os.path.join(args[1], "repodata", "repomd.xml")): + parser.error("Not a repository: %s" % args[1]) + + if not os.path.isdir(options.outputdir): + parser.error("Not a directory: %s" % options.outputdir) + + return (options, args) + +def print_version(): + print "DeltaRepo: %s" % deltarepo.VERBOSE_VERSION + +def setup_logging(quiet, verbose): + logger = logging.getLogger("deltarepo_logger") + formatter = logging.Formatter(LOG_FORMAT) + logging.basicConfig(format=LOG_FORMAT) + if quiet: + logger.setLevel(logging.ERROR) + elif verbose: + logger.setLevel(logging.DEBUG) + else: + logger.setLevel(logging.INFO) + return logger + +if __name__ == "__main__": + options, args = parse_options() + + if options.version: + print_version() + sys.exit(0) + + logger = setup_logging(options.quiet, options.verbose) + + if options.apply: + # Applying delta + pass + else: + # Do delta + generator = deltarepo.DeltaRepoGenerator(id_type=options.id_type, + logger=logger) + generator.gendelta(args[0], args[1], out_path=options.outputdir, + do_only=options.do_only, skip=options.skip) diff --git a/deltarepo/deltarepo/__init__.py b/deltarepo/deltarepo/__init__.py new file mode 100644 index 0000000..e6f5d7b --- /dev/null +++ b/deltarepo/deltarepo/__init__.py @@ -0,0 +1,253 @@ +""" +DeltaRepo package for Python. +This is the library for generation, application and handling of +DeltaRepositories. +The library is builded on the Createrepo_c library and its a part of it. + +Copyright (C) 2013 Tomas Mlcoch + +""" + +import os +import shutil +import hashlib +import logging +from lxml import etree +import createrepo_c as cr + +__all__ = ['VERSION', 'VERBOSE_VERSION', 'DeltaRepoGenerator'] + +VERSION = "0.0.1" +VERBOSE_VERSION = "%s (createrepo_c: %s)" % (VERSION, cr.VERSION) + +class DeltaRepoError(Exception): + pass + +class DeltaModule(object): + + def _path(self, path, record): + return os.path.join(path, record.location_href) + +class PrimaryDeltaModule(DeltaModule): + def do(old_path, old_rec, new_path, new_rec, delta_path, data): + + old_fn = self._path(old_path, old_rec) + + old_packages = set() + + def pkgcb(pkg): + old_packages.add(pkg.pkgId, pkg.location_href, location_base) + + cr.xml_parse_primary(old_fn, pkgcb=pkgcb) + + print old_packages + print "DONE" + + +_DELTA_MODULES = { + "primary": PrimaryDeltaModule, +# "filelists": FilelistsDeltaModule, +# "other": OtherDeltaModule, + } + +class RemovedXml(object): + def __init__(self): + self.packages = {} # { location_href: location_base } + self.files = {} # { location_href: location_base or Null } + + def __str__(self): + print self.packages + print self.files + + def add_pkg(self, pkg): + self.packages[pkg.location_href] = pkg.location_base + + def add_record(self, rec): + self.files[rec.location_href] = rec.location_base + + def xml_dump(self): + xmltree = etree.Element("removed") + packages = etree.SubElement(xmltree, "packages") + for href, base in self.packages.iteritems(): + attrs = {} + if href: attrs['href'] = href + if base: attrs['base'] = base + if not attrs: continue + etree.SubElement(packages, "location", attrs) + files = etree.SubElement(xmltree, "files") + for href, base in self.files.iteritems(): + attrs = {} + if href: attrs['href'] = href + if base: attrs['base'] = base + if not attrs: continue + etree.SubElement(files, "location", attrs) + return etree.tostring(xmltree, + pretty_print=True, + encoding="UTF-8", + xml_declaration=True) + + def xml_parse(self, path): + # TODO: parsing for RemovedXml + pass + +class LoggingInterface(object): + def __init__(self, logger=None): + if logger is None: + logger = logging.getLogger() + logger.disabled = True + self.logger = logger + + def _debug(self, msg): + self.logger.debug(msg) + + def _info(self, msg): + self.logger.info(msg) + + def _warning(self, msg): + self.logger.warning(msg) + + def _error(self, msg): + self.logger.error(msg) + + def _critical(self, msg): + self.logger.critical(msg) + +class DeltaRepoGenerator(LoggingInterface): + """Object for generating of DeltaRepositories.""" + + def __init__(self, id_type=None, logger=None): + LoggingInterface.__init__(self, logger) + + if id_type is None: + id_type = "sha256" + self.id_type = id_type + + def _fn_without_checksum(self, path): + """Strip checksum from a record filename""" + path = os.path.basename(path) + return path.rsplit('-')[-1] + + def gendelta(self, old_path, new_path, out_path=None, + do_only=None, skip=None): + removedxml = RemovedXml() + + # Prepare variables with paths + new_repodata_path = os.path.join(new_path, "repodata/") + old_repodata_path = os.path.join(old_path, "repodata/") + + if not os.path.isdir(new_repodata_path): + raise IOError("Directory %s doesn't exists" % new_repodata_path) + + if not os.path.isdir(old_repodata_path): + raise IOError("Directory %s doesn't exists" % old_repodata_path) + + old_repomd_path = os.path.join(old_repodata_path, "repomd.xml") + new_repomd_path = os.path.join(new_repodata_path, "repomd.xml") + + # Prepare Repomd objects + old_repomd = cr.Repomd(old_repomd_path) + new_repomd = cr.Repomd(new_repomd_path) + delta_repomd = cr.Repomd() + + # Prepare output path + delta_path = os.path.join(out_path, ".deltarepo/") + delta_repodata_path = os.path.join(delta_path, "repodata/") + os.mkdir(delta_path) + os.mkdir(delta_repodata_path) + + # Do repomd delta + delta_repomd.set_revision(new_repomd.revision) + for tag in new_repomd.distro_tags: + delta_repomd.add_distro_tag(tag[1], tag[0]) + for tag in new_repomd.repo_tags: + delta_repomd.add_repo_tag(tag) + for tag in new_repomd.content_tags: + delta_repomd.add_content_tag(tag) + + old_records = dict([(record.type, record) for record in old_repomd.records ]) + new_records = dict([(record.type, record) for record in new_repomd.records ]) + old_record_types = set(old_records.keys()) + new_record_types = set(new_records.keys()) + deleted_repomd_record_types = old_record_types - new_record_types + added_repomd_record_types = new_record_types - old_record_types + + delta_data = { # Data shared between delta modules + "removedxml": removedxml, + } + + # Do deltas for the "primary + if not "primary" in old_records or not "primary" in new_records: + raise DeltaRepoError("Missing primary metadata") + + delta_fn = os.path.join(delta_repodata_path, "primary.xml") + deltamodule = _DELTA_MODULES["primary"]() + deltamodule.do(old_path, old_records["primary"], + new_path, new_records["primary"], + delta_fn, delta_data) + + # Do deltas for the rest of the metadata + for record_type in added_repomd_record_types: + # Added records + self._debug("Added: %s" % record_type) + rec = new_records[record_type] + rec_path = os.path.join(new_path, rec.location_href) + shutil.copy2(rec_path, delta_repodata_path) + delta_repomd.set_record(rec) + + # Do deltas for individual records + for record in old_repomd.records: + if record.type == "primary": + # primary record is already done + continue + + if record.type in deleted_repomd_record_types: + # Removed record + removedxml.add_record(record) + self._debug("Removed: %s" % record.type) + continue + + old_rec = old_records[record.type] + new_rec = new_records[record.type] + if old_rec.checksum == new_rec.checksum and \ + old_rec.checksum_open == new_rec.checksum_open: + # File unchanged + self._debug("Unchanged: %s" % record.type) + continue + + if (skip and record.type in skip) or \ + (do_only and record.type not in do_only) or \ + (record.type not in _DELTA_MODULES): + # Do not do delta of this file, just copy it + self._debug("No delta for: %s" % record.type) + rec = new_records[record.type] + rec_path = os.path.join(new_path, rec.location_href) + shutil.copy2(rec_path, delta_repodata_path) + delta_repomd.set_record(record) + continue + + # TODO: Do delta + delta_fn = os.path.join(delta_repodata_path, + self._fn_without_checksum(record.location_href)) + print delta_fn + deltamodule = _DELTA_MODULES[record.type]() + deltamodule.do(old_rec, new_rec, delta_fn, delta_data) + # TODO + + # Write out removed.xml + # TODO: Compressed!! + removedxml_path = os.path.join(delta_repodata_path, "removed.xml") + removedxml_xml = removedxml.xml_dump() + open(removedxml_path, "w").write(removedxml_xml) + removedxml_rec = cr.RepomdRecord("removed", removedxml_path) + removedxml_rec.fill(cr.SHA256) + delta_repomd.set_record(removedxml_rec) + + # Write out repomd.xml + #deltarepoid = "%s-%s" % (old_repomd.repoid, new_repomd.repoid) + # RepoId must be calculated during primary delta calculation + deltarepoid = "xxx" + delta_repomd.set_repoid(deltarepoid, self.id_type) + delta_repomd_path = os.path.join(delta_repodata_path, "repomd.xml") + delta_repomd_xml = delta_repomd.xml_dump() + open(delta_repomd_path, "w").write(delta_repomd_xml) +