From: Seth Vidal Date: Fri, 9 Jan 2004 07:48:03 +0000 (+0000) Subject: Initial revision X-Git-Tag: upstream/0.9.9~327 X-Git-Url: http://review.tizen.org/git/?a=commitdiff_plain;h=6d1be85d4cd5eef876499e38adbda145d9f16b70;p=tools%2Fcreaterepo.git Initial revision --- 6d1be85d4cd5eef876499e38adbda145d9f16b70 diff --git a/dumpMetadata.py b/dumpMetadata.py new file mode 100644 index 0000000..ce52d6c --- /dev/null +++ b/dumpMetadata.py @@ -0,0 +1,583 @@ +#!/usr/bin/python -tt +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Library General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program; if not, write to the Free Software +# Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. +# Copyright 2003 Duke University + +# take an list of rpms +# pull a bunch of pertinent info out of them and write it out to a file +# just a 'proof of concept' + +import os +import rpm +import exceptions +import md5 +import sha +import types +import struct +import re + + + +def returnHdr(ts, package): + """hand back the rpm header or raise an Error if the pkg is fubar""" + try: + if type(package) is types.StringType: + fdno = os.open(package, os.O_RDONLY) + else: + fdno = package # let's assume this is an fdno and go with it :) + except OSError: + raise MDError, "Error opening file" + ts.setVSFlags(~(rpm.RPMVSF_NOMD5|rpm.RPMVSF_NEEDPAYLOAD)) + try: + hdr = ts.hdrFromFdno(fdno) + except rpm.error: + raise MDError, "Error opening package" + if type(hdr) != rpm.hdr: + raise MDError, "Error opening package" + ts.setVSFlags(0) + if type(package) is types.StringType: + os.close(fdno) + del fdno + return hdr + +def getChecksum(sumtype, file, CHUNK=2**16): + """takes filename, hand back Checksum of it + sumtype = md5 or sha + filename = /path/to/file + CHUNK=65536 by default""" + + # chunking brazenly lifted from Ryan Tomayko + try: + if type(file) is not types.StringType: + fo = file # assume it's a file-like-object + else: + fo = open(file, 'r', CHUNK) + + if sumtype == 'md5': + sum = md5.new() + elif sumtype == 'sha': + sum = sha.new() + else: + raise MDError, 'Error Checksumming file, wrong checksum type %s' % sumtype + chunk = fo.read + while chunk: + chunk = fo.read(CHUNK) + sum.update(chunk) + + if type(file) is types.StringType: + fo.close() + del fo + + return sum.hexdigest() + except: + raise MDError, 'Error opening file for checksum' + + +def utf8String(string): + """hands back a unicoded string""" + try: + string = unicode(string) + except UnicodeError: + newstring = '' + for char in string: + if ord(char) > 127: + newstring = newstring + '?' + else: + newstring = newstring + char + return unicode(newstring) + else: + return string + +def xmlCleanString(doc, string): + """hands back a special-char encoded and utf8 cleaned string + Takes a libxml2 document object and the string to clean + document object is needed to not require expensive get_doc function + """ + string = utf8String(string) + string = doc.encodeSpecialChars(string) + return string + + +def byteranges(file): + """takes an rpm file or fileobject and returns byteranges for location of the header""" + if type(file) is not types.StringType: + fo = file + else: + fo = open(file, 'r') + #read in past lead and first 8 bytes of sig header + fo.seek(104) + # 104 bytes in + binindex = fo.read(4) + # 108 bytes in + (sigindex, ) = struct.unpack('>I', binindex) + bindata = fo.read(4) + # 112 bytes in + (sigdata, ) = struct.unpack('>I', bindata) + # each index is 4 32bit segments - so each is 16 bytes + sigindexsize = sigindex * 16 + sigsize = sigdata + sigindexsize + # we have to round off to the next 8 byte boundary + disttoboundary = (sigsize % 8) + if disttoboundary != 0: + disttoboundary = 8 - disttoboundary + # 112 bytes - 96 == lead, 8 = magic and reserved, 8 == sig header data + hdrstart = 112 + sigsize + disttoboundary + + fo.seek(hdrstart) # go to the start of the header + fo.seek(8,1) # read past the magic number and reserved bytes + + binindex = fo.read(4) + (hdrindex, ) = struct.unpack('>I', binindex) + bindata = fo.read(4) + (hdrdata, ) = struct.unpack('>I', bindata) + + # each index is 4 32bit segments - so each is 16 bytes + hdrindexsize = hdrindex * 16 + # add 16 to the hdrsize to account for the 16 bytes of misc data b/t the + # end of the sig and the header. + hdrsize = hdrdata + hdrindexsize + 16 + + # header end is hdrstart + hdrsize + hdrend = hdrstart + hdrsize + if type(file) is types.StringType: + fo.close() + del fo + return (hdrstart, hdrend) + + +class MDError(exceptions.Exception): + def __init__(self, args=None): + exceptions.Exception.__init__(self) + self.args = args + + + +class RpmMetaData: + """each rpm is one object, you pass it an rpm file + it opens the file, and pulls the information out in bite-sized chunks :) + """ + def __init__(self, ts, filename, url, sumtype): + stats = os.stat(filename) + self.size = stats[6] + self.mtime = stats[8] + del stats + + self.localurl = url + self.relativepath = filename + self.hdr = returnHdr(ts, filename) + self.pkgid = getChecksum(sumtype, filename) + (self.rangestart, self.rangeend) = byteranges(filename) + + # setup our regex objects + fileglobs = ['.*bin\/.*', '^\/etc\/.*', '^\/usr\/lib\/sendmail$'] + dirglobs = ['.*bin\/.*', '^\/etc\/.*'] + self.dirrc = [] + self.filerc = [] + for glob in fileglobs: + self.filerc.append(re.compile(glob)) + + for glob in dirglobs: + self.dirrc.append(re.compile(glob)) + + self.filenames = [] + self.dirnames = [] + self.ghostnames = [] + self.genFileLists() + + def arch(self): + if self.tagByName('sourcepackage') == 1: + return 'src' + else: + return self.tagByName('arch') + + def _correctFlags(self, flags): + returnflags=[] + if flags is None: + return returnflags + + if type(flags) is not types.ListType: + newflag = flags & 0xf + returnflags.append(newflag) + else: + for flag in flags: + newflag = flag + if flag is not None: + newflag = flag & 0xf + returnflags.append(newflag) + return returnflags + + def _correctVersion(self, vers): + returnvers = [] + vertuple = (None, None, None) + if vers is None: + returnvers.append(vertuple) + return returnvers + + if type(vers) is not types.ListType: + if vers is not None: + vertuple = self._stringToVersion(vers) + else: + vertuple = (None, None, None) + returnvers.append(vertuple) + else: + for ver in vers: + if ver is not None: + vertuple = self._stringToVersion(ver) + else: + vertuple = (None, None, None) + returnvers.append(vertuple) + return returnvers + + + def _stringToVersion(self, strng): + i = strng.find(':') + if i != -1: + epoch = long(strng[:i]) + else: + epoch = '0' + j = strng.find('-') + if j != -1: + if strng[i + 1:j] == '': + version = None + else: + version = strng[i + 1:j] + release = strng[j + 1:] + else: + if strng[i + 1:] == '': + version = None + else: + version = strng[i + 1:] + release = None + return (epoch, version, release) + + ########### + # Title: Remove duplicates from a sequence + # Submitter: Tim Peters + # From: http://aspn.activestate.com/ASPN/Cookbook/Python/Recipe/52560 + + def _uniq(self,s): + """Return a list of the elements in s, but without duplicates. + + For example, unique([1,2,3,1,2,3]) is some permutation of [1,2,3], + unique("abcabc") some permutation of ["a", "b", "c"], and + unique(([1, 2], [2, 3], [1, 2])) some permutation of + [[2, 3], [1, 2]]. + + For best speed, all sequence elements should be hashable. Then + unique() will usually work in linear time. + + If not possible, the sequence elements should enjoy a total + ordering, and if list(s).sort() doesn't raise TypeError it's + assumed that they do enjoy a total ordering. Then unique() will + usually work in O(N*log2(N)) time. + + If that's not possible either, the sequence elements must support + equality-testing. Then unique() will usually work in quadratic + time. + """ + + n = len(s) + if n == 0: + return [] + + # Try using a dict first, as that's the fastest and will usually + # work. If it doesn't work, it will usually fail quickly, so it + # usually doesn't cost much to *try* it. It requires that all the + # sequence elements be hashable, and support equality comparison. + u = {} + try: + for x in s: + u[x] = 1 + except TypeError: + del u # move on to the next method + else: + return u.keys() + + # We can't hash all the elements. Second fastest is to sort, + # which brings the equal elements together; then duplicates are + # easy to weed out in a single pass. + # NOTE: Python's list.sort() was designed to be efficient in the + # presence of many duplicate elements. This isn't true of all + # sort functions in all languages or libraries, so this approach + # is more effective in Python than it may be elsewhere. + try: + t = list(s) + t.sort() + except TypeError: + del t # move on to the next method + else: + assert n > 0 + last = t[0] + lasti = i = 1 + while i < n: + if t[i] != last: + t[lasti] = last = t[i] + lasti += 1 + i += 1 + return t[:lasti] + + # Brute force is all that's left. + u = [] + for x in s: + if x not in u: + u.append(x) + return u + + def tagByName(self, tag): + return self.hdr[tag] + + def listTagByName(self, tag): + """take a tag that should be a list and make sure it is one""" + lst = [] + data = self.tagByName(tag) + if data is None: + pass + if type(data) is types.ListType: + lst.extend(data) + else: + lst.append(data) + return lst + + + def epoch(self): + if self.hdr['epoch'] is None: + return 0 + else: + return self.tagByName('epoch') + + def color(self): + # do something here - but what I don't know + pass + + def genFileLists(self): + """produces lists of dirs and files for this header in two lists""" + + files = self.listTagByName('filenames') + fileclasses = self.listTagByName('fileclass') + fileflags = self.listTagByName('fileflags') + filetuple = zip(files, fileclasses, fileflags) + classdict = self.listTagByName('classdict') + for (file, fileclass, flags) in filetuple: + if fileclass is None or file is None: # this is a dumb test + self.filenames.append(file) + continue + if (flags & 64): # check for ghost + self.ghostnames.append(file) + continue + if classdict[fileclass] == 'directory': + self.dirnames.append(file) + else: + self.filenames.append(file) + + + def usefulFiles(self): + """search for good files""" + returns = {} + for item in self.filenames: + if item is None: + continue + for glob in self.filerc: + if glob.match(item): + returns[item] = 1 + return returns + + + def usefulDirs(self): + """search for good dirs""" + returns = {} + for item in self.dirnames: + if item is None: + continue + for glob in self.dirrc: + if glob.match(item): + returns[item] = 1 + return returns.keys() + + + def depsList(self): + """returns a list of tuples of dependencies""" + # these should probably compress down duplicates too + lst = [] + names = self.hdr[rpm.RPMTAG_REQUIRENAME] + tmpflags = self.hdr[rpm.RPMTAG_REQUIREFLAGS] + flags = self._correctFlags(tmpflags) + ver = self._correctVersion(self.hdr[rpm.RPMTAG_REQUIREVERSION]) + if names is not None: + lst = zip(names, flags, ver) + return self._uniq(lst) + + def obsoletesList(self): + lst = [] + names = self.hdr[rpm.RPMTAG_OBSOLETENAME] + tmpflags = self.hdr[rpm.RPMTAG_OBSOLETEFLAGS] + flags = self._correctFlags(tmpflags) + ver = self._correctVersion(self.hdr[rpm.RPMTAG_OBSOLETEVERSION]) + if names is not None: + lst = zip(names, flags, ver) + return self._uniq(lst) + + def conflictsList(self): + lst = [] + names = self.hdr[rpm.RPMTAG_CONFLICTNAME] + tmpflags = self.hdr[rpm.RPMTAG_CONFLICTFLAGS] + flags = self._correctFlags(tmpflags) + ver = self._correctVersion(self.hdr[rpm.RPMTAG_CONFLICTVERSION]) + if names is not None: + lst = zip(names, flags, ver) + return self._uniq(lst) + + def providesList(self): + lst = [] + names = self.hdr[rpm.RPMTAG_PROVIDENAME] + tmpflags = self.hdr[rpm.RPMTAG_PROVIDEFLAGS] + flags = self._correctFlags(tmpflags) + ver = self._correctVersion(self.hdr[rpm.RPMTAG_PROVIDEVERSION]) + if names is not None: + lst = zip(names, flags, ver) + return self._uniq(lst) + + def changelogLists(self): + lst = [] + names = self.listTagByName('changelogname') + times = self.listTagByName('changelogtime') + texts = self.listTagByName('changelogtext') + if len(names) > 0: + lst = zip(names, times, texts) + return lst + + +def generateXML(doc, node, rpmObj, sumtype): + """takes an xml doc object and a package metadata entry node, populates a + package node with the md information""" + ns = node.ns() + pkgNode = node.newChild(None, "package", None) + pkgNode.newProp('type', 'rpm') + pkgNode.newChild(None, 'name', rpmObj.tagByName('name')) + pkgNode.newChild(None, 'arch', rpmObj.arch()) + version = pkgNode.newChild(None, 'version', None) + version.newProp('epoch', str(rpmObj.epoch())) + version.newProp('ver', str(rpmObj.tagByName('version'))) + version.newProp('rel', str(rpmObj.tagByName('release'))) + csum = pkgNode.newChild(None, 'checksum', rpmObj.pkgid) + csum.newProp('type', sumtype) + csum.newProp('pkgid', 'YES') + for tag in ['summary', 'description', 'packager', 'url']: + value = rpmObj.tagByName(tag) + value = utf8String(value) + value = re.sub("\n$", '', value) + entry = pkgNode.newChild(None, tag, None) + value = xmlCleanString(doc, value) + entry.addContent(value) + + time = pkgNode.newChild(None, 'time', None) + time.newProp('file', str(rpmObj.mtime)) + time.newProp('build', str(rpmObj.tagByName('buildtime'))) + size = pkgNode.newChild(None, 'size', None) + size.newProp('package', str(rpmObj.size)) + size.newProp('installed', str(rpmObj.tagByName('size'))) + size.newProp('archive', str(rpmObj.tagByName('archivesize'))) + location = pkgNode.newChild(None, 'location', None) + if rpmObj.localurl is not None: + location.newProp('xml:base', rpmObj.localurl) + location.newProp('href', rpmObj.relativepath) + format = pkgNode.newChild(None, 'format', None) + formatns = format.newNs('http://linux.duke.edu/metadata/rpm', 'rpm') + for tag in ['license', 'vendor', 'group', 'buildhost', 'sourcerpm']: + value = rpmObj.tagByName(tag) + value = utf8String(value) + value = re.sub("\n$", '', value) + entry = format.newChild(None, tag, None) + value = xmlCleanString(doc, value) + entry.addContent(value) + + hr = format.newChild(formatns, 'header-range', None) + hr.newProp('start', str(rpmObj.rangestart)) + hr.newProp('end', str(rpmObj.rangeend)) + #pkgNode.newChild(None, 'color', 'greenishpurple') + for (lst, nodename) in [(rpmObj.depsList(), 'requires'), (rpmObj.providesList(), 'provides'), + (rpmObj.conflictsList(), 'conflicts'), (rpmObj.obsoletesList(), 'obsoletes')]: + if len(lst) > 0: + rpconode = format.newChild(formatns, nodename, None) + for (name, flags, (e,v,r)) in lst: + entry = rpconode.newChild(formatns, 'entry', None) + entry.newProp('name', name) + if flags != 0: + if flags == 2: arg = 'LT' + if flags == 4: arg = 'GT' + if flags == 8: arg = 'EQ' + if flags == 10: arg = 'LE' + if flags == 12: arg = 'GE' + entry.newProp('flags', arg) + if e or v or r: + version = entry.newChild(ns, 'version', None) + if e: + version.newProp('epoch', str(e)) + if v: + version.newProp('ver', str(v)) + if r: + version.newProp('rel', str(r)) + + + for file in rpmObj.usefulFiles(): + files = format.newChild(None, 'file', None) + file = xmlCleanString(doc, file) + files.addContent(file) + for directory in rpmObj.usefulDirs(): + files = format.newChild(None, 'file', None) + directory = xmlCleanString(doc, directory) + files.addContent(directory) + files.newProp('type', 'dir') + +def fileListXML(doc, node, rpmObj): + pkg = node.newChild(None, 'package', None) + pkg.newProp('pkgid', rpmObj.pkgid) + pkg.newProp('name', rpmObj.tagByName('name')) + pkg.newProp('arch', rpmObj.arch()) + version = pkg.newChild(None, 'version', None) + version.newProp('epoch', str(rpmObj.epoch())) + version.newProp('ver', str(rpmObj.tagByName('version'))) + version.newProp('rel', str(rpmObj.tagByName('release'))) + for file in rpmObj.filenames: + files = pkg.newChild(None, 'file', None) + file = xmlCleanString(doc, file) + files.addContent(file) + for directory in rpmObj.dirnames: + files = pkg.newChild(None, 'file', None) + directory = xmlCleanString(doc, directory) + files.addContent(directory) + files.newProp('type', 'dir') + for ghost in rpmObj.ghostnames: + files = pkg.newChild(None, 'file', None) + ghost = xmlCleanString(doc, ghost) + files.addContent(ghost) + files.newProp('type', 'ghost') + +def otherXML(doc, node, rpmObj): + pkg = node.newChild(None, 'package', None) + pkg.newProp('pkgid', rpmObj.pkgid) + pkg.newProp('name', rpmObj.tagByName('name')) + pkg.newProp('arch', rpmObj.arch()) + version = pkg.newChild(None, 'version', None) + version.newProp('epoch', str(rpmObj.epoch())) + version.newProp('ver', str(rpmObj.tagByName('version'))) + version.newProp('rel', str(rpmObj.tagByName('release'))) + clogs = rpmObj.changelogLists() + for (name, time, text) in clogs: + clog = pkg.newChild(None, 'changelog', None) + text = xmlCleanString(doc, text) + clog.addContent(text) + clog.newProp('author', utf8String(name)) + clog.newProp('date', str(time)) + + diff --git a/genpkgmetadata.py b/genpkgmetadata.py new file mode 100755 index 0000000..e0ec54c --- /dev/null +++ b/genpkgmetadata.py @@ -0,0 +1,353 @@ +#!/usr/bin/python -tt +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Library General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program; if not, write to the Free Software +# Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. +# Copyright 2003 Duke University + +import os +import sys +import getopt +import rpm +import libxml2 +import string +import fnmatch +import dumpMetadata + + +def errorprint(stuff): + print >> sys.stderr, stuff + +def usage(): + print """ + %s [options] directory-of-packages + + Options: + -u, --baseurl = optional base url location for all files + -g, --groupfile = optional groups xml file for this repository + this should be relative to the 'directory-of-packages' + -x, --exclude = files globs to exclude, can be specified multiple times + -q, --quiet = run quietly + -v, --verbose = run verbosely + -s, --checksum = md5 or sha - select type of checksum to use (default: md5) + -h, --help = show this help + + """ % os.path.basename(sys.argv[0]) + + + sys.exit(1) + + +def getFileList(path, ext, filelist): + """Return all files in path matching ext, store them in filelist, recurse dirs + return list object""" + + extlen = len(ext) + try: + dir_list = os.listdir(path) + except OSError, e: + errorprint('Error accessing directory %s, %s' % (path, e)) + sys.exit(1) + + for d in dir_list: + if os.path.isdir(path + '/' + d): + filelist = getFileList(path + '/' + d, ext, filelist) + else: + if string.lower(d[-extlen:]) == '%s' % (ext): + newpath = os.path.normpath(path + '/' + d) + filelist.append(newpath) + + return filelist + + +def trimRpms(rpms, excludeGlobs): + # print 'Pre-Trim Len: %d' % len(rpms) + badrpms = [] + for file in rpms: + for glob in excludeGlobs: + if fnmatch.fnmatch(file, glob): + # print 'excluded: %s' % file + if file not in badrpms: + badrpms.append(file) + for file in badrpms: + if file in rpms: + rpms.remove(file) + # print 'Post-Trim Len: %d' % len(rpms) + return rpms + + +def parseArgs(args): + """ + Parse the command line args return a commands dict and directory. + Sanity check all the things being passed in. + """ + if len(args) == 0: + usage() + cmds = {} + cmds['quiet'] = 0 + cmds['verbose'] = 0 + cmds['excludes'] = [] + cmds['baseurl'] = None + cmds['groupfile'] = None + cmds['sumtype'] = 'md5' + + try: + gopts, argsleft = getopt.getopt(args, 'hqvg:s:x:u:', ['help', 'exclude', + 'quiet', 'verbose', + 'baseurl=', 'groupfile=', + 'checksum=']) + except getopt.error, e: + errorprint('Options Error: %s.' % e) + usage() + + try: + for arg,a in gopts: + if arg in ['-h','--help']: + usage() + elif arg == '-v': + cmds['verbose'] = 1 + elif arg == "-q": + cmds['quiet'] = 1 + elif arg in ['-u', '--baseurl']: + if cmds['baseurl'] is not None: + errorprint('Error: Only one baseurl allowed.') + usage() + else: + cmds['baseurl'] = a + elif arg in ['-g', '--groupfile']: + if cmds['groupfile'] is not None: + errorprint('Error: Only one groupfile allowed.') + usage() + else: + cmds['groupfile'] = a + + elif arg in ['-x', '--exclude']: + cmds['excludes'].append(a) + elif arg in ['-s', '--checksum']: + if a not in ['md5', 'sha']: + errorprint('Error: checksums are: md5 or sha.') + usage() + else: + cmds['sumtype'] = a + + except ValueError, e: + errorprint('Options Error: %s' % e) + usage() + + if len(argsleft) != 1: + errorprint('Error: Only one directory allowed per run.') + usage() + else: + directory = argsleft[0] + + return cmds, directory + +def doPkgMetadata(cmds, ts): + # setup the base metadata doc + basedoc = libxml2.newDoc("1.0") + baseroot = basedoc.newChild(None, "metadata", None) + basens = baseroot.newNs('http://linux.duke.edu/metadata/common', None) + baseroot.setNs(basens) + # setup the file list doc + filesdoc = libxml2.newDoc("1.0") + filesroot = filesdoc.newChild(None, "filelists", None) + filesns = filesroot.newNs('http://linux.duke.edu/metadata/filelists', None) + filesroot.setNs(filesns) + # setup the other doc + otherdoc = libxml2.newDoc("1.0") + otherroot = otherdoc.newChild(None, "otherdata", None) + otherns = otherroot.newNs('http://linux.duke.edu/metadata/other', None) + otherroot.setNs(otherns) + + files = [] + files = getFileList('./', '.rpm', files) + files = trimRpms(files, cmds['excludes']) + + current = 0 + for file in files: + current+=1 + try: + mdobj = dumpMetadata.RpmMetaData(ts, file, cmds['baseurl'], cmds['sumtype']) + if not cmds['quiet']: + if cmds['verbose']: + print '%d/%d - %s' % (current, len(files), file) + else: + sys.stdout.write('\r' + ' ' * 80) + sys.stdout.write("\r%d/%d - %s" % (current, len(files), file)) + sys.stdout.flush() + except dumpMetadata.MDError, e: + errorprint('\n%s - %s' % (e, file)) + continue + else: + try: + dumpMetadata.generateXML(basedoc, baseroot, mdobj, cmds['sumtype']) + except dumpMetadata.MDError, e: + errorprint('\nan error occurred creating primary metadata - hmm %s' % e) + continue + try: + dumpMetadata.fileListXML(filesdoc, filesroot, mdobj) + except dumpMetadata.MDError, e: + errorprint('\nan error occurred creating filelists- hmm %s' % e) + continue + try: + dumpMetadata.otherXML(otherdoc, otherroot, mdobj) + except dumpMetadata.MDError, e: + errorprint('\nan error occurred - hmm %s' % e) + continue + if not cmds['quiet']: + print '' + + # save them up to the tmp locations: + basedoc.setDocCompressMode(9) + if not cmds['quiet']: + print 'Saving Primary metadata' + basedoc.saveFormatFileEnc('.primary.xml.gz', 'UTF-8', 1) + + filesdoc.setDocCompressMode(9) + if not cmds['quiet']: + print 'Saving file lists metadata' + filesdoc.saveFormatFileEnc('.filelists.xml.gz', 'UTF-8', 1) + + otherdoc.setDocCompressMode(9) + if not cmds['quiet']: + print 'Saving other metadata' + otherdoc.saveFormatFileEnc('.other.xml.gz', 'UTF-8', 1) + + # move them to their final locations + for (tmp, dest) in [('.other.xml.gz', cmds['otherfile']), + ('.primary.xml.gz', cmds['primaryfile']), + ('.filelists.xml.gz', cmds['filelistsfile'])]: + try: + os.rename(tmp, dest) + except OSError, e: + errorprint('Error finishing file %s: %s' % (dest, e)) + errorprint('Exiting.') + os.unlink(tmp) + sys.exit(1) + + +def doRepoMetadata(cmds): + """generate the repomd.xml file that stores the info on the other files""" + # + # + # + # md5sumhere + # timestamp + # + repodoc = libxml2.newDoc("1.0") + reporoot = repodoc.newChild(None, "repomd", None) + repons = reporoot.newNs('http://linux.duke.edu/metadata/repo', None) + reporoot.setNs(repons) + sumtype = cmds['sumtype'] + + if cmds['groupfile'] is not None: + workfiles = [(cmds['otherfile'], 'other',), + (cmds['filelistsfile'], 'filelists'), + (cmds['primaryfile'], 'primary'), + (cmds['groupfile'], 'group')] + + else: + workfiles = [(cmds['otherfile'], 'other',), + (cmds['filelistsfile'], 'filelists'), + (cmds['primaryfile'], 'primary')] + + for (file, ftype) in workfiles: + csum = dumpMetadata.getChecksum(sumtype, file) + timestamp = os.stat(file)[8] + data = reporoot.newChild(None, 'data', None) + data.newProp('type', ftype) + location = data.newChild(None, 'location', None) + if cmds['baseurl'] is not None: + location.newProp('xml:base', cmds['baseurl']) + location.newProp('href', file) + checksum = data.newChild(None, 'checksum', csum) + checksum.newProp('type', sumtype) + timestamp = data.newChild(None, 'timestamp', str(timestamp)) + + repodoc.saveFormatFileEnc('.repomd.xml.gz', 'UTF-8', 1) + try: + os.rename('.repomd.xml.gz', cmds['repomdfile']) + except OSError, e: + errorprint('Error finishing file %s: %s' % (cmds['repomdfile'], e)) + errorprint('Exiting.') + os.unlink('.repomd.xml.gz') + sys.exit(1) + else: + del repodoc + + + +def main(args): + cmds, directory = parseArgs(args) + #setup some defaults + cmds['primaryfile'] = 'primary.xml.gz' + cmds['filelistsfile'] = 'filelists.xml.gz' + cmds['otherfile'] = 'other.xml.gz' + cmds['repomdfile'] = 'repomd.xml' + + # save where we are right now + curdir = os.getcwd() + # start the sanity/stupidity checks + if not os.path.exists(directory): + errorprint('Directory must exist') + usage() + if not os.path.isdir(directory): + errorprint('Directory of packages must be a directory.') + usage() + if not os.access(directory, os.W_OK): + errorprint('Directory must be writable.') + usage() + # check out the group file if specified + if cmds['groupfile'] is not None: + grpfile = os.path.join(directory, cmds['groupfile']) + if not os.access(grpfile, os.R_OK): + errorprint('groupfile %s must exist and be readable' % grpfile) + usage() + # make sure we can write to where we want to write to: + for file in ['primaryfile', 'filelistsfile', 'otherfile', 'repomdfile']: + filepath = os.path.join(directory, cmds[file]) + dirpath = os.path.dirname(filepath) + if os.path.exists(filepath): + if not os.access(filepath, os.W_OK): + errorprint('error in must be able to write to metadata files:\n -> %s' % filepath) + usage() + else: + if not os.access(dirpath, os.W_OK): + errorprint('must be able to write to path for metadata files:\n -> %s' % dirpath) + usage() + + # change to the basedir to work from w/i the path - for relative url paths + os.chdir(directory) + ts = rpm.TransactionSet() + try: + doPkgMetadata(cmds, ts) + except: + # always clean up your messes + os.chdir(curdir) + raise + + try: + doRepoMetadata(cmds) + except: + os.chdir(curdir) + raise + + os.chdir(curdir) + + + +if __name__ == "__main__": + if sys.argv[1] == 'profile': + import profile + profile.run('main(sys.argv[2:])') + else: + main(sys.argv[1:])