--- /dev/null
+#!/usr/bin/python -tt
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 2 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU Library General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, write to the Free Software
+# Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
+# Copyright 2003 Duke University
+
+# take an list of rpms
+# pull a bunch of pertinent info out of them and write it out to a file
+# just a 'proof of concept'
+
+import os
+import rpm
+import exceptions
+import md5
+import sha
+import types
+import struct
+import re
+
+
+
+def returnHdr(ts, package):
+ """hand back the rpm header or raise an Error if the pkg is fubar"""
+ try:
+ if type(package) is types.StringType:
+ fdno = os.open(package, os.O_RDONLY)
+ else:
+ fdno = package # let's assume this is an fdno and go with it :)
+ except OSError:
+ raise MDError, "Error opening file"
+ ts.setVSFlags(~(rpm.RPMVSF_NOMD5|rpm.RPMVSF_NEEDPAYLOAD))
+ try:
+ hdr = ts.hdrFromFdno(fdno)
+ except rpm.error:
+ raise MDError, "Error opening package"
+ if type(hdr) != rpm.hdr:
+ raise MDError, "Error opening package"
+ ts.setVSFlags(0)
+ if type(package) is types.StringType:
+ os.close(fdno)
+ del fdno
+ return hdr
+
+def getChecksum(sumtype, file, CHUNK=2**16):
+ """takes filename, hand back Checksum of it
+ sumtype = md5 or sha
+ filename = /path/to/file
+ CHUNK=65536 by default"""
+
+ # chunking brazenly lifted from Ryan Tomayko
+ try:
+ if type(file) is not types.StringType:
+ fo = file # assume it's a file-like-object
+ else:
+ fo = open(file, 'r', CHUNK)
+
+ if sumtype == 'md5':
+ sum = md5.new()
+ elif sumtype == 'sha':
+ sum = sha.new()
+ else:
+ raise MDError, 'Error Checksumming file, wrong checksum type %s' % sumtype
+ chunk = fo.read
+ while chunk:
+ chunk = fo.read(CHUNK)
+ sum.update(chunk)
+
+ if type(file) is types.StringType:
+ fo.close()
+ del fo
+
+ return sum.hexdigest()
+ except:
+ raise MDError, 'Error opening file for checksum'
+
+
+def utf8String(string):
+ """hands back a unicoded string"""
+ try:
+ string = unicode(string)
+ except UnicodeError:
+ newstring = ''
+ for char in string:
+ if ord(char) > 127:
+ newstring = newstring + '?'
+ else:
+ newstring = newstring + char
+ return unicode(newstring)
+ else:
+ return string
+
+def xmlCleanString(doc, string):
+ """hands back a special-char encoded and utf8 cleaned string
+ Takes a libxml2 document object and the string to clean
+ document object is needed to not require expensive get_doc function
+ """
+ string = utf8String(string)
+ string = doc.encodeSpecialChars(string)
+ return string
+
+
+def byteranges(file):
+ """takes an rpm file or fileobject and returns byteranges for location of the header"""
+ if type(file) is not types.StringType:
+ fo = file
+ else:
+ fo = open(file, 'r')
+ #read in past lead and first 8 bytes of sig header
+ fo.seek(104)
+ # 104 bytes in
+ binindex = fo.read(4)
+ # 108 bytes in
+ (sigindex, ) = struct.unpack('>I', binindex)
+ bindata = fo.read(4)
+ # 112 bytes in
+ (sigdata, ) = struct.unpack('>I', bindata)
+ # each index is 4 32bit segments - so each is 16 bytes
+ sigindexsize = sigindex * 16
+ sigsize = sigdata + sigindexsize
+ # we have to round off to the next 8 byte boundary
+ disttoboundary = (sigsize % 8)
+ if disttoboundary != 0:
+ disttoboundary = 8 - disttoboundary
+ # 112 bytes - 96 == lead, 8 = magic and reserved, 8 == sig header data
+ hdrstart = 112 + sigsize + disttoboundary
+
+ fo.seek(hdrstart) # go to the start of the header
+ fo.seek(8,1) # read past the magic number and reserved bytes
+
+ binindex = fo.read(4)
+ (hdrindex, ) = struct.unpack('>I', binindex)
+ bindata = fo.read(4)
+ (hdrdata, ) = struct.unpack('>I', bindata)
+
+ # each index is 4 32bit segments - so each is 16 bytes
+ hdrindexsize = hdrindex * 16
+ # add 16 to the hdrsize to account for the 16 bytes of misc data b/t the
+ # end of the sig and the header.
+ hdrsize = hdrdata + hdrindexsize + 16
+
+ # header end is hdrstart + hdrsize
+ hdrend = hdrstart + hdrsize
+ if type(file) is types.StringType:
+ fo.close()
+ del fo
+ return (hdrstart, hdrend)
+
+
+class MDError(exceptions.Exception):
+ def __init__(self, args=None):
+ exceptions.Exception.__init__(self)
+ self.args = args
+
+
+
+class RpmMetaData:
+ """each rpm is one object, you pass it an rpm file
+ it opens the file, and pulls the information out in bite-sized chunks :)
+ """
+ def __init__(self, ts, filename, url, sumtype):
+ stats = os.stat(filename)
+ self.size = stats[6]
+ self.mtime = stats[8]
+ del stats
+
+ self.localurl = url
+ self.relativepath = filename
+ self.hdr = returnHdr(ts, filename)
+ self.pkgid = getChecksum(sumtype, filename)
+ (self.rangestart, self.rangeend) = byteranges(filename)
+
+ # setup our regex objects
+ fileglobs = ['.*bin\/.*', '^\/etc\/.*', '^\/usr\/lib\/sendmail$']
+ dirglobs = ['.*bin\/.*', '^\/etc\/.*']
+ self.dirrc = []
+ self.filerc = []
+ for glob in fileglobs:
+ self.filerc.append(re.compile(glob))
+
+ for glob in dirglobs:
+ self.dirrc.append(re.compile(glob))
+
+ self.filenames = []
+ self.dirnames = []
+ self.ghostnames = []
+ self.genFileLists()
+
+ def arch(self):
+ if self.tagByName('sourcepackage') == 1:
+ return 'src'
+ else:
+ return self.tagByName('arch')
+
+ def _correctFlags(self, flags):
+ returnflags=[]
+ if flags is None:
+ return returnflags
+
+ if type(flags) is not types.ListType:
+ newflag = flags & 0xf
+ returnflags.append(newflag)
+ else:
+ for flag in flags:
+ newflag = flag
+ if flag is not None:
+ newflag = flag & 0xf
+ returnflags.append(newflag)
+ return returnflags
+
+ def _correctVersion(self, vers):
+ returnvers = []
+ vertuple = (None, None, None)
+ if vers is None:
+ returnvers.append(vertuple)
+ return returnvers
+
+ if type(vers) is not types.ListType:
+ if vers is not None:
+ vertuple = self._stringToVersion(vers)
+ else:
+ vertuple = (None, None, None)
+ returnvers.append(vertuple)
+ else:
+ for ver in vers:
+ if ver is not None:
+ vertuple = self._stringToVersion(ver)
+ else:
+ vertuple = (None, None, None)
+ returnvers.append(vertuple)
+ return returnvers
+
+
+ def _stringToVersion(self, strng):
+ i = strng.find(':')
+ if i != -1:
+ epoch = long(strng[:i])
+ else:
+ epoch = '0'
+ j = strng.find('-')
+ if j != -1:
+ if strng[i + 1:j] == '':
+ version = None
+ else:
+ version = strng[i + 1:j]
+ release = strng[j + 1:]
+ else:
+ if strng[i + 1:] == '':
+ version = None
+ else:
+ version = strng[i + 1:]
+ release = None
+ return (epoch, version, release)
+
+ ###########
+ # Title: Remove duplicates from a sequence
+ # Submitter: Tim Peters
+ # From: http://aspn.activestate.com/ASPN/Cookbook/Python/Recipe/52560
+
+ def _uniq(self,s):
+ """Return a list of the elements in s, but without duplicates.
+
+ For example, unique([1,2,3,1,2,3]) is some permutation of [1,2,3],
+ unique("abcabc") some permutation of ["a", "b", "c"], and
+ unique(([1, 2], [2, 3], [1, 2])) some permutation of
+ [[2, 3], [1, 2]].
+
+ For best speed, all sequence elements should be hashable. Then
+ unique() will usually work in linear time.
+
+ If not possible, the sequence elements should enjoy a total
+ ordering, and if list(s).sort() doesn't raise TypeError it's
+ assumed that they do enjoy a total ordering. Then unique() will
+ usually work in O(N*log2(N)) time.
+
+ If that's not possible either, the sequence elements must support
+ equality-testing. Then unique() will usually work in quadratic
+ time.
+ """
+
+ n = len(s)
+ if n == 0:
+ return []
+
+ # Try using a dict first, as that's the fastest and will usually
+ # work. If it doesn't work, it will usually fail quickly, so it
+ # usually doesn't cost much to *try* it. It requires that all the
+ # sequence elements be hashable, and support equality comparison.
+ u = {}
+ try:
+ for x in s:
+ u[x] = 1
+ except TypeError:
+ del u # move on to the next method
+ else:
+ return u.keys()
+
+ # We can't hash all the elements. Second fastest is to sort,
+ # which brings the equal elements together; then duplicates are
+ # easy to weed out in a single pass.
+ # NOTE: Python's list.sort() was designed to be efficient in the
+ # presence of many duplicate elements. This isn't true of all
+ # sort functions in all languages or libraries, so this approach
+ # is more effective in Python than it may be elsewhere.
+ try:
+ t = list(s)
+ t.sort()
+ except TypeError:
+ del t # move on to the next method
+ else:
+ assert n > 0
+ last = t[0]
+ lasti = i = 1
+ while i < n:
+ if t[i] != last:
+ t[lasti] = last = t[i]
+ lasti += 1
+ i += 1
+ return t[:lasti]
+
+ # Brute force is all that's left.
+ u = []
+ for x in s:
+ if x not in u:
+ u.append(x)
+ return u
+
+ def tagByName(self, tag):
+ return self.hdr[tag]
+
+ def listTagByName(self, tag):
+ """take a tag that should be a list and make sure it is one"""
+ lst = []
+ data = self.tagByName(tag)
+ if data is None:
+ pass
+ if type(data) is types.ListType:
+ lst.extend(data)
+ else:
+ lst.append(data)
+ return lst
+
+
+ def epoch(self):
+ if self.hdr['epoch'] is None:
+ return 0
+ else:
+ return self.tagByName('epoch')
+
+ def color(self):
+ # do something here - but what I don't know
+ pass
+
+ def genFileLists(self):
+ """produces lists of dirs and files for this header in two lists"""
+
+ files = self.listTagByName('filenames')
+ fileclasses = self.listTagByName('fileclass')
+ fileflags = self.listTagByName('fileflags')
+ filetuple = zip(files, fileclasses, fileflags)
+ classdict = self.listTagByName('classdict')
+ for (file, fileclass, flags) in filetuple:
+ if fileclass is None or file is None: # this is a dumb test
+ self.filenames.append(file)
+ continue
+ if (flags & 64): # check for ghost
+ self.ghostnames.append(file)
+ continue
+ if classdict[fileclass] == 'directory':
+ self.dirnames.append(file)
+ else:
+ self.filenames.append(file)
+
+
+ def usefulFiles(self):
+ """search for good files"""
+ returns = {}
+ for item in self.filenames:
+ if item is None:
+ continue
+ for glob in self.filerc:
+ if glob.match(item):
+ returns[item] = 1
+ return returns
+
+
+ def usefulDirs(self):
+ """search for good dirs"""
+ returns = {}
+ for item in self.dirnames:
+ if item is None:
+ continue
+ for glob in self.dirrc:
+ if glob.match(item):
+ returns[item] = 1
+ return returns.keys()
+
+
+ def depsList(self):
+ """returns a list of tuples of dependencies"""
+ # these should probably compress down duplicates too
+ lst = []
+ names = self.hdr[rpm.RPMTAG_REQUIRENAME]
+ tmpflags = self.hdr[rpm.RPMTAG_REQUIREFLAGS]
+ flags = self._correctFlags(tmpflags)
+ ver = self._correctVersion(self.hdr[rpm.RPMTAG_REQUIREVERSION])
+ if names is not None:
+ lst = zip(names, flags, ver)
+ return self._uniq(lst)
+
+ def obsoletesList(self):
+ lst = []
+ names = self.hdr[rpm.RPMTAG_OBSOLETENAME]
+ tmpflags = self.hdr[rpm.RPMTAG_OBSOLETEFLAGS]
+ flags = self._correctFlags(tmpflags)
+ ver = self._correctVersion(self.hdr[rpm.RPMTAG_OBSOLETEVERSION])
+ if names is not None:
+ lst = zip(names, flags, ver)
+ return self._uniq(lst)
+
+ def conflictsList(self):
+ lst = []
+ names = self.hdr[rpm.RPMTAG_CONFLICTNAME]
+ tmpflags = self.hdr[rpm.RPMTAG_CONFLICTFLAGS]
+ flags = self._correctFlags(tmpflags)
+ ver = self._correctVersion(self.hdr[rpm.RPMTAG_CONFLICTVERSION])
+ if names is not None:
+ lst = zip(names, flags, ver)
+ return self._uniq(lst)
+
+ def providesList(self):
+ lst = []
+ names = self.hdr[rpm.RPMTAG_PROVIDENAME]
+ tmpflags = self.hdr[rpm.RPMTAG_PROVIDEFLAGS]
+ flags = self._correctFlags(tmpflags)
+ ver = self._correctVersion(self.hdr[rpm.RPMTAG_PROVIDEVERSION])
+ if names is not None:
+ lst = zip(names, flags, ver)
+ return self._uniq(lst)
+
+ def changelogLists(self):
+ lst = []
+ names = self.listTagByName('changelogname')
+ times = self.listTagByName('changelogtime')
+ texts = self.listTagByName('changelogtext')
+ if len(names) > 0:
+ lst = zip(names, times, texts)
+ return lst
+
+
+def generateXML(doc, node, rpmObj, sumtype):
+ """takes an xml doc object and a package metadata entry node, populates a
+ package node with the md information"""
+ ns = node.ns()
+ pkgNode = node.newChild(None, "package", None)
+ pkgNode.newProp('type', 'rpm')
+ pkgNode.newChild(None, 'name', rpmObj.tagByName('name'))
+ pkgNode.newChild(None, 'arch', rpmObj.arch())
+ version = pkgNode.newChild(None, 'version', None)
+ version.newProp('epoch', str(rpmObj.epoch()))
+ version.newProp('ver', str(rpmObj.tagByName('version')))
+ version.newProp('rel', str(rpmObj.tagByName('release')))
+ csum = pkgNode.newChild(None, 'checksum', rpmObj.pkgid)
+ csum.newProp('type', sumtype)
+ csum.newProp('pkgid', 'YES')
+ for tag in ['summary', 'description', 'packager', 'url']:
+ value = rpmObj.tagByName(tag)
+ value = utf8String(value)
+ value = re.sub("\n$", '', value)
+ entry = pkgNode.newChild(None, tag, None)
+ value = xmlCleanString(doc, value)
+ entry.addContent(value)
+
+ time = pkgNode.newChild(None, 'time', None)
+ time.newProp('file', str(rpmObj.mtime))
+ time.newProp('build', str(rpmObj.tagByName('buildtime')))
+ size = pkgNode.newChild(None, 'size', None)
+ size.newProp('package', str(rpmObj.size))
+ size.newProp('installed', str(rpmObj.tagByName('size')))
+ size.newProp('archive', str(rpmObj.tagByName('archivesize')))
+ location = pkgNode.newChild(None, 'location', None)
+ if rpmObj.localurl is not None:
+ location.newProp('xml:base', rpmObj.localurl)
+ location.newProp('href', rpmObj.relativepath)
+ format = pkgNode.newChild(None, 'format', None)
+ formatns = format.newNs('http://linux.duke.edu/metadata/rpm', 'rpm')
+ for tag in ['license', 'vendor', 'group', 'buildhost', 'sourcerpm']:
+ value = rpmObj.tagByName(tag)
+ value = utf8String(value)
+ value = re.sub("\n$", '', value)
+ entry = format.newChild(None, tag, None)
+ value = xmlCleanString(doc, value)
+ entry.addContent(value)
+
+ hr = format.newChild(formatns, 'header-range', None)
+ hr.newProp('start', str(rpmObj.rangestart))
+ hr.newProp('end', str(rpmObj.rangeend))
+ #pkgNode.newChild(None, 'color', 'greenishpurple')
+ for (lst, nodename) in [(rpmObj.depsList(), 'requires'), (rpmObj.providesList(), 'provides'),
+ (rpmObj.conflictsList(), 'conflicts'), (rpmObj.obsoletesList(), 'obsoletes')]:
+ if len(lst) > 0:
+ rpconode = format.newChild(formatns, nodename, None)
+ for (name, flags, (e,v,r)) in lst:
+ entry = rpconode.newChild(formatns, 'entry', None)
+ entry.newProp('name', name)
+ if flags != 0:
+ if flags == 2: arg = 'LT'
+ if flags == 4: arg = 'GT'
+ if flags == 8: arg = 'EQ'
+ if flags == 10: arg = 'LE'
+ if flags == 12: arg = 'GE'
+ entry.newProp('flags', arg)
+ if e or v or r:
+ version = entry.newChild(ns, 'version', None)
+ if e:
+ version.newProp('epoch', str(e))
+ if v:
+ version.newProp('ver', str(v))
+ if r:
+ version.newProp('rel', str(r))
+
+
+ for file in rpmObj.usefulFiles():
+ files = format.newChild(None, 'file', None)
+ file = xmlCleanString(doc, file)
+ files.addContent(file)
+ for directory in rpmObj.usefulDirs():
+ files = format.newChild(None, 'file', None)
+ directory = xmlCleanString(doc, directory)
+ files.addContent(directory)
+ files.newProp('type', 'dir')
+
+def fileListXML(doc, node, rpmObj):
+ pkg = node.newChild(None, 'package', None)
+ pkg.newProp('pkgid', rpmObj.pkgid)
+ pkg.newProp('name', rpmObj.tagByName('name'))
+ pkg.newProp('arch', rpmObj.arch())
+ version = pkg.newChild(None, 'version', None)
+ version.newProp('epoch', str(rpmObj.epoch()))
+ version.newProp('ver', str(rpmObj.tagByName('version')))
+ version.newProp('rel', str(rpmObj.tagByName('release')))
+ for file in rpmObj.filenames:
+ files = pkg.newChild(None, 'file', None)
+ file = xmlCleanString(doc, file)
+ files.addContent(file)
+ for directory in rpmObj.dirnames:
+ files = pkg.newChild(None, 'file', None)
+ directory = xmlCleanString(doc, directory)
+ files.addContent(directory)
+ files.newProp('type', 'dir')
+ for ghost in rpmObj.ghostnames:
+ files = pkg.newChild(None, 'file', None)
+ ghost = xmlCleanString(doc, ghost)
+ files.addContent(ghost)
+ files.newProp('type', 'ghost')
+
+def otherXML(doc, node, rpmObj):
+ pkg = node.newChild(None, 'package', None)
+ pkg.newProp('pkgid', rpmObj.pkgid)
+ pkg.newProp('name', rpmObj.tagByName('name'))
+ pkg.newProp('arch', rpmObj.arch())
+ version = pkg.newChild(None, 'version', None)
+ version.newProp('epoch', str(rpmObj.epoch()))
+ version.newProp('ver', str(rpmObj.tagByName('version')))
+ version.newProp('rel', str(rpmObj.tagByName('release')))
+ clogs = rpmObj.changelogLists()
+ for (name, time, text) in clogs:
+ clog = pkg.newChild(None, 'changelog', None)
+ text = xmlCleanString(doc, text)
+ clog.addContent(text)
+ clog.newProp('author', utf8String(name))
+ clog.newProp('date', str(time))
+
+
--- /dev/null
+#!/usr/bin/python -tt
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 2 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU Library General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, write to the Free Software
+# Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
+# Copyright 2003 Duke University
+
+import os
+import sys
+import getopt
+import rpm
+import libxml2
+import string
+import fnmatch
+import dumpMetadata
+
+
+def errorprint(stuff):
+ print >> sys.stderr, stuff
+
+def usage():
+ print """
+ %s [options] directory-of-packages
+
+ Options:
+ -u, --baseurl = optional base url location for all files
+ -g, --groupfile = optional groups xml file for this repository
+ this should be relative to the 'directory-of-packages'
+ -x, --exclude = files globs to exclude, can be specified multiple times
+ -q, --quiet = run quietly
+ -v, --verbose = run verbosely
+ -s, --checksum = md5 or sha - select type of checksum to use (default: md5)
+ -h, --help = show this help
+
+ """ % os.path.basename(sys.argv[0])
+
+
+ sys.exit(1)
+
+
+def getFileList(path, ext, filelist):
+ """Return all files in path matching ext, store them in filelist, recurse dirs
+ return list object"""
+
+ extlen = len(ext)
+ try:
+ dir_list = os.listdir(path)
+ except OSError, e:
+ errorprint('Error accessing directory %s, %s' % (path, e))
+ sys.exit(1)
+
+ for d in dir_list:
+ if os.path.isdir(path + '/' + d):
+ filelist = getFileList(path + '/' + d, ext, filelist)
+ else:
+ if string.lower(d[-extlen:]) == '%s' % (ext):
+ newpath = os.path.normpath(path + '/' + d)
+ filelist.append(newpath)
+
+ return filelist
+
+
+def trimRpms(rpms, excludeGlobs):
+ # print 'Pre-Trim Len: %d' % len(rpms)
+ badrpms = []
+ for file in rpms:
+ for glob in excludeGlobs:
+ if fnmatch.fnmatch(file, glob):
+ # print 'excluded: %s' % file
+ if file not in badrpms:
+ badrpms.append(file)
+ for file in badrpms:
+ if file in rpms:
+ rpms.remove(file)
+ # print 'Post-Trim Len: %d' % len(rpms)
+ return rpms
+
+
+def parseArgs(args):
+ """
+ Parse the command line args return a commands dict and directory.
+ Sanity check all the things being passed in.
+ """
+ if len(args) == 0:
+ usage()
+ cmds = {}
+ cmds['quiet'] = 0
+ cmds['verbose'] = 0
+ cmds['excludes'] = []
+ cmds['baseurl'] = None
+ cmds['groupfile'] = None
+ cmds['sumtype'] = 'md5'
+
+ try:
+ gopts, argsleft = getopt.getopt(args, 'hqvg:s:x:u:', ['help', 'exclude',
+ 'quiet', 'verbose',
+ 'baseurl=', 'groupfile=',
+ 'checksum='])
+ except getopt.error, e:
+ errorprint('Options Error: %s.' % e)
+ usage()
+
+ try:
+ for arg,a in gopts:
+ if arg in ['-h','--help']:
+ usage()
+ elif arg == '-v':
+ cmds['verbose'] = 1
+ elif arg == "-q":
+ cmds['quiet'] = 1
+ elif arg in ['-u', '--baseurl']:
+ if cmds['baseurl'] is not None:
+ errorprint('Error: Only one baseurl allowed.')
+ usage()
+ else:
+ cmds['baseurl'] = a
+ elif arg in ['-g', '--groupfile']:
+ if cmds['groupfile'] is not None:
+ errorprint('Error: Only one groupfile allowed.')
+ usage()
+ else:
+ cmds['groupfile'] = a
+
+ elif arg in ['-x', '--exclude']:
+ cmds['excludes'].append(a)
+ elif arg in ['-s', '--checksum']:
+ if a not in ['md5', 'sha']:
+ errorprint('Error: checksums are: md5 or sha.')
+ usage()
+ else:
+ cmds['sumtype'] = a
+
+ except ValueError, e:
+ errorprint('Options Error: %s' % e)
+ usage()
+
+ if len(argsleft) != 1:
+ errorprint('Error: Only one directory allowed per run.')
+ usage()
+ else:
+ directory = argsleft[0]
+
+ return cmds, directory
+
+def doPkgMetadata(cmds, ts):
+ # setup the base metadata doc
+ basedoc = libxml2.newDoc("1.0")
+ baseroot = basedoc.newChild(None, "metadata", None)
+ basens = baseroot.newNs('http://linux.duke.edu/metadata/common', None)
+ baseroot.setNs(basens)
+ # setup the file list doc
+ filesdoc = libxml2.newDoc("1.0")
+ filesroot = filesdoc.newChild(None, "filelists", None)
+ filesns = filesroot.newNs('http://linux.duke.edu/metadata/filelists', None)
+ filesroot.setNs(filesns)
+ # setup the other doc
+ otherdoc = libxml2.newDoc("1.0")
+ otherroot = otherdoc.newChild(None, "otherdata", None)
+ otherns = otherroot.newNs('http://linux.duke.edu/metadata/other', None)
+ otherroot.setNs(otherns)
+
+ files = []
+ files = getFileList('./', '.rpm', files)
+ files = trimRpms(files, cmds['excludes'])
+
+ current = 0
+ for file in files:
+ current+=1
+ try:
+ mdobj = dumpMetadata.RpmMetaData(ts, file, cmds['baseurl'], cmds['sumtype'])
+ if not cmds['quiet']:
+ if cmds['verbose']:
+ print '%d/%d - %s' % (current, len(files), file)
+ else:
+ sys.stdout.write('\r' + ' ' * 80)
+ sys.stdout.write("\r%d/%d - %s" % (current, len(files), file))
+ sys.stdout.flush()
+ except dumpMetadata.MDError, e:
+ errorprint('\n%s - %s' % (e, file))
+ continue
+ else:
+ try:
+ dumpMetadata.generateXML(basedoc, baseroot, mdobj, cmds['sumtype'])
+ except dumpMetadata.MDError, e:
+ errorprint('\nan error occurred creating primary metadata - hmm %s' % e)
+ continue
+ try:
+ dumpMetadata.fileListXML(filesdoc, filesroot, mdobj)
+ except dumpMetadata.MDError, e:
+ errorprint('\nan error occurred creating filelists- hmm %s' % e)
+ continue
+ try:
+ dumpMetadata.otherXML(otherdoc, otherroot, mdobj)
+ except dumpMetadata.MDError, e:
+ errorprint('\nan error occurred - hmm %s' % e)
+ continue
+ if not cmds['quiet']:
+ print ''
+
+ # save them up to the tmp locations:
+ basedoc.setDocCompressMode(9)
+ if not cmds['quiet']:
+ print 'Saving Primary metadata'
+ basedoc.saveFormatFileEnc('.primary.xml.gz', 'UTF-8', 1)
+
+ filesdoc.setDocCompressMode(9)
+ if not cmds['quiet']:
+ print 'Saving file lists metadata'
+ filesdoc.saveFormatFileEnc('.filelists.xml.gz', 'UTF-8', 1)
+
+ otherdoc.setDocCompressMode(9)
+ if not cmds['quiet']:
+ print 'Saving other metadata'
+ otherdoc.saveFormatFileEnc('.other.xml.gz', 'UTF-8', 1)
+
+ # move them to their final locations
+ for (tmp, dest) in [('.other.xml.gz', cmds['otherfile']),
+ ('.primary.xml.gz', cmds['primaryfile']),
+ ('.filelists.xml.gz', cmds['filelistsfile'])]:
+ try:
+ os.rename(tmp, dest)
+ except OSError, e:
+ errorprint('Error finishing file %s: %s' % (dest, e))
+ errorprint('Exiting.')
+ os.unlink(tmp)
+ sys.exit(1)
+
+
+def doRepoMetadata(cmds):
+ """generate the repomd.xml file that stores the info on the other files"""
+ #<repomd>
+ # <data type='other'>
+ # <location base=foo href=relative/>
+ # <checksum type="md5">md5sumhere</checksum>
+ # <timestamp>timestamp</timestamp>
+ # </data>
+ repodoc = libxml2.newDoc("1.0")
+ reporoot = repodoc.newChild(None, "repomd", None)
+ repons = reporoot.newNs('http://linux.duke.edu/metadata/repo', None)
+ reporoot.setNs(repons)
+ sumtype = cmds['sumtype']
+
+ if cmds['groupfile'] is not None:
+ workfiles = [(cmds['otherfile'], 'other',),
+ (cmds['filelistsfile'], 'filelists'),
+ (cmds['primaryfile'], 'primary'),
+ (cmds['groupfile'], 'group')]
+
+ else:
+ workfiles = [(cmds['otherfile'], 'other',),
+ (cmds['filelistsfile'], 'filelists'),
+ (cmds['primaryfile'], 'primary')]
+
+ for (file, ftype) in workfiles:
+ csum = dumpMetadata.getChecksum(sumtype, file)
+ timestamp = os.stat(file)[8]
+ data = reporoot.newChild(None, 'data', None)
+ data.newProp('type', ftype)
+ location = data.newChild(None, 'location', None)
+ if cmds['baseurl'] is not None:
+ location.newProp('xml:base', cmds['baseurl'])
+ location.newProp('href', file)
+ checksum = data.newChild(None, 'checksum', csum)
+ checksum.newProp('type', sumtype)
+ timestamp = data.newChild(None, 'timestamp', str(timestamp))
+
+ repodoc.saveFormatFileEnc('.repomd.xml.gz', 'UTF-8', 1)
+ try:
+ os.rename('.repomd.xml.gz', cmds['repomdfile'])
+ except OSError, e:
+ errorprint('Error finishing file %s: %s' % (cmds['repomdfile'], e))
+ errorprint('Exiting.')
+ os.unlink('.repomd.xml.gz')
+ sys.exit(1)
+ else:
+ del repodoc
+
+
+
+def main(args):
+ cmds, directory = parseArgs(args)
+ #setup some defaults
+ cmds['primaryfile'] = 'primary.xml.gz'
+ cmds['filelistsfile'] = 'filelists.xml.gz'
+ cmds['otherfile'] = 'other.xml.gz'
+ cmds['repomdfile'] = 'repomd.xml'
+
+ # save where we are right now
+ curdir = os.getcwd()
+ # start the sanity/stupidity checks
+ if not os.path.exists(directory):
+ errorprint('Directory must exist')
+ usage()
+ if not os.path.isdir(directory):
+ errorprint('Directory of packages must be a directory.')
+ usage()
+ if not os.access(directory, os.W_OK):
+ errorprint('Directory must be writable.')
+ usage()
+ # check out the group file if specified
+ if cmds['groupfile'] is not None:
+ grpfile = os.path.join(directory, cmds['groupfile'])
+ if not os.access(grpfile, os.R_OK):
+ errorprint('groupfile %s must exist and be readable' % grpfile)
+ usage()
+ # make sure we can write to where we want to write to:
+ for file in ['primaryfile', 'filelistsfile', 'otherfile', 'repomdfile']:
+ filepath = os.path.join(directory, cmds[file])
+ dirpath = os.path.dirname(filepath)
+ if os.path.exists(filepath):
+ if not os.access(filepath, os.W_OK):
+ errorprint('error in must be able to write to metadata files:\n -> %s' % filepath)
+ usage()
+ else:
+ if not os.access(dirpath, os.W_OK):
+ errorprint('must be able to write to path for metadata files:\n -> %s' % dirpath)
+ usage()
+
+ # change to the basedir to work from w/i the path - for relative url paths
+ os.chdir(directory)
+ ts = rpm.TransactionSet()
+ try:
+ doPkgMetadata(cmds, ts)
+ except:
+ # always clean up your messes
+ os.chdir(curdir)
+ raise
+
+ try:
+ doRepoMetadata(cmds)
+ except:
+ os.chdir(curdir)
+ raise
+
+ os.chdir(curdir)
+
+
+
+if __name__ == "__main__":
+ if sys.argv[1] == 'profile':
+ import profile
+ profile.run('main(sys.argv[2:])')
+ else:
+ main(sys.argv[1:])