From: Seth Vidal Date: Mon, 11 Jul 2005 05:45:15 +0000 (+0000) Subject: - disable the checksum flag - default and only use sha1sum's X-Git-Tag: upstream/0.9.9~266 X-Git-Url: http://review.tizen.org/git/?a=commitdiff_plain;h=254527c935f4213b5b9c18ad13ad44064f6a80a5;p=tools%2Fcreaterepo.git - disable the checksum flag - default and only use sha1sum's - add in -c,--cachedir option to setup a cachedir for the cache files of the checksums of the packages. Uses name-hdrid from the package hdr as filenames. Contents of the file is a single line of the package's checksum. This dramatically speeds up rebuilding a repository's metadata b/c the checksum of the package file was the item taking the most time. --- diff --git a/dumpMetadata.py b/dumpMetadata.py index 8060911..aab9ad8 100644 --- a/dumpMetadata.py +++ b/dumpMetadata.py @@ -212,7 +212,7 @@ class RpmMetaData: """each rpm is one object, you pass it an rpm file it opens the file, and pulls the information out in bite-sized chunks :) """ - def __init__(self, ts, filename, url, sumtype): + def __init__(self, ts, filename, options): try: stats = os.stat(filename) self.size = stats[6] @@ -221,13 +221,14 @@ class RpmMetaData: except OSError, e: raise MDError, "Error Stat'ing file %s" % filename - self.localurl = url + self.options = options + self.localurl = options['baseurl'] self.relativepath = filename fd = returnFD(filename) self.hdr = returnHdr(ts, fd) os.lseek(fd, 0, 0) fo = os.fdopen(fd, 'rb') - self.pkgid = getChecksum(sumtype, fo) + self.pkgid = self.doChecksumCache(fo) fo.seek(0) (self.rangestart, self.rangeend) = byteranges(fo) fo.close() @@ -235,8 +236,10 @@ class RpmMetaData: del fd # setup our regex objects - fileglobs = ['.*bin\/.*', '^\/etc\/.*', '^\/usr\/lib\/sendmail$'] - dirglobs = ['.*bin\/.*', '^\/etc\/.*'] + fileglobs = options['file-pattern-match'] + #['.*bin\/.*', '^\/etc\/.*', '^\/usr\/lib\/sendmail$'] + dirglobs = options['dir-pattern-match'] + #['.*bin\/.*', '^\/etc\/.*'] self.dirrc = [] self.filerc = [] for glob in fileglobs: @@ -548,6 +551,33 @@ class RpmMetaData: lst = zip(names, times, texts) return lst + def doChecksumCache(self, fo): + """return a checksum for a package: + - check if the checksum cache is enabled + if not - return the checksum + if so - check to see if it has a cache file + if so, open it and return the first line's contents + if not, grab the checksum and write it to a file for this pkg + """ + if not self.options['cache']: + return getChecksum(self.options['sumtype'], fo) + + csumtag = '%s-%s' % (self.hdr['name'] , self.hdr['hdrid']) + csumfile = '%s/%s' % (self.options['cachedir'], csumtag) + if os.path.exists(csumfile): + csumo = open(csumfile, 'r') + checksum = csumo.readline() + csumo.close() + + else: + checksum = getChecksum(self.options['sumtype'], fo) + csumo = open(csumfile, 'w') + csumo.write(checksum) + csumo.close() + + return checksum + + def generateXML(doc, node, formatns, rpmObj, sumtype): """takes an xml doc object and a package metadata entry node, populates a diff --git a/genpkgmetadata.py b/genpkgmetadata.py index 9c9d37a..4d32fb6 100755 --- a/genpkgmetadata.py +++ b/genpkgmetadata.py @@ -49,7 +49,7 @@ def usage(retval=1): -q, --quiet = run quietly -g, --groupfile to point to for group information (precreated) -v, --verbose = run verbosely - -s, --checksum = md5 or sha - select type of checksum to use (default: sha) + -c, --cachedir = specify which dir to use for the checksum cache -h, --help = show this help -V, --version = output version -p, --pretty = output xml files in pretty format. @@ -102,21 +102,21 @@ def checkAndMakeDir(dir): if os.path.exists(dir): if not os.path.isdir(dir): errorprint(_('%s is not a dir') % dir) - result = 0 + result = False else: if not os.access(dir, os.W_OK): errorprint(_('%s is not writable') % dir) - result = 0 + result = False else: - result = 1 + result = True else: try: os.mkdir(dir) except OSError, e: errorprint(_('Error creating dir %s: %s') % (dir, e)) - result = 0 + result = False else: - result = 1 + result = True return result @@ -134,12 +134,14 @@ def parseArgs(args): cmds['sumtype'] = 'sha' cmds['pretty'] = 0 # cmds['updategroupsonly'] = 0 + cmds['cachedir'] = None + cmds['cache'] = False cmds['file-pattern-match'] = ['.*bin\/.*', '^\/etc\/.*', '^\/usr\/lib\/sendmail$'] cmds['dir-pattern-match'] = ['.*bin\/.*', '^\/etc\/.*'] try: - gopts, argsleft = getopt.getopt(args, 'phqVvg:s:x:u:', ['help', 'exclude=', - 'quiet', 'verbose', + gopts, argsleft = getopt.getopt(args, 'phqVvg:s:x:u:c:', ['help', 'exclude=', + 'quiet', 'verbose', 'cachedir=', 'baseurl=', 'groupfile=', 'checksum=', 'version', 'pretty']) except getopt.error, e: @@ -197,12 +199,14 @@ def parseArgs(args): # elif arg in ['--update-groups-only']: # cmds['updategroupsonly'] = 1 elif arg in ['-s', '--checksum']: - if a not in ['md5', 'sha']: - errorprint(_('Error: checksums are: md5 or sha.')) + errorprint(_('This option is deprecated')) + elif arg in ['-c', '--cachedir']: + cmds['cache'] = True + cmds['cachedir'] = a + if not checkAndMakeDir(a): + errorprint(_('Error: cannot open/write to cache dir %s' % a)) usage() - else: - cmds['sumtype'] = a - + except ValueError, e: errorprint(_('Options Error: %s') % e) usage() @@ -259,7 +263,7 @@ def doPkgMetadata(cmds, ts): for file in files: current+=1 try: - mdobj = dumpMetadata.RpmMetaData(ts, file, cmds['baseurl'], cmds['sumtype']) + mdobj = dumpMetadata.RpmMetaData(ts, file, cmds) if not cmds['quiet']: if cmds['verbose']: print '%d/%d - %s' % (current, len(files), file)