From: Seth Vidal Date: Thu, 3 Jan 2008 17:26:45 +0000 (-0500) Subject: - port to optionparser from getopt X-Git-Tag: upstream/0.9.9~184 X-Git-Url: http://review.tizen.org/git/?a=commitdiff_plain;h=ff26a86251cac6da7fc5a8aa19c567df5ecd3924;p=tools%2Fcreaterepo.git - port to optionparser from getopt - redo config class to make use outside of cli more do-able - handle repomd.xml creation in class, too - still have a lot of changes to complete --- diff --git a/createrepo/__init__.py b/createrepo/__init__.py index 2ab8708..7f30d91 100644 --- a/createrepo/__init__.py +++ b/createrepo/__init__.py @@ -7,10 +7,11 @@ import fnmatch import hashlib import rpm import yumbased +from optparse import OptionContainer from yum import misc -from utils import _ +from utils import _, errorprint import readMetadata try: @@ -33,10 +34,44 @@ class MDError(exceptions.Exception): def __str__(self): return self.value +class MetaDataConfig(object): + def __init__(self): + self.quiet = False + self.verbose = False + self.excludes = [] + self.baseurl = '' + self.groupfile = None + self.sumtype = 'sha' + self.noepoch = False #??? + self.pretty = False + self.cachedir = None + self.basedir = os.getcwd() + self.use_cache = False + self.checkts = False + self.split = False + self.update = False + self.database = False + self.outputdir = None + self.file_patterns = ['.*bin\/.*', '^\/etc\/.*', '^\/usr\/lib\/sendmail$'] + self.dir_patterns = ['.*bin\/.*', '^\/etc\/.*'] + self.skip_symlinks = False + self.pkglist = [] + self.primaryfile = 'primary.xml.gz' + self.filelistsfile = 'filelists.xml.gz' + self.otherfile = 'other.xml.gz' + self.repomdfile = 'repomd.xml' + self.tempdir = '.repodata' + self.finaldir = 'repodata' + self.olddir = '.olddata' + self.mdtimestamp = 0 + class MetaDataGenerator: - def __init__(self, cmds): - self.cmds = cmds + def __init__(self, config_obj=None): + self.conf = config_obj + if config_obj == None: + self.conf = MetaDataConfig() + self.ts = rpm.TransactionSet() self.pkgcount = 0 self.files = [] @@ -67,7 +102,7 @@ class MetaDataGenerator: for fn in names: if os.path.isdir(fn): continue - if self.cmds['skip-symlinks'] and os.path.islink(fn): + if self.conf.skip_symlinks and os.path.islink(fn): continue elif fn[-extlen:].lower() == '%s' % (ext): relativepath = dirname.replace(startdir, "", 1) @@ -78,23 +113,30 @@ class MetaDataGenerator: startdir = os.path.join(basepath, directory) + '/' self._os_path_walk(startdir, extension_visitor, filelist) return filelist - #module - def checkTimeStamps(self, directory): - if self.cmds['checkts']: - files = self.getFileList(self.cmds['basedir'], directory, '.rpm') + + + def checkTimeStamps(self): + """check the timestamp of our target dir. If it is not newer than the repodata + return False, else True""" + if self.conf.checkts: + files = self.getFileList(self.conf.basedir, self.conf.directory, '.rpm') files = self.trimRpms(files) for f in files: - fn = os.path.join(self.cmds['basedir'], directory, f) + fn = os.path.join(self.conf.basedir, self.conf.directory, f) if not os.path.exists(fn): + #FIXME - raise don't print here errorprint(_('cannot get to file: %s') % fn) - if os.path.getctime(fn) > self.cmds['mdtimestamp']: + if os.path.getctime(fn) > self.conf.mdtimestamp: return False - return True - #module + else: + return True + + return False + def trimRpms(self, files): badrpms = [] for file in files: - for glob in self.cmds['excludes']: + for glob in self.conf.excludes: if fnmatch.fnmatch(file, glob): # print 'excluded: %s' % file if file not in badrpms: @@ -104,26 +146,28 @@ class MetaDataGenerator: files.remove(file) return files - def doPkgMetadata(self, directory): + def doPkgMetadata(self, directory=None): """all the heavy lifting for the package metadata""" - + if not directory: + directory = self.conf.directory + # rpms we're going to be dealing with - if self.cmds['update']: + if self.conf.update: #build the paths - primaryfile = os.path.join(self.cmds['outputdir'], self.cmds['finaldir'], self.cmds['primaryfile']) - flfile = os.path.join(self.cmds['outputdir'], self.cmds['finaldir'], self.cmds['filelistsfile']) - otherfile = os.path.join(self.cmds['outputdir'], self.cmds['finaldir'], self.cmds['otherfile']) + primaryfile = os.path.join(self.conf.outputdir, self.conf.finaldir, self.conf.primaryfile) + flfile = os.path.join(self.conf.outputdir, self.conf.finaldir, self.conf.filelistsfile) + otherfile = os.path.join(self.conf.outputdir, self.conf.finaldir, self.conf.otherfile) opts = { - 'verbose' : self.cmds['verbose'], - 'pkgdir' : os.path.normpath(os.path.join(self.cmds['basedir'], directory)) + 'verbose' : self.conf.verbose, + 'pkgdir' : os.path.normpath(os.path.join(self.conf.basedir, directory)) } #and scan the old repo - self.oldData = readMetadata.MetadataIndex(self.cmds['outputdir'], + self.oldData = readMetadata.MetadataIndex(self.conf.outputdir, primaryfile, flfile, otherfile, opts) - if self.cmds['pkglist']: - packages = self.cmds['pkglist'] + if self.conf.pkglist: + packages = self.conf.pkglist else: - packages = self.getFileList(self.cmds['basedir'], directory, '.rpm') + packages = self.getFileList(self.conf.basedir, directory, '.rpm') packages = self.trimRpms(packages) self.pkgcount = len(packages) @@ -139,7 +183,7 @@ class MetaDataGenerator: def _setupPrimary(self): # setup the primary metadata file - primaryfilepath = os.path.join(self.cmds['outputdir'], self.cmds['tempdir'], self.cmds['primaryfile']) + primaryfilepath = os.path.join(self.conf.outputdir, self.conf.tempdir, self.conf.primaryfile) fo = _gzipOpen(primaryfilepath, 'w') fo.write('\n') fo.write('\n' % @@ -148,7 +192,7 @@ class MetaDataGenerator: def _setupFilelists(self): # setup the filelist file - filelistpath = os.path.join(self.cmds['outputdir'], self.cmds['tempdir'], self.cmds['filelistsfile']) + filelistpath = os.path.join(self.conf.outputdir, self.conf.tempdir, self.conf.filelistsfile) fo = _gzipOpen(filelistpath, 'w') fo.write('\n') fo.write('\n' % @@ -157,45 +201,18 @@ class MetaDataGenerator: def _setupOther(self): # setup the other file - otherfilepath = os.path.join(self.cmds['outputdir'], self.cmds['tempdir'], self.cmds['otherfile']) + otherfilepath = os.path.join(self.conf.outputdir, self.conf.tempdir, self.conf.otherfile) fo = _gzipOpen(otherfilepath, 'w') fo.write('\n') fo.write('\n' % self.pkgcount) return fo - def _getNodes(self, pkg, directory, current): - # delete function since it seems to nothing anymore - basenode = None - filesnode = None - othernode = None - try: - rpmdir= os.path.join(self.cmds['basedir'], directory) - mdobj = dumpMetadata.RpmMetaData(self.ts, rpmdir, pkg, self.cmds) - except dumpMetadata.MDError, e: - errorprint('\n%s - %s' % (e, pkg)) - return None - try: - basenode = dumpMetadata.generateXML(self.basedoc, self.baseroot, self.formatns, mdobj, self.cmds['sumtype']) - except dumpMetadata.MDError, e: - errorprint(_('\nAn error occurred creating primary metadata: %s') % e) - return None - try: - filesnode = dumpMetadata.fileListXML(self.filesdoc, self.filesroot, mdobj) - except dumpMetadata.MDError, e: - errorprint(_('\nAn error occurred creating filelists: %s') % e) - return None - try: - othernode = dumpMetadata.otherXML(self.otherdoc, self.otherroot, mdobj) - except dumpMetadata.MDError, e: - errorprint(_('\nAn error occurred: %s') % e) - return None - return basenode,filesnode,othernode def read_in_package(self, directory, rpmfile): # XXX fixme try/excepts here # directory is stupid - just make it part of the class - rpmfile = '%s/%s/%s' % (self.cmds['basedir'], directory, rpmfile) + rpmfile = '%s/%s/%s' % (self.conf.basedir, directory, rpmfile) po = yumbased.CreateRepoPackage(self.ts, rpmfile) return po @@ -210,7 +227,7 @@ class MetaDataGenerator: # look to see if we can get the data from the old repodata # if so write this one out that way - if self.cmds['update']: + if self.conf.update: #see if we can pull the nodes from the old repo nodes = self.oldData.getNodes(pkg) if nodes is not None: @@ -221,7 +238,8 @@ class MetaDataGenerator: if not recycled: #scan rpm files po = self.read_in_package(directory, pkg) - self.primaryfile.write(po.do_primary_xml_dump()) + reldir = os.path.join(self.conf.basedir, directory) + self.primaryfile.write(po.do_primary_xml_dump(reldir, baseurl=self.conf.baseurl)) self.flfile.write(po.do_filelists_xml_dump()) self.otherfile.write(po.do_other_xml_dump()) else: @@ -233,14 +251,14 @@ class MetaDataGenerator: (othernode,self.otherfile)): if node is None: break - output = node.serialize('UTF-8', self.cmds['pretty']) + output = node.serialize('UTF-8', self.conf.pretty) outfile.write(output) outfile.write('\n') self.oldData.freeNodes(pkg) - if not self.cmds['quiet']: - if self.cmds['verbose']: + if not self.conf.quiet: + if self.conf.verbose: print '%d/%d %s %s' % (current, self.pkgcount, sep, pkg) else: sys.stdout.write('\r' + ' ' * 80) @@ -251,39 +269,146 @@ class MetaDataGenerator: def closeMetadataDocs(self): - if not self.cmds['quiet']: + if not self.conf.quiet: print '' # save them up to the tmp locations: - if not self.cmds['quiet']: + if not self.conf.quiet: print _('Saving Primary metadata') self.primaryfile.write('\n') self.primaryfile.close() - if not self.cmds['quiet']: + if not self.conf.quiet: print _('Saving file lists metadata') self.flfile.write('\n') self.flfile.close() - if not self.cmds['quiet']: + if not self.conf.quiet: print _('Saving other metadata') self.otherfile.write('\n') self.otherfile.close() + + def doRepoMetadata(self): """wrapper to generate the repomd.xml file that stores the info on the other files""" repodoc = libxml2.newDoc("1.0") reporoot = repodoc.newChild(None, "repomd", None) repons = reporoot.newNs('http://linux.duke.edu/metadata/repo', None) reporoot.setNs(repons) - repofilepath = os.path.join(self.cmds['outputdir'], self.cmds['tempdir'], self.cmds['repomdfile']) + repopath = os.path.join(self.conf.outputdir, self.conf.tempdir) + repofilepath = os.path.join(repopath, self.conf.repomdfile) + + sumtype = self.conf.sumtype + workfiles = [(self.conf.otherfile, 'other',), + (self.conf.filelistsfile, 'filelists'), + (self.conf.primaryfile, 'primary')] + repoid='garbageid' + + if self.conf.database: + try: + dbversion = str(sqlitecachec.DBVERSION) + except AttributeError: + dbversion = '9' + rp = sqlitecachec.RepodataParserSqlite(repopath, repoid, None) + + for (file, ftype) in workfiles: + complete_path = os.path.join(repopath, file) + + zfo = _gzipOpen(complete_path) + uncsum = misc.checksum(sumtype, zfo) + zfo.close() + csum = misc.checksum(sumtype, complete_path) + timestamp = os.stat(complete_path)[8] + + db_csums = {} + db_compressed_sums = {} + + if self.conf.database: + if ftype == 'primary': + rp.getPrimary(complete_path, csum) + + elif ftype == 'filelists': + rp.getFilelists(complete_path, csum) + + elif ftype == 'other': + rp.getOtherdata(complete_path, csum) + - try: - repoXML(reporoot, self.cmds) - except MDError, e: - errorprint(_('Error generating repo xml file: %s') % e) - sys.exit(1) + tmp_result_name = '%s.xml.gz.sqlite' % ftype + tmp_result_path = os.path.join(repopath, tmp_result_name) + good_name = '%s.sqlite' % ftype + resultpath = os.path.join(repopath, good_name) + + # rename from silly name to not silly name + os.rename(tmp_result_path, resultpath) + compressed_name = '%s.bz2' % good_name + result_compressed = os.path.join(repopath, compressed_name) + db_csums[ftype] = misc.checksum(sumtype, resultpath) + + # compress the files + bzipFile(resultpath, result_compressed) + # csum the compressed file + db_compressed_sums[ftype] = misc.checksum(sumtype, result_compressed) + # remove the uncompressed file + os.unlink(resultpath) + + # timestamp the compressed file + db_timestamp = os.stat(result_compressed)[8] + + # add this data as a section to the repomdxml + db_data_type = '%s_db' % ftype + data = reporoot.newChild(None, 'data', None) + data.newProp('type', db_data_type) + location = data.newChild(None, 'location', None) + if self.conf.baseurl is not None: + location.newProp('xml:base', self.conf.baseurl) + + location.newProp('href', os.path.join(self.conf.finaldir, compressed_name)) + checksum = data.newChild(None, 'checksum', db_compressed_sums[ftype]) + checksum.newProp('type', sumtype) + db_tstamp = data.newChild(None, 'timestamp', str(db_timestamp)) + unchecksum = data.newChild(None, 'open-checksum', db_csums[ftype]) + unchecksum.newProp('type', sumtype) + database_version = data.newChild(None, 'database_version', dbversion) + + + data = reporoot.newChild(None, 'data', None) + data.newProp('type', ftype) + location = data.newChild(None, 'location', None) + if self.conf.baseurl is not None: + location.newProp('xml:base', self.conf.baseurl) + location.newProp('href', os.path.join(self.conf.finaldir, file)) + checksum = data.newChild(None, 'checksum', csum) + checksum.newProp('type', sumtype) + timestamp = data.newChild(None, 'timestamp', str(timestamp)) + unchecksum = data.newChild(None, 'open-checksum', uncsum) + unchecksum.newProp('type', sumtype) + + # if we've got a group file then checksum it once and be done + if self.conf.groupfile is not None: + grpfile = self.conf.groupfile + timestamp = os.stat(grpfile)[8] + sfile = os.path.basename(grpfile) + fo = open(grpfile, 'r') + output = open(os.path.join(self.conf.outputdir, self.conf.tempdir, sfile), 'w') + output.write(fo.read()) + output.close() + fo.seek(0) + csum = misc.checksum(sumtype, fo) + fo.close() + + data = reporoot.newChild(None, 'data', None) + data.newProp('type', 'group') + location = data.newChild(None, 'location', None) + if self.conf.baseurl is not None: + location.newProp('xml:base', self.conf.baseurl) + location.newProp('href', os.path.join(self.conf.finaldir, sfile)) + checksum = data.newChild(None, 'checksum', csum) + checksum.newProp('type', sumtype) + timestamp = data.newChild(None, 'timestamp', str(timestamp)) + # save it down try: repodoc.saveFormatFileEnc(repofilepath, 'UTF-8', 1) except: @@ -294,8 +419,8 @@ class MetaDataGenerator: class SplitMetaDataGenerator(MetaDataGenerator): - def __init__(self, cmds): - MetaDataGenerator.__init__(self, cmds) + def __init__(self, config_obj=None): + MetaDataGenerator.__init__(self, config_obj=conf) def _getFragmentUrl(self, url, fragment): import urlparse @@ -332,135 +457,22 @@ class SplitMetaDataGenerator(MetaDataGenerator): return filematrix = {} for mydir in directories: - filematrix[mydir] = self.getFileList(self.cmds['basedir'], mydir, '.rpm') + filematrix[mydir] = self.getFileList(self.conf.basedir, mydir, '.rpm') self.trimRpms(filematrix[mydir]) self.pkgcount += len(filematrix[mydir]) mediano = 1 current = 0 - self.cmds['baseurl'] = self._getFragmentUrl(self.cmds['baseurl'], mediano) + self.conf.baseurl = self._getFragmentUrl(self.conf.baseurl, mediano) self.openMetadataDocs() - original_basedir = self.cmds['basedir'] + original_basedir = self.conf.basedir for mydir in directories: - self.cmds['baseurl'] = self._getFragmentUrl(self.cmds['baseurl'], mediano) + self.conf.baseurl = self._getFragmentUrl(self.conf.baseurl, mediano) current = self.writeMetadataDocs(filematrix[mydir], mydir, current) mediano += 1 - self.cmds['baseurl'] = self._getFragmentUrl(self.cmds['baseurl'], 1) + self.conf.baseurl = self._getFragmentUrl(self.conf.baseurl, 1) self.closeMetadataDocs() -def repoXML(node, cmds): - """generate the repomd.xml file that stores the info on the other files""" - sumtype = cmds['sumtype'] - workfiles = [(cmds['otherfile'], 'other',), - (cmds['filelistsfile'], 'filelists'), - (cmds['primaryfile'], 'primary')] - repoid='garbageid' - - repopath = os.path.join(cmds['outputdir'], cmds['tempdir']) - - if cmds['database']: - try: - dbversion = str(sqlitecachec.DBVERSION) - except AttributeError: - dbversion = '9' - rp = sqlitecachec.RepodataParserSqlite(repopath, repoid, None) - - for (file, ftype) in workfiles: - complete_path = os.path.join(repopath, file) - - zfo = _gzipOpen(complete_path) - uncsum = misc.checksum(sumtype, zfo) - zfo.close() - csum = misc.checksum(sumtype, complete_path) - timestamp = os.stat(complete_path)[8] - - db_csums = {} - db_compressed_sums = {} - - if cmds['database']: - if ftype == 'primary': - rp.getPrimary(complete_path, csum) - - elif ftype == 'filelists': - rp.getFilelists(complete_path, csum) - - elif ftype == 'other': - rp.getOtherdata(complete_path, csum) - - - tmp_result_name = '%s.xml.gz.sqlite' % ftype - tmp_result_path = os.path.join(repopath, tmp_result_name) - good_name = '%s.sqlite' % ftype - resultpath = os.path.join(repopath, good_name) - - # rename from silly name to not silly name - os.rename(tmp_result_path, resultpath) - compressed_name = '%s.bz2' % good_name - result_compressed = os.path.join(repopath, compressed_name) - db_csums[ftype] = misc.checksum(sumtype, resultpath) - - # compress the files - bzipFile(resultpath, result_compressed) - # csum the compressed file - db_compressed_sums[ftype] = misc.checksum(sumtype, result_compressed) - # remove the uncompressed file - os.unlink(resultpath) - - # timestamp the compressed file - db_timestamp = os.stat(result_compressed)[8] - - # add this data as a section to the repomdxml - db_data_type = '%s_db' % ftype - data = node.newChild(None, 'data', None) - data.newProp('type', db_data_type) - location = data.newChild(None, 'location', None) - if cmds['baseurl'] is not None: - location.newProp('xml:base', cmds['baseurl']) - - location.newProp('href', os.path.join(cmds['finaldir'], compressed_name)) - checksum = data.newChild(None, 'checksum', db_compressed_sums[ftype]) - checksum.newProp('type', sumtype) - db_tstamp = data.newChild(None, 'timestamp', str(db_timestamp)) - unchecksum = data.newChild(None, 'open-checksum', db_csums[ftype]) - unchecksum.newProp('type', sumtype) - database_version = data.newChild(None, 'database_version', dbversion) - - - data = node.newChild(None, 'data', None) - data.newProp('type', ftype) - location = data.newChild(None, 'location', None) - if cmds['baseurl'] is not None: - location.newProp('xml:base', cmds['baseurl']) - location.newProp('href', os.path.join(cmds['finaldir'], file)) - checksum = data.newChild(None, 'checksum', csum) - checksum.newProp('type', sumtype) - timestamp = data.newChild(None, 'timestamp', str(timestamp)) - unchecksum = data.newChild(None, 'open-checksum', uncsum) - unchecksum.newProp('type', sumtype) - - # if we've got a group file then checksum it once and be done - if cmds['groupfile'] is not None: - grpfile = cmds['groupfile'] - timestamp = os.stat(grpfile)[8] - sfile = os.path.basename(grpfile) - fo = open(grpfile, 'r') - output = open(os.path.join(cmds['outputdir'], cmds['tempdir'], sfile), 'w') - output.write(fo.read()) - output.close() - fo.seek(0) - csum = misc.checksum(sumtype, fo) - fo.close() - - data = node.newChild(None, 'data', None) - data.newProp('type', 'group') - location = data.newChild(None, 'location', None) - if cmds['baseurl'] is not None: - location.newProp('xml:base', cmds['baseurl']) - location.newProp('href', os.path.join(cmds['finaldir'], sfile)) - checksum = data.newChild(None, 'checksum', csum) - checksum.newProp('type', sumtype) - timestamp = data.newChild(None, 'timestamp', str(timestamp)) - diff --git a/createrepo/utils.py b/createrepo/utils.py index bb3939c..51e56cd 100644 --- a/createrepo/utils.py +++ b/createrepo/utils.py @@ -98,4 +98,27 @@ def utf8String(string): newstring = newstring + char return newstring +def checkAndMakeDir(dir): + """ + check out the dir and make it, if possible, return 1 if done, else return 0 + """ + if os.path.exists(dir): + if not os.path.isdir(dir): + errorprint(_('%s is not a dir') % dir) + result = False + else: + if not os.access(dir, os.W_OK): + errorprint(_('%s is not writable') % dir) + result = False + else: + result = True + else: + try: + os.mkdir(dir) + except OSError, e: + errorprint(_('Error creating dir %s: %s') % (dir, e)) + result = False + else: + result = True + return result diff --git a/createrepo/yumbased.py b/createrepo/yumbased.py index ea2b9aa..ec6db90 100644 --- a/createrepo/yumbased.py +++ b/createrepo/yumbased.py @@ -14,6 +14,7 @@ from yum import misc from rpmUtils.transaction import initReadOnlyTransaction from rpmUtils.miscutils import flagToString, stringToVersion +#FIXME - merge into class with config stuff fileglobs = ['.*bin\/.*', '^\/etc\/.*', '^\/usr\/lib\/sendmail$'] file_re = [] for glob in fileglobs: @@ -96,7 +97,19 @@ class CreateRepoPackage(YumLocalPackage): hdrend = property(fget=lambda self: self._get_header_byte_range()[1]) hdrstart = property(fget=lambda self: self._get_header_byte_range()[0]) - def _dump_base_items(self): + def _dump_base_items(self, basedir, baseurl=None): + """Takes an optional baseurl and required basedir. + basedir is the relative path to remove from the location + baseurl is whether or not this package already has a + baseurl rather than just '.'""" + + # if we start seeing fullpaths in the location tag - this is the culprit + if self.localpath.startswith(basedir): + relpath = self.localpath.replace(basedir, '') + if relpath[0] == '/': relpath = relpath[1:] + else: + relpath = self.localpath + msg = """ %s %s @@ -108,12 +121,16 @@ class CreateRepoPackage(YumLocalPackage): %s