allow optionally creating compressed sqlite databases
authorSeth Vidal <skvidal@linux.duke.edu>
Sat, 3 Feb 2007 19:06:24 +0000 (19:06 +0000)
committerSeth Vidal <skvidal@linux.duke.edu>
Sat, 3 Feb 2007 19:06:24 +0000 (19:06 +0000)
dumpMetadata.py
genpkgmetadata.py

index 36ab5d60c85ecec600a795f0f916629e739ab091..02abab957cc8d529cac6eef25d397c64a6220d9c 100644 (file)
@@ -27,6 +27,8 @@ import types
 import struct
 import re
 import stat
+import bz2
+import sqlitecachec
 
 # done to fix gzip randomly changing the checksum
 import gzip
@@ -54,7 +56,20 @@ class GzipFile(gzip.GzipFile):
 def _gzipOpen(filename, mode="rb", compresslevel=9):
     return GzipFile(filename, mode, compresslevel)
     
+def bzipFile(source, dest):
+    
+    s_fn = open(source, 'rb')
+    destination = bz2.BZ2File(dest, 'w')
+
+    while True:
+        data = s_fn.read(1024000)
+        
+        if not data: break
+        destination.write(data)
 
+    destination.close()
+    s_fn.close()
+    
 
 def returnFD(filename):
     try:
@@ -748,14 +763,70 @@ def repoXML(node, cmds):
     workfiles = [(cmds['otherfile'], 'other',),
                  (cmds['filelistsfile'], 'filelists'),
                  (cmds['primaryfile'], 'primary')]
+    repoid='garbageid'
     
+    repopath = os.path.join(cmds['outputdir'], cmds['tempdir'])
     
+    if cmds['database']:
+        rp = sqlitecachec.RepodataParserSqlite(repopath, repoid, None)
+
     for (file, ftype) in workfiles:
-        zfo = _gzipOpen(os.path.join(cmds['outputdir'], cmds['tempdir'], file))
+        complete_path = os.path.join(repopath, file)
+        
+        zfo = _gzipOpen(complete_path)
         uncsum = getChecksum(sumtype, zfo)
         zfo.close()
-        csum = getChecksum(sumtype, os.path.join(cmds['outputdir'], cmds['tempdir'], file))
-        timestamp = os.stat(os.path.join(cmds['outputdir'], cmds['tempdir'], file))[8]
+        csum = getChecksum(sumtype, complete_path)
+        timestamp = os.stat(complete_path)[8]
+        
+        db_csums = {}
+        db_compressed_sums = {}
+        
+        if cmds['database']:
+            if ftype == 'primary':
+                rp.getPrimary(complete_path, csum)
+                resultname = 'primary.xml.gz.sqlite'
+                compressed_name = 'primary.xml.gz.sqlite.bz2'
+                            
+            elif ftype == 'filelists':
+                rp.getFilelists(complete_path, csum)
+                resultname = 'filelists.xml.gz.sqlite'
+                compressed_name = 'filelists.xml.gz.sqlite.bz2'
+                
+            elif ftype == 'other':
+                rp.getOtherdata(complete_path, csum)
+                resultname = 'other.xml.gz.sqlite'
+                compressed_name = 'other.xml.gz.sqlite.bz2'
+                
+            resultpath = os.path.join(repopath, resultname)
+            result_compressed = os.path.join(repopath, compressed_name)
+            db_csums[ftype] = getChecksum(sumtype, resultpath)
+            # compress the files
+            bzipFile(resultpath, result_compressed)
+            # csum the compressed file
+            db_compressed_sums[ftype] = getChecksum(sumtype, result_compressed)
+            # remove the uncompressed file
+            os.unlink(resultpath)
+
+            # timestamp the compressed file
+            db_timestamp = os.stat(result_compressed)[8]
+            
+            # add this data as a section to the repomdxml
+            db_data_type = '%s_db' % ftype
+            data = node.newChild(None, 'data', None)
+            data.newProp('type', db_data_type)
+            location = data.newChild(None, 'location', None)
+            if cmds['baseurl'] is not None:
+                location.newProp('xml:base', cmds['baseurl'])
+            
+            location.newProp('href', os.path.join(cmds['finaldir'], compressed_name))
+            checksum = data.newChild(None, 'checksum', db_compressed_sums[ftype])
+            checksum.newProp('type', sumtype)
+            db_tstamp = data.newChild(None, 'timestamp', str(db_timestamp))
+            unchecksum = data.newChild(None, 'open-checksum', db_csums[ftype])
+            unchecksum.newProp('type', sumtype)
+            
+            
         data = node.newChild(None, 'data', None)
         data.newProp('type', ftype)
         location = data.newChild(None, 'location', None)
@@ -790,3 +861,5 @@ def repoXML(node, cmds):
         checksum = data.newChild(None, 'checksum', csum)
         checksum.newProp('type', sumtype)
         timestamp = data.newChild(None, 'timestamp', str(timestamp))
+    
+        
index c00986bede6d4b5b768d296f9403165812858d97..c476ee3c6392f993655c5564370914bb018154db 100755 (executable)
@@ -61,6 +61,7 @@ def usage(retval=1):
      -h, --help = show this help
      -V, --version = output version
      -p, --pretty = output xml files in pretty format.
+     -d, --database = generate the sqlite databases.
     """)
 
     sys.exit(retval)
@@ -377,15 +378,16 @@ def parseArgs(args):
     cmds['mdtimestamp'] = 0
     cmds['split'] = False
     cmds['outputdir'] = ""
+    cmds['database'] = False
     cmds['file-pattern-match'] = ['.*bin\/.*', '^\/etc\/.*', '^\/usr\/lib\/sendmail$']
     cmds['dir-pattern-match'] = ['.*bin\/.*', '^\/etc\/.*']
 
     try:
-        gopts, argsleft = getopt.getopt(args, 'phqVvng:s:x:u:c:o:C', ['help', 'exclude=',
+        gopts, argsleft = getopt.getopt(args, 'phqVvndg:s:x:u:c:o:C', ['help', 'exclude=',
                                                                   'quiet', 'verbose', 'cachedir=', 'basedir=',
                                                                   'baseurl=', 'groupfile=', 'checksum=',
                                                                   'version', 'pretty', 'split', 'outputdir=',
-                                                                  'noepoch', 'checkts'])
+                                                                  'noepoch', 'checkts', 'database'])
     except getopt.error, e:
         errorprint(_('Options Error: %s.') % e)
         usage()
@@ -451,7 +453,9 @@ def parseArgs(args):
                 cmds['outputdir'] = a
             elif arg in ['-n', '--noepoch']:
                 cmds['noepoch'] = True
-                    
+            elif arg in ['-d', '--database']:
+                cmds['database'] = True
+                
     except ValueError, e:
         errorprint(_('Options Error: %s') % e)
         usage()
@@ -461,7 +465,7 @@ def parseArgs(args):
         sys.exit(1)
 
     directory = directories[0]
-# 
+
     directory = os.path.normpath(directory)
     if cmds['split']:
         pass