-#!/usr/bin/python
+#!/usr/bin/python3
import sys
import os
import shutil
import subprocess
import re
-import zipfile
import datetime
import hashlib
-import operator
-import locale
-import errno
import logging
-import glob
import apt
import stat
-if sys.hexversion < 0x02040000:
- print >> sys.stderr, "Python 2.4 or newer is required."
+
+if sys.version_info[0] < 3:
+ print >> sys.stderr, "Python 3.0 or newer is required."
sys.exit(1)
+
'''
-Diff two folders and create delta using SS_BSDIFF
-Will maintain same format of script that will be generated when we use diffutil
-
-1. Create a list of files in each Base folders,
-2. These files will fall into one these below categories:
- 1) Only in OLD - Should be deleted
- 2) Only in NEW - Should be added or renamed accordingly
- 3) File exists in both directories but contents are different - Create Diff.
- 4) File name is same but TYPE can change (File to Folder, Folder to Link etc.)
- 5) Duplicates in the list of Deletes and News
- 6) Close matching diffs even though name changes across directories. (for matching extension)
- 7) Clearing empty directories after Moves or diffs under Rename.
-
-Current Case
-1. Given two folders, from list of REMOVED and NEW files find if there
-is version change and create diff between them
+Logic for DELTA_FS:
+ Diff two folders and create delta using SS_BSDIFF
+ Will maintain same format of script that will be generated when we use diffutil
+
+ 1. Create a list of files in each Base folders,
+ 2. These files will fall into one these below categories:
+ 1) Only in OLD - Should be deleted
+ 2) Only in NEW - Should be added or renamed accordingly
+ 3) File exists in both directories but contents are different - Create Diff.
+ 4) File name is same but TYPE can change (File to Folder, Folder to Link etc.)
+ 5) Duplicates in the list of Deletes and News
+ 6) Close matching diffs even though name changes across directories. (for matching extension)
+ 7) Clearing empty directories after Moves or diffs under Rename.
+
+ Current Case
+ 1. Given two folders, from list of REMOVED and NEW files find if there
+ is version change and create diff between them
TODO
Want to extend the same script for entire DIFF generation and replace TOTAlib.sh file
'''
-def global_paths():
- global DIFF_UTIL
- global ZIPUTIL
- global NEW_FILES_PATH
- global NEW_FILES_ZIP_NAME
- global SYMLINK_TYPE
- global ATTR_DOC_EXT
- global SYMLINK_DOC_NAME
- global DIFF_PREFIX
- global DIFF_SUFFIX
- global SUPPORT_RENAME
- global NEW_PREFIX
- global DIFFPATCH_UTIL
- global SUPPORT_CONTAINERS
- global FULL_IMAGE
- global DELTA_IMAGE
- global DELTA_FS
- global EXTRA
- global COMMON_BIN_PATH
- global MEM_REQ
- global EMPTY
- global MEM_FILE
-
-
COMMON_BIN_PATH = "../../common/bin/"
DIFF_UTIL = "/usr/local/bin/ss_bsdiff"
DIFFPATCH_UTIL = "/usr/local/bin/ss_bspatch"
-#ZIPUTIL = "p7zip "
ZIPUTIL = "7z -mf=off a "
NEW_FILES_PATH = "run/upgrade-sysroot"
NEW_FILES_ZIP_NAME = "system.7z"
SYMLINK_TYPE = "SYM"
-ATTR_DOC_EXT = "_attr.txt"
-SYMLINK_DOC_NAME = "_sym.txt"
-HARDLINK_DOC_NAME = "_hard.txt"
PART_DOC_EXT = ".txt"
+ATTR_DOC_EXT = "_attr" + PART_DOC_EXT
+SYMLINK_DOC_NAME = "_sym" + PART_DOC_EXT
+HARDLINK_DOC_NAME = "_hard" + PART_DOC_EXT
DIFF_PREFIX = "diff"
DIFF_SUFFIX = ".delta"
-NEW_PREFIX = 'new'
FULL_IMAGE = "FULL_IMAGE"
DELTA_IMAGE = "DELTA_IMAGE"
DELTA_FS = "DELTA_FS"
-EXTRA = "EXTRA"
LOGFILE = "Delta.log"
EMPTY = ""
+PARENT_DIR = ".."
MEM_REQ = 0
MEM_FILE = "NULL"
COMPRESSION_LZMA = "lzma"
COMPRESSION_BROTLI = "brotli"
-SUPPORT_RENAME = "TRUE" # Use appropriate name
-SUPPORT_CONTAINERS = "FALSE"
-
-TEST_MODE = "FALSE"
-
-
-def main():
- logging.basicConfig(filename=LOGFILE, level=logging.DEBUG)
- global AttributeFile
- global GenerateDiffAttr
- try:
-
- if len(sys.argv) < 5:
- sys.exit('Usage: CreatePatch.py UPDATE_TYPE PARTNAME OLDBASE NEWBASE OUTFOLDER')
- UPDATE_TYPE = sys.argv[1]
- UPDATE_TYPE_S = UPDATE_TYPE.split(":")
- PART_NAME = sys.argv[2] # lets make this also optional
-
- BASE_OLD = sys.argv[3]
- BASE_NEW = sys.argv[4]
- OUT_DIR = sys.argv[5]
- ATTR_OLD = EMPTY
- ATTR_NEW = EMPTY
- UPDATE_CFG_PATH = EMPTY
- GenerateDiffAttr = "FALSE"
- if UPDATE_TYPE_S[0] == DELTA_FS:
- #instead of arguments check it in outdirectory ?
- if len(sys.argv) == 9:
- ATTR_OLD = sys.argv[6]
- ATTR_NEW = sys.argv[7]
- UPDATE_CFG_PATH = '../' + sys.argv[8]
- GenerateDiffAttr = "TRUE"
-
- elif UPDATE_TYPE_S[0] in [DELTA_IMAGE, FULL_IMAGE]:
- if len(sys.argv) == 7:
- #Use path in better way
- UPDATE_CFG_PATH = '../' + sys.argv[6]
-
- global DIFF_UTIL
- global DIFFPATCH_UTIL
- if not (os.path.isfile(DIFF_UTIL) and os.access(DIFF_UTIL, os.X_OK)):
- DIFF_UTIL = COMMON_BIN_PATH + DIFF_UTIL
- DIFFPATCH_UTIL = COMMON_BIN_PATH + DIFFPATCH_UTIL
- if not (os.path.isfile(DIFF_UTIL) and os.access(DIFF_UTIL, os.X_OK)):
- print >> sys.stderr, "Diff Util Does NOT exist -- ABORT"
- logging.info('Diff Util Does NOT exist -- ABORT')
- sys.exit(1)
-
- start = datetime.datetime.now().time()
- logging.info('*************** ENTERED PYTHON SCRIPT *****************')
- logging.info('Arguments Passed: [UpdateType - %s][Part Name - %s] [BaseOld - %s] [BaseNew - %s] \n [OUTPUTDir - %s] [BASE ATTR - %s] [TARGET ATTR - %s]' % (UPDATE_TYPE, PART_NAME, BASE_OLD, BASE_NEW, OUT_DIR, ATTR_OLD, ATTR_NEW))
-
- try:
- ensure_dir_exists(OUT_DIR)
- except FileExistsError as exc:
- logging.error('Argument passed as OUT_DIR - %s is already an existing file' % OUT_DIR)
- raise exc
- if GenerateDiffAttr == "TRUE":
- if not (os.path.isfile(ATTR_OLD) and os.path.isfile(ATTR_NEW)):
- print >> sys.stderr, "Attributes missing -- ABORT"
- sys.exit(1)
-
- # Should check if APT is supported on other linux flavours
- cache = apt.Cache()
- if cache['p7zip'].is_installed and cache['attr'].is_installed and cache['tar'].is_installed:
- logging.info('Basic utils installed')
- else:
- print >> sys.stderr, "Basic utils missing -- ABORT"
- sys.exit(1)
-
- if UPDATE_TYPE_S[0] == FULL_IMAGE:
- SS_mk_full_img(BASE_OLD, BASE_NEW, OUT_DIR, PART_NAME, UPDATE_CFG_PATH)
- # #### currently does not support LZMA ####
- # elif UPDATE_TYPE == DELTA_IMAGE:
- # SS_mk_delta_img(BASE_OLD, BASE_NEW, OUT_DIR, PART_NAME, UPDATE_CFG_PATH, COMPRESSION_LZMA)
- elif UPDATE_TYPE_S[0] == DELTA_IMAGE:
- SS_mk_delta_img(BASE_OLD, BASE_NEW, OUT_DIR, PART_NAME, UPDATE_CFG_PATH, COMPRESSION_BROTLI)
- elif UPDATE_TYPE == DELTA_FS:
- AttributeFile = ATTR_NEW
- ATTR_FILE = OUT_DIR + '/' + PART_NAME + ATTR_DOC_EXT
- Diff_AttrFiles(ATTR_OLD, ATTR_NEW, ATTR_FILE)
- Old_files, Old_dirs = Get_Files(BASE_OLD)
- New_files, New_dirs = Get_Files(BASE_NEW)
- SS_Generate_Delta(PART_NAME, BASE_OLD, Old_files, Old_dirs, BASE_NEW, New_files, New_dirs, OUT_DIR, ATTR_FILE)
-
- if not UPDATE_CFG_PATH == EMPTY:
- SS_update_cfg(PART_NAME, UPDATE_CFG_PATH)
-
- elif UPDATE_TYPE == EXTRA:
- print('UPDATE_TYPE ---- EXTRA')
- else:
- print('UPDATE_TYPE ---- UNKNOWN FORMAT')
-
- if GenerateDiffAttr == "TRUE":
- if os.path.exists(ATTR_OLD) and os.path.exists(ATTR_NEW):
- os.remove(ATTR_OLD)
- os.remove(ATTR_NEW)
- end = datetime.datetime.now().time()
-
- logging.info('Max Memory requried to upgrade [%s] is [%d] for File[%s]' % (PART_NAME, MEM_REQ, MEM_FILE))
- logging.info('*************** DONE WITH PYTHON SCRIPT ***************')
- logging.info('Time start [%s] - Time end [%s]' % (start, end))
- print('Done with [%s][%d]---- Time start [%s] - Time end [%s]' % (PART_NAME, MEM_REQ, start, end))
-
- except Exception as exc:
- logging.error('Usage: {} <Update_Type> <Part_Name> <OLD_Base> <NEW_Base> <OUT_DIR>'.format(os.path.basename(sys.argv[0])))
- raise exc
-
-
-def SS_update_cfg(DELTA_BIN, UPDATE_CFG_PATH):
- f = open(UPDATE_CFG_PATH, 'r')
- lines = f.readlines()
- f.close()
- f = open(UPDATE_CFG_PATH, 'w')
- for line in lines:
- ConfigItems = line.split()
- if ConfigItems[0] == DELTA_BIN:
- DELTA = ConfigItems[1]
- logging.info('Updating %s config' % DELTA_BIN)
- line = line.rstrip('\n')
- Value = MEM_REQ
- line = line.replace(line, line + '\t' + str(Value) + '\n')
- f.write(line)
- else:
- f.write(line)
- f.close()
-
-
-def SS_mk_delta_img(BASE_OLD, BASE_NEW, OUT_DIR, DELTA_BIN, UPDATE_CFG_PATH, COMPRESSION_METHOD):
- #for sizes
-
- oldsize_d = os.path.getsize(BASE_OLD)
- newsize_d = os.path.getsize(BASE_NEW)
- SHA_BIN_DEST = hash_file(BASE_NEW)
- SHA_BIN_BASE = hash_file(BASE_OLD)
-
- #incase UPDATE CFG is empty
- DELTA = DELTA_BIN
- SS_UpdateSize(BASE_OLD, BASE_NEW)
- #Should throw error if PART NAME NOT found??
- if not UPDATE_CFG_PATH == EMPTY:
- f = open(UPDATE_CFG_PATH, 'r')
- lines = f.readlines()
- f.close()
- f = open(UPDATE_CFG_PATH, 'w')
- for line in lines:
- ConfigItems = line.split()
- if ConfigItems[0] == DELTA_BIN:
- logging.info('Updating %s config' % DELTA_BIN)
- DELTA = ConfigItems[1]
- line = line.rstrip('\n')
- line = line.replace(line, line + '\t' + str(oldsize_d) + '\t\t' + str(newsize_d) + '\t\t' + str(SHA_BIN_BASE) + '\t\t' + str(SHA_BIN_DEST) + '\n')
- f.write(line)
- else:
- f.write(line)
- f.close()
-
- patchLoc = '%s/%s' % (OUT_DIR, DELTA)
- logging.info('Make Delta Image %s <--> %s ==> %s %s' % (BASE_OLD, BASE_NEW, DELTA_BIN, patchLoc))
- subprocess.call([DIFF_UTIL, "-c", COMPRESSION_METHOD, BASE_OLD, BASE_NEW, patchLoc])
-
-
-def SS_mk_full_img(BASE_OLD, BASE_NEW, OUT_DIR, DELTA_BIN, UPDATE_CFG_PATH):
- logging.info('Make Full Image %s <--> %s ==> %s' % (BASE_OLD, BASE_NEW, DELTA_BIN))
- oldsize_d = os.path.getsize(BASE_OLD)
- newsize_d = os.path.getsize(BASE_NEW)
- SHA_BIN_DEST = hash_file(BASE_NEW)
- SHA_BIN_BASE = hash_file(BASE_OLD)
- #echo -e "\t${oldsize_d}\t\t${newsize_d}\t\t${SHA_BIN_BASE}\t\t${SHA_BIN_DEST}" >> ${DATA_DIR}/update_new.cfg
- SS_UpdateSize(BASE_OLD, BASE_NEW)
-
- if not UPDATE_CFG_PATH == EMPTY:
- f = open(UPDATE_CFG_PATH, 'r')
- lines = f.readlines()
- f.close()
- f = open(UPDATE_CFG_PATH, 'w')
- for line in lines:
- ConfigItems = line.split()
- if ConfigItems[0] == DELTA_BIN:
- logging.info('Updating %s config' % DELTA_BIN)
- DELTA = ConfigItems[1]
- line = line.rstrip('\n')
- line = line.replace(line, line + '\t' + str(oldsize_d) + '\t\t' + str(newsize_d) + '\t\t' + str(SHA_BIN_BASE) + '\t\t' + str(SHA_BIN_DEST) + '\n')
- f.write(line)
- else:
- f.write(line)
- f.close()
-
-
-def zipdir(path, zip):
- for root, dirs, files in os.walk(path):
- for file in files:
- zip.write(os.path.join(root, file))
+SUPPORT_RENAME = "TRUE"
def ensure_dir_exists(path):
os.makedirs(path)
elif os.path.isfile(path):
raise FileExistsError
- #shutil.rmtree(path)
- #os.makedirs(path)
def path_leaf(path):
return tail
-# Creating Diff between OLD and NEW attribute files v12
-def Diff_AttrFiles(ATTR_OLD, ATTR_NEW, ATTR_FILE):
- if GenerateDiffAttr == "FALSE":
- return
+# Creating Diff between OLD and NEW attribute files
+def diff_attr_files(ATTR_OLD, ATTR_NEW, ATTR_FILE):
with open(ATTR_OLD, 'r') as f_old:
lines1 = set(f_old.read().splitlines())
lines = set.difference(lines2, lines1)
with open(ATTR_FILE, 'w+') as file_out:
for line in lines:
- logging.info('Diff_AttrFiles - %s' % line)
+ logging.info('diff_attr_files - %s' % line)
file_out.write(line + '\n')
-def Update_Attr(RequestedPath, Type, File_Attributes, Sym_Attributes):
- # Full File Path should MATCH
- if GenerateDiffAttr == "FALSE":
- return
- FilePath = '"' + RequestedPath + '"'
- #print ('FilePath - %s'% (FilePath))
- with open(AttributeFile) as f:
- for line in f:
- if FilePath in line:
- if Type == SYMLINK_TYPE:
- Sym_Attributes.append(line)
- else:
- File_Attributes.append(line)
-
-
def hash_file(filename):
'''This function returns the SHA-1 hash of the file passed into it'''
return h.hexdigest()
-def find_dupes_list(BASE_OLD, BASE_NEW, fileListB, fileListT, Old_hardlinks, New_hardlinks):
+def measure_two_filediffs(src, dst):
+ patchLoc = 'temp.patch'
+ # TODO ensure this is excepts an error
+ subprocess.call([DIFF_UTIL, src, dst, patchLoc])
+ result_size = os.path.getsize(patchLoc)
+ os.remove(patchLoc)
+ return result_size
+
+
+def find_dupes_list(BASE_OLD, BASE_NEW, fileListB, fileListT):
dups = {}
fdupes = {}
- print('Finding Duplicates in - %s %s' % (BASE_OLD, BASE_NEW))
+ print('Finding duplicates in - %s %s' % (BASE_OLD, BASE_NEW))
for filename in fileListB:
- Src_File = BASE_OLD + '/' + filename
+ Src_File = os.path.join(BASE_OLD, filename)
if os.path.islink(Src_File) or os.path.isdir(Src_File) or ishardlink(Src_File):
continue
# Calculate hash
dups[file_hash] = Src_File
for filename in fileListT:
- Dest_File = BASE_NEW + '/' + filename
+ Dest_File = os.path.join(BASE_NEW, filename)
if os.path.islink(Dest_File) or os.path.isdir(Dest_File) or ishardlink(Dest_File):
continue
# Calculate hash
BaseStr = dups.get(file_hash)
Baseloc = BaseStr.find('/')
if not BaseStr[Baseloc:] == filename:
- #print('Dupes - %s ==> %s' % (BaseStr[Baseloc:], filename))
fdupes[BaseStr] = filename
- logging.info('Total Duplicate files %d' % (len(fdupes)))
+ logging.info('Total duplicate files %d' % (len(fdupes)))
+
return fdupes
-def SS_UpdateSize(src_file, dst_file):
+def update_size(src_file, dst_file):
global MEM_REQ
global MEM_FILE
oldsize_d = os.path.getsize(src_file)
MEM_FILE = dst_file
-def SS_Generate_Delta(PART_NAME, BASE_OLD, Old_files, Old_dirs, BASE_NEW, New_files, New_dirs, OUT_DIR, ATTR_FILE):
- print('Going from %d files to %d files' % (len(Old_files), len(New_files)))
- logging.info('Going from %d files to %d files' % (len(Old_files), len(New_files)))
-
- # First let's fill up these categories
- files_new = []
- files_removed = []
- Dir_removed = []
- Dir_Added = []
- files_changed = []
- files_unchanged = []
- files_renamed = []
- File_Attributes = []
- Sym_Attributes = []
-
- files_Del_List = {}
- files_New_List = {}
-
- # Get dictionaries used for hardlinks form both directories
- New_hardlinks = get_hardlinks(BASE_NEW)
- Old_hardlinks = get_hardlinks(BASE_OLD)
-
- # Generate NEW List
- for elt in New_files:
- if elt not in Old_files:
- files_new.append(elt)
- logging.info('New files %s' % elt)
-
- # Generate Delete List
- for elt in Old_files:
- if elt not in New_files:
- # Cant we just append it here only if this is NOT a directory???? so that we have list of removed files ONLY. including directories
- files_removed.append(elt)
- logging.info('Old files %s' % elt)
-
- for elt in Old_dirs:
- #print('List of Old Dirs %s' % elt)
- # Delete END logic goes in hand with UPG, After Diffs and moves, DEL END should be done.
- if elt not in New_dirs:
- Dir_removed.append(elt)
- logging.info('Old Dirs %s' % elt + '/')
-
- for elt in New_dirs:
- if elt not in Old_dirs:
- Dir_Added.append(elt)
- #++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
-
- # What files have changed contents but not name/path?
- for elt in New_files:
- if elt in Old_files:
- # Both are symbolic linkes and they differ
- src_file = BASE_OLD + '/' + elt
- dst_file = BASE_NEW + '/' + elt
- #print('Files Changed - %s -%s' % (src_file,dst_file))
- if os.path.islink(src_file) and os.path.islink(dst_file):
- if not (os.readlink(src_file) == os.readlink(dst_file)):
- files_changed.append(elt)
- #print('%d Sym link files changed' % len(files_changed))
- logging.info('Sym links Changed - %s' % elt)
- else:
- files_unchanged.append(elt)
- # Both are hardlinks - we add them because we can't be sure if file they point to changes
- elif elt in New_hardlinks and elt in Old_hardlinks:
- files_changed.append(elt)
- # Both are Normal files and they differ. (Is file returns true in case of sym/hardlink also,
- # so additional check to find either of the file is sym/hardlink)
- elif (not (os.path.islink(src_file) or os.path.islink(dst_file))) \
- and (not (elt in New_hardlinks or elt in Old_hardlinks)) \
- and os.path.isfile(src_file) and os.path.isfile(dst_file):
- if not filecmp.cmp(src_file, dst_file):
- files_changed.append(elt)
- #print('%d Normal files changed' % len(files_changed))
- #print('Files Changed - %s' % elt)
- else:
- files_unchanged.append(elt)
- # File types differ between BASE and TARGET
- else:
- logging.info('Files are of diff types but same names Src- %s Des- %s' % (src_file, dst_file))
- # Both file types have changed and they differ
- # Case 1: First Delete the OLD entry file type (Be it anything)
- # Processing and updating partition txt file will be done under REMOVED case and NEW files case accordingly, we just make an entry here
- files_removed.append(elt)
- files_new.append(elt)
-
- # Currently if Version or number is the first character of the file, then we are NOT making any diffs.
- if SUPPORT_RENAME == "TRUE":
- for elt in files_removed:
- if os.path.isfile(BASE_OLD + '/' + elt):
- FileName = path_leaf(elt)
- entries = re.split('[0-9]', FileName)
- # Gives the STRING part of NAME. if name starts with version then later part wil b string
- #print('Entires under removed list after split - %s %s - %s' % (FileName, entries[0], elt))
- # If version is starting at the begining of the string?? shd we hav additional check for such cases??
- if len(entries[0]) > 0:
- files_Del_List.update({entries[0]: elt})
+def ishardlink(path):
+ if os.stat(path).st_nlink > 1:
+ return True
+ return False
+
+
+def get_inode(path):
+ return os.stat(path).st_ino
- for elt in files_new:
- if os.path.isfile(BASE_NEW + '/' + elt):
+
+class DeltaFsGenerator:
+ class OperationsCount:
+ def __init__(self):
+ self.sym_diff_cnt = 0
+ self.sym_new_cnt = 0
+ self.hard_diff_cnt = 0
+ self.hard_new_cnt = 0
+ self.del_cnt = 0
+ self.new_cnt = 0
+ self.diff_cnt = 0
+ self.move_cnt = 0
+
+ class ConstantStrings:
+ def __init__(self, PART_NAME, BASE_OLD, BASE_NEW, OUT_DIR, ATTR_FILE, ATTR_NEW):
+ self.PART_NAME = PART_NAME
+ self.BASE_OLD = BASE_OLD
+ self.BASE_NEW = BASE_NEW
+ self.OUT_DIR = OUT_DIR
+ self.ATTR_FILE = ATTR_FILE
+ self.ATTR_NEW = ATTR_NEW
+ self.REG_DOC = os.path.join(self.OUT_DIR, (self.PART_NAME + PART_DOC_EXT))
+ self.SYMLINK_DOC = os.path.join(self.OUT_DIR, (self.PART_NAME + SYMLINK_DOC_NAME))
+ self.HARDLINK_DOC = os.path.join(self.OUT_DIR, (self.PART_NAME + HARDLINK_DOC_NAME))
+
+ class OldNewEntriesData:
+ def __init__(self, BASE_OLD, BASE_NEW):
+ self.old_files, self.old_dirs = self.get_entries_data(BASE_OLD)
+ self.new_files, self.new_dirs = self.get_entries_data(BASE_NEW)
+ self.new_hardlinks = self.get_hardlinks(BASE_NEW)
+ self.old_hardlinks = self.get_hardlinks(BASE_OLD)
+
+ def get_entries_data(self, path):
+ all_files = []
+ all_dirs = []
+ for root, directories, filenames in os.walk(path, topdown=False, followlinks=False):
+ for directory in directories:
+ DirName = os.path.join(root, directory)
+ if os.path.islink(DirName):
+ logging.debug('This is symlink pointing to dir - %s' % DirName)
+ all_files.append(os.path.relpath(DirName, path))
+ elif not os.listdir(DirName):
+ all_dirs.append(os.path.relpath(DirName, path))
+ else:
+ all_dirs.append(os.path.relpath(DirName, path))
+ for filename in filenames:
+ FileName = os.path.join(root, filename)
+ all_files.append(os.path.relpath(FileName, path))
+
+ all_files.sort()
+ all_dirs.sort()
+ return all_files, all_dirs
+
+ def get_hardlinks(self, base):
+ hardlinks_dict = {}
+ inodes_dict = {}
+
+ for root, direcotories, files in os.walk(base, topdown=True, followlinks=False):
+ for file in sorted(files):
+ file_name = os.path.join(root, file)
+ if not os.path.islink(file_name) and ishardlink(file_name):
+ inode = get_inode(file_name)
+ rel_path = os.path.relpath(file_name, base)
+ if inode not in inodes_dict:
+ inodes_dict[inode] = rel_path
+ else:
+ hardlinks_dict[rel_path] = inodes_dict[inode]
+
+ return hardlinks_dict
+
+ class EntriesLists:
+ def __init__(self):
+ self.added_files =[]
+ self.removed_files = []
+ self.added_dirs = []
+ self.removed_dirs = []
+ self.changed_files = []
+ self.unchanged_files = []
+ self.renamed_files = []
+
+ self.file_attributes = []
+ self.sym_attributes = []
+
+ def __init__(self, PART_NAME, BASE_OLD, BASE_NEW, OUT_DIR, ATTR_FILE, ATTR_NEW):
+ self.operations_count = self.OperationsCount()
+ self.constant_strings = self.ConstantStrings(PART_NAME, BASE_OLD, BASE_NEW, OUT_DIR, ATTR_FILE, ATTR_NEW)
+ self.old_new_entries_data = self.OldNewEntriesData(BASE_OLD, BASE_NEW)
+ self.entries_lists = self.EntriesLists()
+
+ def update_attributes(self, requested_path, file_type):
+ # Full File Path should MATCH
+ file_path = '"' + requested_path + '"'
+ with open(self.constant_strings.ATTR_NEW, "r") as f:
+ for line in f:
+ if file_path in line:
+ if file_type == SYMLINK_TYPE:
+ self.entries_lists.sym_attributes.append(line)
+ else:
+ self.entries_lists.file_attributes.append(line)
+
+ def generate_entries_lists(self):
+ # Generate NEW List
+ for elt in self.old_new_entries_data.new_files:
+ if elt not in self.old_new_entries_data.old_files:
+ self.entries_lists.added_files.append(elt)
+ logging.info('Added files %s' % elt)
+
+ # Generate Delete List
+ for elt in self.old_new_entries_data.old_files:
+ if elt not in self.old_new_entries_data.new_files:
+ self.entries_lists.removed_files.append(elt)
+ logging.info('Removed files %s' % elt)
+
+ for elt in self.old_new_entries_data.old_dirs:
+ # Delete END logic goes in hand with UPG, After Diffs and moves, DEL END should be done.
+ if elt not in self.old_new_entries_data.new_dirs:
+ self.entries_lists.removed_dirs.append(elt)
+ logging.info('Removed dirs %s' % elt + '/')
+
+ for elt in self.old_new_entries_data.new_dirs:
+ if elt not in self.old_new_entries_data.old_dirs:
+ self.entries_lists.added_dirs.append(elt)
+ logging.info('Added dirs %s' % elt + '/')
+
+ def generate_changed_files_lists(self):
+ for elt in self.old_new_entries_data.new_files:
+ if elt in self.old_new_entries_data.old_files:
+ src_file = os.path.join(self.constant_strings.BASE_OLD, elt)
+ dst_file = os.path.join(self.constant_strings.BASE_NEW, elt)
+ # Both are symbolic links and they differ
+ if os.path.islink(src_file) and os.path.islink(dst_file):
+ if not (os.readlink(src_file) == os.readlink(dst_file)):
+ self.entries_lists.changed_files.append(elt)
+ logging.info('Symlinks changed - %s' % elt)
+ else:
+ self.entries_lists.unchanged_files.append(elt)
+ # Both are hardlinks - we add them because we can't be sure if file they point to changes
+ elif elt in self.old_new_entries_data.new_hardlinks and elt in self.old_new_entries_data.old_hardlinks:
+ self.entries_lists.changed_files.append(elt)
+ # Both are normal files and they differ. (isfile() returns true in case of sym/hardlink also,
+ # so we need additional checks to make sure both entries are the same type (normal))
+ elif (not (os.path.islink(src_file) or os.path.islink(dst_file))) \
+ and (not (elt in self.old_new_entries_data.new_hardlinks or elt in self.old_new_entries_data.old_hardlinks)) \
+ and os.path.isfile(src_file) and os.path.isfile(dst_file):
+ if not filecmp.cmp(src_file, dst_file):
+ self.entries_lists.changed_files.append(elt)
+ else:
+ self.entries_lists.unchanged_files.append(elt)
+ # Both are files of different types
+ else:
+ logging.info('Files are of diff types but same names; Src - %s Des - %s' % (src_file, dst_file))
+ self.entries_lists.removed_files.append(elt)
+ self.entries_lists.added_files.append(elt)
+
+ def generate_renamed_files_with_version_lists(self):
+ tmp_add_dict = {}
+ tmp_del_dict = {}
+
+ for elt in self.entries_lists.removed_files:
+ if os.path.isfile(os.path.join(self.constant_strings.BASE_OLD, elt)):
+ FileName = path_leaf(elt)
+ entries = re.split('[0-9]', FileName)
+ # Gives the STRING part of NAME. if name starts with version then later part wil b string
+ # If version is starting at the begining of the string?? shd we hav additional check for such cases??
+ if len(entries[0]) > 0:
+ tmp_del_dict.update({entries[0]: elt})
+
+ for elt in self.entries_lists.added_files:
+ if os.path.isfile(os.path.join(self.constant_strings.BASE_NEW, elt)):
FileName = path_leaf(elt)
entries = re.split('[0-9]', FileName)
- #print('Entires under NEWfiles list after split - %s %s - %s' % (FileName, entries[0], elt))
if len(entries[0]) > 0:
- files_New_List.update({entries[0]: elt})
+ tmp_add_dict.update({entries[0]: elt})
- for key, value in files_Del_List.iteritems():
- #print('Key value pair -%s -%s' % (key, value))
- if key in files_New_List:
+ for key, value in tmp_del_dict.items():
+ if key in tmp_add_dict:
# this file is the same name in both!
- src_file = BASE_OLD + '/' + value
- dst_file = BASE_NEW + '/' + files_New_List[key]
+ src_file = os.path.join(self.constant_strings.BASE_OLD, value)
+ dst_file = os.path.join(self.constant_strings.BASE_NEW, tmp_add_dict[key])
# we don't want to move hardlinks
if ishardlink(src_file) or ishardlink(dst_file):
logging.debug('Cannot diff as one of them is a hardlink')
elif os.path.islink(src_file) or os.path.islink(dst_file):
- logging.debug('Cannot diff as one of them is Symlink')
+ logging.debug('Cannot diff as one of them is a symlink')
elif os.path.isdir(src_file) or os.path.isdir(dst_file):
- logging.debug('Cannot diff as one of them is dir')
+ logging.debug('Cannot diff as one of them is a dir')
else:
#Pick the best diff of same type and diff names
- files_renamed.append([files_New_List[key], value])
- files_removed.remove(value)
- files_new.remove(files_New_List[key])
-
- '''
- Patch Section
- Partition.txt contains Protocol for UPI
- Types Supported: DIFFS, MOVES, NEWS, DELETES, SYMDIFFS, SYMNEWS.
- '''
- Sym_Diff_Cnt = 0
- Sym_New_Cnt = 0
- Hard_Diff_Cnt = 0
- Hard_New_Cnt = 0
- Del_Cnt = 0
- New_Cnt = 0
- Diff_Cnt = 0
- Move_Cnt = 0
- SymLinkDoc = OUT_DIR + '/' + PART_NAME + SYMLINK_DOC_NAME
- HardLinkDoc = OUT_DIR + '/' + PART_NAME + HARDLINK_DOC_NAME
- Partition_Doc = open(OUT_DIR + '/' + PART_NAME + '.txt', 'w')
- Partition_Doc_SymLinks = open(SymLinkDoc, 'w')
- Partition_Doc_HardLinks = open(HardLinkDoc, "w")
-
- print("writing diff'ed changed files...")
- for elt in files_changed:
- dst_file = BASE_NEW + '/' + elt
- src_file = BASE_OLD + '/' + elt
- # Both files are symbolic links and they differ
- if os.path.islink(dst_file) and os.path.islink(src_file):
- # Both are symlinks and they differ
- logging.debug(' File Changed is Link %s ' % dst_file)
- patch = os.readlink(dst_file)
- Sym_Diff_Cnt = Sym_Diff_Cnt + 1
- Partition_Doc_SymLinks.write('SYM:DIFF:%s:%s:%s\n' % (elt, elt, patch))
- Update_Attr(elt, "SYM", File_Attributes, Sym_Attributes)
- # Both are hardlinks and they differ (point to something different, new/changed file)
- if elt in Old_hardlinks and elt in New_hardlinks:
- if Old_hardlinks[elt] != New_hardlinks[elt] or New_hardlinks[elt] in files_changed or New_hardlinks[elt] in files_new:
- logging.debug('Hardlinks changed %s %s' % (src_file, dst_file))
- patch = New_hardlinks[elt]
- Hard_Diff_Cnt += 1
- Partition_Doc_HardLinks.write('HARD:DIFF:%s:%s:%s\n' % (elt, elt, patch))
- # Both are NORMAL files and they differ
- elif (not (os.path.islink(src_file) or os.path.islink(dst_file))) \
- and (not (elt in Old_hardlinks or elt in New_hardlinks)) \
- and os.path.isfile(dst_file) and os.path.isfile(src_file):
- # Both are files and they differ
- Diff_Cnt = Diff_Cnt + 1
- patchName = (DIFF_PREFIX + '%d_%s_' + PART_NAME + DIFF_SUFFIX) % (Diff_Cnt, path_leaf(elt))
- patchLoc = '%s/%s' % (OUT_DIR, patchName)
- logging.debug(' File Differ %s %s' % (src_file, dst_file))
- SS_UpdateSize(src_file, dst_file)
-
- FORMAT = "REG"
- ret = subprocess.call([DIFF_UTIL, src_file, dst_file, patchLoc])
- if ret is not 0:
- logging.debug('Failed to create diff %d %s %s\n' % (ret, src_file, dst_file))
- files_new.append(elt)
- Diff_Cnt = Diff_Cnt - 1
- else:
- Partition_Doc.write('DIFF:REG:%s:%s:%s:%s:%s\n' % (elt, elt, hash_file(src_file), hash_file(dst_file), patchName))
-
- Update_Attr(elt, "FILE", File_Attributes, Sym_Attributes)
- # Both differ but they are of diff types
- else:
- # Processing and updating partition txt file will be done under REMOVED case and NEW files case accordingly, we just make an entry here
- files_removed.append(elt)
- files_new.append(elt)
-
- fdupes = find_dupes_list(BASE_OLD, BASE_NEW, files_removed, files_new, Old_hardlinks, New_hardlinks)
- for oldpath, newpath in fdupes.iteritems():
- logging.info('Dupes %s -> %s' % (oldpath, newpath))
- for elt in files_removed:
- src_file = BASE_OLD + '/' + elt
- # If parent directory is deleted.. & del end not possible. (==> Moves should be done before deletes in ENGINE)
- if src_file in fdupes.keys():
- dst_file = BASE_NEW + '/' + fdupes[src_file]
- logging.debug(' File Moved %s ==> %s' % (src_file, dst_file))
- Move_Cnt = Move_Cnt + 1
- Partition_Doc.write('MOVE:REG:%s:%s:%s\n' % (elt, fdupes[src_file], hash_file(src_file)))
- files_removed.remove(elt)
- files_new.remove(fdupes[src_file])
- # Should be placed after removing duplicates, else they will be filtered here.
- # loop shd b for all NEW files, rather than for all delete files (Current understanding)
- # First Step: Sort & Filter out unwanted files
- # Minimum condition used is,
- # 1. File name should match 70%
- # 2. Extensions should be same
- # 3. File name length shd b greater than 3 char
- # 4. As we are using sorting on file names, once file name does not match and R_Flag is set to true, we nee not check remaining files. So, will execute break.
- # 5. Should consider editdistance for RENAME LOGIC ==> TBD
- Base_DelList = files_removed[:]
- Base_NewList = files_new[:]
- DelList = sorted(Base_DelList, key=path_leaf)
- NewList = sorted(Base_NewList, key=path_leaf)
- logging.debug('Rename Logic before filter: Delcount -%d NewCount -%d' % (len(DelList), len(NewList)))
-
- Filter1 = []
- Filter2 = []
- # Remove unwanted items which we cant make diff with for rename logic
- for file in DelList:
- if os.path.islink(BASE_OLD + '/' + file):
- continue
- elif ishardlink(BASE_OLD + '/' + file):
- continue
- elif os.path.isdir(BASE_OLD + '/' + file):
- continue
- else:
- Filter1.append(file)
- #logging.debug('Sorted del list - %s' % (file))
-
- DelList = Filter1
+ self.entries_lists.renamed_files.append([tmp_add_dict[key], value])
+ self.entries_lists.removed_files.remove(value)
+ self.entries_lists.added_files.remove(tmp_add_dict[key])
+
+ def process_changed_entries(self):
+ with open(self.constant_strings.REG_DOC, 'a') as reg_doc_obj, \
+ open(self.constant_strings.SYMLINK_DOC, 'a') as symlink_doc_obj, \
+ open(self.constant_strings.HARDLINK_DOC, "a") as hardlink_doc_obj:
+ for elt in self.entries_lists.changed_files:
+ dst_file = os.path.join(self.constant_strings.BASE_NEW, elt)
+ src_file = os.path.join(self.constant_strings.BASE_OLD, elt)
+ # Both files are symbolic links and they differ
+ if os.path.islink(dst_file) and os.path.islink(src_file):
+ logging.debug('File changed is a symlink %s ' % dst_file)
+ patch = os.readlink(dst_file)
+ self.operations_count.sym_diff_cnt += 1
+ symlink_doc_obj.write('SYM:DIFF:%s:%s:%s\n' % (elt, elt, patch))
+ self.update_attributes(elt, "SYM")
+ # Both are hardlinks and they differ (pointing to something different, new/changed file)
+ if elt in self.old_new_entries_data.old_hardlinks and elt in self.old_new_entries_data.new_hardlinks:
+ if self.old_new_entries_data.old_hardlinks[elt] != self.old_new_entries_data.new_hardlinks[elt] \
+ or self.old_new_entries_data.new_hardlinks[elt] in self.entries_lists.changed_files \
+ or self.old_new_entries_data.new_hardlinks[elt] in self.entries_lists.added_files:
+ logging.debug('File changed is a hardlink %s %s' % (src_file, dst_file))
+ patch = self.old_new_entries_data.new_hardlinks[elt]
+ self.operations_count.hard_diff_cnt += 1
+ hardlink_doc_obj.write('HARD:DIFF:%s:%s:%s\n' % (elt, elt, patch))
+ # Both are NORMAL files and they differ
+ elif (not (os.path.islink(src_file) or os.path.islink(dst_file))) \
+ and (not (elt in self.old_new_entries_data.old_hardlinks or elt in self.old_new_entries_data.new_hardlinks)) \
+ and os.path.isfile(dst_file) and os.path.isfile(src_file):
+ self.operations_count.diff_cnt += 1
+ patchName = (DIFF_PREFIX + '%d_%s_' + self.constant_strings.PART_NAME + DIFF_SUFFIX) % (self.operations_count.diff_cnt, path_leaf(elt))
+ patchLoc = '%s/%s' % (self.constant_strings.OUT_DIR, patchName)
+ logging.debug('Regular file change %s %s' % (src_file, dst_file))
+ update_size(src_file, dst_file)
- for file in NewList:
- if os.path.islink(BASE_NEW + '/' + file):
- continue
- elif ishardlink(BASE_NEW + '/' + file):
- continue
- elif os.path.isdir(BASE_NEW + '/' + file):
- continue
- elif len(path_leaf(file)) <= 3:
- logging.debug('Ignored for best picks -%s ' % (BASE_NEW + '/' + file))
- continue
- else:
- Filter2.append(file)
- NewList = Filter2
- logging.debug('Rename Logic After filter: Delcount -%d NewCount -%d' % (len(DelList), len(NewList)))
-
- for new_file in NewList:
- R_Flag = 'FALSE'
- FileNameNew = path_leaf(new_file)
- DiffSize = 0
- winning_patch_sz = os.path.getsize(BASE_NEW + '/' + new_file)
- New_fs = winning_patch_sz
- winning_file = ''
-
- for del_file in DelList:
- FileNameOld = path_leaf(del_file)
- if (FileNameOld.startswith(FileNameNew[:len(FileNameNew) * 7 / 10]) and (os.path.splitext(FileNameNew)[1] == os.path.splitext(del_file)[1])):
- #winning_patch_sz = 0.9 * os.path.getsize(BASE_NEW+'/'+new_file)
- # Percentage difference between two file sizes is within 30%, then we consider for diff generation
- Del_fs = os.path.getsize(BASE_OLD + '/' + del_file)
- v1 = abs(New_fs - Del_fs)
- v2 = (New_fs + Del_fs) / 2
- if(v2 <= 0 or ((v1 / v2) * 100) > 30):
- logging.debug('Ignore diff generation New_fs - %d Del_Fs - %d' % (New_fs, Del_fs))
- continue
- logging.debug('I can compute diff between %s %s Del_Fs - %d New_Fs - %d' % (del_file, new_file, Del_fs, New_fs))
- R_Flag = 'TRUE'
- DiffSize = measure_two_filediffs(BASE_OLD + '/' + del_file, BASE_NEW + '/' + new_file)
- if (DiffSize < 0.8 * winning_patch_sz):
- winning_patch_sz = DiffSize
- winning_file = del_file
- elif (not FileNameOld.startswith(FileNameNew[:len(FileNameNew) * 7 / 10]) and R_Flag == 'TRUE'):
- logging.debug('Because nex set of files will not have matching name - break @@ %s %s' % (del_file, new_file))
- break
- if len(winning_file) > 0:
- logging.debug('Best Pick -%s ==> %s [%d]' % (winning_file, new_file, DiffSize))
- files_renamed.append([new_file, winning_file])
- DelList.remove(winning_file)
- files_removed.remove(winning_file)
- files_new.remove(new_file)
-
- #********************** Files should NOT be deleted for any such renames ***********************
-
- if SUPPORT_RENAME == "TRUE":
- for elt in files_renamed:
- src_file = BASE_OLD + '/' + elt[1]
- dst_file = BASE_NEW + '/' + elt[0]
- Diff_Cnt = Diff_Cnt + 1
- patchName = (DIFF_PREFIX + '%d_%s_' + PART_NAME + DIFF_SUFFIX) % (Diff_Cnt, path_leaf(elt[1]))
- #patchName = (DIFF_PREFIX+'_%s'+DIFF_SUFFIX) % (path_leaf(elt[0]))
- patchLoc = '%s/%s' % (OUT_DIR, patchName)
- logging.debug(' File Renamed %s ==> %s' % (src_file, dst_file))
- # Should be careful of renaming files??
- # Should we consider measure_two_filediffs ?? so that patch size is NOT greater than actual file?
- # What if folder path has numerics??
-
- if os.path.isdir(src_file) or os.path.isdir(dst_file):
- # This case never occurs??
- Partition_Doc.write('"%s" and "%s" renamed 0 0\n' % (elt[0], elt[1]))
- Update_Attr(elt[0], "FILE", File_Attributes, Sym_Attributes)
- # Make sure these files are PROPER and they shd NOT be symlinks
- elif not (os.path.islink(src_file) or os.path.islink(dst_file)) \
- and not (elt[0] in New_hardlinks or elt[1] in Old_hardlinks) \
- and (os.path.isfile(src_file) and os.path.isfile(dst_file)):
- if filecmp.cmp(src_file, dst_file):
- Move_Cnt = Move_Cnt + 1
- Diff_Cnt = Diff_Cnt - 1
- Partition_Doc.write('MOVE:REG:%s:%s:%s\n' % (elt[1], elt[0], hash_file(src_file)))
- else:
- FORMAT = "REG"
ret = subprocess.call([DIFF_UTIL, src_file, dst_file, patchLoc])
- if ret is not 0:
+ if ret != 0:
logging.debug('Failed to create diff %d %s %s\n' % (ret, src_file, dst_file))
- files_new.append(elt)
- Diff_Cnt = Diff_Cnt - 1
+ self.entries_lists.added_files.append(elt)
+ self.operations_count.diff_cnt -= 1
else:
- Partition_Doc.write('DIFF:REG:%s:%s:%s:%s:%s\n' % (elt[1], elt[0], hash_file(src_file), hash_file(dst_file), patchName))
-
- SS_UpdateSize(src_file, dst_file)
- Update_Attr(elt[0], "FILE", File_Attributes, Sym_Attributes)
-
- for elt in files_removed:
- # if files are part of patches after renaming, we shd remove them as part of removed.
- src_file = BASE_OLD + '/' + elt
- if os.path.islink(src_file):
- Partition_Doc.write('DEL:SYM:%s\n' % (elt))
- elif elt in Old_hardlinks:
- Partition_Doc.write('DEL:HARD:%s\n' % (elt))
- elif os.path.isdir(src_file):
- # If we change to DIR TYPE, then the same token should be modified on UA also and SHA should be accordingly passed.
- Partition_Doc.write('DEL:REG:%s:NA\n' % (elt))
- else:
- Partition_Doc.write('DEL:REG:%s:%s\n' % (elt, hash_file(src_file)))
- logging.debug(' File Deleted %s' % src_file)
- Del_Cnt = Del_Cnt + 1
-
- Dir_removed.sort(reverse=True)
- for elt in Dir_removed:
- # if Dir is empty, add it to the removed list.
- src_file = BASE_OLD + '/' + elt
- # Irrespective of weather files are MOVED or DIFF'ed, we can delete the folders. This action can be performed at the end.
- # It covers symlinks also, as NEW symlinks cannot point to NON existant folders of TARGET (NEW binary)
- if os.path.isdir(src_file):
- Partition_Doc.write('DEL:END:%s\n' % (elt))
- Del_Cnt = Del_Cnt + 1
- logging.debug(' Dir Deleted- %s' % src_file)
+ reg_doc_obj.write('DIFF:REG:%s:%s:%s:%s:%s\n' % (elt, elt, hash_file(src_file), hash_file(dst_file), patchName))
- try:
- ensure_dir_exists(NEW_FILES_PATH)
- except FileExistsError as exc:
- logging.error('Directory %s used by this script is already an existing file' % NEW_FILES_PATH)
- raise exc
-
- for elt in files_new:
- dst_file = os.path.join(BASE_NEW, elt)
- destpath = os.path.join(NEW_FILES_PATH, elt)
-
- if os.path.islink(dst_file):
- patch = os.readlink(dst_file)
- logging.debug(' File New Links %s' % elt)
- Partition_Doc_SymLinks.write('SYM:NEW:%s:%s\n' % (elt, patch))
- # What if this is only a new sym link and folder already exists??? Should recheck
- if not os.path.exists(os.path.dirname(destpath)):
- os.makedirs(os.path.dirname(destpath))
- logging.info('New SymLink - Adding missing Dir')
- Update_Attr(elt, "SYM", File_Attributes, Sym_Attributes)
- Sym_New_Cnt = Sym_New_Cnt + 1
- elif elt in New_hardlinks:
- patch = New_hardlinks[elt]
- logging.debug('File new hardlink %s' % elt)
- Partition_Doc_HardLinks.write('HARD:NEW:%s:%s\n' %(elt, patch))
- if not os.path.exists(os.path.dirname(destpath)):
- os.makedirs(os.path.dirname(destpath))
- logging.info('New hardlink - Adding missing Dir')
- Hard_New_Cnt += 1
- elif os.path.isdir(dst_file): # We create just empty directory here
+ self.update_attributes(elt, "FILE")
+ # Both differ but they are of different types
+ else:
+ # Processing and updating partition txt file will be done under REMOVED case and NEW files case accordingly, we just make an entry here
+ self.entries_lists.removed_files.append(elt)
+ self.entries_lists.added_files.append(elt)
+
+ def process_moved_duplicates(self):
+ found_duplicates = find_dupes_list(self.constant_strings.BASE_OLD, self.constant_strings.BASE_NEW, \
+ self.entries_lists.removed_files, self.entries_lists.added_files)
+ for old_path, new_path in found_duplicates.items():
+ logging.info('Dupes %s -> %s' % (old_path, new_path))
+
+ with open(self.constant_strings.REG_DOC, "a") as reg_doc_obj:
+ for elt in self.entries_lists.removed_files:
+ src_file = os.path.join(self.constant_strings.BASE_OLD, elt)
+ # If parent directory is deleted.. & del end not possible. (==> Moves should be done before deletes in ENGINE)
+ if src_file in found_duplicates.keys():
+ dst_file = os.path.join(self.constant_strings.BASE_NEW, found_duplicates[src_file])
+ logging.debug('File moved %s ==> %s' % (src_file, dst_file))
+ self.operations_count.move_cnt += 1
+ reg_doc_obj.write('MOVE:REG:%s:%s:%s\n' % (elt, found_duplicates[src_file], hash_file(src_file)))
+ self.entries_lists.removed_files.remove(elt)
+ self.entries_lists.added_files.remove(found_duplicates[src_file])
+
+ def find_remaining_renamed_files(self):
+ # Should be placed after removing duplicates, else they will be filtered here.
+ # loop shd b for all NEW files, rather than for all delete files (Current understanding)
+ # First Step: Sort & Filter out unwanted files
+ # Minimum condition used is,
+ # 1. File name should match 70%
+ # 2. Extensions should be same
+ # 3. File name length shd b greater than 3 char
+ # 4. As we are using sorting on file names, once file name does not match and R_Flag is set to true, we nee not check remaining files. So, will execute break.
+ # 5. Should consider editdistance for RENAME LOGIC ==> TBD
+
+ logging.debug('Rename logic before filter: removed count - %d added count - %d' % (len(self.entries_lists.removed_files), len(self.entries_lists.added_files)))
+
+ removed_files_filtered = []
+ added_files_filtered = []
+ # Remove unwanted items which we cant make diff with for rename logic
+ for file in sorted(self.entries_lists.removed_files, key=path_leaf):
+ file_to_filter = os.path.join(self.constant_strings.BASE_OLD, file)
+ if os.path.islink(file_to_filter):
+ continue
+ elif ishardlink(file_to_filter):
+ continue
+ elif os.path.isdir(file_to_filter):
+ continue
+ else:
+ removed_files_filtered.append(file)
+
+ for file in sorted(self.entries_lists.added_files, key=path_leaf):
+ file_to_filter = os.path.join(self.constant_strings.BASE_NEW, file)
+ if os.path.islink(file_to_filter):
+ continue
+ elif ishardlink(file_to_filter):
+ continue
+ elif os.path.isdir(file_to_filter):
+ continue
+ elif len(path_leaf(file)) <= 3:
+ logging.debug('Ignored for best picks -%s ' % (file_to_filter))
+ continue
+ else:
+ added_files_filtered.append(file)
+
+ logging.debug('Rename logic after filter: removed count -%d added count - %d' % (len(removed_files_filtered), len(added_files_filtered)))
+
+ for new_file in added_files_filtered:
+ R_Flag = 'FALSE'
+ FileNameNew = path_leaf(new_file)
+ DiffSize = 0
+ base_new_file = os.path.join(self.constant_strings.BASE_NEW, new_file)
+ winning_patch_sz = os.path.getsize(base_new_file)
+ New_fs = winning_patch_sz
+ winning_file = EMPTY
+
+ for del_file in removed_files_filtered:
+ base_del_file = os.path.join(self.constant_strings.BASE_OLD, del_file)
+ FileNameOld = path_leaf(del_file)
+ if (FileNameOld.startswith(FileNameNew[:int(len(FileNameNew) * (7 / 10))]) and (os.path.splitext(FileNameNew)[1] == os.path.splitext(del_file)[1])):
+ # If percentage difference between two file sizes is within 30%, then we consider for diff generation
+ Del_fs = os.path.getsize(base_del_file)
+ v1 = abs(New_fs - Del_fs)
+ v2 = (New_fs + Del_fs) / 2
+ if(v2 <= 0 or ((v1 / v2) * 100) > 30):
+ logging.debug('Ignore diff generation New_fs - %d Del_Fs - %d' % (New_fs, Del_fs))
+ continue
+ logging.debug('I can compute diff between %s %s Del_Fs - %d New_Fs - %d' % (del_file, new_file, Del_fs, New_fs))
+ R_Flag = 'TRUE'
+ DiffSize = measure_two_filediffs(base_del_file, base_new_file)
+ if (DiffSize < 0.8 * winning_patch_sz):
+ winning_patch_sz = DiffSize
+ winning_file = del_file
+ elif (not FileNameOld.startswith(FileNameNew[:int(len(FileNameNew) * (7 / 10))]) and R_Flag == 'TRUE'):
+ logging.debug('Because next set of files will not have matching name - break @@ %s %s' % (del_file, new_file))
+ break
+ if len(winning_file) > 0:
+ logging.debug('Best pick -%s ==> %s [%d]' % (winning_file, new_file, DiffSize))
+ self.entries_lists.renamed_files.append([new_file, winning_file])
+ removed_files_filtered.remove(winning_file)
+ self.entries_lists.removed_files.remove(winning_file)
+ self.entries_lists.added_files.remove(new_file)
+
+ def process_renamed_files(self):
+ with open(self.constant_strings.REG_DOC, "a") as reg_doc_obj:
+ for elt in self.entries_lists.renamed_files:
+ src_file = os.path.join(self.constant_strings.BASE_OLD, elt[1])
+ dst_file = os.path.join(self.constant_strings.BASE_NEW, elt[0])
+ self.operations_count.diff_cnt += 1
+ patch_name = (DIFF_PREFIX + '%d_%s_' + self.constant_strings.PART_NAME + DIFF_SUFFIX) % (self.operations_count.diff_cnt, path_leaf(elt[1]))
+ patch_loc = '%s/%s' % (self.constant_strings.OUT_DIR, patch_name)
+ logging.debug('File renamed %s ==> %s' % (src_file, dst_file))
+ # Should be careful of renaming files??
+ # Should we consider measure_two_filediffs ?? so that patch size is NOT greater than actual file?
+ # What if folder path has numerics??
+
+ if os.path.isdir(src_file) or os.path.isdir(dst_file):
+ # This case never occurs??
+ reg_doc_obj.write('"%s" and "%s" renamed 0 0\n' % (elt[0], elt[1]))
+ self.update_attributes(elt[0], "FILE")
+ # Make sure these files are normal and they should NOT be hard/symlinks
+ elif not (os.path.islink(src_file) or os.path.islink(dst_file)) \
+ and not (elt[0] in self.old_new_entries_data.new_hardlinks or elt[1] in self.old_new_entries_data.old_hardlinks) \
+ and (os.path.isfile(src_file) and os.path.isfile(dst_file)):
+ if filecmp.cmp(src_file, dst_file):
+ self.operations_count.move_cnt += 1
+ self.operations_count.diff_cnt -= 1
+ reg_doc_obj.write('MOVE:REG:%s:%s:%s\n' % (elt[1], elt[0], hash_file(src_file)))
+ else:
+ FORMAT = "REG"
+ ret = subprocess.call([DIFF_UTIL, src_file, dst_file, patch_loc])
+ if ret != 0:
+ logging.debug('Failed to create diff %d %s %s\n' % (ret, src_file, dst_file))
+ self.entries_lists.added_files.append(elt)
+ self.operations_count.diff_cnt -= 1
+ else:
+ reg_doc_obj.write('DIFF:REG:%s:%s:%s:%s:%s\n' % (elt[1], elt[0], hash_file(src_file), hash_file(dst_file), patch_name))
+
+ update_size(src_file, dst_file)
+ self.update_attributes(elt[0], "FILE")
+
+ def process_removed_entries(self):
+ with open(self.constant_strings.REG_DOC, "a") as reg_doc_obj:
+ for elt in self.entries_lists.removed_files:
+ # if files are part of patches after renaming, we shd remove them as part of removed.
+ src_file = os.path.join(self.constant_strings.BASE_OLD, elt)
+ if os.path.islink(src_file):
+ reg_doc_obj.write('DEL:SYM:%s\n' % (elt))
+ elif elt in self.old_new_entries_data.old_hardlinks:
+ reg_doc_obj.write('DEL:HARD:%s\n' % (elt))
+ elif os.path.isdir(src_file):
+ reg_doc_obj.write('DEL:REG:%s:NA\n' % (elt))
+ else:
+ reg_doc_obj.write('DEL:REG:%s:%s\n' % (elt, hash_file(src_file)))
+ logging.debug('File deleted %s' % src_file)
+ self.operations_count.del_cnt += 1
+
+ self.entries_lists.removed_dirs.sort(reverse=True)
+ for elt in self.entries_lists.removed_dirs:
+ # if Dir is empty, add it to the removed list.
+ src_file = os.path.join(self.constant_strings.BASE_OLD, elt)
+ # Irrespective of weather files are MOVED or DIFF'ed, we can delete the folders. This action can be performed at the end.
+ # It covers symlinks also, as NEW symlinks cannot point to NON existent folders of TARGET (NEW binary)
+ if os.path.isdir(src_file):
+ reg_doc_obj.write('DEL:END:%s\n' % (elt))
+ self.operations_count.del_cnt += 1
+ logging.debug('Dir deleted- %s' % src_file)
+
+ def process_added_entries(self):
+ with open(self.constant_strings.SYMLINK_DOC, "a") as symlink_doc_obj, \
+ open(self.constant_strings.HARDLINK_DOC, "a") as hardlink_doc_obj:
+ for elt in self.entries_lists.added_files:
+ dst_file = os.path.join(self.constant_strings.BASE_NEW, elt)
+ destpath = os.path.join(NEW_FILES_PATH, elt)
+
+ if os.path.islink(dst_file):
+ patch = os.readlink(dst_file)
+ logging.debug('File is an added symlink %s' % elt)
+ symlink_doc_obj.write('SYM:NEW:%s:%s\n' % (elt, patch))
+ # What if this is only a new sym link and folder already exists??? Should recheck
+ if not os.path.exists(os.path.dirname(destpath)):
+ os.makedirs(os.path.dirname(destpath))
+ logging.info('Added symlink - adding missing dir')
+ self.update_attributes(elt, "SYM")
+ self.operations_count.sym_new_cnt += 1
+ elif elt in self.old_new_entries_data.new_hardlinks:
+ patch = self.old_new_entries_data.new_hardlinks[elt]
+ logging.debug('File is an added hardlink %s' % elt)
+ hardlink_doc_obj.write('HARD:NEW:%s:%s\n' %(elt, patch))
+ if not os.path.exists(os.path.dirname(destpath)):
+ os.makedirs(os.path.dirname(destpath))
+ logging.info('Added hardlink - adding missing dir')
+ self.operations_count.hard_new_cnt += 1
+ elif os.path.isdir(dst_file): # We create just empty directory here
+ if not os.path.exists(destpath):
+ os.makedirs(destpath)
+ logging.debug('New dir created %s' % destpath)
+ self.operations_count.new_cnt += 1
+ else:
+ self.operations_count.new_cnt += 1
+ destdir = os.path.dirname(destpath)
+ logging.debug('New files - %s ==> %s' % (dst_file, destdir))
+
+ if not os.path.isdir(destdir):
+ try:
+ os.makedirs(destdir)
+ except Exception as exc:
+ logging.critical('Error in NEW files DIR entry -%s' % destdir)
+ raise exc
+
+ try:
+ if not stat.S_ISFIFO(os.stat(dst_file).st_mode):
+ shutil.copy2(dst_file, destpath)
+ logging.debug('New files copied from- %s to- %s' % (dst_file, destpath))
+ except Exception as exc:
+ logging.critical('Error in NEW files entry -%s -%s' % (dst_file, destpath))
+ raise exc
+ self.update_attributes(elt, "FILE")
+
+ for elt in self.entries_lists.added_dirs:
+ destpath = os.path.join(NEW_FILES_PATH, elt)
if not os.path.exists(destpath):
os.makedirs(destpath)
- logging.debug(' File New Dir %s' % destpath)
- New_Cnt = New_Cnt + 1
- else:
- New_Cnt = New_Cnt + 1
- destdir = os.path.dirname(destpath)
- logging.debug('New files - %s ==> %s' % (dst_file, destdir))
-
- if not os.path.isdir(destdir):
- try:
- os.makedirs(destdir)
- except Exception as exc:
- logging.critical('Error in NEW files DIR entry -%s' % destdir)
- raise exc
-
- try:
- if not stat.S_ISFIFO(os.stat(dst_file).st_mode):
- shutil.copy2(dst_file, destpath)
- logging.debug('New files copied from- %s to- %s' % (dst_file, destpath))
- except Exception as exc:
- logging.critical('Error in NEW files entry -%s -%s' % (dst_file, destpath))
- raise exc
- Update_Attr(elt, "FILE", File_Attributes, Sym_Attributes)
-
- for elt in Dir_Added:
- destpath = os.path.join(NEW_FILES_PATH, elt)
- if not os.path.exists(destpath):
- os.makedirs(destpath)
- logging.debug(' DirList New Dir %s' % destpath)
- New_Cnt = New_Cnt + 1
-
- # Base directory should be system
- print 'Compressing New files'
- if (New_Cnt > 0 or Sym_New_Cnt > 0):
- WorkingDir = os.getcwd()
- os.chdir(os.path.join(os.getcwd(), NEW_FILES_PATH))
- logging.info('Curr Working Dir - %s' % os.getcwd())
- log_path = os.path.join(WorkingDir, LOGFILE)
- os.system(ZIPUTIL + NEW_FILES_ZIP_NAME + " . " + " >> " + log_path)
- shutil.move(NEW_FILES_ZIP_NAME, WorkingDir + "/" + OUT_DIR)
- # New file size?? cos, we extract system.7z from delta.tar and then proceed with decompression
- SS_UpdateSize(WorkingDir + "/" + OUT_DIR + "/" + NEW_FILES_ZIP_NAME, WorkingDir + "/" + OUT_DIR + "/" + NEW_FILES_ZIP_NAME)
- os.chdir(WorkingDir)
- shutil.rmtree(NEW_FILES_PATH)
- # use 7z a system.7z ./*
-
- #logging.info('%d Dir to be removed' % len(Dir_removed))
- logging.info('%d files unchanged' % len(files_unchanged))
- logging.info('%d files files_renamed' % len(files_renamed))
- logging.info('%d files NEW' % len(files_new))
- logging.info('%d File attr' % len(File_Attributes))
- logging.info('%d Sym attr' % len(Sym_Attributes))
- logging.info('PaTcHCoUnT:Diffs-%d Moves-%d News-%d Delets-%d SymDiffs-%d SymNews-%d HardDiffs-%d HardNews-%d\n' % \
- (Diff_Cnt, Move_Cnt, New_Cnt, Del_Cnt, Sym_Diff_Cnt, Sym_New_Cnt, Hard_Diff_Cnt, Hard_New_Cnt))
- print('PaTcHCoUnT:Diffs-%d Moves-%d News-%d Delets-%d SymDiffs-%d SymNews-%d HardDiffs-%d HardNews-%d\n' % \
- (Diff_Cnt, Move_Cnt, New_Cnt, Del_Cnt, Sym_Diff_Cnt, Sym_New_Cnt, Hard_Diff_Cnt, Hard_New_Cnt))
-
- # There could be duplicates, TODO, can check before adding..
- ATTR_FILE_D = open(ATTR_FILE, 'a+')
- for elt in File_Attributes:
- ATTR_FILE_D.write(elt)
- for elt in Sym_Attributes:
- ATTR_FILE_D.write(elt)
-
- ATTR_FILE_D.close()
-
- Partition_Doc_SymLinks.close()
- Partition_Doc_HardLinks.close()
- Partition_Read_SymLinks = open(SymLinkDoc, 'r+')
- Partition_Read_HardLinks = open(HardLinkDoc, 'r+')
- Partition_Doc.write(Partition_Read_SymLinks.read())
- for line in reversed(Partition_Read_HardLinks.readlines()):
- Partition_Doc.write(line)
- Partition_Doc.write('PaTcHCoUnT:%d %d %d %d %d %d %d %d\n' % \
- (Diff_Cnt, Move_Cnt, New_Cnt, Del_Cnt, Sym_Diff_Cnt, Sym_New_Cnt, Hard_Diff_Cnt, Hard_New_Cnt))
- Partition_Read_SymLinks.close()
- Partition_Read_HardLinks.close()
- Partition_Doc.close()
- os.remove(SymLinkDoc)
- os.remove(HardLinkDoc)
-
- if Diff_Cnt + Move_Cnt + New_Cnt + Del_Cnt + Sym_Diff_Cnt + Sym_New_Cnt + Hard_Diff_Cnt + \
- Hard_New_Cnt + os.path.getsize(ATTR_FILE) == 0:
- print('No Delta Generated for %s - %s' % (PART_NAME, OUT_DIR))
- logging.info('No Delta Generated for %s' % PART_NAME)
- shutil.rmtree(OUT_DIR)
-
-
-def IsSymlink(info):
- return (info.external_attr >> 16) == 0120777
-
-
-def NewFiles(src, dest):
- print src, dest
- subprocess.call(['cp', '-rp', src, dest])
- #try:
- #shutil.copytree(src, dest)
- #except OSError as e:
- # If the error was caused because the source wasn't a directory
- #if e.errno == errno.ENOTDIR:
- #shutil.copy2(src, dest)
- #else:
- #print('Directory not copied. Error: %s' % e)
+ logging.debug('New dir created %s' % destpath)
+ self.operations_count.new_cnt += 1
+
+ def compress_added_entries(self):
+ # Base directory should be system
+ if (self.operations_count.new_cnt > 0 or self.operations_count.sym_new_cnt > 0):
+ WorkingDir = os.getcwd()
+ os.chdir(os.path.join(os.getcwd(), NEW_FILES_PATH))
+ logging.info('Current working dir - %s' % os.getcwd())
+ log_path = os.path.join(WorkingDir, LOGFILE)
+ os.system(ZIPUTIL + NEW_FILES_ZIP_NAME + " . " + " >> " + log_path)
+ zipped_dir = os.path.join(WorkingDir, self.constant_strings.OUT_DIR)
+ shutil.move(NEW_FILES_ZIP_NAME, zipped_dir)
+ update_size(os.path.join(zipped_dir, NEW_FILES_ZIP_NAME), os.path.join(zipped_dir, NEW_FILES_ZIP_NAME))
+ os.chdir(WorkingDir)
+ shutil.rmtree(NEW_FILES_PATH)
+
+ def write_doc_and_cleanup(self):
+ # There could be duplicates, TODO, can check before adding..
+ with open(self.constant_strings.ATTR_FILE, 'a+') as attr_file_obj:
+ for elt in self.entries_lists.file_attributes:
+ attr_file_obj.write(elt)
+ for elt in self.entries_lists.sym_attributes:
+ attr_file_obj.write(elt)
+
+ with open(self.constant_strings.REG_DOC, "a") as reg_doc_obj, \
+ open(self.constant_strings.SYMLINK_DOC, 'r+') as symlink_doc_obj, \
+ open(self.constant_strings.HARDLINK_DOC, 'r+') as hardlink_doc_obj:
+ reg_doc_obj.write(symlink_doc_obj.read())
+ for line in reversed(hardlink_doc_obj.readlines()):
+ reg_doc_obj.write(line)
+ reg_doc_obj.write('PaTcHCoUnT:%d %d %d %d %d %d %d %d\n' % \
+ (self.operations_count.diff_cnt, self.operations_count.move_cnt, \
+ self.operations_count.new_cnt, self.operations_count.del_cnt, \
+ self.operations_count.sym_diff_cnt, self.operations_count.sym_new_cnt, \
+ self.operations_count.hard_diff_cnt, self.operations_count.hard_new_cnt))
+
+ os.remove(self.constant_strings.SYMLINK_DOC)
+ os.remove(self.constant_strings.HARDLINK_DOC)
+
+ def generate_delta_fs(self):
+ tmp_str = 'Going from %d files to %d files' % (len(self.old_new_entries_data.old_files), len(self.old_new_entries_data.new_files))
+ print(tmp_str)
+ logging.info(tmp_str)
+ try:
+ ensure_dir_exists(NEW_FILES_PATH)
+ except FileExistsError as exc:
+ logging.error('Directory %s used by this script is already an existing file' % NEW_FILES_PATH)
+ raise exc
-def measure_two_filediffs(src, dst):
- patchLoc = 'temp.patch'
- # TODO ensure this is excepts an error
- subprocess.call([DIFF_UTIL, src, dst, patchLoc])
- result_size = os.path.getsize(patchLoc)
- os.remove(patchLoc)
- return result_size
+ # Generate lists of entries
+ self.generate_entries_lists()
+ # What files have changed contents but not name/path?
+ self.generate_changed_files_lists()
+ # Currently if Version or number is the first character of the file, then we are NOT making any diffs.
+ if SUPPORT_RENAME == "TRUE":
+ self.generate_renamed_files_with_version_lists()
-def ishardlink(path):
- if os.stat(path).st_nlink > 1:
- return True
- return False
+ '''
+ Patch Section
+ partition.txt contains protocol for binaries responsible for upgrading
+ Types supported: DIFFS, MOVES, NEWS, DELETES, SYMDIFFS, SYMNEWS.
+ '''
+ print("writing diff'ed changed files...")
+ self.process_changed_entries()
-def get_inode(path):
- return os.stat(path).st_ino
+ self.process_moved_duplicates()
-def get_hardlinks(base):
- hardlinks_dict = {}
- inodes_dict = {}
+ if SUPPORT_RENAME == "TRUE":
+ self.find_remaining_renamed_files()
+ self.process_renamed_files()
- for root, direcotories, files in os.walk(base, topdown=True, followlinks=False):
- for file in sorted(files):
- file_name = os.path.join(root, file)
- if not os.path.islink(file_name) and ishardlink(file_name):
- inode = get_inode(file_name)
- rel_path = os.path.relpath(file_name, base)
- if inode not in inodes_dict:
- inodes_dict[inode] = rel_path
- else:
- hardlinks_dict[rel_path] = inodes_dict[inode]
-
- return hardlinks_dict
-
-
-def Get_Files(path):
- all_files = []
- all_dirs = []
-
- for root, directories, filenames in os.walk(path, topdown=False, followlinks=False):
- for directory in directories:
- #DirName = os.path.join(root+'/',directory)
- DirName = os.path.join(root, directory)
- if os.path.islink(DirName):
- logging.debug('This is symlink pointing to dir -%s' % DirName)
- all_files.append(os.path.relpath(DirName, path))
- elif not os.listdir(DirName):
- #print('*****Empty Directory******* -%s', DirName)
- # This should NOT be appended ??? Empty dir shd b considered
- all_dirs.append(os.path.relpath(DirName, path))
+ self.process_removed_entries()
+
+ self.process_added_entries()
+
+ print('Compressing added files')
+ self.compress_added_entries()
+
+ logging.info('%d files unchanged' % len(self.entries_lists.unchanged_files))
+ logging.info('%d files renamed' % len(self.entries_lists.renamed_files))
+ logging.info('%d files added' % len(self.entries_lists.added_files))
+ logging.info('%d regular files changed attributes' % len(self.entries_lists.file_attributes))
+ logging.info('%d symlinks changed attributes' % len(self.entries_lists.sym_attributes))
+
+ # <PaTcHCoUnT> is not a typo, it is parsed like this in other programs that use deltas
+ tmp_str = 'PaTcHCoUnT:Diffs-%d Moves-%d News-%d Delets-%d SymDiffs-%d SymNews-%d HardDiffs-%d HardNews-%d\n' % \
+ (self.operations_count.diff_cnt, self.operations_count.move_cnt, self.operations_count.new_cnt, \
+ self.operations_count.del_cnt, self.operations_count.sym_diff_cnt, self.operations_count.sym_new_cnt, \
+ self.operations_count.hard_diff_cnt, self.operations_count.hard_new_cnt)
+ logging.info(tmp_str)
+ print(tmp_str)
+
+ self.write_doc_and_cleanup()
+
+ if self.operations_count.diff_cnt + self.operations_count.move_cnt + self.operations_count.new_cnt + \
+ self.operations_count.del_cnt + self.operations_count.sym_diff_cnt + self.operations_count.sym_new_cnt + \
+ self.operations_count.hard_diff_cnt + self.operations_count.hard_new_cnt + os.path.getsize(self.constant_strings.ATTR_FILE) == 0:
+ print('No delta generated for %s - %s' % (self.constant_strings.PART_NAME, self.constant_strings.OUT_DIR))
+ logging.info('No delta generated for %s' % self.constant_strings.PART_NAME)
+ shutil.rmtree(self.constant_strings.OUT_DIR)
+
+
+def update_cfg_file(DELTA_BIN, UPDATE_CFG_PATH):
+ with open(UPDATE_CFG_PATH, 'r') as f:
+ lines = f.readlines()
+
+ with open(UPDATE_CFG_PATH, 'w') as f:
+ for line in lines:
+ ConfigItems = line.split()
+ if ConfigItems[0] == DELTA_BIN:
+ DELTA = ConfigItems[1]
+ logging.info('Updating %s config' % DELTA_BIN)
+ line = line.rstrip('\n')
+ Value = MEM_REQ
+ line = line.replace(line, line + '\t' + str(Value) + '\n')
+ f.write(line)
else:
- all_dirs.append(os.path.relpath(DirName, path))
- for filename in filenames:
- FileName = os.path.join(root, filename)
- all_files.append(os.path.relpath(FileName, path))
+ f.write(line)
+
+
+def generate_delta_image(BASE_OLD, BASE_NEW, OUT_DIR, DELTA_BIN, UPDATE_CFG_PATH, COMPRESSION_METHOD):
+ oldsize_d = os.path.getsize(BASE_OLD)
+ newsize_d = os.path.getsize(BASE_NEW)
+ SHA_BIN_DEST = hash_file(BASE_NEW)
+ SHA_BIN_BASE = hash_file(BASE_OLD)
+
+ DELTA = DELTA_BIN
+ update_size(BASE_OLD, BASE_NEW)
+ if UPDATE_CFG_PATH:
+ with open(UPDATE_CFG_PATH, 'r') as f:
+ lines = f.readlines()
+
+ with open(UPDATE_CFG_PATH, 'w') as f:
+ for line in lines:
+ ConfigItems = line.split()
+ if ConfigItems[0] == DELTA_BIN:
+ logging.info('Updating %s config' % DELTA_BIN)
+ DELTA = ConfigItems[1]
+ line = line.rstrip('\n')
+ line = line.replace(line, line + '\t' + str(oldsize_d) + '\t\t' + str(newsize_d) + '\t\t' + str(SHA_BIN_BASE) + '\t\t' + str(SHA_BIN_DEST) + '\n')
+ f.write(line)
+
+ patchLoc = '%s/%s' % (OUT_DIR, DELTA)
+ logging.info('Make Delta Image %s <--> %s ==> %s %s' % (BASE_OLD, BASE_NEW, DELTA_BIN, patchLoc))
+ subprocess.call([DIFF_UTIL, "-c", COMPRESSION_METHOD, BASE_OLD, BASE_NEW, patchLoc])
+
+
+def generate_full_image(BASE_OLD, BASE_NEW, OUT_DIR, DELTA_BIN, UPDATE_CFG_PATH):
+ logging.info('Make Full Image %s <--> %s ==> %s' % (BASE_OLD, BASE_NEW, DELTA_BIN))
+ oldsize_d = os.path.getsize(BASE_OLD)
+ newsize_d = os.path.getsize(BASE_NEW)
+ SHA_BIN_DEST = hash_file(BASE_NEW)
+ SHA_BIN_BASE = hash_file(BASE_OLD)
+ update_size(BASE_OLD, BASE_NEW)
+
+ if UPDATE_CFG_PATH:
+ with open(UPDATE_CFG_PATH, 'r') as f:
+ lines = f.readlines()
+
+ with open(UPDATE_CFG_PATH, 'w') as f:
+ for line in lines:
+ ConfigItems = line.split()
+ if ConfigItems[0] == DELTA_BIN:
+ logging.info('Updating %s config' % DELTA_BIN)
+ DELTA = ConfigItems[1]
+ line = line.rstrip('\n')
+ line = line.replace(line, line + '\t' + str(oldsize_d) + '\t\t' + str(newsize_d) + '\t\t' + str(SHA_BIN_BASE) + '\t\t' + str(SHA_BIN_DEST) + '\n')
+ f.write(line)
+
+
+def generate_delta_fs(PART_NAME, BASE_OLD, BASE_NEW, OUT_DIR, ATTR_FILE, ATTR_NEW):
+ delta_fs_generator = DeltaFsGenerator(PART_NAME, BASE_OLD, BASE_NEW, OUT_DIR, ATTR_FILE, ATTR_NEW)
+ delta_fs_generator.generate_delta_fs()
+
+
+def main():
+ logging.basicConfig(filename=LOGFILE, level=logging.DEBUG)
+
+ try:
+ if len(sys.argv) < 5:
+ sys.exit('Usage: CreatePatch.py UPDATE_TYPE PARTITION_NAME OLD_BASE_DIR NEW_BASE_DIR OUTFOLDER')
+ UPDATE_TYPE = sys.argv[1]
+ UPDATE_TYPE_S = UPDATE_TYPE.split(":")[0]
+ # TODO make PART_NAME optional
+ PART_NAME = sys.argv[2]
+
+ BASE_OLD = sys.argv[3]
+ BASE_NEW = sys.argv[4]
+ OUT_DIR = sys.argv[5]
+ ATTR_OLD = EMPTY
+ ATTR_NEW = EMPTY
+ UPDATE_CFG_PATH = EMPTY
+
+ global DIFF_UTIL
+ global DIFFPATCH_UTIL
+ if UPDATE_TYPE_S == DELTA_FS:
+ if len(sys.argv) == 9:
+ ATTR_OLD = sys.argv[6]
+ ATTR_NEW = sys.argv[7]
+ UPDATE_CFG_PATH = os.path.join(PARENT_DIR, sys.argv[8])
- all_files.sort()
- all_dirs.sort()
- return all_files, all_dirs
+ elif UPDATE_TYPE_S in [DELTA_IMAGE, FULL_IMAGE]:
+ if len(sys.argv) == 7:
+ UPDATE_CFG_PATH = os.path.join(PARENT_DIR, sys.argv[6])
+ if not (os.path.isfile(DIFF_UTIL) and os.access(DIFF_UTIL, os.X_OK)):
+ DIFF_UTIL = os.path.join(COMMON_BIN_PATH, DIFF_UTIL)
+ DIFFPATCH_UTIL = os.path.join(COMMON_BIN_PATH, DIFFPATCH_UTIL)
+ if not (os.path.isfile(DIFF_UTIL) and os.access(DIFF_UTIL, os.X_OK)):
+ print("Diff Util Does NOT exist -- ABORT", file=sys.stderr)
+ logging.info('Diff Util Does NOT exist -- ABORT')
+ sys.exit(1)
-USAGE_DOCSTRING = """
- Generate Delta using BASEOLD AND BASE NEW
- Attributes is optional
- Usage: CreatePatch.py UPDATE_TYPE PARTNAME OLDBASE NEWBASE OUTFOLDER
-"""
+ start = datetime.datetime.now().time()
+ text = f'Started CreatePatch.py at {start}'
+ logging.info(f'{text:^70}')
+ print(f'{text:^70}')
+ logging.info('Arguments Passed: [UpdateType - %s][Part Name - %s] [BaseOld - %s] [BaseNew - %s] \n [OUTPUTDir - %s] \
+ [BASE ATTR - %s] [TARGET ATTR - %s]' % (UPDATE_TYPE, PART_NAME, BASE_OLD, BASE_NEW, OUT_DIR, ATTR_OLD, ATTR_NEW))
+ try:
+ ensure_dir_exists(OUT_DIR)
+ except FileExistsError as exc:
+ logging.error('Argument passed as OUT_DIR - %s is already an existing file' % OUT_DIR)
+ raise exc
+ if UPDATE_TYPE_S == DELTA_FS:
+ if not (os.path.isfile(ATTR_OLD) and os.path.isfile(ATTR_NEW)):
+ print("Attributes missing -- ABORT", file=sys.stderr)
+ sys.exit(1)
-def Usage(docstring):
- print docstring.rstrip("\n")
- print COMMON_DOCSTRING
+ # TODO verify if other linux distributions support APT library
+ cache = apt.Cache()
+ if cache['p7zip'].is_installed and cache['attr'].is_installed and cache['tar'].is_installed:
+ logging.info('Basic utils installed')
+ else:
+ print("Basic utils missing -- ABORT", file=sys.stderr)
+ sys.exit(1)
+
+ if UPDATE_TYPE_S == FULL_IMAGE:
+ generate_full_image(BASE_OLD, BASE_NEW, OUT_DIR, PART_NAME, UPDATE_CFG_PATH)
+ elif UPDATE_TYPE_S == DELTA_IMAGE:
+ # generating LZMA deltas is supported by underlying software but it would require at least some kind of format autodetection mechanism,
+ # which is not available currently. Disable for now.
+ generate_delta_image(BASE_OLD, BASE_NEW, OUT_DIR, PART_NAME, UPDATE_CFG_PATH, COMPRESSION_BROTLI)
+ elif UPDATE_TYPE_S == DELTA_FS:
+ ATTR_FILE = os.path.join(OUT_DIR, (PART_NAME + ATTR_DOC_EXT))
+ diff_attr_files(ATTR_OLD, ATTR_NEW, ATTR_FILE)
+ generate_delta_fs(PART_NAME, BASE_OLD, BASE_NEW, OUT_DIR, ATTR_FILE, ATTR_NEW)
+
+ if UPDATE_CFG_PATH:
+ update_cfg_file(PART_NAME, UPDATE_CFG_PATH)
+ else:
+ print('UPDATE_TYPE ---- UNKNOWN FORMAT')
+ raise TypeError
+
+ if UPDATE_TYPE_S == DELTA_FS:
+ if os.path.exists(ATTR_OLD) and os.path.exists(ATTR_NEW):
+ os.remove(ATTR_OLD)
+ os.remove(ATTR_NEW)
+
+ end = datetime.datetime.now().time()
+ logging.info('Max memory requried to upgrade [%s] is [%d] for file [%s]' % (PART_NAME, MEM_REQ, MEM_FILE))
+ text = f'Done with CreatePath.py at {end}'
+ logging.info(f'{text:^70}')
+ print(f'{text:^70}')
+ logging.info('Time start [%s] - Time end [%s]' % (start, end))
+ print('Done with [%s][%d]---- Time start [%s] - Time end [%s]' % (PART_NAME, MEM_REQ, start, end))
+
+ except Exception as exc:
+ logging.error('Usage: {} <Update_Type> <Part_Name> <OLD_Base> <NEW_Base> <OUT_DIR>'.format(os.path.basename(sys.argv[0])))
+ raise exc
if __name__ == '__main__':
- main()
+ main()
\ No newline at end of file