CreatePatch.py: Refactor code mainly responsible for generating DELTA_FS deltas,... 09/282309/14
authorAntoni Adaszkiewicz <a.adaszkiewi@samsung.com>
Thu, 29 Sep 2022 10:43:06 +0000 (12:43 +0200)
committerAntoni Adaszkiewicz <a.adaszkiewi@samsung.com>
Thu, 1 Dec 2022 12:55:13 +0000 (13:55 +0100)
readability, drop importing of unused modules, drop obsolete comments and commented-out code,
force python 3.0 or higher

Change-Id: I750d237b038cc6215aa0e9519c6893746bf6f011

Dockerfile
mk_delta/common/bin/CreatePatch.py
mk_delta/common/bin/mk_part_delta.sh

index 59752bb..078a305 100644 (file)
@@ -20,7 +20,7 @@ ADD recovery /tota-upg/recovery/
 COPY --from=build /usr/local/bin/ss_bsdiff /usr/local/bin/ss_bspatch /usr/local/bin/
 COPY --from=build /usr/local/lib/liblzma-tool.so.* /usr/local/lib
 RUN apt-get update && \
-       apt-get install -y --no-install-recommends libbrotli1 libdivsufsort3 python-is-python2 python2 python-apt python3 python3-pip aria2 p7zip-full attr tar file sudo git && rm -rf /var/lib/apt/lists/*
+       apt-get install -y --no-install-recommends libbrotli1 libdivsufsort3 python-is-python2 python2 python-apt python3-apt python3 python3-pip aria2 p7zip-full attr tar file sudo git && rm -rf /var/lib/apt/lists/*
 
 RUN pip3 install requests beautifulsoup4 PyYAML
 
index adf4e52..65b2a91 100755 (executable)
@@ -1,4 +1,4 @@
-#!/usr/bin/python
+#!/usr/bin/python3
 
 import sys
 import os
@@ -6,38 +6,36 @@ import filecmp
 import shutil
 import subprocess
 import re
-import zipfile
 import datetime
 import hashlib
-import operator
-import locale
-import errno
 import logging
-import glob
 import apt
 import stat
 
-if sys.hexversion < 0x02040000:
-       print >> sys.stderr, "Python 2.4 or newer is required."
+
+if sys.version_info[0] < 3:
+       print >> sys.stderr, "Python 3.0 or newer is required."
        sys.exit(1)
 
+
 '''
-Diff two folders and create delta using SS_BSDIFF
-Will maintain same format of script that will be generated when we use diffutil
-
-1. Create a list of files in each Base folders,
-2. These files will fall into one these below categories:
-       1) Only in OLD - Should be deleted
-       2) Only in NEW - Should be added or renamed accordingly
-       3) File exists in both directories but contents are different - Create Diff.
-       4) File name is same but TYPE can change (File to Folder, Folder to Link etc.)
-       5) Duplicates in the list of Deletes and News
-       6) Close matching diffs even though name changes across directories. (for matching extension)
-       7) Clearing empty directories after Moves or diffs under Rename.
-
-Current Case
-1. Given two folders, from list of REMOVED and NEW files find if there
-is version change and create diff between them
+Logic for DELTA_FS:
+       Diff two folders and create delta using SS_BSDIFF
+       Will maintain same format of script that will be generated when we use diffutil
+
+       1. Create a list of files in each Base folders,
+       2. These files will fall into one these below categories:
+               1) Only in OLD - Should be deleted
+               2) Only in NEW - Should be added or renamed accordingly
+               3) File exists in both directories but contents are different - Create Diff.
+               4) File name is same but TYPE can change (File to Folder, Folder to Link etc.)
+               5) Duplicates in the list of Deletes and News
+               6) Close matching diffs even though name changes across directories. (for matching extension)
+               7) Clearing empty directories after Moves or diffs under Rename.
+
+       Current Case
+       1. Given two folders, from list of REMOVED and NEW files find if there
+       is version change and create diff between them
 
 TODO
 Want to extend the same script for entire DIFF generation and replace TOTAlib.sh file
@@ -45,249 +43,31 @@ Catching errors at all stages. SHOULD exit & return error in case of failure
 '''
 
 
-def global_paths():
-       global DIFF_UTIL
-       global ZIPUTIL
-       global NEW_FILES_PATH
-       global NEW_FILES_ZIP_NAME
-       global SYMLINK_TYPE
-       global ATTR_DOC_EXT
-       global SYMLINK_DOC_NAME
-       global DIFF_PREFIX
-       global DIFF_SUFFIX
-       global SUPPORT_RENAME
-       global NEW_PREFIX
-       global DIFFPATCH_UTIL
-       global SUPPORT_CONTAINERS
-       global FULL_IMAGE
-       global DELTA_IMAGE
-       global DELTA_FS
-       global EXTRA
-       global COMMON_BIN_PATH
-       global MEM_REQ
-       global EMPTY
-       global MEM_FILE
-
-
 COMMON_BIN_PATH = "../../common/bin/"
 DIFF_UTIL = "/usr/local/bin/ss_bsdiff"
 DIFFPATCH_UTIL = "/usr/local/bin/ss_bspatch"
-#ZIPUTIL = "p7zip "
 ZIPUTIL = "7z -mf=off a "
 NEW_FILES_PATH = "run/upgrade-sysroot"
 NEW_FILES_ZIP_NAME = "system.7z"
 SYMLINK_TYPE = "SYM"
-ATTR_DOC_EXT = "_attr.txt"
-SYMLINK_DOC_NAME = "_sym.txt"
-HARDLINK_DOC_NAME = "_hard.txt"
 PART_DOC_EXT = ".txt"
+ATTR_DOC_EXT = "_attr" + PART_DOC_EXT
+SYMLINK_DOC_NAME = "_sym" + PART_DOC_EXT
+HARDLINK_DOC_NAME = "_hard" + PART_DOC_EXT
 DIFF_PREFIX = "diff"
 DIFF_SUFFIX = ".delta"
-NEW_PREFIX = 'new'
 FULL_IMAGE = "FULL_IMAGE"
 DELTA_IMAGE = "DELTA_IMAGE"
 DELTA_FS = "DELTA_FS"
-EXTRA = "EXTRA"
 LOGFILE = "Delta.log"
 EMPTY = ""
+PARENT_DIR = ".."
 MEM_REQ = 0
 MEM_FILE = "NULL"
 COMPRESSION_LZMA = "lzma"
 COMPRESSION_BROTLI = "brotli"
 
-SUPPORT_RENAME = "TRUE"  # Use appropriate name
-SUPPORT_CONTAINERS = "FALSE"
-
-TEST_MODE = "FALSE"
-
-
-def main():
-       logging.basicConfig(filename=LOGFILE, level=logging.DEBUG)
-       global AttributeFile
-       global GenerateDiffAttr
-       try:
-
-               if len(sys.argv) < 5:
-                       sys.exit('Usage: CreatePatch.py UPDATE_TYPE PARTNAME OLDBASE NEWBASE OUTFOLDER')
-               UPDATE_TYPE = sys.argv[1]
-               UPDATE_TYPE_S = UPDATE_TYPE.split(":")
-               PART_NAME = sys.argv[2]  # lets make this also optional
-
-               BASE_OLD = sys.argv[3]
-               BASE_NEW = sys.argv[4]
-               OUT_DIR = sys.argv[5]
-               ATTR_OLD = EMPTY
-               ATTR_NEW = EMPTY
-               UPDATE_CFG_PATH = EMPTY
-               GenerateDiffAttr = "FALSE"
-               if UPDATE_TYPE_S[0] == DELTA_FS:
-                       #instead of arguments check it in outdirectory ?
-                       if len(sys.argv) == 9:
-                               ATTR_OLD = sys.argv[6]
-                               ATTR_NEW = sys.argv[7]
-                               UPDATE_CFG_PATH = '../' + sys.argv[8]
-                               GenerateDiffAttr = "TRUE"
-
-               elif UPDATE_TYPE_S[0] in [DELTA_IMAGE, FULL_IMAGE]:
-                       if len(sys.argv) == 7:
-                               #Use path in better way
-                               UPDATE_CFG_PATH = '../' + sys.argv[6]
-
-               global DIFF_UTIL
-               global DIFFPATCH_UTIL
-               if not (os.path.isfile(DIFF_UTIL) and os.access(DIFF_UTIL, os.X_OK)):
-                       DIFF_UTIL = COMMON_BIN_PATH + DIFF_UTIL
-                       DIFFPATCH_UTIL = COMMON_BIN_PATH + DIFFPATCH_UTIL
-                       if not (os.path.isfile(DIFF_UTIL) and os.access(DIFF_UTIL, os.X_OK)):
-                               print >> sys.stderr, "Diff Util Does NOT exist -- ABORT"
-                               logging.info('Diff Util Does NOT exist -- ABORT')
-                               sys.exit(1)
-
-               start = datetime.datetime.now().time()
-               logging.info('*************** ENTERED PYTHON SCRIPT *****************')
-               logging.info('Arguments Passed: [UpdateType - %s][Part Name - %s] [BaseOld - %s]  [BaseNew - %s] \n [OUTPUTDir - %s] [BASE ATTR - %s] [TARGET ATTR - %s]' % (UPDATE_TYPE, PART_NAME, BASE_OLD, BASE_NEW, OUT_DIR, ATTR_OLD, ATTR_NEW))
-
-               try:
-                       ensure_dir_exists(OUT_DIR)
-               except FileExistsError as exc:
-                       logging.error('Argument passed as OUT_DIR - %s is already an existing file' % OUT_DIR)
-                       raise exc
-               if GenerateDiffAttr == "TRUE":
-                       if not (os.path.isfile(ATTR_OLD) and os.path.isfile(ATTR_NEW)):
-                               print >> sys.stderr, "Attributes missing -- ABORT"
-                               sys.exit(1)
-
-               # Should check if APT is supported on other linux flavours
-               cache = apt.Cache()
-               if cache['p7zip'].is_installed and cache['attr'].is_installed and cache['tar'].is_installed:
-                       logging.info('Basic utils installed')
-               else:
-                       print >> sys.stderr, "Basic utils missing -- ABORT"
-                       sys.exit(1)
-
-               if UPDATE_TYPE_S[0] == FULL_IMAGE:
-                       SS_mk_full_img(BASE_OLD, BASE_NEW, OUT_DIR, PART_NAME, UPDATE_CFG_PATH)
-               # #### currently does not support LZMA ####
-               #  elif UPDATE_TYPE == DELTA_IMAGE:
-               #      SS_mk_delta_img(BASE_OLD, BASE_NEW, OUT_DIR, PART_NAME, UPDATE_CFG_PATH, COMPRESSION_LZMA)
-               elif UPDATE_TYPE_S[0] == DELTA_IMAGE:
-                       SS_mk_delta_img(BASE_OLD, BASE_NEW, OUT_DIR, PART_NAME, UPDATE_CFG_PATH, COMPRESSION_BROTLI)
-               elif UPDATE_TYPE == DELTA_FS:
-                       AttributeFile = ATTR_NEW
-                       ATTR_FILE = OUT_DIR + '/' + PART_NAME + ATTR_DOC_EXT
-                       Diff_AttrFiles(ATTR_OLD, ATTR_NEW, ATTR_FILE)
-                       Old_files, Old_dirs = Get_Files(BASE_OLD)
-                       New_files, New_dirs = Get_Files(BASE_NEW)
-                       SS_Generate_Delta(PART_NAME, BASE_OLD, Old_files, Old_dirs, BASE_NEW, New_files, New_dirs, OUT_DIR, ATTR_FILE)
-
-                       if not UPDATE_CFG_PATH == EMPTY:
-                               SS_update_cfg(PART_NAME, UPDATE_CFG_PATH)
-
-               elif UPDATE_TYPE == EXTRA:
-                       print('UPDATE_TYPE ---- EXTRA')
-               else:
-                       print('UPDATE_TYPE ---- UNKNOWN FORMAT')
-
-               if GenerateDiffAttr == "TRUE":
-                       if os.path.exists(ATTR_OLD) and os.path.exists(ATTR_NEW):
-                               os.remove(ATTR_OLD)
-                               os.remove(ATTR_NEW)
-               end = datetime.datetime.now().time()
-
-               logging.info('Max Memory requried to upgrade [%s] is [%d] for File[%s]' % (PART_NAME, MEM_REQ, MEM_FILE))
-               logging.info('*************** DONE WITH PYTHON SCRIPT ***************')
-               logging.info('Time start [%s] - Time end [%s]' % (start, end))
-               print('Done with [%s][%d]---- Time start [%s] - Time end [%s]' % (PART_NAME, MEM_REQ, start, end))
-
-       except Exception as exc:
-               logging.error('Usage: {} <Update_Type> <Part_Name> <OLD_Base> <NEW_Base> <OUT_DIR>'.format(os.path.basename(sys.argv[0])))
-               raise exc
-
-
-def SS_update_cfg(DELTA_BIN, UPDATE_CFG_PATH):
-       f = open(UPDATE_CFG_PATH, 'r')
-       lines = f.readlines()
-       f.close()
-       f = open(UPDATE_CFG_PATH, 'w')
-       for line in lines:
-               ConfigItems = line.split()
-               if ConfigItems[0] == DELTA_BIN:
-                       DELTA = ConfigItems[1]
-                       logging.info('Updating %s config' % DELTA_BIN)
-                       line = line.rstrip('\n')
-                       Value = MEM_REQ
-                       line = line.replace(line, line + '\t' + str(Value) + '\n')
-                       f.write(line)
-               else:
-                       f.write(line)
-       f.close()
-
-
-def SS_mk_delta_img(BASE_OLD, BASE_NEW, OUT_DIR, DELTA_BIN, UPDATE_CFG_PATH, COMPRESSION_METHOD):
-       #for sizes
-
-       oldsize_d = os.path.getsize(BASE_OLD)
-       newsize_d = os.path.getsize(BASE_NEW)
-       SHA_BIN_DEST = hash_file(BASE_NEW)
-       SHA_BIN_BASE = hash_file(BASE_OLD)
-
-       #incase UPDATE CFG is empty
-       DELTA = DELTA_BIN
-       SS_UpdateSize(BASE_OLD, BASE_NEW)
-       #Should throw error if PART NAME NOT found??
-       if not UPDATE_CFG_PATH == EMPTY:
-               f = open(UPDATE_CFG_PATH, 'r')
-               lines = f.readlines()
-               f.close()
-               f = open(UPDATE_CFG_PATH, 'w')
-               for line in lines:
-                       ConfigItems = line.split()
-                       if ConfigItems[0] == DELTA_BIN:
-                               logging.info('Updating %s config' % DELTA_BIN)
-                               DELTA = ConfigItems[1]
-                               line = line.rstrip('\n')
-                               line = line.replace(line, line + '\t' + str(oldsize_d) + '\t\t' + str(newsize_d) + '\t\t' + str(SHA_BIN_BASE) + '\t\t' + str(SHA_BIN_DEST) + '\n')
-                               f.write(line)
-                       else:
-                               f.write(line)
-               f.close()
-
-       patchLoc = '%s/%s' % (OUT_DIR, DELTA)
-       logging.info('Make Delta Image %s <--> %s ==> %s %s' % (BASE_OLD, BASE_NEW, DELTA_BIN, patchLoc))
-       subprocess.call([DIFF_UTIL, "-c", COMPRESSION_METHOD, BASE_OLD, BASE_NEW, patchLoc])
-
-
-def SS_mk_full_img(BASE_OLD, BASE_NEW, OUT_DIR, DELTA_BIN, UPDATE_CFG_PATH):
-       logging.info('Make Full Image %s <--> %s ==> %s' % (BASE_OLD, BASE_NEW, DELTA_BIN))
-       oldsize_d = os.path.getsize(BASE_OLD)
-       newsize_d = os.path.getsize(BASE_NEW)
-       SHA_BIN_DEST = hash_file(BASE_NEW)
-       SHA_BIN_BASE = hash_file(BASE_OLD)
-       #echo -e "\t${oldsize_d}\t\t${newsize_d}\t\t${SHA_BIN_BASE}\t\t${SHA_BIN_DEST}" >> ${DATA_DIR}/update_new.cfg
-       SS_UpdateSize(BASE_OLD, BASE_NEW)
-
-       if not UPDATE_CFG_PATH == EMPTY:
-               f = open(UPDATE_CFG_PATH, 'r')
-               lines = f.readlines()
-               f.close()
-               f = open(UPDATE_CFG_PATH, 'w')
-               for line in lines:
-                       ConfigItems = line.split()
-                       if ConfigItems[0] == DELTA_BIN:
-                               logging.info('Updating %s config' % DELTA_BIN)
-                               DELTA = ConfigItems[1]
-                               line = line.rstrip('\n')
-                               line = line.replace(line, line + '\t' + str(oldsize_d) + '\t\t' + str(newsize_d) + '\t\t' + str(SHA_BIN_BASE) + '\t\t' + str(SHA_BIN_DEST) + '\n')
-                               f.write(line)
-                       else:
-                               f.write(line)
-               f.close()
-
-
-def zipdir(path, zip):
-       for root, dirs, files in os.walk(path):
-               for file in files:
-                       zip.write(os.path.join(root, file))
+SUPPORT_RENAME = "TRUE"
 
 
 def ensure_dir_exists(path):
@@ -295,8 +75,6 @@ def ensure_dir_exists(path):
                os.makedirs(path)
        elif os.path.isfile(path):
                raise FileExistsError
-               #shutil.rmtree(path)
-       #os.makedirs(path)
 
 
 def path_leaf(path):
@@ -304,10 +82,8 @@ def path_leaf(path):
        return tail
 
 
-# Creating Diff between OLD and NEW attribute files v12
-def Diff_AttrFiles(ATTR_OLD, ATTR_NEW, ATTR_FILE):
-       if GenerateDiffAttr == "FALSE":
-               return
+# Creating Diff between OLD and NEW attribute files
+def diff_attr_files(ATTR_OLD, ATTR_NEW, ATTR_FILE):
        with open(ATTR_OLD, 'r') as f_old:
                lines1 = set(f_old.read().splitlines())
 
@@ -317,25 +93,10 @@ def Diff_AttrFiles(ATTR_OLD, ATTR_NEW, ATTR_FILE):
        lines = set.difference(lines2, lines1)
        with open(ATTR_FILE, 'w+') as file_out:
                for line in lines:
-                       logging.info('Diff_AttrFiles - %s' % line)
+                       logging.info('diff_attr_files - %s' % line)
                        file_out.write(line + '\n')
 
 
-def Update_Attr(RequestedPath, Type, File_Attributes, Sym_Attributes):
-       # Full File Path should MATCH
-       if GenerateDiffAttr == "FALSE":
-               return
-       FilePath = '"' + RequestedPath + '"'
-       #print ('FilePath - %s'% (FilePath))
-       with open(AttributeFile) as f:
-               for line in f:
-                       if FilePath in line:
-                               if Type == SYMLINK_TYPE:
-                                       Sym_Attributes.append(line)
-                               else:
-                                       File_Attributes.append(line)
-
-
 def hash_file(filename):
        '''This function returns the SHA-1 hash of the file passed into it'''
 
@@ -355,13 +116,22 @@ def hash_file(filename):
        return h.hexdigest()
 
 
-def find_dupes_list(BASE_OLD, BASE_NEW, fileListB, fileListT, Old_hardlinks, New_hardlinks):
+def measure_two_filediffs(src, dst):
+       patchLoc = 'temp.patch'
+       # TODO ensure this is excepts an error
+       subprocess.call([DIFF_UTIL, src, dst, patchLoc])
+       result_size = os.path.getsize(patchLoc)
+       os.remove(patchLoc)
+       return result_size
+
+
+def find_dupes_list(BASE_OLD, BASE_NEW, fileListB, fileListT):
        dups = {}
        fdupes = {}
-       print('Finding Duplicates in - %s %s' % (BASE_OLD, BASE_NEW))
+       print('Finding duplicates in - %s %s' % (BASE_OLD, BASE_NEW))
 
        for filename in fileListB:
-               Src_File = BASE_OLD + '/' + filename
+               Src_File = os.path.join(BASE_OLD, filename)
                if os.path.islink(Src_File) or os.path.isdir(Src_File) or ishardlink(Src_File):
                        continue
                # Calculate hash
@@ -369,7 +139,7 @@ def find_dupes_list(BASE_OLD, BASE_NEW, fileListB, fileListT, Old_hardlinks, New
                dups[file_hash] = Src_File
 
        for filename in fileListT:
-               Dest_File = BASE_NEW + '/' + filename
+               Dest_File = os.path.join(BASE_NEW, filename)
                if os.path.islink(Dest_File) or os.path.isdir(Dest_File) or ishardlink(Dest_File):
                        continue
                # Calculate hash
@@ -378,13 +148,13 @@ def find_dupes_list(BASE_OLD, BASE_NEW, fileListB, fileListT, Old_hardlinks, New
                        BaseStr = dups.get(file_hash)
                        Baseloc = BaseStr.find('/')
                        if not BaseStr[Baseloc:] == filename:
-                               #print('Dupes - %s ==> %s' % (BaseStr[Baseloc:], filename))
                                fdupes[BaseStr] = filename
-       logging.info('Total Duplicate files %d' % (len(fdupes)))
+       logging.info('Total duplicate files %d' % (len(fdupes)))
+
        return fdupes
 
 
-def SS_UpdateSize(src_file, dst_file):
+def update_size(src_file, dst_file):
        global MEM_REQ
        global MEM_FILE
        oldsize_d = os.path.getsize(src_file)
@@ -398,563 +168,749 @@ def SS_UpdateSize(src_file, dst_file):
                MEM_FILE = dst_file
 
 
-def SS_Generate_Delta(PART_NAME, BASE_OLD, Old_files, Old_dirs, BASE_NEW, New_files, New_dirs, OUT_DIR, ATTR_FILE):
-       print('Going from %d files to %d files' % (len(Old_files), len(New_files)))
-       logging.info('Going from %d files to %d files' % (len(Old_files), len(New_files)))
-
-       # First let's fill up these categories
-       files_new = []
-       files_removed = []
-       Dir_removed = []
-       Dir_Added = []
-       files_changed = []
-       files_unchanged = []
-       files_renamed = []
-       File_Attributes = []
-       Sym_Attributes = []
-
-       files_Del_List = {}
-       files_New_List = {}
-
-       # Get dictionaries used for hardlinks form both directories
-       New_hardlinks = get_hardlinks(BASE_NEW)
-       Old_hardlinks = get_hardlinks(BASE_OLD)
-
-       # Generate NEW List
-       for elt in New_files:
-               if elt not in Old_files:
-                       files_new.append(elt)
-                       logging.info('New files %s' % elt)
-
-       # Generate Delete List
-       for elt in Old_files:
-               if elt not in New_files:
-                       # Cant we just append it here only if this is NOT a directory???? so that we have list of removed files ONLY. including directories
-                       files_removed.append(elt)
-                       logging.info('Old files %s' % elt)
-
-       for elt in Old_dirs:
-               #print('List of Old Dirs %s' % elt)
-               # Delete END logic goes in hand with UPG, After Diffs and moves, DEL END should be done.
-               if elt not in New_dirs:
-                       Dir_removed.append(elt)
-                       logging.info('Old Dirs %s' % elt + '/')
-
-       for elt in New_dirs:
-               if elt not in Old_dirs:
-                       Dir_Added.append(elt)
-               #++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
-
-       # What files have changed contents but not name/path?
-       for elt in New_files:
-               if elt in Old_files:
-                       # Both are symbolic linkes and they differ
-                       src_file = BASE_OLD + '/' + elt
-                       dst_file = BASE_NEW + '/' + elt
-                       #print('Files Changed - %s -%s' % (src_file,dst_file))
-                       if os.path.islink(src_file) and os.path.islink(dst_file):
-                               if not (os.readlink(src_file) == os.readlink(dst_file)):
-                                       files_changed.append(elt)
-                                       #print('%d Sym link files changed' % len(files_changed))
-                                       logging.info('Sym links Changed - %s' % elt)
-                               else:
-                                       files_unchanged.append(elt)
-                       # Both are hardlinks - we add them because we can't be sure if file they point to changes
-                       elif elt in New_hardlinks and elt in Old_hardlinks:
-                               files_changed.append(elt)
-                       # Both are Normal files and they differ. (Is file returns true in case of sym/hardlink also,
-                       # so additional check to find either of the file is sym/hardlink)
-                       elif (not (os.path.islink(src_file) or os.path.islink(dst_file))) \
-                               and (not (elt in New_hardlinks or elt in Old_hardlinks)) \
-                               and os.path.isfile(src_file) and os.path.isfile(dst_file):
-                               if not filecmp.cmp(src_file, dst_file):
-                                       files_changed.append(elt)
-                                       #print('%d Normal files changed' % len(files_changed))
-                                       #print('Files Changed - %s' % elt)
-                               else:
-                                       files_unchanged.append(elt)
-                       # File types differ between BASE and TARGET
-                       else:
-                               logging.info('Files are of diff types but same names  Src- %s Des- %s' % (src_file, dst_file))
-                               # Both file types have changed and they differ
-                               # Case 1: First Delete the OLD entry file type (Be it anything)
-                               # Processing and updating partition txt file will be done under REMOVED case and NEW files case accordingly, we just make an entry here
-                               files_removed.append(elt)
-                               files_new.append(elt)
-
-       # Currently if Version or number is the first character of the file, then we are NOT making any diffs.
-       if SUPPORT_RENAME == "TRUE":
-               for elt in files_removed:
-                       if os.path.isfile(BASE_OLD + '/' + elt):
-                               FileName = path_leaf(elt)
-                               entries = re.split('[0-9]', FileName)
-                               # Gives the STRING part of NAME. if name starts with version then later part wil b string
-                               #print('Entires under removed list after split - %s %s - %s' % (FileName, entries[0], elt))
-                               # If version is starting at the begining of the string?? shd we hav additional check for such cases??
-                               if len(entries[0]) > 0:
-                                       files_Del_List.update({entries[0]: elt})
+def ishardlink(path):
+       if os.stat(path).st_nlink > 1:
+               return True
+       return False
+
+
+def get_inode(path):
+       return os.stat(path).st_ino
 
-               for elt in files_new:
-                       if os.path.isfile(BASE_NEW + '/' + elt):
+
+class DeltaFsGenerator:
+       class OperationsCount:
+               def __init__(self):
+                       self.sym_diff_cnt = 0
+                       self.sym_new_cnt = 0
+                       self.hard_diff_cnt = 0
+                       self.hard_new_cnt = 0
+                       self.del_cnt = 0
+                       self.new_cnt = 0
+                       self.diff_cnt = 0
+                       self.move_cnt = 0
+
+       class ConstantStrings:
+               def __init__(self, PART_NAME, BASE_OLD, BASE_NEW, OUT_DIR, ATTR_FILE, ATTR_NEW):
+                       self.PART_NAME = PART_NAME
+                       self.BASE_OLD = BASE_OLD
+                       self.BASE_NEW = BASE_NEW
+                       self.OUT_DIR = OUT_DIR
+                       self.ATTR_FILE = ATTR_FILE
+                       self.ATTR_NEW = ATTR_NEW
+                       self.REG_DOC = os.path.join(self.OUT_DIR, (self.PART_NAME + PART_DOC_EXT))
+                       self.SYMLINK_DOC = os.path.join(self.OUT_DIR, (self.PART_NAME + SYMLINK_DOC_NAME))
+                       self.HARDLINK_DOC = os.path.join(self.OUT_DIR, (self.PART_NAME + HARDLINK_DOC_NAME))
+
+       class OldNewEntriesData:
+               def __init__(self, BASE_OLD, BASE_NEW):
+                       self.old_files, self.old_dirs = self.get_entries_data(BASE_OLD)
+                       self.new_files, self.new_dirs = self.get_entries_data(BASE_NEW)
+                       self.new_hardlinks = self.get_hardlinks(BASE_NEW)
+                       self.old_hardlinks = self.get_hardlinks(BASE_OLD)
+
+               def get_entries_data(self, path):
+                       all_files = []
+                       all_dirs = []
+                       for root, directories, filenames in os.walk(path, topdown=False, followlinks=False):
+                               for directory in directories:
+                                       DirName = os.path.join(root, directory)
+                                       if os.path.islink(DirName):
+                                               logging.debug('This is symlink pointing to dir - %s' % DirName)
+                                               all_files.append(os.path.relpath(DirName, path))
+                                       elif not os.listdir(DirName):
+                                               all_dirs.append(os.path.relpath(DirName, path))
+                                       else:
+                                               all_dirs.append(os.path.relpath(DirName, path))
+                               for filename in filenames:
+                                       FileName = os.path.join(root, filename)
+                                       all_files.append(os.path.relpath(FileName, path))
+
+                       all_files.sort()
+                       all_dirs.sort()
+                       return all_files, all_dirs
+
+               def get_hardlinks(self, base):
+                       hardlinks_dict = {}
+                       inodes_dict = {}
+
+                       for root, direcotories, files in os.walk(base, topdown=True, followlinks=False):
+                               for file in sorted(files):
+                                       file_name = os.path.join(root, file)
+                                       if not os.path.islink(file_name) and ishardlink(file_name):
+                                               inode = get_inode(file_name)
+                                               rel_path = os.path.relpath(file_name, base)
+                                               if inode not in inodes_dict:
+                                                       inodes_dict[inode] = rel_path
+                                               else:
+                                                       hardlinks_dict[rel_path] = inodes_dict[inode]
+
+                       return hardlinks_dict
+
+       class EntriesLists:
+               def __init__(self):
+                       self.added_files =[]
+                       self.removed_files = []
+                       self.added_dirs = []
+                       self.removed_dirs = []
+                       self.changed_files = []
+                       self.unchanged_files = []
+                       self.renamed_files = []
+
+                       self.file_attributes = []
+                       self.sym_attributes = []
+
+       def __init__(self, PART_NAME, BASE_OLD, BASE_NEW, OUT_DIR, ATTR_FILE, ATTR_NEW):
+               self.operations_count = self.OperationsCount()
+               self.constant_strings = self.ConstantStrings(PART_NAME, BASE_OLD, BASE_NEW, OUT_DIR, ATTR_FILE, ATTR_NEW)
+               self.old_new_entries_data = self.OldNewEntriesData(BASE_OLD, BASE_NEW)
+               self.entries_lists = self.EntriesLists()
+
+       def update_attributes(self, requested_path, file_type):
+                       # Full File Path should MATCH
+                       file_path = '"' + requested_path + '"'
+                       with open(self.constant_strings.ATTR_NEW, "r") as f:
+                               for line in f:
+                                       if file_path in line:
+                                               if file_type == SYMLINK_TYPE:
+                                                       self.entries_lists.sym_attributes.append(line)
+                                               else:
+                                                       self.entries_lists.file_attributes.append(line)
+
+       def generate_entries_lists(self):
+               # Generate NEW List
+               for elt in self.old_new_entries_data.new_files:
+                       if elt not in self.old_new_entries_data.old_files:
+                               self.entries_lists.added_files.append(elt)
+                               logging.info('Added files %s' % elt)
+
+               # Generate Delete List
+               for elt in self.old_new_entries_data.old_files:
+                       if elt not in self.old_new_entries_data.new_files:
+                               self.entries_lists.removed_files.append(elt)
+                               logging.info('Removed files %s' % elt)
+
+               for elt in self.old_new_entries_data.old_dirs:
+                       # Delete END logic goes in hand with UPG, After Diffs and moves, DEL END should be done.
+                       if elt not in self.old_new_entries_data.new_dirs:
+                               self.entries_lists.removed_dirs.append(elt)
+                               logging.info('Removed dirs %s' % elt + '/')
+
+               for elt in self.old_new_entries_data.new_dirs:
+                       if elt not in self.old_new_entries_data.old_dirs:
+                               self.entries_lists.added_dirs.append(elt)
+                               logging.info('Added dirs %s' % elt + '/')
+
+       def generate_changed_files_lists(self):
+               for elt in self.old_new_entries_data.new_files:
+                       if elt in self.old_new_entries_data.old_files:
+                               src_file = os.path.join(self.constant_strings.BASE_OLD, elt)
+                               dst_file = os.path.join(self.constant_strings.BASE_NEW, elt)
+                               # Both are symbolic links and they differ
+                               if os.path.islink(src_file) and os.path.islink(dst_file):
+                                       if not (os.readlink(src_file) == os.readlink(dst_file)):
+                                               self.entries_lists.changed_files.append(elt)
+                                               logging.info('Symlinks changed - %s' % elt)
+                                       else:
+                                               self.entries_lists.unchanged_files.append(elt)
+                               # Both are hardlinks - we add them because we can't be sure if file they point to changes
+                               elif elt in self.old_new_entries_data.new_hardlinks and elt in self.old_new_entries_data.old_hardlinks:
+                                       self.entries_lists.changed_files.append(elt)
+                               # Both are normal files and they differ. (isfile() returns true in case of sym/hardlink also,
+                               # so we need additional checks to make sure both entries are the same type (normal))
+                               elif (not (os.path.islink(src_file) or os.path.islink(dst_file))) \
+                                       and (not (elt in self.old_new_entries_data.new_hardlinks or elt in self.old_new_entries_data.old_hardlinks)) \
+                                       and os.path.isfile(src_file) and os.path.isfile(dst_file):
+                                       if not filecmp.cmp(src_file, dst_file):
+                                               self.entries_lists.changed_files.append(elt)
+                                       else:
+                                               self.entries_lists.unchanged_files.append(elt)
+                               # Both are files of different types
+                               else:
+                                       logging.info('Files are of diff types but same names; Src - %s Des - %s' % (src_file, dst_file))
+                                       self.entries_lists.removed_files.append(elt)
+                                       self.entries_lists.added_files.append(elt)
+
+       def generate_renamed_files_with_version_lists(self):
+               tmp_add_dict = {}
+               tmp_del_dict = {}
+
+               for elt in self.entries_lists.removed_files:
+                               if os.path.isfile(os.path.join(self.constant_strings.BASE_OLD, elt)):
+                                       FileName = path_leaf(elt)
+                                       entries = re.split('[0-9]', FileName)
+                                       # Gives the STRING part of NAME. if name starts with version then later part wil b string
+                                       # If version is starting at the begining of the string?? shd we hav additional check for such cases??
+                                       if len(entries[0]) > 0:
+                                               tmp_del_dict.update({entries[0]: elt})
+
+               for elt in self.entries_lists.added_files:
+                       if os.path.isfile(os.path.join(self.constant_strings.BASE_NEW, elt)):
                                FileName = path_leaf(elt)
                                entries = re.split('[0-9]', FileName)
-                               #print('Entires under NEWfiles list after split  - %s %s - %s' % (FileName, entries[0], elt))
                                if len(entries[0]) > 0:
-                                       files_New_List.update({entries[0]: elt})
+                                       tmp_add_dict.update({entries[0]: elt})
 
-               for key, value in files_Del_List.iteritems():
-                       #print('Key value pair -%s -%s' % (key, value))
-                       if key in files_New_List:
+               for key, value in tmp_del_dict.items():
+                       if key in tmp_add_dict:
                                # this file is the same name in both!
-                               src_file = BASE_OLD + '/' + value
-                               dst_file = BASE_NEW + '/' + files_New_List[key]
+                               src_file = os.path.join(self.constant_strings.BASE_OLD, value)
+                               dst_file = os.path.join(self.constant_strings.BASE_NEW, tmp_add_dict[key])
                                # we don't want to move hardlinks
                                if ishardlink(src_file) or ishardlink(dst_file):
                                        logging.debug('Cannot diff as one of them is a hardlink')
                                elif os.path.islink(src_file) or os.path.islink(dst_file):
-                                       logging.debug('Cannot diff as one of them is Symlink')
+                                       logging.debug('Cannot diff as one of them is a symlink')
                                elif os.path.isdir(src_file) or os.path.isdir(dst_file):
-                                       logging.debug('Cannot diff as one of them is dir')
+                                       logging.debug('Cannot diff as one of them is dir')
                                else:
                                        #Pick the best diff of same type and diff names
-                                       files_renamed.append([files_New_List[key], value])
-                                       files_removed.remove(value)
-                                       files_new.remove(files_New_List[key])
-
-       '''
-       Patch Section
-               Partition.txt contains Protocol for UPI
-               Types Supported: DIFFS, MOVES, NEWS, DELETES, SYMDIFFS, SYMNEWS.
-       '''
-       Sym_Diff_Cnt = 0
-       Sym_New_Cnt = 0
-       Hard_Diff_Cnt = 0
-       Hard_New_Cnt = 0
-       Del_Cnt = 0
-       New_Cnt = 0
-       Diff_Cnt = 0
-       Move_Cnt = 0
-       SymLinkDoc = OUT_DIR + '/' + PART_NAME + SYMLINK_DOC_NAME
-       HardLinkDoc = OUT_DIR + '/' + PART_NAME + HARDLINK_DOC_NAME
-       Partition_Doc = open(OUT_DIR + '/' + PART_NAME + '.txt', 'w')
-       Partition_Doc_SymLinks = open(SymLinkDoc, 'w')
-       Partition_Doc_HardLinks = open(HardLinkDoc, "w")
-
-       print("writing diff'ed changed files...")
-       for elt in files_changed:
-               dst_file = BASE_NEW + '/' + elt
-               src_file = BASE_OLD + '/' + elt
-               # Both files are symbolic links and they differ
-               if os.path.islink(dst_file) and os.path.islink(src_file):
-                       # Both are symlinks and they differ
-                       logging.debug(' File Changed is Link %s ' % dst_file)
-                       patch = os.readlink(dst_file)
-                       Sym_Diff_Cnt = Sym_Diff_Cnt + 1
-                       Partition_Doc_SymLinks.write('SYM:DIFF:%s:%s:%s\n' % (elt, elt, patch))
-                       Update_Attr(elt, "SYM", File_Attributes, Sym_Attributes)
-               # Both are hardlinks and they differ (point to something different, new/changed file)
-               if elt in Old_hardlinks and elt in New_hardlinks:
-                       if Old_hardlinks[elt] != New_hardlinks[elt] or New_hardlinks[elt] in files_changed or New_hardlinks[elt] in files_new:
-                               logging.debug('Hardlinks changed %s %s' % (src_file, dst_file))
-                               patch = New_hardlinks[elt]
-                               Hard_Diff_Cnt += 1
-                               Partition_Doc_HardLinks.write('HARD:DIFF:%s:%s:%s\n' % (elt, elt, patch))
-               # Both are NORMAL files and they differ
-               elif (not (os.path.islink(src_file) or os.path.islink(dst_file))) \
-                       and (not (elt in Old_hardlinks or elt in New_hardlinks)) \
-                       and os.path.isfile(dst_file) and os.path.isfile(src_file):
-                       # Both are files and they differ
-                       Diff_Cnt = Diff_Cnt + 1
-                       patchName = (DIFF_PREFIX + '%d_%s_' + PART_NAME + DIFF_SUFFIX) % (Diff_Cnt, path_leaf(elt))
-                       patchLoc = '%s/%s' % (OUT_DIR, patchName)
-                       logging.debug(' File Differ %s %s' % (src_file, dst_file))
-                       SS_UpdateSize(src_file, dst_file)
-
-                       FORMAT = "REG"
-                       ret = subprocess.call([DIFF_UTIL, src_file, dst_file, patchLoc])
-                       if ret is not 0:
-                               logging.debug('Failed to create diff %d %s %s\n' % (ret, src_file, dst_file))
-                               files_new.append(elt)
-                               Diff_Cnt = Diff_Cnt - 1
-                       else:
-                               Partition_Doc.write('DIFF:REG:%s:%s:%s:%s:%s\n' % (elt, elt, hash_file(src_file), hash_file(dst_file), patchName))
-
-                       Update_Attr(elt, "FILE", File_Attributes, Sym_Attributes)
-               # Both differ but they are of diff types
-               else:
-                       # Processing and updating partition txt file will be done under REMOVED case and NEW files case accordingly, we just make an entry here
-                       files_removed.append(elt)
-                       files_new.append(elt)
-
-       fdupes = find_dupes_list(BASE_OLD, BASE_NEW, files_removed, files_new, Old_hardlinks, New_hardlinks)
-       for oldpath, newpath in fdupes.iteritems():
-               logging.info('Dupes %s -> %s' % (oldpath, newpath))
-       for elt in files_removed:
-               src_file = BASE_OLD + '/' + elt
-               # If parent directory is deleted.. & del end not possible. (==> Moves should be done before deletes in ENGINE)
-               if src_file in fdupes.keys():
-                       dst_file = BASE_NEW + '/' + fdupes[src_file]
-                       logging.debug(' File Moved %s ==> %s' % (src_file, dst_file))
-                       Move_Cnt = Move_Cnt + 1
-                       Partition_Doc.write('MOVE:REG:%s:%s:%s\n' % (elt, fdupes[src_file], hash_file(src_file)))
-                       files_removed.remove(elt)
-                       files_new.remove(fdupes[src_file])
-       # Should be placed after removing duplicates, else they will be filtered here.
-       # loop shd b for all NEW files, rather than for all delete files (Current understanding)
-       # First Step: Sort & Filter out unwanted files
-       # Minimum condition used is,
-       #       1. File name should match 70%
-       #       2. Extensions should be same
-       #       3. File name length shd b greater than 3 char
-       #       4. As we are using sorting on file names, once file name does not match and R_Flag is set to true, we nee not check remaining files. So, will execute break.
-       #       5. Should consider editdistance for RENAME LOGIC ==> TBD
-       Base_DelList = files_removed[:]
-       Base_NewList = files_new[:]
-       DelList = sorted(Base_DelList, key=path_leaf)
-       NewList = sorted(Base_NewList, key=path_leaf)
-       logging.debug('Rename Logic before filter: Delcount -%d NewCount -%d' % (len(DelList), len(NewList)))
-
-       Filter1 = []
-       Filter2 = []
-       # Remove unwanted items which we cant make diff with for rename logic
-       for file in DelList:
-               if os.path.islink(BASE_OLD + '/' + file):
-                       continue
-               elif ishardlink(BASE_OLD + '/' + file):
-                       continue
-               elif os.path.isdir(BASE_OLD + '/' + file):
-                       continue
-               else:
-                       Filter1.append(file)
-                       #logging.debug('Sorted del list - %s' % (file))
-
-       DelList = Filter1
+                                       self.entries_lists.renamed_files.append([tmp_add_dict[key], value])
+                                       self.entries_lists.removed_files.remove(value)
+                                       self.entries_lists.added_files.remove(tmp_add_dict[key])
+
+       def process_changed_entries(self):
+               with open(self.constant_strings.REG_DOC, 'a') as reg_doc_obj, \
+                       open(self.constant_strings.SYMLINK_DOC, 'a') as symlink_doc_obj, \
+                       open(self.constant_strings.HARDLINK_DOC, "a") as hardlink_doc_obj:
+                       for elt in self.entries_lists.changed_files:
+                               dst_file = os.path.join(self.constant_strings.BASE_NEW, elt)
+                               src_file = os.path.join(self.constant_strings.BASE_OLD, elt)
+                               # Both files are symbolic links and they differ
+                               if os.path.islink(dst_file) and os.path.islink(src_file):
+                                       logging.debug('File changed is a symlink %s ' % dst_file)
+                                       patch = os.readlink(dst_file)
+                                       self.operations_count.sym_diff_cnt += 1
+                                       symlink_doc_obj.write('SYM:DIFF:%s:%s:%s\n' % (elt, elt, patch))
+                                       self.update_attributes(elt, "SYM")
+                               # Both are hardlinks and they differ (pointing to something different, new/changed file)
+                               if elt in self.old_new_entries_data.old_hardlinks and elt in self.old_new_entries_data.new_hardlinks:
+                                       if self.old_new_entries_data.old_hardlinks[elt] != self.old_new_entries_data.new_hardlinks[elt] \
+                                               or self.old_new_entries_data.new_hardlinks[elt] in self.entries_lists.changed_files \
+                                               or self.old_new_entries_data.new_hardlinks[elt] in self.entries_lists.added_files:
+                                               logging.debug('File changed is a hardlink %s %s' % (src_file, dst_file))
+                                               patch = self.old_new_entries_data.new_hardlinks[elt]
+                                               self.operations_count.hard_diff_cnt += 1
+                                               hardlink_doc_obj.write('HARD:DIFF:%s:%s:%s\n' % (elt, elt, patch))
+                               # Both are NORMAL files and they differ
+                               elif (not (os.path.islink(src_file) or os.path.islink(dst_file))) \
+                                       and (not (elt in self.old_new_entries_data.old_hardlinks or elt in self.old_new_entries_data.new_hardlinks)) \
+                                       and os.path.isfile(dst_file) and os.path.isfile(src_file):
+                                       self.operations_count.diff_cnt += 1
+                                       patchName = (DIFF_PREFIX + '%d_%s_' + self.constant_strings.PART_NAME + DIFF_SUFFIX) % (self.operations_count.diff_cnt, path_leaf(elt))
+                                       patchLoc = '%s/%s' % (self.constant_strings.OUT_DIR, patchName)
+                                       logging.debug('Regular file change %s %s' % (src_file, dst_file))
+                                       update_size(src_file, dst_file)
 
-       for file in NewList:
-               if os.path.islink(BASE_NEW + '/' + file):
-                       continue
-               elif ishardlink(BASE_NEW + '/' + file):
-                       continue
-               elif os.path.isdir(BASE_NEW + '/' + file):
-                       continue
-               elif len(path_leaf(file)) <= 3:
-                       logging.debug('Ignored for best picks -%s ' % (BASE_NEW + '/' + file))
-                       continue
-               else:
-                       Filter2.append(file)
-       NewList = Filter2
-       logging.debug('Rename Logic After filter: Delcount -%d NewCount -%d' % (len(DelList), len(NewList)))
-
-       for new_file in NewList:
-               R_Flag = 'FALSE'
-               FileNameNew = path_leaf(new_file)
-               DiffSize = 0
-               winning_patch_sz = os.path.getsize(BASE_NEW + '/' + new_file)
-               New_fs = winning_patch_sz
-               winning_file = ''
-
-               for del_file in DelList:
-                       FileNameOld = path_leaf(del_file)
-                       if (FileNameOld.startswith(FileNameNew[:len(FileNameNew) * 7 / 10]) and (os.path.splitext(FileNameNew)[1] == os.path.splitext(del_file)[1])):
-                               #winning_patch_sz = 0.9 * os.path.getsize(BASE_NEW+'/'+new_file)
-                               # Percentage difference between two file sizes is within 30%, then we consider for diff generation
-                               Del_fs = os.path.getsize(BASE_OLD + '/' + del_file)
-                               v1 = abs(New_fs - Del_fs)
-                               v2 = (New_fs + Del_fs) / 2
-                               if(v2 <= 0 or ((v1 / v2) * 100) > 30):
-                                       logging.debug('Ignore diff generation New_fs - %d Del_Fs - %d' % (New_fs, Del_fs))
-                                       continue
-                               logging.debug('I can compute diff between %s %s Del_Fs - %d New_Fs - %d' % (del_file, new_file, Del_fs, New_fs))
-                               R_Flag = 'TRUE'
-                               DiffSize = measure_two_filediffs(BASE_OLD + '/' + del_file, BASE_NEW + '/' + new_file)
-                               if (DiffSize < 0.8 * winning_patch_sz):
-                                       winning_patch_sz = DiffSize
-                                       winning_file = del_file
-                       elif (not FileNameOld.startswith(FileNameNew[:len(FileNameNew) * 7 / 10]) and R_Flag == 'TRUE'):
-                               logging.debug('Because nex set of files will not have matching name - break @@ %s %s' % (del_file, new_file))
-                               break
-               if len(winning_file) > 0:
-                       logging.debug('Best Pick -%s ==> %s [%d]' % (winning_file, new_file, DiffSize))
-                       files_renamed.append([new_file, winning_file])
-                       DelList.remove(winning_file)
-                       files_removed.remove(winning_file)
-                       files_new.remove(new_file)
-
-       #********************** Files should NOT be deleted for any such renames ***********************
-
-       if SUPPORT_RENAME == "TRUE":
-               for elt in files_renamed:
-                       src_file = BASE_OLD + '/' + elt[1]
-                       dst_file = BASE_NEW + '/' + elt[0]
-                       Diff_Cnt = Diff_Cnt + 1
-                       patchName = (DIFF_PREFIX + '%d_%s_' + PART_NAME + DIFF_SUFFIX) % (Diff_Cnt, path_leaf(elt[1]))
-                       #patchName = (DIFF_PREFIX+'_%s'+DIFF_SUFFIX) % (path_leaf(elt[0]))
-                       patchLoc = '%s/%s' % (OUT_DIR, patchName)
-                       logging.debug(' File Renamed %s ==> %s' % (src_file, dst_file))
-                       # Should be careful of renaming files??
-                       # Should we consider measure_two_filediffs ?? so that patch size is NOT greater than actual file?
-                       # What if folder path has numerics??
-
-                       if os.path.isdir(src_file) or os.path.isdir(dst_file):
-                               # This case never occurs??
-                               Partition_Doc.write('"%s" and "%s" renamed 0 0\n' % (elt[0], elt[1]))
-                               Update_Attr(elt[0], "FILE", File_Attributes, Sym_Attributes)
-            # Make sure these files are PROPER and they shd NOT be symlinks
-                       elif not (os.path.islink(src_file) or os.path.islink(dst_file)) \
-                and not (elt[0] in New_hardlinks or elt[1] in Old_hardlinks) \
-                       and (os.path.isfile(src_file) and os.path.isfile(dst_file)):
-                               if filecmp.cmp(src_file, dst_file):
-                                       Move_Cnt = Move_Cnt + 1
-                                       Diff_Cnt = Diff_Cnt - 1
-                                       Partition_Doc.write('MOVE:REG:%s:%s:%s\n' % (elt[1], elt[0], hash_file(src_file)))
-                               else:
-                                       FORMAT = "REG"
                                        ret = subprocess.call([DIFF_UTIL, src_file, dst_file, patchLoc])
-                                       if ret is not 0:
+                                       if ret != 0:
                                                logging.debug('Failed to create diff %d %s %s\n' % (ret, src_file, dst_file))
-                                               files_new.append(elt)
-                                               Diff_Cnt = Diff_Cnt - 1
+                                               self.entries_lists.added_files.append(elt)
+                                               self.operations_count.diff_cnt -= 1
                                        else:
-                                               Partition_Doc.write('DIFF:REG:%s:%s:%s:%s:%s\n' % (elt[1], elt[0], hash_file(src_file), hash_file(dst_file), patchName))
-
-                               SS_UpdateSize(src_file, dst_file)
-                               Update_Attr(elt[0], "FILE", File_Attributes, Sym_Attributes)
-
-       for elt in files_removed:
-               # if files are part of patches after renaming, we shd remove them as part of removed.
-               src_file = BASE_OLD + '/' + elt
-               if os.path.islink(src_file):
-                       Partition_Doc.write('DEL:SYM:%s\n' % (elt))
-               elif elt in Old_hardlinks:
-                       Partition_Doc.write('DEL:HARD:%s\n' % (elt))
-               elif os.path.isdir(src_file):
-                       # If we change to DIR TYPE, then the same token should be modified on UA also and SHA should be accordingly passed.
-                       Partition_Doc.write('DEL:REG:%s:NA\n' % (elt))
-               else:
-                       Partition_Doc.write('DEL:REG:%s:%s\n' % (elt, hash_file(src_file)))
-               logging.debug(' File Deleted %s' % src_file)
-               Del_Cnt = Del_Cnt + 1
-
-       Dir_removed.sort(reverse=True)
-       for elt in Dir_removed:
-               # if Dir is empty, add it to the removed list.
-               src_file = BASE_OLD + '/' + elt
-               # Irrespective of weather files are MOVED or DIFF'ed, we can delete the folders. This action can be performed at the end.
-               # It covers symlinks also, as NEW symlinks cannot point to NON existant folders of TARGET (NEW binary)
-               if os.path.isdir(src_file):
-                       Partition_Doc.write('DEL:END:%s\n' % (elt))
-                       Del_Cnt = Del_Cnt + 1
-                       logging.debug(' Dir Deleted- %s' % src_file)
+                                               reg_doc_obj.write('DIFF:REG:%s:%s:%s:%s:%s\n' % (elt, elt, hash_file(src_file), hash_file(dst_file), patchName))
 
-       try:
-               ensure_dir_exists(NEW_FILES_PATH)
-       except FileExistsError as exc:
-               logging.error('Directory %s used by this script is already an existing file' % NEW_FILES_PATH)
-               raise exc
-
-       for elt in files_new:
-               dst_file = os.path.join(BASE_NEW, elt)
-               destpath = os.path.join(NEW_FILES_PATH, elt)
-
-               if os.path.islink(dst_file):
-                       patch = os.readlink(dst_file)
-                       logging.debug(' File New Links %s' % elt)
-                       Partition_Doc_SymLinks.write('SYM:NEW:%s:%s\n' % (elt, patch))
-                       # What if this is only a new sym link and folder already exists??? Should recheck
-                       if not os.path.exists(os.path.dirname(destpath)):
-                               os.makedirs(os.path.dirname(destpath))
-                               logging.info('New SymLink - Adding missing Dir')
-                       Update_Attr(elt, "SYM", File_Attributes, Sym_Attributes)
-                       Sym_New_Cnt = Sym_New_Cnt + 1
-               elif elt in New_hardlinks:
-                       patch = New_hardlinks[elt]
-                       logging.debug('File new hardlink %s' % elt)
-                       Partition_Doc_HardLinks.write('HARD:NEW:%s:%s\n' %(elt, patch))
-                       if not os.path.exists(os.path.dirname(destpath)):
-                               os.makedirs(os.path.dirname(destpath))
-                               logging.info('New hardlink - Adding missing Dir')
-                       Hard_New_Cnt += 1
-               elif os.path.isdir(dst_file):  # We create just empty directory here
+                                       self.update_attributes(elt, "FILE")
+                               # Both differ but they are of different types
+                               else:
+                                       # Processing and updating partition txt file will be done under REMOVED case and NEW files case accordingly, we just make an entry here
+                                       self.entries_lists.removed_files.append(elt)
+                                       self.entries_lists.added_files.append(elt)
+
+       def process_moved_duplicates(self):
+               found_duplicates = find_dupes_list(self.constant_strings.BASE_OLD, self.constant_strings.BASE_NEW, \
+                       self.entries_lists.removed_files, self.entries_lists.added_files)
+               for old_path, new_path in found_duplicates.items():
+                       logging.info('Dupes %s -> %s' % (old_path, new_path))
+
+               with open(self.constant_strings.REG_DOC, "a") as reg_doc_obj:
+                       for elt in self.entries_lists.removed_files:
+                               src_file = os.path.join(self.constant_strings.BASE_OLD, elt)
+                               # If parent directory is deleted.. & del end not possible. (==> Moves should be done before deletes in ENGINE)
+                               if src_file in found_duplicates.keys():
+                                       dst_file = os.path.join(self.constant_strings.BASE_NEW, found_duplicates[src_file])
+                                       logging.debug('File moved %s ==> %s' % (src_file, dst_file))
+                                       self.operations_count.move_cnt += 1
+                                       reg_doc_obj.write('MOVE:REG:%s:%s:%s\n' % (elt, found_duplicates[src_file], hash_file(src_file)))
+                                       self.entries_lists.removed_files.remove(elt)
+                                       self.entries_lists.added_files.remove(found_duplicates[src_file])
+
+       def find_remaining_renamed_files(self):
+               # Should be placed after removing duplicates, else they will be filtered here.
+               # loop shd b for all NEW files, rather than for all delete files (Current understanding)
+               # First Step: Sort & Filter out unwanted files
+               # Minimum condition used is,
+               #       1. File name should match 70%
+               #       2. Extensions should be same
+               #       3. File name length shd b greater than 3 char
+               #       4. As we are using sorting on file names, once file name does not match and R_Flag is set to true, we nee not check remaining files. So, will execute break.
+               #       5. Should consider editdistance for RENAME LOGIC ==> TBD
+
+               logging.debug('Rename logic before filter: removed count - %d added count - %d' % (len(self.entries_lists.removed_files), len(self.entries_lists.added_files)))
+
+               removed_files_filtered = []
+               added_files_filtered = []
+               # Remove unwanted items which we cant make diff with for rename logic
+               for file in sorted(self.entries_lists.removed_files, key=path_leaf):
+                       file_to_filter = os.path.join(self.constant_strings.BASE_OLD, file)
+                       if os.path.islink(file_to_filter):
+                               continue
+                       elif ishardlink(file_to_filter):
+                               continue
+                       elif os.path.isdir(file_to_filter):
+                               continue
+                       else:
+                               removed_files_filtered.append(file)
+
+               for file in sorted(self.entries_lists.added_files, key=path_leaf):
+                       file_to_filter = os.path.join(self.constant_strings.BASE_NEW, file)
+                       if os.path.islink(file_to_filter):
+                               continue
+                       elif ishardlink(file_to_filter):
+                               continue
+                       elif os.path.isdir(file_to_filter):
+                               continue
+                       elif len(path_leaf(file)) <= 3:
+                               logging.debug('Ignored for best picks -%s ' % (file_to_filter))
+                               continue
+                       else:
+                               added_files_filtered.append(file)
+
+               logging.debug('Rename logic after filter: removed count -%d added count - %d' % (len(removed_files_filtered), len(added_files_filtered)))
+
+               for new_file in added_files_filtered:
+                       R_Flag = 'FALSE'
+                       FileNameNew = path_leaf(new_file)
+                       DiffSize = 0
+                       base_new_file = os.path.join(self.constant_strings.BASE_NEW, new_file)
+                       winning_patch_sz = os.path.getsize(base_new_file)
+                       New_fs = winning_patch_sz
+                       winning_file = EMPTY
+
+                       for del_file in removed_files_filtered:
+                               base_del_file = os.path.join(self.constant_strings.BASE_OLD, del_file)
+                               FileNameOld = path_leaf(del_file)
+                               if (FileNameOld.startswith(FileNameNew[:int(len(FileNameNew) * (7 / 10))]) and (os.path.splitext(FileNameNew)[1] == os.path.splitext(del_file)[1])):
+                                       # If percentage difference between two file sizes is within 30%, then we consider for diff generation
+                                       Del_fs = os.path.getsize(base_del_file)
+                                       v1 = abs(New_fs - Del_fs)
+                                       v2 = (New_fs + Del_fs) / 2
+                                       if(v2 <= 0 or ((v1 / v2) * 100) > 30):
+                                               logging.debug('Ignore diff generation New_fs - %d Del_Fs - %d' % (New_fs, Del_fs))
+                                               continue
+                                       logging.debug('I can compute diff between %s %s Del_Fs - %d New_Fs - %d' % (del_file, new_file, Del_fs, New_fs))
+                                       R_Flag = 'TRUE'
+                                       DiffSize = measure_two_filediffs(base_del_file, base_new_file)
+                                       if (DiffSize < 0.8 * winning_patch_sz):
+                                               winning_patch_sz = DiffSize
+                                               winning_file = del_file
+                               elif (not FileNameOld.startswith(FileNameNew[:int(len(FileNameNew) * (7 / 10))]) and R_Flag == 'TRUE'):
+                                       logging.debug('Because next set of files will not have matching name - break @@ %s %s' % (del_file, new_file))
+                                       break
+                       if len(winning_file) > 0:
+                               logging.debug('Best pick -%s ==> %s [%d]' % (winning_file, new_file, DiffSize))
+                               self.entries_lists.renamed_files.append([new_file, winning_file])
+                               removed_files_filtered.remove(winning_file)
+                               self.entries_lists.removed_files.remove(winning_file)
+                               self.entries_lists.added_files.remove(new_file)
+
+       def process_renamed_files(self):
+               with open(self.constant_strings.REG_DOC, "a") as reg_doc_obj:
+                       for elt in self.entries_lists.renamed_files:
+                               src_file = os.path.join(self.constant_strings.BASE_OLD, elt[1])
+                               dst_file = os.path.join(self.constant_strings.BASE_NEW, elt[0])
+                               self.operations_count.diff_cnt += 1
+                               patch_name = (DIFF_PREFIX + '%d_%s_' + self.constant_strings.PART_NAME + DIFF_SUFFIX) % (self.operations_count.diff_cnt, path_leaf(elt[1]))
+                               patch_loc = '%s/%s' % (self.constant_strings.OUT_DIR, patch_name)
+                               logging.debug('File renamed %s ==> %s' % (src_file, dst_file))
+                               # Should be careful of renaming files??
+                               # Should we consider measure_two_filediffs ?? so that patch size is NOT greater than actual file?
+                               # What if folder path has numerics??
+
+                               if os.path.isdir(src_file) or os.path.isdir(dst_file):
+                                       # This case never occurs??
+                                       reg_doc_obj.write('"%s" and "%s" renamed 0 0\n' % (elt[0], elt[1]))
+                                       self.update_attributes(elt[0], "FILE")
+                               # Make sure these files are normal and they should NOT be hard/symlinks
+                               elif not (os.path.islink(src_file) or os.path.islink(dst_file)) \
+                                       and not (elt[0] in self.old_new_entries_data.new_hardlinks or elt[1] in self.old_new_entries_data.old_hardlinks) \
+                                       and (os.path.isfile(src_file) and os.path.isfile(dst_file)):
+                                       if filecmp.cmp(src_file, dst_file):
+                                               self.operations_count.move_cnt += 1
+                                               self.operations_count.diff_cnt -= 1
+                                               reg_doc_obj.write('MOVE:REG:%s:%s:%s\n' % (elt[1], elt[0], hash_file(src_file)))
+                                       else:
+                                               FORMAT = "REG"
+                                               ret = subprocess.call([DIFF_UTIL, src_file, dst_file, patch_loc])
+                                               if ret != 0:
+                                                       logging.debug('Failed to create diff %d %s %s\n' % (ret, src_file, dst_file))
+                                                       self.entries_lists.added_files.append(elt)
+                                                       self.operations_count.diff_cnt -= 1
+                                               else:
+                                                       reg_doc_obj.write('DIFF:REG:%s:%s:%s:%s:%s\n' % (elt[1], elt[0], hash_file(src_file), hash_file(dst_file), patch_name))
+
+                                       update_size(src_file, dst_file)
+                                       self.update_attributes(elt[0], "FILE")
+
+       def process_removed_entries(self):
+               with open(self.constant_strings.REG_DOC, "a") as reg_doc_obj:
+                       for elt in self.entries_lists.removed_files:
+                               # if files are part of patches after renaming, we shd remove them as part of removed.
+                               src_file = os.path.join(self.constant_strings.BASE_OLD, elt)
+                               if os.path.islink(src_file):
+                                       reg_doc_obj.write('DEL:SYM:%s\n' % (elt))
+                               elif elt in self.old_new_entries_data.old_hardlinks:
+                                       reg_doc_obj.write('DEL:HARD:%s\n' % (elt))
+                               elif os.path.isdir(src_file):
+                                       reg_doc_obj.write('DEL:REG:%s:NA\n' % (elt))
+                               else:
+                                       reg_doc_obj.write('DEL:REG:%s:%s\n' % (elt, hash_file(src_file)))
+                               logging.debug('File deleted %s' % src_file)
+                               self.operations_count.del_cnt += 1
+
+                       self.entries_lists.removed_dirs.sort(reverse=True)
+                       for elt in self.entries_lists.removed_dirs:
+                               # if Dir is empty, add it to the removed list.
+                               src_file = os.path.join(self.constant_strings.BASE_OLD, elt)
+                               # Irrespective of weather files are MOVED or DIFF'ed, we can delete the folders. This action can be performed at the end.
+                               # It covers symlinks also, as NEW symlinks cannot point to NON existent folders of TARGET (NEW binary)
+                               if os.path.isdir(src_file):
+                                       reg_doc_obj.write('DEL:END:%s\n' % (elt))
+                                       self.operations_count.del_cnt += 1
+                                       logging.debug('Dir deleted- %s' % src_file)
+
+       def process_added_entries(self):
+               with  open(self.constant_strings.SYMLINK_DOC, "a") as symlink_doc_obj, \
+                       open(self.constant_strings.HARDLINK_DOC, "a") as hardlink_doc_obj:
+                       for elt in self.entries_lists.added_files:
+                               dst_file = os.path.join(self.constant_strings.BASE_NEW, elt)
+                               destpath = os.path.join(NEW_FILES_PATH, elt)
+
+                               if os.path.islink(dst_file):
+                                       patch = os.readlink(dst_file)
+                                       logging.debug('File is an added symlink %s' % elt)
+                                       symlink_doc_obj.write('SYM:NEW:%s:%s\n' % (elt, patch))
+                                       # What if this is only a new sym link and folder already exists??? Should recheck
+                                       if not os.path.exists(os.path.dirname(destpath)):
+                                               os.makedirs(os.path.dirname(destpath))
+                                               logging.info('Added symlink - adding missing dir')
+                                       self.update_attributes(elt, "SYM")
+                                       self.operations_count.sym_new_cnt += 1
+                               elif elt in self.old_new_entries_data.new_hardlinks:
+                                       patch = self.old_new_entries_data.new_hardlinks[elt]
+                                       logging.debug('File is an added hardlink %s' % elt)
+                                       hardlink_doc_obj.write('HARD:NEW:%s:%s\n' %(elt, patch))
+                                       if not os.path.exists(os.path.dirname(destpath)):
+                                               os.makedirs(os.path.dirname(destpath))
+                                               logging.info('Added hardlink - adding missing dir')
+                                       self.operations_count.hard_new_cnt += 1
+                               elif os.path.isdir(dst_file):  # We create just empty directory here
+                                       if not os.path.exists(destpath):
+                                               os.makedirs(destpath)
+                                               logging.debug('New dir created %s' % destpath)
+                                               self.operations_count.new_cnt += 1
+                               else:
+                                       self.operations_count.new_cnt += 1
+                                       destdir = os.path.dirname(destpath)
+                                       logging.debug('New files - %s ==> %s' % (dst_file, destdir))
+
+                                       if not os.path.isdir(destdir):
+                                               try:
+                                                       os.makedirs(destdir)
+                                               except Exception as exc:
+                                                       logging.critical('Error in NEW files DIR entry -%s' % destdir)
+                                                       raise exc
+
+                                       try:
+                                               if not stat.S_ISFIFO(os.stat(dst_file).st_mode):
+                                                       shutil.copy2(dst_file, destpath)
+                                                       logging.debug('New files copied from- %s to- %s' % (dst_file, destpath))
+                                       except Exception as exc:
+                                               logging.critical('Error in NEW files entry -%s -%s' % (dst_file, destpath))
+                                               raise exc
+                                       self.update_attributes(elt, "FILE")
+
+               for elt in self.entries_lists.added_dirs:
+                       destpath = os.path.join(NEW_FILES_PATH, elt)
                        if not os.path.exists(destpath):
                                os.makedirs(destpath)
-                               logging.debug(' File New Dir %s' % destpath)
-                               New_Cnt = New_Cnt + 1
-               else:
-                       New_Cnt = New_Cnt + 1
-                       destdir = os.path.dirname(destpath)
-                       logging.debug('New files - %s ==> %s' % (dst_file, destdir))
-
-                       if not os.path.isdir(destdir):
-                               try:
-                                       os.makedirs(destdir)
-                               except Exception as exc:
-                                       logging.critical('Error in NEW files DIR entry -%s' % destdir)
-                                       raise exc
-
-                       try:
-                               if not stat.S_ISFIFO(os.stat(dst_file).st_mode):
-                                       shutil.copy2(dst_file, destpath)
-                                       logging.debug('New files copied from- %s to- %s' % (dst_file, destpath))
-                       except Exception as exc:
-                               logging.critical('Error in NEW files entry -%s -%s' % (dst_file, destpath))
-                               raise exc
-                       Update_Attr(elt, "FILE", File_Attributes, Sym_Attributes)
-
-       for elt in Dir_Added:
-               destpath = os.path.join(NEW_FILES_PATH, elt)
-               if not os.path.exists(destpath):
-                       os.makedirs(destpath)
-                       logging.debug(' DirList New Dir %s' % destpath)
-                       New_Cnt = New_Cnt + 1
-
-       # Base directory should be system
-       print 'Compressing New files'
-       if (New_Cnt > 0 or Sym_New_Cnt > 0):
-               WorkingDir = os.getcwd()
-               os.chdir(os.path.join(os.getcwd(), NEW_FILES_PATH))
-               logging.info('Curr Working Dir - %s' % os.getcwd())
-               log_path = os.path.join(WorkingDir, LOGFILE)
-               os.system(ZIPUTIL + NEW_FILES_ZIP_NAME + " . " + " >> " + log_path)
-               shutil.move(NEW_FILES_ZIP_NAME, WorkingDir + "/" + OUT_DIR)
-               # New file size?? cos, we extract system.7z from delta.tar and then proceed with decompression
-               SS_UpdateSize(WorkingDir + "/" + OUT_DIR + "/" + NEW_FILES_ZIP_NAME, WorkingDir + "/" + OUT_DIR + "/" + NEW_FILES_ZIP_NAME)
-               os.chdir(WorkingDir)
-               shutil.rmtree(NEW_FILES_PATH)
-               # use 7z a system.7z ./*
-
-       #logging.info('%d Dir to be removed' % len(Dir_removed))
-       logging.info('%d files unchanged' % len(files_unchanged))
-       logging.info('%d files files_renamed' % len(files_renamed))
-       logging.info('%d files NEW' % len(files_new))
-       logging.info('%d File attr' % len(File_Attributes))
-       logging.info('%d Sym attr' % len(Sym_Attributes))
-       logging.info('PaTcHCoUnT:Diffs-%d Moves-%d News-%d Delets-%d SymDiffs-%d SymNews-%d HardDiffs-%d HardNews-%d\n' % \
-               (Diff_Cnt, Move_Cnt, New_Cnt, Del_Cnt, Sym_Diff_Cnt, Sym_New_Cnt, Hard_Diff_Cnt, Hard_New_Cnt))
-       print('PaTcHCoUnT:Diffs-%d Moves-%d News-%d Delets-%d SymDiffs-%d SymNews-%d HardDiffs-%d HardNews-%d\n' % \
-               (Diff_Cnt, Move_Cnt, New_Cnt, Del_Cnt, Sym_Diff_Cnt, Sym_New_Cnt, Hard_Diff_Cnt, Hard_New_Cnt))
-
-       # There could be duplicates, TODO, can check before adding..
-       ATTR_FILE_D = open(ATTR_FILE, 'a+')
-       for elt in File_Attributes:
-               ATTR_FILE_D.write(elt)
-       for elt in Sym_Attributes:
-               ATTR_FILE_D.write(elt)
-
-       ATTR_FILE_D.close()
-
-       Partition_Doc_SymLinks.close()
-       Partition_Doc_HardLinks.close()
-       Partition_Read_SymLinks = open(SymLinkDoc, 'r+')
-       Partition_Read_HardLinks = open(HardLinkDoc, 'r+')
-       Partition_Doc.write(Partition_Read_SymLinks.read())
-       for line in reversed(Partition_Read_HardLinks.readlines()):
-               Partition_Doc.write(line)
-       Partition_Doc.write('PaTcHCoUnT:%d %d %d %d %d %d %d %d\n' % \
-               (Diff_Cnt, Move_Cnt, New_Cnt, Del_Cnt, Sym_Diff_Cnt, Sym_New_Cnt, Hard_Diff_Cnt, Hard_New_Cnt))
-       Partition_Read_SymLinks.close()
-       Partition_Read_HardLinks.close()
-       Partition_Doc.close()
-       os.remove(SymLinkDoc)
-       os.remove(HardLinkDoc)
-
-       if Diff_Cnt + Move_Cnt + New_Cnt + Del_Cnt + Sym_Diff_Cnt + Sym_New_Cnt + Hard_Diff_Cnt + \
-               Hard_New_Cnt + os.path.getsize(ATTR_FILE) == 0:
-               print('No Delta Generated for %s - %s' % (PART_NAME, OUT_DIR))
-               logging.info('No Delta Generated for %s' % PART_NAME)
-               shutil.rmtree(OUT_DIR)
-
-
-def IsSymlink(info):
-       return (info.external_attr >> 16) == 0120777
-
-
-def NewFiles(src, dest):
-       print src, dest
-       subprocess.call(['cp', '-rp', src, dest])
-       #try:
-               #shutil.copytree(src, dest)
-       #except OSError as e:
-               # If the error was caused because the source wasn't a directory
-               #if e.errno == errno.ENOTDIR:
-                       #shutil.copy2(src, dest)
-               #else:
-                       #print('Directory not copied. Error: %s' % e)
+                               logging.debug('New dir created %s' % destpath)
+                               self.operations_count.new_cnt += 1
+
+       def compress_added_entries(self):
+               # Base directory should be system
+               if (self.operations_count.new_cnt > 0 or self.operations_count.sym_new_cnt > 0):
+                       WorkingDir = os.getcwd()
+                       os.chdir(os.path.join(os.getcwd(), NEW_FILES_PATH))
+                       logging.info('Current working dir - %s' % os.getcwd())
+                       log_path = os.path.join(WorkingDir, LOGFILE)
+                       os.system(ZIPUTIL + NEW_FILES_ZIP_NAME + " . " + " >> " + log_path)
+                       zipped_dir = os.path.join(WorkingDir, self.constant_strings.OUT_DIR)
+                       shutil.move(NEW_FILES_ZIP_NAME, zipped_dir)
+                       update_size(os.path.join(zipped_dir, NEW_FILES_ZIP_NAME), os.path.join(zipped_dir, NEW_FILES_ZIP_NAME))
+                       os.chdir(WorkingDir)
+                       shutil.rmtree(NEW_FILES_PATH)
+
+       def write_doc_and_cleanup(self):
+               # There could be duplicates, TODO, can check before adding..
+               with open(self.constant_strings.ATTR_FILE, 'a+') as attr_file_obj:
+                       for elt in self.entries_lists.file_attributes:
+                               attr_file_obj.write(elt)
+                       for elt in self.entries_lists.sym_attributes:
+                               attr_file_obj.write(elt)
+
+               with open(self.constant_strings.REG_DOC, "a") as reg_doc_obj, \
+                       open(self.constant_strings.SYMLINK_DOC, 'r+') as symlink_doc_obj, \
+                       open(self.constant_strings.HARDLINK_DOC, 'r+') as hardlink_doc_obj:
+                       reg_doc_obj.write(symlink_doc_obj.read())
+                       for line in reversed(hardlink_doc_obj.readlines()):
+                               reg_doc_obj.write(line)
+                       reg_doc_obj.write('PaTcHCoUnT:%d %d %d %d %d %d %d %d\n' % \
+                               (self.operations_count.diff_cnt, self.operations_count.move_cnt, \
+                                       self.operations_count.new_cnt, self.operations_count.del_cnt, \
+                                       self.operations_count.sym_diff_cnt, self.operations_count.sym_new_cnt, \
+                                       self.operations_count.hard_diff_cnt, self.operations_count.hard_new_cnt))
+
+               os.remove(self.constant_strings.SYMLINK_DOC)
+               os.remove(self.constant_strings.HARDLINK_DOC)
+
+       def generate_delta_fs(self):
+               tmp_str = 'Going from %d files to %d files' % (len(self.old_new_entries_data.old_files), len(self.old_new_entries_data.new_files))
+               print(tmp_str)
+               logging.info(tmp_str)
 
+               try:
+                       ensure_dir_exists(NEW_FILES_PATH)
+               except FileExistsError as exc:
+                       logging.error('Directory %s used by this script is already an existing file' % NEW_FILES_PATH)
+                       raise exc
 
-def measure_two_filediffs(src, dst):
-       patchLoc = 'temp.patch'
-       # TODO ensure this is excepts an error
-       subprocess.call([DIFF_UTIL, src, dst, patchLoc])
-       result_size = os.path.getsize(patchLoc)
-       os.remove(patchLoc)
-       return result_size
+               # Generate lists of entries
+               self.generate_entries_lists()
+               # What files have changed contents but not name/path?
+               self.generate_changed_files_lists()
 
+               # Currently if Version or number is the first character of the file, then we are NOT making any diffs.
+               if SUPPORT_RENAME == "TRUE":
+                       self.generate_renamed_files_with_version_lists()
 
-def ishardlink(path):
-       if os.stat(path).st_nlink > 1:
-               return True
-       return False
+               '''
+               Patch Section
+                       partition.txt contains protocol for binaries responsible for upgrading
+                       Types supported: DIFFS, MOVES, NEWS, DELETES, SYMDIFFS, SYMNEWS.
+               '''
 
+               print("writing diff'ed changed files...")
+               self.process_changed_entries()
 
-def get_inode(path):
-       return os.stat(path).st_ino
+               self.process_moved_duplicates()
 
 
-def get_hardlinks(base):
-       hardlinks_dict = {}
-       inodes_dict = {}
+               if SUPPORT_RENAME == "TRUE":
+                       self.find_remaining_renamed_files()
+                       self.process_renamed_files()
 
-       for root, direcotories, files in os.walk(base, topdown=True, followlinks=False):
-               for file in sorted(files):
-                       file_name = os.path.join(root, file)
-                       if not os.path.islink(file_name) and ishardlink(file_name):
-                               inode = get_inode(file_name)
-                               rel_path = os.path.relpath(file_name, base)
-                               if inode not in inodes_dict:
-                                       inodes_dict[inode] = rel_path
-                               else:
-                                       hardlinks_dict[rel_path] = inodes_dict[inode]
-
-       return hardlinks_dict
-
-
-def Get_Files(path):
-       all_files = []
-       all_dirs = []
-
-       for root, directories, filenames in os.walk(path, topdown=False, followlinks=False):
-               for directory in directories:
-                       #DirName = os.path.join(root+'/',directory)
-                       DirName = os.path.join(root, directory)
-                       if os.path.islink(DirName):
-                               logging.debug('This is symlink pointing to dir -%s' % DirName)
-                               all_files.append(os.path.relpath(DirName, path))
-                       elif not os.listdir(DirName):
-                               #print('*****Empty Directory******* -%s', DirName)
-                               # This should NOT be appended ??? Empty dir shd b considered
-                               all_dirs.append(os.path.relpath(DirName, path))
+               self.process_removed_entries()
+
+               self.process_added_entries()
+
+               print('Compressing added files')
+               self.compress_added_entries()
+
+               logging.info('%d files unchanged' % len(self.entries_lists.unchanged_files))
+               logging.info('%d files renamed' % len(self.entries_lists.renamed_files))
+               logging.info('%d files added' % len(self.entries_lists.added_files))
+               logging.info('%d regular files changed attributes' % len(self.entries_lists.file_attributes))
+               logging.info('%d symlinks changed attributes' % len(self.entries_lists.sym_attributes))
+
+               # <PaTcHCoUnT> is not a typo, it is parsed like this in other programs that use deltas
+               tmp_str = 'PaTcHCoUnT:Diffs-%d Moves-%d News-%d Delets-%d SymDiffs-%d SymNews-%d HardDiffs-%d HardNews-%d\n' % \
+                       (self.operations_count.diff_cnt, self.operations_count.move_cnt, self.operations_count.new_cnt, \
+                       self.operations_count.del_cnt, self.operations_count.sym_diff_cnt, self.operations_count.sym_new_cnt, \
+                       self.operations_count.hard_diff_cnt, self.operations_count.hard_new_cnt)
+               logging.info(tmp_str)
+               print(tmp_str)
+
+               self.write_doc_and_cleanup()
+
+               if self.operations_count.diff_cnt + self.operations_count.move_cnt + self.operations_count.new_cnt + \
+                       self.operations_count.del_cnt + self.operations_count.sym_diff_cnt + self.operations_count.sym_new_cnt + \
+                       self.operations_count.hard_diff_cnt + self.operations_count.hard_new_cnt + os.path.getsize(self.constant_strings.ATTR_FILE) == 0:
+                       print('No delta generated for %s - %s' % (self.constant_strings.PART_NAME, self.constant_strings.OUT_DIR))
+                       logging.info('No delta generated for %s' % self.constant_strings.PART_NAME)
+                       shutil.rmtree(self.constant_strings.OUT_DIR)
+
+
+def update_cfg_file(DELTA_BIN, UPDATE_CFG_PATH):
+       with open(UPDATE_CFG_PATH, 'r') as f:
+               lines = f.readlines()
+
+       with open(UPDATE_CFG_PATH, 'w') as f:
+               for line in lines:
+                       ConfigItems = line.split()
+                       if ConfigItems[0] == DELTA_BIN:
+                               DELTA = ConfigItems[1]
+                               logging.info('Updating %s config' % DELTA_BIN)
+                               line = line.rstrip('\n')
+                               Value = MEM_REQ
+                               line = line.replace(line, line + '\t' + str(Value) + '\n')
+                               f.write(line)
                        else:
-                               all_dirs.append(os.path.relpath(DirName, path))
-               for filename in filenames:
-                       FileName = os.path.join(root, filename)
-                       all_files.append(os.path.relpath(FileName, path))
+                               f.write(line)
+
+
+def generate_delta_image(BASE_OLD, BASE_NEW, OUT_DIR, DELTA_BIN, UPDATE_CFG_PATH, COMPRESSION_METHOD):
+       oldsize_d = os.path.getsize(BASE_OLD)
+       newsize_d = os.path.getsize(BASE_NEW)
+       SHA_BIN_DEST = hash_file(BASE_NEW)
+       SHA_BIN_BASE = hash_file(BASE_OLD)
+
+       DELTA = DELTA_BIN
+       update_size(BASE_OLD, BASE_NEW)
+       if UPDATE_CFG_PATH:
+               with open(UPDATE_CFG_PATH, 'r') as f:
+                       lines = f.readlines()
+
+               with open(UPDATE_CFG_PATH, 'w') as f:
+                       for line in lines:
+                               ConfigItems = line.split()
+                               if ConfigItems[0] == DELTA_BIN:
+                                       logging.info('Updating %s config' % DELTA_BIN)
+                                       DELTA = ConfigItems[1]
+                                       line = line.rstrip('\n')
+                                       line = line.replace(line, line + '\t' + str(oldsize_d) + '\t\t' + str(newsize_d) + '\t\t' + str(SHA_BIN_BASE) + '\t\t' + str(SHA_BIN_DEST) + '\n')
+                               f.write(line)
+
+       patchLoc = '%s/%s' % (OUT_DIR, DELTA)
+       logging.info('Make Delta Image %s <--> %s ==> %s %s' % (BASE_OLD, BASE_NEW, DELTA_BIN, patchLoc))
+       subprocess.call([DIFF_UTIL, "-c", COMPRESSION_METHOD, BASE_OLD, BASE_NEW, patchLoc])
+
+
+def generate_full_image(BASE_OLD, BASE_NEW, OUT_DIR, DELTA_BIN, UPDATE_CFG_PATH):
+       logging.info('Make Full Image %s <--> %s ==> %s' % (BASE_OLD, BASE_NEW, DELTA_BIN))
+       oldsize_d = os.path.getsize(BASE_OLD)
+       newsize_d = os.path.getsize(BASE_NEW)
+       SHA_BIN_DEST = hash_file(BASE_NEW)
+       SHA_BIN_BASE = hash_file(BASE_OLD)
+       update_size(BASE_OLD, BASE_NEW)
+
+       if UPDATE_CFG_PATH:
+               with open(UPDATE_CFG_PATH, 'r') as f:
+                       lines = f.readlines()
+
+               with open(UPDATE_CFG_PATH, 'w') as f:
+                       for line in lines:
+                               ConfigItems = line.split()
+                               if ConfigItems[0] == DELTA_BIN:
+                                       logging.info('Updating %s config' % DELTA_BIN)
+                                       DELTA = ConfigItems[1]
+                                       line = line.rstrip('\n')
+                                       line = line.replace(line, line + '\t' + str(oldsize_d) + '\t\t' + str(newsize_d) + '\t\t' + str(SHA_BIN_BASE) + '\t\t' + str(SHA_BIN_DEST) + '\n')
+                               f.write(line)
+
+
+def generate_delta_fs(PART_NAME, BASE_OLD, BASE_NEW, OUT_DIR, ATTR_FILE, ATTR_NEW):
+       delta_fs_generator = DeltaFsGenerator(PART_NAME, BASE_OLD, BASE_NEW, OUT_DIR, ATTR_FILE, ATTR_NEW)
+       delta_fs_generator.generate_delta_fs()
+
+
+def main():
+       logging.basicConfig(filename=LOGFILE, level=logging.DEBUG)
+
+       try:
+               if len(sys.argv) < 5:
+                       sys.exit('Usage: CreatePatch.py UPDATE_TYPE PARTITION_NAME OLD_BASE_DIR NEW_BASE_DIR OUTFOLDER')
+               UPDATE_TYPE = sys.argv[1]
+               UPDATE_TYPE_S = UPDATE_TYPE.split(":")[0]
+               # TODO make PART_NAME optional
+               PART_NAME = sys.argv[2]
+
+               BASE_OLD = sys.argv[3]
+               BASE_NEW = sys.argv[4]
+               OUT_DIR = sys.argv[5]
+               ATTR_OLD = EMPTY
+               ATTR_NEW = EMPTY
+               UPDATE_CFG_PATH = EMPTY
+
+               global DIFF_UTIL
+               global DIFFPATCH_UTIL
+               if UPDATE_TYPE_S == DELTA_FS:
+                       if len(sys.argv) == 9:
+                               ATTR_OLD = sys.argv[6]
+                               ATTR_NEW = sys.argv[7]
+                               UPDATE_CFG_PATH = os.path.join(PARENT_DIR, sys.argv[8])
 
-       all_files.sort()
-       all_dirs.sort()
-       return all_files, all_dirs
+               elif UPDATE_TYPE_S in [DELTA_IMAGE, FULL_IMAGE]:
+                       if len(sys.argv) == 7:
+                               UPDATE_CFG_PATH = os.path.join(PARENT_DIR, sys.argv[6])
 
+               if not (os.path.isfile(DIFF_UTIL) and os.access(DIFF_UTIL, os.X_OK)):
+                       DIFF_UTIL = os.path.join(COMMON_BIN_PATH, DIFF_UTIL)
+                       DIFFPATCH_UTIL = os.path.join(COMMON_BIN_PATH, DIFFPATCH_UTIL)
+                       if not (os.path.isfile(DIFF_UTIL) and os.access(DIFF_UTIL, os.X_OK)):
+                               print("Diff Util Does NOT exist -- ABORT", file=sys.stderr)
+                               logging.info('Diff Util Does NOT exist -- ABORT')
+                               sys.exit(1)
 
-USAGE_DOCSTRING = """
-       Generate Delta using BASEOLD AND BASE NEW
-       Attributes is optional
-       Usage: CreatePatch.py UPDATE_TYPE PARTNAME OLDBASE NEWBASE OUTFOLDER
-"""
+               start = datetime.datetime.now().time()
+               text = f'Started CreatePatch.py at {start}'
+               logging.info(f'{text:^70}')
+               print(f'{text:^70}')
+               logging.info('Arguments Passed: [UpdateType - %s][Part Name - %s] [BaseOld - %s]  [BaseNew - %s] \n [OUTPUTDir - %s] \
+                       [BASE ATTR - %s] [TARGET ATTR - %s]' % (UPDATE_TYPE, PART_NAME, BASE_OLD, BASE_NEW, OUT_DIR, ATTR_OLD, ATTR_NEW))
 
+               try:
+                       ensure_dir_exists(OUT_DIR)
+               except FileExistsError as exc:
+                       logging.error('Argument passed as OUT_DIR - %s is already an existing file' % OUT_DIR)
+                       raise exc
+               if UPDATE_TYPE_S == DELTA_FS:
+                       if not (os.path.isfile(ATTR_OLD) and os.path.isfile(ATTR_NEW)):
+                               print("Attributes missing -- ABORT", file=sys.stderr)
+                               sys.exit(1)
 
-def Usage(docstring):
-       print docstring.rstrip("\n")
-       print COMMON_DOCSTRING
+               # TODO verify if other linux distributions support APT library
+               cache = apt.Cache()
+               if cache['p7zip'].is_installed and cache['attr'].is_installed and cache['tar'].is_installed:
+                       logging.info('Basic utils installed')
+               else:
+                       print("Basic utils missing -- ABORT", file=sys.stderr)
+                       sys.exit(1)
+
+               if UPDATE_TYPE_S == FULL_IMAGE:
+                       generate_full_image(BASE_OLD, BASE_NEW, OUT_DIR, PART_NAME, UPDATE_CFG_PATH)
+               elif UPDATE_TYPE_S == DELTA_IMAGE:
+                       # generating LZMA deltas is supported by underlying software but it would require at least some kind of format autodetection mechanism, 
+                       # which is not available currently. Disable for now.
+                       generate_delta_image(BASE_OLD, BASE_NEW, OUT_DIR, PART_NAME, UPDATE_CFG_PATH, COMPRESSION_BROTLI)
+               elif UPDATE_TYPE_S == DELTA_FS:
+                       ATTR_FILE = os.path.join(OUT_DIR, (PART_NAME + ATTR_DOC_EXT))
+                       diff_attr_files(ATTR_OLD, ATTR_NEW, ATTR_FILE)
+                       generate_delta_fs(PART_NAME, BASE_OLD, BASE_NEW, OUT_DIR, ATTR_FILE, ATTR_NEW)
+
+                       if UPDATE_CFG_PATH:
+                               update_cfg_file(PART_NAME, UPDATE_CFG_PATH)
+               else:
+                       print('UPDATE_TYPE ---- UNKNOWN FORMAT')
+                       raise TypeError
+
+               if UPDATE_TYPE_S == DELTA_FS:
+                       if os.path.exists(ATTR_OLD) and os.path.exists(ATTR_NEW):
+                               os.remove(ATTR_OLD)
+                               os.remove(ATTR_NEW)
+
+               end = datetime.datetime.now().time()
+               logging.info('Max memory requried to upgrade [%s] is [%d] for file [%s]' % (PART_NAME, MEM_REQ, MEM_FILE))
+               text = f'Done with CreatePath.py at {end}'
+               logging.info(f'{text:^70}')
+               print(f'{text:^70}')
+               logging.info('Time start [%s] - Time end [%s]' % (start, end))
+               print('Done with [%s][%d]---- Time start [%s] - Time end [%s]' % (PART_NAME, MEM_REQ, start, end))
+
+       except Exception as exc:
+               logging.error('Usage: {} <Update_Type> <Part_Name> <OLD_Base> <NEW_Base> <OUT_DIR>'.format(os.path.basename(sys.argv[0])))
+               raise exc
 
 
 if __name__ == '__main__':
-       main()
+       main()
\ No newline at end of file
index 3f1f49e..1315db7 100755 (executable)
@@ -160,7 +160,7 @@ fn_mk_full_img()
                        sudo rm -rf ${PART_IMG_NEW}
                        return 0
                fi
-               sudo python ${COMMON_BINDIR}/CreatePatch.py ${UPDATE_TYPE} ${PART_NAME} ${PART_IMG_OLD} ${PART_IMG_NEW} ${OUTPUT_DIR} ${UPDATE_CFG_PATH}
+               sudo python3 ${COMMON_BINDIR}/CreatePatch.py ${UPDATE_TYPE} ${PART_NAME} ${PART_IMG_OLD} ${PART_IMG_NEW} ${OUTPUT_DIR} ${UPDATE_CFG_PATH}
                PythonRet=$?
                sudo rm ${PART_IMG_OLD}
 
@@ -216,7 +216,7 @@ fn_mk_delta_img_core()
 
        #---- make delta file ----
        echo "make ${DELTA}"
-       sudo python ${COMMON_BINDIR}/CreatePatch.py ${UPDATE_TYPE} ${PART_NAME} ${PART_IMG_OLD} ${PART_IMG_NEW} ${OUTPUT_DIR} ${UPDATE_CFG_PATH}
+       sudo python3 ${COMMON_BINDIR}/CreatePatch.py ${UPDATE_TYPE} ${PART_NAME} ${PART_IMG_OLD} ${PART_IMG_NEW} ${OUTPUT_DIR} ${UPDATE_CFG_PATH}
        PythonRet=$?
        #sudo xdelta delta ${PART_IMG_OLD} ${PART_IMG_NEW} ./${OUTPUT_DIR}/${PART_NAME}".delta"
 
@@ -348,7 +348,7 @@ fn_mk_delta_fs_core()
        fn_mk_attribute ${PART_NAME} ${BASE_OLD} ${BASE_NEW}
 
        #PART_IMG_ORG parameter should match with DELTA name that is part of default config file. Which will be used to update MAX size file that is present in respective partition.
-       sudo python ${COMMON_BINDIR}/CreatePatch.py ${UPDATE_TYPE} ${PART_NAME} ${BASE_OLD} ${BASE_NEW} ${OUTPUT_DIR} ${V1_ATTR_FILE} ${V2_ATTR_FILE} ${UPDATE_CFG_PATH}
+       sudo python3 ${COMMON_BINDIR}/CreatePatch.py ${UPDATE_TYPE} ${PART_NAME} ${BASE_OLD} ${BASE_NEW} ${OUTPUT_DIR} ${V1_ATTR_FILE} ${V2_ATTR_FILE} ${UPDATE_CFG_PATH}
        PythonRet=$?
 
        #---- unmount partition image ----