21 if sys.hexversion < 0x02040000:
22 print >> sys.stderr, "Python 2.4 or newer is required."
26 Diff two folders and create delta using SS_BSDIFF
27 Will maintain same format of script that will be generated when we use diffutil
29 1. Create a list of files in each Base folders,
30 2. These files will fall into one these below categories:
31 1) Only in OLD - Should be deleted
32 2) Only in NEW - Should be added or renamed accordingly
33 3) File exists in both directories but contents are different - Create Diff.
34 4) File name is same but TYPE can change (File to Folder, Folder to Link etc.)
35 5) Duplicates in the list of Deletes and News
36 6) Close matching diffs even though name changes across directories. (for matching extension)
37 7) Clearing empty directories after Moves or diffs under Rename.
38 8) Supporting Verbatim - Any entry under Verbatim_list.txt will be treated as NEW files instead of patch.
41 1. Given two folders, from list of REMOVED and NEW files find if there
42 is version change and create diff between them
45 Want to extend the same script for entire DIFF generation and replace TOTAlib.sh file
46 Catching errors at all stages. SHOULD exit & return error in case of failure
54 global NEW_FILES_ZIP_NAME
57 global SYMLINK_DOC_NAME
63 global SUPPORT_CONTAINERS
68 global COMMON_BIN_PATH
75 COMMON_BIN_PATH = "../../common/bin/"
76 DIFF_UTIL = "/usr/local/bin/ss_bsdiff"
77 DIFFPATCH_UTIL = "/usr/local/bin/ss_bspatch"
79 ZIPUTIL = "7z -mf=off a system.7z "
80 NEW_FILES_PATH = "run/upgrade-sysroot"
81 NEW_FILES_ZIP_NAME = "system.7z"
83 ATTR_DOC_EXT = "_attr.txt"
84 SYMLINK_DOC_NAME = "_sym.txt"
85 HARDLINK_DOC_NAME = "_hard.txt"
88 DIFF_SUFFIX = ".delta"
90 FULL_IMAGE = "FULL_IMAGE"
91 DELTA_IMAGE = "DELTA_IMAGE"
95 VERBATIM_LIST = "Verbatim_List.txt"
99 COMPRESSION_LZMA = "lzma"
100 COMPRESSION_BROTLI = "brotli"
102 SUPPORT_RENAME = "TRUE" # Use appropriate name
103 SUPPORT_CONTAINERS = "FALSE"
104 SUPPORT_VERBATIM = "TRUE"
110 logging.basicConfig(filename=LOGFILE, level=logging.DEBUG)
112 global GenerateDiffAttr
115 if len(sys.argv) < 5:
116 sys.exit('Usage: CreatePatch.py UPDATE_TYPE PARTNAME OLDBASE NEWBASE OUTFOLDER')
117 UPDATE_TYPE = sys.argv[1]
118 UPDATE_TYPE_S = UPDATE_TYPE.split(":")
119 PART_NAME = sys.argv[2] # lets make this also optional
121 BASE_OLD = sys.argv[3]
122 BASE_NEW = sys.argv[4]
123 OUT_DIR = sys.argv[5]
126 UPDATE_CFG_PATH = EMPTY
127 GenerateDiffAttr = "FALSE"
128 if UPDATE_TYPE_S[0] == DELTA_FS:
129 #instead of arguments check it in outdirectory ?
130 if len(sys.argv) == 9:
131 ATTR_OLD = sys.argv[6]
132 ATTR_NEW = sys.argv[7]
133 UPDATE_CFG_PATH = '../' + sys.argv[8]
134 GenerateDiffAttr = "TRUE"
136 elif UPDATE_TYPE_S[0] in [DELTA_IMAGE, FULL_IMAGE]:
137 if len(sys.argv) == 7:
138 #Use path in better way
139 UPDATE_CFG_PATH = '../' + sys.argv[6]
142 global DIFFPATCH_UTIL
143 if not (os.path.isfile(DIFF_UTIL) and os.access(DIFF_UTIL, os.X_OK)):
144 DIFF_UTIL = COMMON_BIN_PATH + DIFF_UTIL
145 DIFFPATCH_UTIL = COMMON_BIN_PATH + DIFFPATCH_UTIL
146 if not (os.path.isfile(DIFF_UTIL) and os.access(DIFF_UTIL, os.X_OK)):
147 print >> sys.stderr, "Diff Util Does NOT exist -- ABORT"
148 logging.info('Diff Util Does NOT exist -- ABORT')
151 start = datetime.datetime.now().time()
152 logging.info('*************** ENTERED PYTHON SCRIPT *****************')
153 logging.info('Arguments Passed: [UpdateType - %s][Part Name - %s] [BaseOld - %s] [BaseNew - %s] \n [OUTPUTDir - %s] [BASE ATTR - %s] [TARGET ATTR - %s]' % (UPDATE_TYPE, PART_NAME, BASE_OLD, BASE_NEW, OUT_DIR, ATTR_OLD, ATTR_NEW))
156 ensure_dir_exists(OUT_DIR)
157 except FileExistsError as exc:
158 logging.error('Argument passed as OUT_DIR - %s is already an existing file' % OUT_DIR)
160 if GenerateDiffAttr == "TRUE":
161 if not (os.path.isfile(ATTR_OLD) and os.path.isfile(ATTR_NEW)):
162 print >> sys.stderr, "Attributes missing -- ABORT"
165 # Should check if APT is supported on other linux flavours
167 if cache['p7zip'].is_installed and cache['attr'].is_installed and cache['tar'].is_installed:
168 logging.info('Basic utils installed')
170 print >> sys.stderr, "Basic utils missing -- ABORT"
173 if UPDATE_TYPE_S[0] == FULL_IMAGE:
174 SS_mk_full_img(BASE_OLD, BASE_NEW, OUT_DIR, PART_NAME, UPDATE_CFG_PATH)
175 # #### currently does not support LZMA ####
176 # elif UPDATE_TYPE == DELTA_IMAGE:
177 # SS_mk_delta_img(BASE_OLD, BASE_NEW, OUT_DIR, PART_NAME, UPDATE_CFG_PATH, COMPRESSION_LZMA)
178 elif UPDATE_TYPE_S[0] == DELTA_IMAGE:
179 SS_mk_delta_img(BASE_OLD, BASE_NEW, OUT_DIR, PART_NAME, UPDATE_CFG_PATH, COMPRESSION_BROTLI)
180 elif UPDATE_TYPE == DELTA_FS:
181 AttributeFile = ATTR_NEW
182 ATTR_FILE = OUT_DIR + '/' + PART_NAME + ATTR_DOC_EXT
183 Diff_AttrFiles(ATTR_OLD, ATTR_NEW, ATTR_FILE)
184 Old_files, Old_dirs = Get_Files(BASE_OLD)
185 New_files, New_dirs = Get_Files(BASE_NEW)
186 SS_Generate_Delta(PART_NAME, BASE_OLD, Old_files, Old_dirs, BASE_NEW, New_files, New_dirs, OUT_DIR, ATTR_FILE)
188 if not UPDATE_CFG_PATH == EMPTY:
189 SS_update_cfg(PART_NAME, UPDATE_CFG_PATH)
191 elif UPDATE_TYPE == EXTRA:
192 print('UPDATE_TYPE ---- EXTRA')
194 print('UPDATE_TYPE ---- UNKNOWN FORMAT')
196 if GenerateDiffAttr == "TRUE":
197 if os.path.exists(ATTR_OLD) and os.path.exists(ATTR_NEW):
200 end = datetime.datetime.now().time()
202 logging.info('Max Memory requried to upgrade [%s] is [%d] for File[%s]' % (PART_NAME, MEM_REQ, MEM_FILE))
203 logging.info('*************** DONE WITH PYTHON SCRIPT ***************')
204 logging.info('Time start [%s] - Time end [%s]' % (start, end))
205 print('Done with [%s][%d]---- Time start [%s] - Time end [%s]' % (PART_NAME, MEM_REQ, start, end))
207 except Exception as exc:
208 logging.error('Usage: {} <Update_Type> <Part_Name> <OLD_Base> <NEW_Base> <OUT_DIR>'.format(os.path.basename(sys.argv[0])))
212 def SS_update_cfg(DELTA_BIN, UPDATE_CFG_PATH):
213 f = open(UPDATE_CFG_PATH, 'r')
214 lines = f.readlines()
216 f = open(UPDATE_CFG_PATH, 'w')
218 ConfigItems = line.split()
219 if ConfigItems[0] == DELTA_BIN:
220 DELTA = ConfigItems[1]
221 logging.info('Updating %s config' % DELTA_BIN)
222 line = line.rstrip('\n')
224 line = line.replace(line, line + '\t' + str(Value) + '\n')
231 def SS_mk_delta_img(BASE_OLD, BASE_NEW, OUT_DIR, DELTA_BIN, UPDATE_CFG_PATH, COMPRESSION_METHOD):
234 oldsize_d = os.path.getsize(BASE_OLD)
235 newsize_d = os.path.getsize(BASE_NEW)
236 SHA_BIN_DEST = hash_file(BASE_NEW)
237 SHA_BIN_BASE = hash_file(BASE_OLD)
239 #incase UPDATE CFG is empty
241 SS_UpdateSize(BASE_OLD, BASE_NEW)
242 #Should throw error if PART NAME NOT found??
243 if not UPDATE_CFG_PATH == EMPTY:
244 f = open(UPDATE_CFG_PATH, 'r')
245 lines = f.readlines()
247 f = open(UPDATE_CFG_PATH, 'w')
249 ConfigItems = line.split()
250 if ConfigItems[0] == DELTA_BIN:
251 logging.info('Updating %s config' % DELTA_BIN)
252 DELTA = ConfigItems[1]
253 line = line.rstrip('\n')
254 line = line.replace(line, line + '\t' + str(oldsize_d) + '\t\t' + str(newsize_d) + '\t\t' + str(SHA_BIN_BASE) + '\t\t' + str(SHA_BIN_DEST) + '\n')
260 patchLoc = '%s/%s' % (OUT_DIR, DELTA)
261 logging.info('Make Delta Image %s <--> %s ==> %s %s' % (BASE_OLD, BASE_NEW, DELTA_BIN, patchLoc))
262 subprocess.call([DIFF_UTIL, "-c", COMPRESSION_METHOD, BASE_OLD, BASE_NEW, patchLoc])
265 def SS_mk_full_img(BASE_OLD, BASE_NEW, OUT_DIR, DELTA_BIN, UPDATE_CFG_PATH):
266 logging.info('Make Full Image %s <--> %s ==> %s' % (BASE_OLD, BASE_NEW, DELTA_BIN))
267 oldsize_d = os.path.getsize(BASE_OLD)
268 newsize_d = os.path.getsize(BASE_NEW)
269 SHA_BIN_DEST = hash_file(BASE_NEW)
270 SHA_BIN_BASE = hash_file(BASE_OLD)
271 #echo -e "\t${oldsize_d}\t\t${newsize_d}\t\t${SHA_BIN_BASE}\t\t${SHA_BIN_DEST}" >> ${DATA_DIR}/update_new.cfg
272 SS_UpdateSize(BASE_OLD, BASE_NEW)
274 if not UPDATE_CFG_PATH == EMPTY:
275 f = open(UPDATE_CFG_PATH, 'r')
276 lines = f.readlines()
278 f = open(UPDATE_CFG_PATH, 'w')
280 ConfigItems = line.split()
281 if ConfigItems[0] == DELTA_BIN:
282 logging.info('Updating %s config' % DELTA_BIN)
283 DELTA = ConfigItems[1]
284 line = line.rstrip('\n')
285 line = line.replace(line, line + '\t' + str(oldsize_d) + '\t\t' + str(newsize_d) + '\t\t' + str(SHA_BIN_BASE) + '\t\t' + str(SHA_BIN_DEST) + '\n')
292 def zipdir(path, zip):
293 for root, dirs, files in os.walk(path):
295 zip.write(os.path.join(root, file))
298 def ensure_dir_exists(path):
299 if not os.path.exists(path):
301 elif os.path.isfile(path):
302 raise FileExistsError
308 head, tail = ntpath.split(path) # This is for windows?? Recheck
313 head, tail = ntpath.split(path)
317 # Creating Diff between OLD and NEW attribute files v12
318 def Diff_AttrFiles(ATTR_OLD, ATTR_NEW, ATTR_FILE):
319 if GenerateDiffAttr == "FALSE":
321 with open(ATTR_OLD, 'r') as f_old:
322 lines1 = set(f_old.read().splitlines())
324 with open(ATTR_NEW, 'r') as f_new:
325 lines2 = set(f_new.read().splitlines())
327 lines = set.difference(lines2, lines1)
328 with open(ATTR_FILE, 'w+') as file_out:
330 logging.info('Diff_AttrFiles - %s' % line)
331 file_out.write(line + '\n')
334 def Update_Attr(RequestedPath, Type, File_Attributes, Sym_Attributes):
335 # Full File Path should MATCH
336 if GenerateDiffAttr == "FALSE":
338 FilePath = '"/' + RequestedPath + '"'
339 #print ('FilePath - %s'% (FilePath))
340 with open(AttributeFile) as f:
343 if Type == SYMLINK_TYPE:
344 Sym_Attributes.append(line)
346 File_Attributes.append(line)
349 def hash_file(filename):
350 '''This function returns the SHA-1 hash of the file passed into it'''
355 # open file for reading in binary mode
356 with open(filename, 'rb') as file:
357 # loop till the end of the file
360 # read only 1024 bytes at a time
361 chunk = file.read(1024 * 1024)
364 # return the hex representation of digest
368 def find_dupes_list(BASE_OLD, BASE_NEW, fileListB, fileListT, Old_hardlinks, New_hardlinks):
371 print('Finding Duplicates in - %s %s' % (BASE_OLD, BASE_NEW))
373 for filename in fileListB:
374 Src_File = BASE_OLD + '/' + filename
375 if os.path.islink(Src_File) or os.path.isdir(Src_File) or ishardlink(Src_File):
378 file_hash = hash_file(Src_File)
379 dups[file_hash] = Src_File
381 for filename in fileListT:
382 Dest_File = BASE_NEW + '/' + filename
383 if os.path.islink(Dest_File) or os.path.isdir(Dest_File) or ishardlink(Dest_File):
386 file_hash = hash_file(Dest_File)
387 if file_hash in dups:
388 BaseStr = dups.get(file_hash)
389 Baseloc = BaseStr.find('/')
390 if not BaseStr[Baseloc:] == filename:
391 #print('Dupes - %s ==> %s' % (BaseStr[Baseloc:], filename))
392 fdupes[BaseStr] = filename
393 logging.info('Total Duplicate files %d' % (len(fdupes)))
397 def SS_UpdateSize(src_file, dst_file):
400 oldsize_d = os.path.getsize(src_file)
401 newsize_d = os.path.getsize(dst_file)
402 if oldsize_d >= newsize_d:
411 def SS_Generate_Delta(PART_NAME, BASE_OLD, Old_files, Old_dirs, BASE_NEW, New_files, New_dirs, OUT_DIR, ATTR_FILE):
412 print('Going from %d files to %d files' % (len(Old_files), len(New_files)))
413 logging.info('Going from %d files to %d files' % (len(Old_files), len(New_files)))
415 # First let's fill up these categories
429 # Get dictionaries used for hardlinks form both directories
430 New_hardlinks = get_hardlinks(BASE_NEW)
431 Old_hardlinks = get_hardlinks(BASE_OLD)
434 for elt in New_files:
435 if elt not in Old_files:
436 files_new.append(elt)
437 logging.info('New files %s' % elt)
439 # Generate Delete List
440 for elt in Old_files:
441 if elt not in New_files:
442 # Cant we just append it here only if this is NOT a directory???? so that we have list of removed files ONLY. including directories
443 files_removed.append(elt)
444 logging.info('Old files %s' % elt)
447 #print('List of Old Dirs %s' % elt)
448 # Delete END logic goes in hand with UPG, After Diffs and moves, DEL END should be done.
449 if elt not in New_dirs:
450 Dir_removed.append(elt)
451 logging.info('Old Dirs %s' % elt + '/')
454 if elt not in Old_dirs:
455 Dir_Added.append(elt)
456 #++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
458 # What files have changed contents but not name/path?
459 for elt in New_files:
461 # Both are symbolic linkes and they differ
462 src_file = BASE_OLD + '/' + elt
463 dst_file = BASE_NEW + '/' + elt
464 #print('Files Changed - %s -%s' % (src_file,dst_file))
465 if os.path.islink(src_file) and os.path.islink(dst_file):
466 if not (os.readlink(src_file) == os.readlink(dst_file)):
467 files_changed.append(elt)
468 #print('%d Sym link files changed' % len(files_changed))
469 logging.info('Sym links Changed - %s' % elt)
471 files_unchanged.append(elt)
472 # Both are hardlinks - we add them because we can't be sure if file they point to changes
473 elif elt in New_hardlinks and elt in Old_hardlinks:
474 files_changed.append(elt)
475 # Both are Normal files and they differ. (Is file returns true in case of sym/hardlink also,
476 # so additional check to find either of the file is sym/hardlink)
477 elif (not (os.path.islink(src_file) or os.path.islink(dst_file))) \
478 and (not (elt in New_hardlinks or elt in Old_hardlinks)) \
479 and os.path.isfile(src_file) and os.path.isfile(dst_file):
480 if not filecmp.cmp(src_file, dst_file):
481 files_changed.append(elt)
482 #print('%d Normal files changed' % len(files_changed))
483 #print('Files Changed - %s' % elt)
485 files_unchanged.append(elt)
486 # File types differ between BASE and TARGET
488 logging.info('Files are of diff types but same names Src- %s Des- %s' % (src_file, dst_file))
489 # Both file types have changed and they differ
490 # Case 1: First Delete the OLD entry file type (Be it anything)
491 # Processing and updating partition txt file will be done under REMOVED case and NEW files case accordingly, we just make an entry here
492 files_removed.append(elt)
493 files_new.append(elt)
495 # HANDLING VERBATIM - Remove from changed list and delete the entries on device first
496 # This script is called partition wise, So, how do u want to handle it? (specialy for delete case?)
498 print("Check for any verbatim under - %s" % VERBATIM_LIST)
499 if SUPPORT_VERBATIM == "TRUE" and os.path.exists(VERBATIM_LIST):
500 with open(VERBATIM_LIST, 'r') as F_News:
501 lines = set(F_News.read().splitlines())
503 if line in files_changed:
504 files_changed.remove(line)
505 files_removed.append(line)
506 if line in files_new:
507 files_new.remove(line)
509 # Currently if Version or number is the first character of the file, then we are NOT making any diffs.
510 if SUPPORT_RENAME == "TRUE":
511 for elt in files_removed:
512 if os.path.isfile(BASE_OLD + '/' + elt):
513 FileName = path_leaf(elt)
514 entries = re.split('[0-9]', FileName)
515 # Gives the STRING part of NAME. if name starts with version then later part wil b string
516 #print('Entires under removed list after split - %s %s - %s' % (FileName, entries[0], elt))
517 # If version is starting at the begining of the string?? shd we hav additional check for such cases??
518 if len(entries[0]) > 0:
519 files_Del_List.update({entries[0]: elt})
521 for elt in files_new:
522 if os.path.isfile(BASE_NEW + '/' + elt):
523 FileName = path_leaf(elt)
524 entries = re.split('[0-9]', FileName)
525 #print('Entires under NEWfiles list after split - %s %s - %s' % (FileName, entries[0], elt))
526 if len(entries[0]) > 0:
527 files_New_List.update({entries[0]: elt})
529 for key, value in files_Del_List.iteritems():
530 #print('Key value pair -%s -%s' % (key, value))
531 if key in files_New_List:
532 # this file is the same name in both!
533 src_file = BASE_OLD + '/' + value
534 dst_file = BASE_NEW + '/' + files_New_List[key]
535 # we don't want to move hardlinks
536 if ishardlink(src_file) or ishardlink(dst_file):
537 logging.debug('Cannot diff as one of them is a hardlink')
538 elif os.path.islink(src_file) or os.path.islink(dst_file):
539 logging.debug('Cannot diff as one of them is Symlink')
540 elif os.path.isdir(src_file) or os.path.isdir(dst_file):
541 logging.debug('Cannot diff as one of them is dir')
543 #Pick the best diff of same type and diff names
544 files_renamed.append([files_New_List[key], value])
545 files_removed.remove(value)
546 files_new.remove(files_New_List[key])
550 Partition.txt contains Protocol for UPI
551 Types Supported: DIFFS, MOVES, NEWS, DELETES, SYMDIFFS, SYMNEWS.
562 SymLinkDoc = OUT_DIR + '/' + PART_NAME + SYMLINK_DOC_NAME
563 HardLinkDoc = OUT_DIR + '/' + PART_NAME + HARDLINK_DOC_NAME
564 Partition_Doc = open(OUT_DIR + '/' + PART_NAME + '.txt', 'w')
565 Partition_Doc_SymLinks = open(SymLinkDoc, 'w')
566 Partition_Doc_HardLinks = open(HardLinkDoc, "w")
568 print("writing diff'ed changed files...")
569 for elt in files_changed:
570 dst_file = BASE_NEW + '/' + elt
571 src_file = BASE_OLD + '/' + elt
572 # Both files are symbolic links and they differ
573 if os.path.islink(dst_file) and os.path.islink(src_file):
574 # Both are symlinks and they differ
575 logging.debug(' File Changed is Link %s ' % dst_file)
576 patch = os.readlink(dst_file)
577 Sym_Diff_Cnt = Sym_Diff_Cnt + 1
578 Partition_Doc_SymLinks.write('SYM:DIFF:%s:%s:%s\n' % (elt, elt, patch))
579 Update_Attr(elt, "SYM", File_Attributes, Sym_Attributes)
580 # Both are hardlinks and they differ (point to something different, new/changed file)
581 if elt in Old_hardlinks and elt in New_hardlinks:
582 if Old_hardlinks[elt] != New_hardlinks[elt] or New_hardlinks[elt] in files_changed or New_hardlinks[elt] in files_new:
583 logging.debug('Hardlinks changed %s %s' % (src_file, dst_file))
584 patch = New_hardlinks[elt]
586 Partition_Doc_HardLinks.write('HARD:DIFF:%s:%s:%s\n' % (elt, elt, patch))
587 # Both are NORMAL files and they differ
588 elif (not (os.path.islink(src_file) or os.path.islink(dst_file))) \
589 and (not (elt in Old_hardlinks or elt in New_hardlinks)) \
590 and os.path.isfile(dst_file) and os.path.isfile(src_file):
591 # Both are files and they differ
592 Diff_Cnt = Diff_Cnt + 1
593 patchName = (DIFF_PREFIX + '%d_%s_' + PART_NAME + DIFF_SUFFIX) % (Diff_Cnt, path_leaf(elt))
594 patchLoc = '%s/%s' % (OUT_DIR, patchName)
595 logging.debug(' File Differ %s %s' % (src_file, dst_file))
596 SS_UpdateSize(src_file, dst_file)
599 ret = subprocess.call([DIFF_UTIL, src_file, dst_file, patchLoc])
601 logging.debug('Failed to create diff %d %s %s\n' % (ret, src_file, dst_file))
602 files_new.append(elt)
603 Diff_Cnt = Diff_Cnt - 1
605 Partition_Doc.write('DIFF:REG:%s:%s:%s:%s:%s\n' % (elt, elt, hash_file(src_file), hash_file(dst_file), patchName))
607 Update_Attr(elt, "FILE", File_Attributes, Sym_Attributes)
608 # Both differ but they are of diff types
610 # Processing and updating partition txt file will be done under REMOVED case and NEW files case accordingly, we just make an entry here
611 files_removed.append(elt)
612 files_new.append(elt)
614 fdupes = find_dupes_list(BASE_OLD, BASE_NEW, files_removed, files_new, Old_hardlinks, New_hardlinks)
615 for oldpath, newpath in fdupes.iteritems():
616 logging.info('Dupes %s -> %s' % (oldpath, newpath))
617 for elt in files_removed:
618 src_file = BASE_OLD + '/' + elt
619 # If parent directory is deleted.. & del end not possible. (==> Moves should be done before deletes in ENGINE)
620 if src_file in fdupes.keys():
621 dst_file = BASE_NEW + '/' + fdupes[src_file]
622 logging.debug(' File Moved %s ==> %s' % (src_file, dst_file))
623 Move_Cnt = Move_Cnt + 1
624 Partition_Doc.write('MOVE:REG:%s:%s:%s\n' % (elt, fdupes[src_file], hash_file(src_file)))
625 files_removed.remove(elt)
626 files_new.remove(fdupes[src_file])
627 # Should be placed after removing duplicates, else they will be filtered here.
628 # loop shd b for all NEW files, rather than for all delete files (Current understanding)
629 # First Step: Sort & Filter out unwanted files
630 # Minimum condition used is,
631 # 1. File name should match 70%
632 # 2. Extensions should be same
633 # 3. File name length shd b greater than 3 char
634 # 4. As we are using sorting on file names, once file name does not match and R_Flag is set to true, we nee not check remaining files. So, will execute break.
635 # 5. Should consider editdistance for RENAME LOGIC ==> TBD
636 Base_DelList = files_removed[:]
637 Base_NewList = files_new[:]
638 DelList = sorted(Base_DelList, key=path_leaf)
639 NewList = sorted(Base_NewList, key=path_leaf)
640 logging.debug('Rename Logic before filter: Delcount -%d NewCount -%d' % (len(DelList), len(NewList)))
644 # Remove unwanted items which we cant make diff with for rename logic
646 if os.path.islink(BASE_OLD + '/' + file):
648 elif ishardlink(BASE_OLD + '/' + file):
650 elif os.path.isdir(BASE_OLD + '/' + file):
654 #logging.debug('Sorted del list - %s' % (file))
659 if os.path.islink(BASE_NEW + '/' + file):
661 elif ishardlink(BASE_NEW + '/' + file):
663 elif os.path.isdir(BASE_NEW + '/' + file):
665 elif len(path_leaf(file)) <= 3:
666 logging.debug('Ignored for best picks -%s ' % (BASE_NEW + '/' + file))
671 logging.debug('Rename Logic After filter: Delcount -%d NewCount -%d' % (len(DelList), len(NewList)))
673 for new_file in NewList:
675 DirPathNew = path_head(new_file)
676 FileNameNew = path_leaf(new_file)
678 winning_patch_sz = os.path.getsize(BASE_NEW + '/' + new_file)
679 New_fs = winning_patch_sz
682 for del_file in DelList:
683 FileNameOld = path_leaf(del_file)
684 if (FileNameOld.startswith(FileNameNew[:len(FileNameNew) * 7 / 10]) and (os.path.splitext(FileNameNew)[1] == os.path.splitext(del_file)[1])):
685 #winning_patch_sz = 0.9 * os.path.getsize(BASE_NEW+'/'+new_file)
686 # Percentage difference between two file sizes is within 30%, then we consider for diff generation
687 Del_fs = os.path.getsize(BASE_OLD + '/' + del_file)
688 v1 = abs(New_fs - Del_fs)
689 v2 = (New_fs + Del_fs) / 2
690 if(v2 <= 0 or ((v1 / v2) * 100) > 30):
691 logging.debug('Ignore diff generation New_fs - %d Del_Fs - %d' % (New_fs, Del_fs))
693 logging.debug('I can compute diff between %s %s Del_Fs - %d New_Fs - %d' % (del_file, new_file, Del_fs, New_fs))
695 DiffSize = measure_two_filediffs(BASE_OLD + '/' + del_file, BASE_NEW + '/' + new_file)
696 if (DiffSize < 0.8 * winning_patch_sz):
697 winning_patch_sz = DiffSize
698 winning_file = del_file
699 elif (not FileNameOld.startswith(FileNameNew[:len(FileNameNew) * 7 / 10]) and R_Flag == 'TRUE'):
700 logging.debug('Because nex set of files will not have matching name - break @@ %s %s' % (del_file, new_file))
702 if len(winning_file) > 0:
703 logging.debug('Best Pick -%s ==> %s [%d]' % (winning_file, new_file, DiffSize))
704 files_renamed.append([new_file, winning_file])
705 DelList.remove(winning_file)
706 files_removed.remove(winning_file)
707 files_new.remove(new_file)
709 #********************** Files should NOT be deleted for any such renames ***********************
711 if SUPPORT_RENAME == "TRUE":
712 for elt in files_renamed:
713 src_file = BASE_OLD + '/' + elt[1]
714 dst_file = BASE_NEW + '/' + elt[0]
715 Diff_Cnt = Diff_Cnt + 1
716 patchName = (DIFF_PREFIX + '%d_%s_' + PART_NAME + DIFF_SUFFIX) % (Diff_Cnt, path_leaf(elt[1]))
717 #patchName = (DIFF_PREFIX+'_%s'+DIFF_SUFFIX) % (path_leaf(elt[0]))
718 patchLoc = '%s/%s' % (OUT_DIR, patchName)
719 logging.debug(' File Renamed %s ==> %s' % (src_file, dst_file))
720 # Should be careful of renaming files??
721 # Should we consider measure_two_filediffs ?? so that patch size is NOT greater than actual file?
722 # What if folder path has numerics??
724 if os.path.isdir(src_file) or os.path.isdir(dst_file):
725 # This case never occurs??
726 Partition_Doc.write('"%s" and "%s" renamed 0 0\n' % (elt[0], elt[1]))
727 Update_Attr(elt[0], "FILE", File_Attributes, Sym_Attributes)
728 # Make sure these files are PROPER and they shd NOT be symlinks
729 elif not (os.path.islink(src_file) or os.path.islink(dst_file)) \
730 and not (elt[0] in New_hardlinks or elt[1] in Old_hardlinks) \
731 and (os.path.isfile(src_file) and os.path.isfile(dst_file)):
732 if filecmp.cmp(src_file, dst_file):
733 Move_Cnt = Move_Cnt + 1
734 Diff_Cnt = Diff_Cnt - 1
735 Partition_Doc.write('MOVE:REG:%s:%s:%s\n' % (elt[1], elt[0], hash_file(src_file)))
738 ret = subprocess.call([DIFF_UTIL, src_file, dst_file, patchLoc])
740 logging.debug('Failed to create diff %d %s %s\n' % (ret, src_file, dst_file))
741 files_new.append(elt)
742 Diff_Cnt = Diff_Cnt - 1
744 Partition_Doc.write('DIFF:REG:%s:%s:%s:%s:%s\n' % (elt[1], elt[0], hash_file(src_file), hash_file(dst_file), patchName))
746 SS_UpdateSize(src_file, dst_file)
747 Update_Attr(elt[0], "FILE", File_Attributes, Sym_Attributes)
749 # HANDLING VERBATIM - We Process NEWs and DELETEs for Verbatim list ONLY after processing duplicates & rename functionality.
750 # So that, the rename functionality will NOT create PATCH instead of verbatims.
752 if SUPPORT_VERBATIM == "TRUE" and os.path.exists(VERBATIM_LIST):
753 with open(VERBATIM_LIST, 'r') as F_News:
754 lines = set(F_News.read().splitlines())
756 if line not in files_new:
757 if os.path.exists(BASE_NEW + '/' + line):
758 files_new.append(line)
759 Verbatim_Cnt = Verbatim_Cnt + 1
760 logging.debug("Added to list of verbatims -%s" % BASE_NEW + '/' + line)
762 for elt in files_removed:
763 # if files are part of patches after renaming, we shd remove them as part of removed.
764 src_file = BASE_OLD + '/' + elt
765 if os.path.islink(src_file):
766 Partition_Doc.write('DEL:SYM:%s\n' % (elt))
767 elif elt in Old_hardlinks:
768 Partition_Doc.write('DEL:HARD:%s\n' % (elt))
769 elif os.path.isdir(src_file):
770 # If we change to DIR TYPE, then the same token should be modified on UA also and SHA should be accordingly passed.
771 Partition_Doc.write('DEL:REG:%s:NA\n' % (elt))
773 Partition_Doc.write('DEL:REG:%s:%s\n' % (elt, hash_file(src_file)))
774 logging.debug(' File Deleted %s' % src_file)
775 Del_Cnt = Del_Cnt + 1
777 Dir_removed.sort(reverse=True)
778 for elt in Dir_removed:
779 # if Dir is empty, add it to the removed list.
780 src_file = BASE_OLD + '/' + elt
781 # Irrespective of weather files are MOVED or DIFF'ed, we can delete the folders. This action can be performed at the end.
782 # It covers symlinks also, as NEW symlinks cannot point to NON existant folders of TARGET (NEW binary)
783 if os.path.isdir(src_file):
784 Partition_Doc.write('DEL:END:%s\n' % (elt))
785 Del_Cnt = Del_Cnt + 1
786 logging.debug(' Dir Deleted- %s' % src_file)
788 for elt in files_new:
789 dst_file = BASE_NEW + '/' + elt
790 newfiles_dest_path = 'run/upgrade-sysroot/'
792 ensure_dir_exists(newfiles_dest_path)
793 except FileExistsError as exc:
794 logging.error('Directory %s used by this script is already an existing file' % newfiles_dest_path)
796 if os.path.islink(dst_file):
797 patch = os.readlink(dst_file)
798 logging.debug(' File New Links %s' % elt)
799 Partition_Doc_SymLinks.write('SYM:NEW:%s:%s\n' % (elt, patch))
800 # What if this is only a new sym link and folder already exists??? Should recheck
801 destpath = newfiles_dest_path + elt
802 if not os.path.exists(path_head(destpath)):
803 os.makedirs(path_head(destpath))
804 logging.info('New SymLink - Adding missing Dir')
805 Update_Attr(elt, "SYM", File_Attributes, Sym_Attributes)
806 Sym_New_Cnt = Sym_New_Cnt + 1
807 elif elt in New_hardlinks:
808 patch = New_hardlinks[elt]
809 logging.debug('File new hardlink %s' % elt)
810 Partition_Doc_HardLinks.write('HARD:NEW:%s:%s\n' %(elt, patch))
811 destpath = newfiles_dest_path + elt
812 if not os.path.exists(path_head(destpath)):
813 os.makedirs(path_head(destpath))
814 logging.info('New hardlink - Adding missing Dir')
816 elif os.path.isdir(dst_file): # We create just empty directory here
817 destpath = newfiles_dest_path + elt
818 if not os.path.exists(destpath):
819 os.makedirs(destpath)
820 logging.debug(' File New Dir %s' % destpath)
821 New_Cnt = New_Cnt + 1
823 New_Cnt = New_Cnt + 1
824 destpath = newfiles_dest_path + elt
825 destdir = os.path.dirname(destpath)
826 logging.debug('New files - %s ==> %s' % (dst_file, destdir))
828 if not os.path.isdir(destdir):
831 except Exception as exc:
832 logging.critical('Error in NEW files DIR entry -%s' % destdir)
836 if not stat.S_ISFIFO(os.stat(dst_file).st_mode):
837 shutil.copy2(dst_file, destpath)
838 logging.debug('New files copied from- %s to- %s' % (dst_file, destpath))
839 except Exception as exc:
840 logging.critical('Error in NEW files entry -%s -%s' % (dst_file, destpath))
842 Update_Attr(elt, "FILE", File_Attributes, Sym_Attributes)
844 for elt in Dir_Added:
845 newfiles_dest_path = 'run/upgrade-sysroot/'
847 ensure_dir_exists(newfiles_dest_path)
848 except FileExistsError as exc:
849 logging.error('Directory %s used by this script is already an existing file' % newfiles_dest_path)
851 destpath = newfiles_dest_path + elt
852 if not os.path.exists(destpath):
853 os.makedirs(destpath)
854 logging.debug(' DirList New Dir %s' % destpath)
855 New_Cnt = New_Cnt + 1
857 # Base directory should be system
858 print 'Compressing New files'
859 if (New_Cnt > 0 or Sym_New_Cnt > 0):
860 WorkingDir = os.getcwd()
861 os.chdir(os.getcwd() + "/" + NEW_FILES_PATH)
862 logging.info('Curr Working Dir - %s' % os.getcwd())
863 os.system(ZIPUTIL + NEW_FILES_PATH + " >> " + LOGFILE)
864 shutil.move(NEW_FILES_ZIP_NAME, WorkingDir + "/" + OUT_DIR)
865 # New file size?? cos, we extract system.7z from delta.tar and then proceed with decompression
866 SS_UpdateSize(WorkingDir + "/" + OUT_DIR + "/" + NEW_FILES_ZIP_NAME, WorkingDir + "/" + OUT_DIR + "/" + NEW_FILES_ZIP_NAME)
868 shutil.rmtree(NEW_FILES_PATH)
869 # use 7z a system.7z ./*
871 #logging.info('%d Dir to be removed' % len(Dir_removed))
872 logging.info('%d files unchanged' % len(files_unchanged))
873 logging.info('%d files files_renamed' % len(files_renamed))
874 logging.info('%d files NEW' % len(files_new))
875 logging.info('%d File attr' % len(File_Attributes))
876 logging.info('%d Sym attr' % len(Sym_Attributes))
877 logging.info('PaTcHCoUnT:Diffs-%d Moves-%d News-%d Delets-%d SymDiffs-%d SymNews-%d HardDiffs-%d HardNews-%d Verbatim -%d\n' % \
878 (Diff_Cnt, Move_Cnt, New_Cnt, Del_Cnt, Sym_Diff_Cnt, Sym_New_Cnt, Hard_Diff_Cnt, Hard_New_Cnt, Verbatim_Cnt))
879 print('PaTcHCoUnT:Diffs-%d Moves-%d News-%d Delets-%d SymDiffs-%d SymNews-%d HardDiffs-%d HardNews-%d Verbatim -%d\n' % \
880 (Diff_Cnt, Move_Cnt, New_Cnt, Del_Cnt, Sym_Diff_Cnt, Sym_New_Cnt, Hard_Diff_Cnt, Hard_New_Cnt, Verbatim_Cnt))
882 # There could be duplicates, TODO, can check before adding..
883 ATTR_FILE_D = open(ATTR_FILE, 'a+')
884 for elt in File_Attributes:
885 ATTR_FILE_D.write(elt)
886 for elt in Sym_Attributes:
887 ATTR_FILE_D.write(elt)
891 Partition_Doc_SymLinks.close()
892 Partition_Doc_HardLinks.close()
893 Partition_Read_SymLinks = open(SymLinkDoc, 'r+')
894 Partition_Read_HardLinks = open(HardLinkDoc, 'r+')
895 Partition_Doc.write(Partition_Read_SymLinks.read())
896 for line in reversed(Partition_Read_HardLinks.readlines()):
897 Partition_Doc.write(line)
898 Partition_Doc.write('PaTcHCoUnT:%d %d %d %d %d %d %d %d\n' % \
899 (Diff_Cnt, Move_Cnt, New_Cnt, Del_Cnt, Sym_Diff_Cnt, Sym_New_Cnt, Hard_Diff_Cnt, Hard_New_Cnt))
900 Partition_Read_SymLinks.close()
901 Partition_Read_HardLinks.close()
902 Partition_Doc.close()
903 os.remove(SymLinkDoc)
904 os.remove(HardLinkDoc)
906 if Diff_Cnt + Move_Cnt + New_Cnt + Del_Cnt + Sym_Diff_Cnt + Sym_New_Cnt + Verbatim_Cnt + Hard_Diff_Cnt + \
907 Hard_New_Cnt + os.path.getsize(ATTR_FILE) == 0:
908 print('No Delta Generated for %s - %s' % (PART_NAME, OUT_DIR))
909 logging.info('No Delta Generated for %s' % PART_NAME)
910 shutil.rmtree(OUT_DIR)
914 return (info.external_attr >> 16) == 0120777
917 def NewFiles(src, dest):
919 subprocess.call(['cp', '-rp', src, dest])
921 #shutil.copytree(src, dest)
922 #except OSError as e:
923 # If the error was caused because the source wasn't a directory
924 #if e.errno == errno.ENOTDIR:
925 #shutil.copy2(src, dest)
927 #print('Directory not copied. Error: %s' % e)
930 def measure_two_filediffs(src, dst):
931 patchLoc = 'temp.patch'
932 # TODO ensure this is excepts an error
933 subprocess.call([DIFF_UTIL, src, dst, patchLoc])
934 result_size = os.path.getsize(patchLoc)
939 def ishardlink(path):
940 if os.stat(path).st_nlink > 1:
946 return os.stat(path).st_ino
949 def get_hardlinks(base):
953 for root, direcotories, files in os.walk(base, topdown=True, followlinks=False):
954 for file in sorted(files):
955 file_name = os.path.join(root, file)
956 if not os.path.islink(file_name) and ishardlink(file_name):
957 inode = get_inode(file_name)
958 rel_path = os.path.relpath(file_name, base)
959 if inode not in inodes_dict:
960 inodes_dict[inode] = rel_path
962 hardlinks_dict[rel_path] = inodes_dict[inode]
964 return hardlinks_dict
971 for root, directories, filenames in os.walk(path, topdown=False, followlinks=False):
972 for directory in directories:
973 #DirName = os.path.join(root+'/',directory)
974 DirName = os.path.join(root, directory)
975 if os.path.islink(DirName):
976 logging.debug('This is symlink pointing to dir -%s' % DirName)
977 all_files.append(os.path.relpath(DirName, path))
978 elif not os.listdir(DirName):
979 #print('*****Empty Directory******* -%s', DirName)
980 # This should NOT be appended ??? Empty dir shd b considered
981 all_dirs.append(os.path.relpath(DirName, path))
983 all_dirs.append(os.path.relpath(DirName, path))
984 for filename in filenames:
985 FileName = os.path.join(root, filename)
986 all_files.append(os.path.relpath(FileName, path))
990 return all_files, all_dirs
993 USAGE_DOCSTRING = """
994 Generate Delta using BASEOLD AND BASE NEW
995 Attributes is optional
996 Usage: CreatePatch.py UPDATE_TYPE PARTNAME OLDBASE NEWBASE OUTFOLDER
1000 def Usage(docstring):
1001 print docstring.rstrip("\n")
1002 print COMMON_DOCSTRING
1005 if __name__ == '__main__':