From 8b38a2c0a55a9140115a91959326918d99bea435 Mon Sep 17 00:00:00 2001 From: Nikola Tesic Date: Wed, 28 Sep 2022 12:34:32 +0200 Subject: [PATCH] [Debugify][OriginalDIMode] Update script to handle large JSON reports This patch updates llvm/utils/llvm-original-di-preservation.py to create more compact HTML verify-debuginfo-preserve reports by: - removing duplicated debug info bugs, - introducing -compress option to create highly compressed report. Additionally, this patch makes script able to process very large JSON inputs. That is done by reading & analyzing JSON report in chunks. Differential Revision: https://reviews.llvm.org/D115617 --- .../Inputs/expected-compressed.html | 110 ++++++++++++ .../Inputs/expected-sample.html | 32 ---- .../tools/llvm-original-di-preservation/basic.test | 5 + llvm/utils/llvm-original-di-preservation.py | 192 ++++++++++++++------- 4 files changed, 242 insertions(+), 97 deletions(-) create mode 100644 llvm/test/tools/llvm-original-di-preservation/Inputs/expected-compressed.html diff --git a/llvm/test/tools/llvm-original-di-preservation/Inputs/expected-compressed.html b/llvm/test/tools/llvm-original-di-preservation/Inputs/expected-compressed.html new file mode 100644 index 0000000..43f9990 --- /dev/null +++ b/llvm/test/tools/llvm-original-di-preservation/Inputs/expected-compressed.html @@ -0,0 +1,110 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
Location Bugs found by the Debugify
FileLLVM Pass NameLLVM IR InstructionFunction NameBasic Block NameAction
test.llno-nameextractvaluefnno-namenot-generate
test.llno-nameinsertvaluefnno-namenot-generate
+
+ + + + + + + + + + + +
Summary of Location Bugs
LLVM Pass NameNumber of bugs
no-name8
+
+
+ + + + + + + + + + + +
SP Bugs found by the Debugify
FileLLVM Pass NameFunction NameAction
No bugs found
+
+ + + + + + + + + + +
Summary of SP Bugs
LLVM Pass NameNumber of bugs
No bugs found
+
+
+ + + + + + + + + + + + +
Variable Location Bugs found by the Debugify
FileLLVM Pass NameVariableFunctionAction
No bugs found
+
+ + + + + + + + + + +
Summary of Variable Location Bugs
LLVM Pass NameNumber of bugs
No bugs found
+ + \ No newline at end of file diff --git a/llvm/test/tools/llvm-original-di-preservation/Inputs/expected-sample.html b/llvm/test/tools/llvm-original-di-preservation/Inputs/expected-sample.html index 6fc1b69..c861d3a 100644 --- a/llvm/test/tools/llvm-original-di-preservation/Inputs/expected-sample.html +++ b/llvm/test/tools/llvm-original-di-preservation/Inputs/expected-sample.html @@ -41,22 +41,6 @@ test.ll no-name - extractvalue - fn - no-name - not-generate - - - test.ll - no-name - insertvalue - fn1 - no-name - not-generate - - - test.ll - no-name insertvalue fn1 no-name @@ -65,22 +49,6 @@ test.ll no-name - insertvalue - fn - no-name - not-generate - - - test.ll - no-name - extractvalue - fn1 - no-name - not-generate - - - test.ll - no-name extractvalue fn1 no-name diff --git a/llvm/test/tools/llvm-original-di-preservation/basic.test b/llvm/test/tools/llvm-original-di-preservation/basic.test index 12292f2..81f987a 100644 --- a/llvm/test/tools/llvm-original-di-preservation/basic.test +++ b/llvm/test/tools/llvm-original-di-preservation/basic.test @@ -6,3 +6,8 @@ RUN: %llvm-original-di-preservation %p/Inputs/corrupted.json %t2.html | FileChec RUN: diff -w %p/Inputs/expected-skipped.html %t2.html CORRUPTED: Skipped lines: 3 CORRUPTED: Skipped bugs: 1 + +RUN: %llvm-original-di-preservation -compress %p/Inputs/sample.json %t3.html | FileCheck %s -check-prefix=COMPRESSED +RUN: diff -w %p/Inputs/expected-compressed.html %t3.html +COMPRESSED-NOT: Skipped lines: + diff --git a/llvm/utils/llvm-original-di-preservation.py b/llvm/utils/llvm-original-di-preservation.py index 73d7d4b..5b53e6a 100755 --- a/llvm/utils/llvm-original-di-preservation.py +++ b/llvm/utils/llvm-original-di-preservation.py @@ -17,17 +17,23 @@ class DILocBug: self.bb_name = bb_name self.fn_name = fn_name self.instr = instr + def __str__(self): + return self.action + self.bb_name + self.fn_name + self.instr class DISPBug: def __init__(self, action, fn_name): self.action = action self.fn_name = fn_name + def __str__(self): + return self.action + self.fn_name class DIVarBug: def __init__(self, action, name, fn_name): self.action = action self.name = name self.fn_name = fn_name + def __str__(self): + return self.action + self.name + self.fn_name # Report the bugs in form of html. def generate_html_report(di_location_bugs, di_subprogram_bugs, di_var_bugs, \ @@ -326,11 +332,12 @@ def generate_html_report(di_location_bugs, di_subprogram_bugs, di_var_bugs, \ print("The " + html_file + " generated.") -# Read the JSON file. -def get_json(file): +# Read the JSON file in chunks. +def get_json_chunk(file,start,size): json_parsed = None di_checker_data = [] skipped_lines = 0 + line = 0 # The file contains json object per line. # An example of the line (formatted json): @@ -354,6 +361,11 @@ def get_json(file): #} with open(file) as json_objects_file: for json_object_line in json_objects_file: + line += 1 + if line < start: + continue + if line >= start+size: + break try: json_object = loads(json_object_line) except: @@ -361,12 +373,13 @@ def get_json(file): else: di_checker_data.append(json_object) - return (di_checker_data, skipped_lines) + return (di_checker_data, skipped_lines, line) # Parse the program arguments. def parse_program_args(parser): parser.add_argument("file_name", type=str, help="json file to process") parser.add_argument("html_file", type=str, help="html file to output data") + parser.add_argument("-compress", action="store_true", help="create reduced html report") return parser.parse_args() @@ -378,8 +391,6 @@ def Main(): print ("error: The output file must be '.html'.") sys.exit(1) - (debug_info_bugs, skipped_lines) = get_json(opts.file_name) - # Use the defaultdict in order to make multidim dicts. di_location_bugs = defaultdict(lambda: defaultdict(dict)) di_subprogram_bugs = defaultdict(lambda: defaultdict(dict)) @@ -390,81 +401,132 @@ def Main(): di_sp_bugs_summary = OrderedDict() di_var_bugs_summary = OrderedDict() + # Compress similar bugs. + # DILocBugs with same pass & instruction name. + di_loc_pass_instr_set = set() + # DISPBugs with same pass & function name. + di_sp_pass_fn_set = set() + # DIVarBugs with same pass & variable name. + di_var_pass_var_set = set() + + start_line = 0 + chunk_size = 1000000 + end_line = chunk_size - 1 + skipped_lines = 0 skipped_bugs = 0 - # Map the bugs into the file-pass pairs. - for bugs_per_pass in debug_info_bugs: - try: - bugs_file = bugs_per_pass["file"] - bugs_pass = bugs_per_pass["pass"] - bugs = bugs_per_pass["bugs"][0] - except: - skipped_lines += 1 - continue - - di_loc_bugs = [] - di_sp_bugs = [] - di_var_bugs = [] - - for bug in bugs: + # Process each chunk of 1 million JSON lines. + while True: + if start_line > end_line: + break + (debug_info_bugs, skipped, end_line) = get_json_chunk(opts.file_name,start_line,chunk_size) + start_line += chunk_size + skipped_lines += skipped + + # Map the bugs into the file-pass pairs. + for bugs_per_pass in debug_info_bugs: try: - bugs_metadata = bug["metadata"] + bugs_file = bugs_per_pass["file"] + bugs_pass = bugs_per_pass["pass"] + bugs = bugs_per_pass["bugs"][0] except: - skipped_bugs += 1 + skipped_lines += 1 continue - if bugs_metadata == "DILocation": - try: - action = bug["action"] - bb_name = bug["bb-name"] - fn_name = bug["fn-name"] - instr = bug["instr"] - except: - skipped_bugs += 1 - continue - di_loc_bugs.append(DILocBug(action, bb_name, fn_name, instr)) + di_loc_bugs = [] + di_sp_bugs = [] + di_var_bugs = [] - # Fill the summary dict. - if bugs_pass in di_location_bugs_summary: - di_location_bugs_summary[bugs_pass] += 1 - else: - di_location_bugs_summary[bugs_pass] = 1 - elif bugs_metadata == "DISubprogram": + # Omit duplicated bugs. + di_loc_set = set() + di_sp_set = set() + di_var_set = set() + for bug in bugs: try: - action = bug["action"] - name = bug["name"] + bugs_metadata = bug["metadata"] except: skipped_bugs += 1 continue - di_sp_bugs.append(DISPBug(action, name)) - # Fill the summary dict. - if bugs_pass in di_sp_bugs_summary: - di_sp_bugs_summary[bugs_pass] += 1 + if bugs_metadata == "DILocation": + try: + action = bug["action"] + bb_name = bug["bb-name"] + fn_name = bug["fn-name"] + instr = bug["instr"] + except: + skipped_bugs += 1 + continue + di_loc_bug = DILocBug(action, bb_name, fn_name, instr) + if not str(di_loc_bug) in di_loc_set: + di_loc_set.add(str(di_loc_bug)) + if opts.compress: + pass_instr = bugs_pass + instr + if not pass_instr in di_loc_pass_instr_set: + di_loc_pass_instr_set.add(pass_instr) + di_loc_bugs.append(di_loc_bug) + else: + di_loc_bugs.append(di_loc_bug) + + # Fill the summary dict. + if bugs_pass in di_location_bugs_summary: + di_location_bugs_summary[bugs_pass] += 1 + else: + di_location_bugs_summary[bugs_pass] = 1 + elif bugs_metadata == "DISubprogram": + try: + action = bug["action"] + name = bug["name"] + except: + skipped_bugs += 1 + continue + di_sp_bug = DISPBug(action, name) + if not str(di_sp_bug) in di_sp_set: + di_sp_set.add(str(di_sp_bug)) + if opts.compress: + pass_fn = bugs_pass + name + if not pass_fn in di_sp_pass_fn_set: + di_sp_pass_fn_set.add(pass_fn) + di_sp_bugs.append(di_sp_bug) + else: + di_sp_bugs.append(di_sp_bug) + + # Fill the summary dict. + if bugs_pass in di_sp_bugs_summary: + di_sp_bugs_summary[bugs_pass] += 1 + else: + di_sp_bugs_summary[bugs_pass] = 1 + elif bugs_metadata == "dbg-var-intrinsic": + try: + action = bug["action"] + fn_name = bug["fn-name"] + name = bug["name"] + except: + skipped_bugs += 1 + continue + di_var_bug = DIVarBug(action, name, fn_name) + if not str(di_var_bug) in di_var_set: + di_var_set.add(str(di_var_bug)) + if opts.compress: + pass_var = bugs_pass + name + if not pass_var in di_var_pass_var_set: + di_var_pass_var_set.add(pass_var) + di_var_bugs.append(di_var_bug) + else: + di_var_bugs.append(di_var_bug) + + # Fill the summary dict. + if bugs_pass in di_var_bugs_summary: + di_var_bugs_summary[bugs_pass] += 1 + else: + di_var_bugs_summary[bugs_pass] = 1 else: - di_sp_bugs_summary[bugs_pass] = 1 - elif bugs_metadata == "dbg-var-intrinsic": - try: - action = bug["action"] - fn_name = bug["fn-name"] - name = bug["name"] - except: + # Unsupported metadata. skipped_bugs += 1 continue - di_var_bugs.append(DIVarBug(action, name, fn_name)) - - # Fill the summary dict. - if bugs_pass in di_var_bugs_summary: - di_var_bugs_summary[bugs_pass] += 1 - else: - di_var_bugs_summary[bugs_pass] = 1 - else: - # Unsupported metadata. - skipped_bugs += 1 - continue - di_location_bugs[bugs_file][bugs_pass] = di_loc_bugs - di_subprogram_bugs[bugs_file][bugs_pass] = di_sp_bugs - di_variable_bugs[bugs_file][bugs_pass] = di_var_bugs + di_location_bugs[bugs_file][bugs_pass] = di_loc_bugs + di_subprogram_bugs[bugs_file][bugs_pass] = di_sp_bugs + di_variable_bugs[bugs_file][bugs_pass] = di_var_bugs generate_html_report(di_location_bugs, di_subprogram_bugs, di_variable_bugs, \ di_location_bugs_summary, di_sp_bugs_summary, \ -- 2.7.4