[Debugify][OriginalDIMode] Update script to handle large JSON reports
authorNikola Tesic <nikola.tesic@syrmia.com>
Wed, 28 Sep 2022 10:34:32 +0000 (12:34 +0200)
committerDjordje Todorovic <djordje.todorovic@syrmia.com>
Thu, 29 Sep 2022 14:48:06 +0000 (16:48 +0200)
This patch updates llvm/utils/llvm-original-di-preservation.py to create more
compact HTML verify-debuginfo-preserve reports by:
- removing duplicated debug info bugs,
- introducing -compress option to create highly compressed report.
Additionally, this patch makes script able to process very large JSON inputs.
That is done by reading & analyzing JSON report in chunks.

Differential Revision: https://reviews.llvm.org/D115617

llvm/test/tools/llvm-original-di-preservation/Inputs/expected-compressed.html [new file with mode: 0644]
llvm/test/tools/llvm-original-di-preservation/Inputs/expected-sample.html
llvm/test/tools/llvm-original-di-preservation/basic.test
llvm/utils/llvm-original-di-preservation.py

diff --git a/llvm/test/tools/llvm-original-di-preservation/Inputs/expected-compressed.html b/llvm/test/tools/llvm-original-di-preservation/Inputs/expected-compressed.html
new file mode 100644 (file)
index 0000000..43f9990
--- /dev/null
@@ -0,0 +1,110 @@
+ <html>
+  <head>
+  <style>
+  table, th, td {
+    border: 1px solid black;
+  }
+  table.center {
+    margin-left: auto;
+    margin-right: auto;
+  }
+  </style>
+  </head>
+  <body>
+  <table>
+  <caption><b>Location Bugs found by the Debugify</b></caption>
+  <tr>
+      <th>File</th>
+    <th>LLVM Pass Name</th>
+    <th>LLVM IR Instruction</th>
+    <th>Function Name</th>
+    <th>Basic Block Name</th>
+    <th>Action</th>
+  </tr>
+  </tr>
+    <tr>
+    <td>test.ll</td>
+    <td>no-name</td>
+    <td>extractvalue</td>
+    <td>fn</td>
+    <td>no-name</td>
+    <td>not-generate</td>
+    </tr>
+    <tr>
+    <td>test.ll</td>
+    <td>no-name</td>
+    <td>insertvalue</td>
+    <td>fn</td>
+    <td>no-name</td>
+    <td>not-generate</td>
+    </tr>
+  <tr>
+</table>
+<br>
+<table>
+  <caption><b>Summary of Location Bugs</b></caption>
+  <tr>
+      <th>LLVM Pass Name</th>
+    <th>Number of bugs</th>
+  </tr>
+    <tr>
+    <td>no-name</td>
+    <td>8</td>
+    </tr>
+  <tr>
+</table>
+<br>
+<br>
+<table>
+  <caption><b>SP Bugs found by the Debugify</b></caption>
+  <tr>
+      <th>File</th>
+    <th>LLVM Pass Name</th>
+    <th>Function Name</th>
+    <th>Action</th>
+  </tr>
+<tr>
+        <td colspan='4'> No bugs found </td>
+      </tr>
+    </table>
+<br>
+<table>
+  <caption><b>Summary of SP Bugs</b></caption>
+  <tr>
+      <th>LLVM Pass Name</th>
+    <th>Number of bugs</th>
+  </tr>
+  <tr>
+<tr>
+        <td colspan='2'> No bugs found </td>
+      </tr>
+    </table>
+<br>
+<br>
+<table>
+  <caption><b>Variable Location Bugs found by the Debugify</b></caption>
+  <tr>
+      <th>File</th>
+    <th>LLVM Pass Name</th>
+    <th>Variable</th>
+    <th>Function</th>
+    <th>Action</th>
+  </tr>
+<tr>
+        <td colspan='4'> No bugs found </td>
+      </tr>
+    </table>
+<br>
+<table>
+  <caption><b>Summary of Variable Location Bugs</b></caption>
+  <tr>
+      <th>LLVM Pass Name</th>
+    <th>Number of bugs</th>
+  </tr>
+  <tr>
+<tr>
+        <td colspan='2'> No bugs found </td>
+      </tr>
+    </table>
+</body>
+  </html>
\ No newline at end of file
index 6fc1b69..c861d3a 100644 (file)
     <tr>
     <td>test.ll</td>
     <td>no-name</td>
-    <td>extractvalue</td>
-    <td>fn</td>
-    <td>no-name</td>
-    <td>not-generate</td>
-    </tr>
-    <tr>
-    <td>test.ll</td>
-    <td>no-name</td>
-    <td>insertvalue</td>
-    <td>fn1</td>
-    <td>no-name</td>
-    <td>not-generate</td>
-    </tr>
-    <tr>
-    <td>test.ll</td>
-    <td>no-name</td>
     <td>insertvalue</td>
     <td>fn1</td>
     <td>no-name</td>
     <tr>
     <td>test.ll</td>
     <td>no-name</td>
-    <td>insertvalue</td>
-    <td>fn</td>
-    <td>no-name</td>
-    <td>not-generate</td>
-    </tr>
-    <tr>
-    <td>test.ll</td>
-    <td>no-name</td>
-    <td>extractvalue</td>
-    <td>fn1</td>
-    <td>no-name</td>
-    <td>not-generate</td>
-    </tr>
-    <tr>
-    <td>test.ll</td>
-    <td>no-name</td>
     <td>extractvalue</td>
     <td>fn1</td>
     <td>no-name</td>
index 12292f2..81f987a 100644 (file)
@@ -6,3 +6,8 @@ RUN: %llvm-original-di-preservation %p/Inputs/corrupted.json %t2.html | FileChec
 RUN: diff -w %p/Inputs/expected-skipped.html %t2.html
 CORRUPTED: Skipped lines: 3
 CORRUPTED: Skipped bugs: 1
+
+RUN: %llvm-original-di-preservation -compress %p/Inputs/sample.json %t3.html | FileCheck %s -check-prefix=COMPRESSED
+RUN: diff -w %p/Inputs/expected-compressed.html %t3.html
+COMPRESSED-NOT: Skipped lines:
+
index 73d7d4b..5b53e6a 100755 (executable)
@@ -17,17 +17,23 @@ class DILocBug:
     self.bb_name = bb_name
     self.fn_name = fn_name
     self.instr = instr
+  def __str__(self):
+    return self.action + self.bb_name + self.fn_name + self.instr
 
 class DISPBug:
   def __init__(self, action, fn_name):
     self.action = action
     self.fn_name = fn_name
+  def __str__(self):
+    return self.action + self.fn_name
 
 class DIVarBug:
   def __init__(self, action, name, fn_name):
     self.action = action
     self.name = name
     self.fn_name = fn_name
+  def __str__(self):
+    return self.action + self.name + self.fn_name
 
 # Report the bugs in form of html.
 def generate_html_report(di_location_bugs, di_subprogram_bugs, di_var_bugs, \
@@ -326,11 +332,12 @@ def generate_html_report(di_location_bugs, di_subprogram_bugs, di_var_bugs, \
 
   print("The " + html_file + " generated.")
 
-# Read the JSON file.
-def get_json(file):
+# Read the JSON file in chunks.
+def get_json_chunk(file,start,size):
   json_parsed = None
   di_checker_data = []
   skipped_lines = 0
+  line = 0
 
   # The file contains json object per line.
   # An example of the line (formatted json):
@@ -354,6 +361,11 @@ def get_json(file):
   #}
   with open(file) as json_objects_file:
     for json_object_line in json_objects_file:
+      line += 1
+      if line < start:
+        continue
+      if line >= start+size:
+        break
       try:
         json_object = loads(json_object_line)
       except:
@@ -361,12 +373,13 @@ def get_json(file):
       else:
         di_checker_data.append(json_object)
 
-  return (di_checker_data, skipped_lines)
+  return (di_checker_data, skipped_lines, line)
 
 # Parse the program arguments.
 def parse_program_args(parser):
   parser.add_argument("file_name", type=str, help="json file to process")
   parser.add_argument("html_file", type=str, help="html file to output data")
+  parser.add_argument("-compress", action="store_true", help="create reduced html report")
 
   return parser.parse_args()
 
@@ -378,8 +391,6 @@ def Main():
     print ("error: The output file must be '.html'.")
     sys.exit(1)
 
-  (debug_info_bugs, skipped_lines) = get_json(opts.file_name)
-
   # Use the defaultdict in order to make multidim dicts.
   di_location_bugs = defaultdict(lambda: defaultdict(dict))
   di_subprogram_bugs = defaultdict(lambda: defaultdict(dict))
@@ -390,81 +401,132 @@ def Main():
   di_sp_bugs_summary = OrderedDict()
   di_var_bugs_summary = OrderedDict()
 
+  # Compress similar bugs.
+  # DILocBugs with same pass & instruction name.
+  di_loc_pass_instr_set = set()
+  # DISPBugs with same pass & function name.
+  di_sp_pass_fn_set = set()
+  # DIVarBugs with same pass & variable name.
+  di_var_pass_var_set = set()
+
+  start_line = 0
+  chunk_size = 1000000
+  end_line = chunk_size - 1
+  skipped_lines = 0
   skipped_bugs = 0
-  # Map the bugs into the file-pass pairs.
-  for bugs_per_pass in debug_info_bugs:
-    try:
-      bugs_file = bugs_per_pass["file"]
-      bugs_pass = bugs_per_pass["pass"]
-      bugs = bugs_per_pass["bugs"][0]
-    except:
-      skipped_lines += 1
-      continue
-
-    di_loc_bugs = []
-    di_sp_bugs = []
-    di_var_bugs = []
-
-    for bug in bugs:
+  # Process each chunk of 1 million JSON lines.
+  while True:
+    if start_line > end_line:
+      break
+    (debug_info_bugs, skipped, end_line) = get_json_chunk(opts.file_name,start_line,chunk_size)
+    start_line += chunk_size
+    skipped_lines += skipped
+
+    # Map the bugs into the file-pass pairs.
+    for bugs_per_pass in debug_info_bugs:
       try:
-        bugs_metadata = bug["metadata"]
+        bugs_file = bugs_per_pass["file"]
+        bugs_pass = bugs_per_pass["pass"]
+        bugs = bugs_per_pass["bugs"][0]
       except:
-        skipped_bugs += 1
+        skipped_lines += 1
         continue
 
-      if bugs_metadata == "DILocation":
-        try:
-          action = bug["action"]
-          bb_name = bug["bb-name"]
-          fn_name = bug["fn-name"]
-          instr = bug["instr"]
-        except:
-          skipped_bugs += 1
-          continue
-        di_loc_bugs.append(DILocBug(action, bb_name, fn_name, instr))
+      di_loc_bugs = []
+      di_sp_bugs = []
+      di_var_bugs = []
 
-        # Fill the summary dict.
-        if bugs_pass in di_location_bugs_summary:
-          di_location_bugs_summary[bugs_pass] += 1
-        else:
-          di_location_bugs_summary[bugs_pass] = 1
-      elif bugs_metadata == "DISubprogram":
+      # Omit duplicated bugs.
+      di_loc_set = set()
+      di_sp_set = set()
+      di_var_set = set()
+      for bug in bugs:
         try:
-          action = bug["action"]
-          name = bug["name"]
+          bugs_metadata = bug["metadata"]
         except:
           skipped_bugs += 1
           continue
-        di_sp_bugs.append(DISPBug(action, name))
 
-        # Fill the summary dict.
-        if bugs_pass in di_sp_bugs_summary:
-          di_sp_bugs_summary[bugs_pass] += 1
+        if bugs_metadata == "DILocation":
+          try:
+            action = bug["action"]
+            bb_name = bug["bb-name"]
+            fn_name = bug["fn-name"]
+            instr = bug["instr"]
+          except:
+            skipped_bugs += 1
+            continue
+          di_loc_bug = DILocBug(action, bb_name, fn_name, instr)
+          if not str(di_loc_bug) in di_loc_set:
+            di_loc_set.add(str(di_loc_bug))
+            if opts.compress:
+              pass_instr = bugs_pass + instr
+              if not pass_instr in di_loc_pass_instr_set:
+                di_loc_pass_instr_set.add(pass_instr)
+                di_loc_bugs.append(di_loc_bug)
+            else:
+              di_loc_bugs.append(di_loc_bug)
+
+          # Fill the summary dict.
+          if bugs_pass in di_location_bugs_summary:
+            di_location_bugs_summary[bugs_pass] += 1
+          else:
+            di_location_bugs_summary[bugs_pass] = 1
+        elif bugs_metadata == "DISubprogram":
+          try:
+            action = bug["action"]
+            name = bug["name"]
+          except:
+            skipped_bugs += 1
+            continue
+          di_sp_bug = DISPBug(action, name)
+          if not str(di_sp_bug) in di_sp_set:
+            di_sp_set.add(str(di_sp_bug))
+            if opts.compress:
+              pass_fn = bugs_pass + name
+              if not pass_fn in di_sp_pass_fn_set:
+                di_sp_pass_fn_set.add(pass_fn)
+                di_sp_bugs.append(di_sp_bug)
+            else:
+              di_sp_bugs.append(di_sp_bug)
+
+          # Fill the summary dict.
+          if bugs_pass in di_sp_bugs_summary:
+            di_sp_bugs_summary[bugs_pass] += 1
+          else:
+            di_sp_bugs_summary[bugs_pass] = 1
+        elif bugs_metadata == "dbg-var-intrinsic":
+          try:
+            action = bug["action"]
+            fn_name = bug["fn-name"]
+            name = bug["name"]
+          except:
+            skipped_bugs += 1
+            continue
+          di_var_bug = DIVarBug(action, name, fn_name)
+          if not str(di_var_bug) in di_var_set:
+            di_var_set.add(str(di_var_bug))
+            if opts.compress:
+              pass_var = bugs_pass + name
+              if not pass_var in di_var_pass_var_set:
+                di_var_pass_var_set.add(pass_var)
+                di_var_bugs.append(di_var_bug)
+            else:
+              di_var_bugs.append(di_var_bug)
+
+          # Fill the summary dict.
+          if bugs_pass in di_var_bugs_summary:
+            di_var_bugs_summary[bugs_pass] += 1
+          else:
+            di_var_bugs_summary[bugs_pass] = 1
         else:
-          di_sp_bugs_summary[bugs_pass] = 1
-      elif bugs_metadata == "dbg-var-intrinsic":
-        try:
-          action = bug["action"]
-          fn_name = bug["fn-name"]
-          name = bug["name"]
-        except:
+          # Unsupported metadata.
           skipped_bugs += 1
           continue
-        di_var_bugs.append(DIVarBug(action, name, fn_name))
-
-        # Fill the summary dict.
-        if bugs_pass in di_var_bugs_summary:
-          di_var_bugs_summary[bugs_pass] += 1
-        else:
-          di_var_bugs_summary[bugs_pass] = 1
-      else:
-        # Unsupported metadata.
-        skipped_bugs += 1
-        continue
 
-    di_location_bugs[bugs_file][bugs_pass] = di_loc_bugs
-    di_subprogram_bugs[bugs_file][bugs_pass] = di_sp_bugs
-    di_variable_bugs[bugs_file][bugs_pass] = di_var_bugs
+      di_location_bugs[bugs_file][bugs_pass] = di_loc_bugs
+      di_subprogram_bugs[bugs_file][bugs_pass] = di_sp_bugs
+      di_variable_bugs[bugs_file][bugs_pass] = di_var_bugs
 
   generate_html_report(di_location_bugs, di_subprogram_bugs, di_variable_bugs, \
                        di_location_bugs_summary, di_sp_bugs_summary, \