[lldb] Parallelize fetching symbol files in crashlog.py
authorJonas Devlieghere <jonas@devlieghere.com>
Fri, 13 May 2022 19:15:05 +0000 (12:15 -0700)
committerJonas Devlieghere <jonas@devlieghere.com>
Fri, 13 May 2022 19:25:41 +0000 (12:25 -0700)
When using dsymForUUID, the majority of time symbolication a crashlog
with crashlog.py is spent waiting for it to complete. Currently, we're
calling dsymForUUID sequentially when iterating over the modules. We can
drastically cut down this time by calling dsymForUUID in parallel. This
patch uses Python's ThreadPoolExecutor (introduced in Python 3.2) to
parallelize this IO-bound operation.

The performance improvement is hard to benchmark, because even with an
empty local cache, consecutive calls to dsymForUUID for the same UUID
complete faster. With warm caches, I'm seeing a ~30% performance
improvement (~90s -> ~60s). I suspect the gains will be much bigger for
a cold cache.

dsymForUUID supports batching up multiple UUIDs. I considered going that
route, but that would require more intrusive changes. It would require
hoisting the logic out of locate_module_and_debug_symbols which we
explicitly document [1] as a feature of Symbolication.py to locate
symbol files.

[1] https://lldb.llvm.org/use/symbolication.html

Differential reviison: https://reviews.llvm.org/D125107

lldb/examples/python/crashlog.py

index bc34bf7..0bcdcd0 100755 (executable)
@@ -26,8 +26,8 @@
 #   PYTHONPATH=/path/to/LLDB.framework/Resources/Python ./crashlog.py ~/Library/Logs/DiagnosticReports/a.crash
 #----------------------------------------------------------------------
 
-from __future__ import print_function
 import cmd
+import concurrent.futures
 import contextlib
 import datetime
 import glob
@@ -41,9 +41,13 @@ import shlex
 import string
 import subprocess
 import sys
+import threading
 import time
 import uuid
 
+
+print_lock = threading.RLock()
+
 try:
     # First try for LLDB in case PYTHONPATH is already correctly setup.
     import lldb
@@ -269,7 +273,8 @@ class CrashLog(symbolication.Symbolicator):
             self.resolved = True
             uuid_str = self.get_normalized_uuid_string()
             if self.show_symbol_progress():
-                print('Getting symbols for %s %s...\n' % (uuid_str, self.path), end=' ')
+                with print_lock:
+                    print('Getting symbols for %s %s...' % (uuid_str, self.path))
             if os.path.exists(self.dsymForUUIDBinary):
                 dsym_for_uuid_command = '%s %s' % (
                     self.dsymForUUIDBinary, uuid_str)
@@ -278,7 +283,8 @@ class CrashLog(symbolication.Symbolicator):
                     try:
                         plist_root = read_plist(s)
                     except:
-                        print(("Got exception: ", sys.exc_info()[1], " handling dsymForUUID output: \n", s))
+                        with print_lock:
+                            print(("Got exception: ", sys.exc_info()[1], " handling dsymForUUID output: \n", s))
                         raise
                     if plist_root:
                         plist = plist_root[uuid_str]
@@ -306,7 +312,8 @@ class CrashLog(symbolication.Symbolicator):
                         if not os.path.exists(dwarf_dir):
                             # Not a dSYM bundle, probably an Xcode archive.
                             continue
-                        print('falling back to binary inside "%s"' % dsym)
+                        with print_lock:
+                            print('falling back to binary inside "%s"' % dsym)
                         self.symfile = dsym
                         for filename in os.listdir(dwarf_dir):
                            self.path = os.path.join(dwarf_dir, filename)
@@ -319,7 +326,8 @@ class CrashLog(symbolication.Symbolicator):
                     pass
             if (self.resolved_path and os.path.exists(self.resolved_path)) or (
                     self.path and os.path.exists(self.path)):
-                print('Resolved symbols for %s %s...\n' % (uuid_str, self.path), end=' ')
+                with print_lock:
+                    print('Resolved symbols for %s %s...' % (uuid_str, self.path))
                 return True
             else:
                 self.unavailable = True
@@ -978,9 +986,16 @@ def SymbolicateCrashLog(crash_log, options):
                 else:
                     print('error: can\'t find image for identifier "%s"' % ident)
 
-    for image in images_to_load:
-        if image not in loaded_images:
-            err = image.add_module(target)
+    futures = []
+    with concurrent.futures.ThreadPoolExecutor() as executor:
+        def add_module(image, target):
+            return image, image.add_module(target)
+
+        for image in images_to_load:
+            futures.append(executor.submit(add_module, image=image, target=target))
+
+        for future in concurrent.futures.as_completed(futures):
+            image, err = future.result()
             if err:
                 print(err)
             else: