From a8abb695859ad4e7fe695b9ee238a2b0cd00af7c Mon Sep 17 00:00:00 2001 From: Jonas Devlieghere Date: Fri, 13 May 2022 12:15:05 -0700 Subject: [PATCH] [lldb] Parallelize fetching symbol files in crashlog.py When using dsymForUUID, the majority of time symbolication a crashlog with crashlog.py is spent waiting for it to complete. Currently, we're calling dsymForUUID sequentially when iterating over the modules. We can drastically cut down this time by calling dsymForUUID in parallel. This patch uses Python's ThreadPoolExecutor (introduced in Python 3.2) to parallelize this IO-bound operation. The performance improvement is hard to benchmark, because even with an empty local cache, consecutive calls to dsymForUUID for the same UUID complete faster. With warm caches, I'm seeing a ~30% performance improvement (~90s -> ~60s). I suspect the gains will be much bigger for a cold cache. dsymForUUID supports batching up multiple UUIDs. I considered going that route, but that would require more intrusive changes. It would require hoisting the logic out of locate_module_and_debug_symbols which we explicitly document [1] as a feature of Symbolication.py to locate symbol files. [1] https://lldb.llvm.org/use/symbolication.html Differential reviison: https://reviews.llvm.org/D125107 --- lldb/examples/python/crashlog.py | 31 +++++++++++++++++++++++-------- 1 file changed, 23 insertions(+), 8 deletions(-) diff --git a/lldb/examples/python/crashlog.py b/lldb/examples/python/crashlog.py index bc34bf7..0bcdcd0 100755 --- a/lldb/examples/python/crashlog.py +++ b/lldb/examples/python/crashlog.py @@ -26,8 +26,8 @@ # PYTHONPATH=/path/to/LLDB.framework/Resources/Python ./crashlog.py ~/Library/Logs/DiagnosticReports/a.crash #---------------------------------------------------------------------- -from __future__ import print_function import cmd +import concurrent.futures import contextlib import datetime import glob @@ -41,9 +41,13 @@ import shlex import string import subprocess import sys +import threading import time import uuid + +print_lock = threading.RLock() + try: # First try for LLDB in case PYTHONPATH is already correctly setup. import lldb @@ -269,7 +273,8 @@ class CrashLog(symbolication.Symbolicator): self.resolved = True uuid_str = self.get_normalized_uuid_string() if self.show_symbol_progress(): - print('Getting symbols for %s %s...\n' % (uuid_str, self.path), end=' ') + with print_lock: + print('Getting symbols for %s %s...' % (uuid_str, self.path)) if os.path.exists(self.dsymForUUIDBinary): dsym_for_uuid_command = '%s %s' % ( self.dsymForUUIDBinary, uuid_str) @@ -278,7 +283,8 @@ class CrashLog(symbolication.Symbolicator): try: plist_root = read_plist(s) except: - print(("Got exception: ", sys.exc_info()[1], " handling dsymForUUID output: \n", s)) + with print_lock: + print(("Got exception: ", sys.exc_info()[1], " handling dsymForUUID output: \n", s)) raise if plist_root: plist = plist_root[uuid_str] @@ -306,7 +312,8 @@ class CrashLog(symbolication.Symbolicator): if not os.path.exists(dwarf_dir): # Not a dSYM bundle, probably an Xcode archive. continue - print('falling back to binary inside "%s"' % dsym) + with print_lock: + print('falling back to binary inside "%s"' % dsym) self.symfile = dsym for filename in os.listdir(dwarf_dir): self.path = os.path.join(dwarf_dir, filename) @@ -319,7 +326,8 @@ class CrashLog(symbolication.Symbolicator): pass if (self.resolved_path and os.path.exists(self.resolved_path)) or ( self.path and os.path.exists(self.path)): - print('Resolved symbols for %s %s...\n' % (uuid_str, self.path), end=' ') + with print_lock: + print('Resolved symbols for %s %s...' % (uuid_str, self.path)) return True else: self.unavailable = True @@ -978,9 +986,16 @@ def SymbolicateCrashLog(crash_log, options): else: print('error: can\'t find image for identifier "%s"' % ident) - for image in images_to_load: - if image not in loaded_images: - err = image.add_module(target) + futures = [] + with concurrent.futures.ThreadPoolExecutor() as executor: + def add_module(image, target): + return image, image.add_module(target) + + for image in images_to_load: + futures.append(executor.submit(add_module, image=image, target=target)) + + for future in concurrent.futures.as_completed(futures): + image, err = future.result() if err: print(err) else: -- 2.7.4