From 27f27d15f6c90b026eca23b8ee238fdbf772fd80 Mon Sep 17 00:00:00 2001 From: Jonas Devlieghere Date: Thu, 13 Apr 2023 16:27:53 -0700 Subject: [PATCH] [lldb] Use ObjectFileJSON to create modules for interactive crashlogs Create an artificial module using a JSON object file when we can't locate the module and dSYM through dsymForUUID (or however locate_module_and_debug_symbols is implemented). By parsing the symbols from the crashlog and making them part of the JSON object file, LLDB can symbolicate frames it otherwise wouldn't be able to, as there is no module for it. For non-interactive crashlogs, that never was a problem because we could simply show the "pre-symbolicated" frame from the input. For interactive crashlogs, we need a way to pass the symbol information to LLDB so that it can symbolicate the frames, which is what motivated the JSON object file format. Differential revision: https://reviews.llvm.org/D148172 --- lldb/examples/python/crashlog.py | 57 +++++++++++----------- .../scripted_process/crashlog_scripted_process.py | 39 +++++++-------- lldb/examples/python/symbolication.py | 36 +++++++++++--- .../interactive_crashlog/multithread-test.ips | 9 ++++ .../skipped_status_interactive_crashlog.test | 3 ++ 5 files changed, 90 insertions(+), 54 deletions(-) diff --git a/lldb/examples/python/crashlog.py b/lldb/examples/python/crashlog.py index 68ead43..dadab3c 100755 --- a/lldb/examples/python/crashlog.py +++ b/lldb/examples/python/crashlog.py @@ -80,6 +80,7 @@ class CrashLog(symbolication.Symbolicator): def __init__(self, index, app_specific_backtrace): self.index = index self.id = index + self.images = list() self.frames = list() self.idents = list() self.registers = dict() @@ -456,6 +457,11 @@ class JSONCrashLogParser(CrashLogParser): except: return None + def __init__(self, debugger, path, verbose): + super().__init__(debugger, path, verbose) + # List of DarwinImages sorted by their index. + self.images = list() + def parse(self): try: self.parse_process_info(self.data) @@ -506,7 +512,6 @@ class JSONCrashLogParser(CrashLogParser): exception_extra) def parse_images(self, json_images): - idx = 0 for json_image in json_images: img_uuid = uuid.UUID(json_image['uuid']) low = int(json_image['base']) @@ -518,8 +523,8 @@ class JSONCrashLogParser(CrashLogParser): darwin_image = self.crashlog.DarwinImage(low, high, name, version, img_uuid, path, self.verbose) + self.images.append(darwin_image) self.crashlog.images.append(darwin_image) - idx += 1 def parse_main_image(self, json_data): if 'procName' in json_data: @@ -539,6 +544,17 @@ class JSONCrashLogParser(CrashLogParser): frame_offset = int(json_frame['imageOffset']) image_addr = self.get_used_image(image_id)['base'] pc = image_addr + frame_offset + + if 'symbol' in json_frame: + symbol = json_frame['symbol'] + location = int(json_frame['symbolLocation']) + image = self.images[image_id] + image.symbols[symbol] = { + "name": symbol, + "type": "code", + "address": frame_offset - location + } + thread.frames.append(self.crashlog.Frame(idx, pc, frame_offset)) # on arm64 systems, if it jump through a null function pointer, @@ -1015,40 +1031,25 @@ def SymbolicateCrashLog(crash_log, options): target = crash_log.create_target() if not target: return - exe_module = target.GetModuleAtIndex(0) - images_to_load = list() - loaded_images = list() + + if options.load_all_images: - # --load-all option was specified, load everything up for image in crash_log.images: - images_to_load.append(image) - else: - # Only load the images found in stack frames for the crashed threads - if options.crashed_only: - for thread in crash_log.threads: - if thread.did_crash(): - for ident in thread.idents: - images = crash_log.find_images_with_identifier(ident) - if images: - for image in images: - images_to_load.append(image) - else: - print('error: can\'t find image for identifier "%s"' % ident) - else: - for ident in crash_log.idents: - images = crash_log.find_images_with_identifier(ident) - if images: - for image in images: - images_to_load.append(image) - else: - print('error: can\'t find image for identifier "%s"' % ident) + image.resolve = True + elif options.crashed_only: + for thread in crash_log.threads: + if thread.did_crash(): + for ident in thread.idents: + for image in self.crashlog.find_images_with_identifier(ident): + image.resolve = True futures = [] + loaded_images = [] with concurrent.futures.ThreadPoolExecutor() as executor: def add_module(image, target): return image, image.add_module(target) - for image in images_to_load: + for image in crash_log.images: futures.append(executor.submit(add_module, image=image, target=target)) for future in concurrent.futures.as_completed(futures): diff --git a/lldb/examples/python/scripted_process/crashlog_scripted_process.py b/lldb/examples/python/scripted_process/crashlog_scripted_process.py index 236853e..c913c86 100644 --- a/lldb/examples/python/scripted_process/crashlog_scripted_process.py +++ b/lldb/examples/python/scripted_process/crashlog_scripted_process.py @@ -22,27 +22,28 @@ class CrashLogScriptedProcess(ScriptedProcess): if hasattr(self.crashlog, 'asb'): self.extended_thread_info = self.crashlog.asb - def load_images(self, images): - #TODO: Add to self.loaded_images and load images in lldb - if images: - for image in images: - if image not in self.loaded_images: - if image.uuid == uuid.UUID(int=0): - continue - err = image.add_module(self.target) - if err: - # Append to SBCommandReturnObject - print(err) - else: - self.loaded_images.append(image) + if self.load_all_images: + for image in self.crashlog.images: + image.resolve = True + else: + for thread in self.crashlog.threads: + if thread.did_crash(): + for ident in thread.idents: + for image in self.crashlog.find_images_with_identifier(ident): + image.resolve = True + + for image in self.crashlog.images: + if image not in self.loaded_images: + if image.uuid == uuid.UUID(int=0): + continue + err = image.add_module(self.target) + if err: + # Append to SBCommandReturnObject + print(err) + else: + self.loaded_images.append(image) for thread in self.crashlog.threads: - if self.load_all_images: - load_images(self, self.crashlog.images) - elif thread.did_crash(): - for ident in thread.idents: - load_images(self, self.crashlog.find_images_with_identifier(ident)) - if hasattr(thread, 'app_specific_backtrace') and thread.app_specific_backtrace: # We don't want to include the Application Specific Backtrace # Thread into the Scripted Process' Thread list. diff --git a/lldb/examples/python/symbolication.py b/lldb/examples/python/symbolication.py index c722b73..9784c28 100755 --- a/lldb/examples/python/symbolication.py +++ b/lldb/examples/python/symbolication.py @@ -35,6 +35,8 @@ import shlex import sys import time import uuid +import json +import tempfile class Address: @@ -230,6 +232,7 @@ class Image: def __init__(self, path, uuid=None): self.path = path self.resolved_path = None + self.resolve = False self.resolved = False self.unavailable = False self.uuid = uuid @@ -240,6 +243,7 @@ class Image: self.module = None self.symfile = None self.slide = None + self.symbols = dict() @classmethod def InitWithSBTargetAndSBModule(cls, target, module): @@ -372,14 +376,32 @@ class Image: uuid_str = self.get_normalized_uuid_string() if uuid_str: self.module = target.AddModule(None, None, uuid_str) - if not self.module: + if not self.module and self.resolve: self.locate_module_and_debug_symbols() - if self.unavailable: - return None - resolved_path = self.get_resolved_path() - self.module = target.AddModule( - resolved_path, None, uuid_str, self.symfile) - if not self.module: + if not self.unavailable: + resolved_path = self.get_resolved_path() + self.module = target.AddModule( + resolved_path, None, uuid_str, self.symfile) + if not self.module and self.section_infos: + name = os.path.basename(self.path) + with tempfile.NamedTemporaryFile(suffix='.' + name) as tf: + data = { + 'triple': target.triple, + 'uuid': uuid_str, + 'type': 'sharedlibrary', + 'sections': list(), + 'symbols': list() + } + for section in self.section_infos: + data['sections'].append({ + 'name' : section.name, + 'size': section.end_addr - section.start_addr + }) + data['symbols'] = list(self.symbols.values()) + with open(tf.name, 'w') as f: + f.write(json.dumps(data, indent=4)) + self.module = target.AddModule(tf.name, None, uuid_str) + if not self.module and not self.unavailable: return 'error: unable to get module for (%s) "%s"' % ( self.arch, self.get_resolved_path()) if self.has_section_load_info(): diff --git a/lldb/test/Shell/ScriptInterpreter/Python/Crashlog/Inputs/interactive_crashlog/multithread-test.ips b/lldb/test/Shell/ScriptInterpreter/Python/Crashlog/Inputs/interactive_crashlog/multithread-test.ips index 33153c8..23ce9d0 100644 --- a/lldb/test/Shell/ScriptInterpreter/Python/Crashlog/Inputs/interactive_crashlog/multithread-test.ips +++ b/lldb/test/Shell/ScriptInterpreter/Python/Crashlog/Inputs/interactive_crashlog/multithread-test.ips @@ -478,6 +478,15 @@ "source" : "A", "base" : 0, "uuid" : "00000000-0000-0000-0000-000000000000" + }, + { + "arch": "arm64", + "base": 12345, + "name": "bogus.dylib", + "path": "/usr/lib/system/bogus.dylib", + "size": 1000, + "source": "P", + "uuid": "11111111-2222-3333-4444-555555555555" } ], "userID": 501, diff --git a/lldb/test/Shell/ScriptInterpreter/Python/Crashlog/skipped_status_interactive_crashlog.test b/lldb/test/Shell/ScriptInterpreter/Python/Crashlog/skipped_status_interactive_crashlog.test index b120b96..81e0686 100644 --- a/lldb/test/Shell/ScriptInterpreter/Python/Crashlog/skipped_status_interactive_crashlog.test +++ b/lldb/test/Shell/ScriptInterpreter/Python/Crashlog/skipped_status_interactive_crashlog.test @@ -41,3 +41,6 @@ bt all # CHECK-NEXT: frame #2: 0x0000000100ec5a87 multithread-test`compute_pow{{.*}} [artificial] # CHECK: frame #{{[0-9]+}}: 0x000000019cc7e06b{{.*}} [artificial] # CHECK: frame #{{[0-9]+}}: 0x000000019cc78e2b{{.*}} [artificial] + +image list +# CHECK: 11111111-2222-3333-4444-555555555555 {{.*}}bogus.dylib -- 2.7.4