From f665e80c023ec52557f55d7eeaf34471e4c6fa0d Mon Sep 17 00:00:00 2001 From: Pavel Labath Date: Tue, 12 May 2020 17:14:20 +0200 Subject: [PATCH] [lldb] Don't dissasemble large functions by default Summary: If we have a binary without symbol information (and without LC_FUNCTION_STARTS, if on a mac), then we have to resort to using heuristics to determine the function boundaries. However, these don't always work, and so we can easily end up thinking we have functions which are several megabytes in size. Attempting to (accidentally) disassemble these can take a very long time spam the terminal with thousands of lines of disassembly. This patch works around that problem by adding a sanity check to the disassemble command. If we are about to disassemble a function which is larger than a certain threshold, we will refuse to disassemble such a function unless the user explicitly specifies the number of instructions to disassemble, uses start/stop addresses for disassembly, or passes the (new) --force argument. The threshold is currently fairly aggressive (4000 bytes ~~ 1000 instructions). If needed, we can increase it, or even make it configurable. Differential Revision: https://reviews.llvm.org/D79789 --- lldb/source/Commands/CommandObjectDisassemble.cpp | 58 +++++++++++++---- lldb/source/Commands/CommandObjectDisassemble.h | 9 ++- lldb/source/Commands/Options.td | 4 +- .../Commands/Inputs/command-disassemble.lldbinit | 10 ++- .../Commands/command-disassemble-process.yaml | 11 +++- lldb/test/Shell/Commands/command-disassemble.s | 72 ++++++++++++++++++++-- 6 files changed, 142 insertions(+), 22 deletions(-) diff --git a/lldb/source/Commands/CommandObjectDisassemble.cpp b/lldb/source/Commands/CommandObjectDisassemble.cpp index 511cd69..d522d63 100644 --- a/lldb/source/Commands/CommandObjectDisassemble.cpp +++ b/lldb/source/Commands/CommandObjectDisassemble.cpp @@ -21,8 +21,9 @@ #include "lldb/Target/StackFrame.h" #include "lldb/Target/Target.h" -#define DEFAULT_DISASM_BYTE_SIZE 32 -#define DEFAULT_DISASM_NUM_INS 4 +static constexpr unsigned default_disasm_byte_size = 32; +static constexpr unsigned default_disasm_num_ins = 4; +static constexpr unsigned large_function_threshold = 4000; using namespace lldb; using namespace lldb_private; @@ -143,6 +144,10 @@ Status CommandObjectDisassemble::CommandOptions::SetOptionValue( } } break; + case '\x01': + force = true; + break; + default: llvm_unreachable("Unimplemented option"); } @@ -186,6 +191,7 @@ void CommandObjectDisassemble::CommandOptions::OptionParsingStarting( arch.Clear(); some_location_specified = false; + force = false; } Status CommandObjectDisassemble::CommandOptions::OptionParsingFinished( @@ -214,6 +220,21 @@ CommandObjectDisassemble::CommandObjectDisassemble( CommandObjectDisassemble::~CommandObjectDisassemble() = default; +llvm::Error CommandObjectDisassemble::CheckRangeSize(const AddressRange &range, + llvm::StringRef what) { + if (m_options.num_instructions > 0 || m_options.force || + range.GetByteSize() < large_function_threshold) + return llvm::Error::success(); + StreamString msg; + msg << "Not disassembling " << what << " because it is very large "; + range.Dump(&msg, &GetSelectedTarget(), Address::DumpStyleLoadAddress, + Address::DumpStyleFileAddress); + msg << ". To disassemble specify an instruction count limit, start/stop " + "addresses or use the --force option."; + return llvm::createStringError(llvm::inconvertibleErrorCode(), + msg.GetString()); +} + llvm::Expected> CommandObjectDisassemble::GetContainingAddressRanges() { std::vector ranges; @@ -254,6 +275,9 @@ CommandObjectDisassemble::GetContainingAddressRanges() { "Could not find function bounds for address 0x%" PRIx64, m_options.symbol_containing_addr); } + + if (llvm::Error err = CheckRangeSize(ranges[0], "the function")) + return std::move(err); return ranges; } @@ -273,8 +297,10 @@ CommandObjectDisassemble::GetCurrentFunctionRanges() { else if (sc.symbol && sc.symbol->ValueIsAddress()) { range = {sc.symbol->GetAddress(), sc.symbol->GetByteSize()}; } else - range = {frame->GetFrameCodeAddress(), DEFAULT_DISASM_BYTE_SIZE}; + range = {frame->GetFrameCodeAddress(), default_disasm_byte_size}; + if (llvm::Error err = CheckRangeSize(range, "the current function")) + return std::move(err); return std::vector{range}; } @@ -298,7 +324,7 @@ CommandObjectDisassemble::GetCurrentLineRanges() { } llvm::Expected> -CommandObjectDisassemble::GetNameRanges() { +CommandObjectDisassemble::GetNameRanges(CommandReturnObject &result) { ConstString name(m_options.func_name.c_str()); const bool include_symbols = true; const bool include_inlines = true; @@ -309,6 +335,7 @@ CommandObjectDisassemble::GetNameRanges() { name, eFunctionNameTypeAuto, include_symbols, include_inlines, sc_list); std::vector ranges; + llvm::Error range_errs = llvm::Error::success(); AddressRange range; const uint32_t scope = eSymbolContextBlock | eSymbolContextFunction | eSymbolContextSymbol; @@ -317,14 +344,21 @@ CommandObjectDisassemble::GetNameRanges() { for (uint32_t range_idx = 0; sc.GetAddressRange(scope, range_idx, use_inline_block_range, range); ++range_idx) { - ranges.push_back(range); + if (llvm::Error err = CheckRangeSize(range, "a range")) + range_errs = joinErrors(std::move(range_errs), std::move(err)); + else + ranges.push_back(range); } } if (ranges.empty()) { + if (range_errs) + return std::move(range_errs); return llvm::createStringError(llvm::inconvertibleErrorCode(), "Unable to find symbol with name '%s'.\n", name.GetCString()); } + if (range_errs) + result.AppendWarning(toString(std::move(range_errs))); return ranges; } @@ -340,7 +374,7 @@ CommandObjectDisassemble::GetPCRanges() { if (m_options.num_instructions == 0) { // Disassembling at the PC always disassembles some number of // instructions (not the whole function). - m_options.num_instructions = DEFAULT_DISASM_NUM_INS; + m_options.num_instructions = default_disasm_num_ins; } return std::vector{{frame->GetFrameCodeAddress(), 0}}; } @@ -359,7 +393,8 @@ CommandObjectDisassemble::GetStartEndAddressRanges() { } llvm::Expected> -CommandObjectDisassemble::GetRangesForSelectedMode() { +CommandObjectDisassemble::GetRangesForSelectedMode( + CommandReturnObject &result) { if (m_options.symbol_containing_addr != LLDB_INVALID_ADDRESS) return CommandObjectDisassemble::GetContainingAddressRanges(); if (m_options.current_function) @@ -367,7 +402,7 @@ CommandObjectDisassemble::GetRangesForSelectedMode() { if (m_options.frame_line) return CommandObjectDisassemble::GetCurrentLineRanges(); if (!m_options.func_name.empty()) - return CommandObjectDisassemble::GetNameRanges(); + return CommandObjectDisassemble::GetNameRanges(result); if (m_options.start_addr != LLDB_INVALID_ADDRESS) return CommandObjectDisassemble::GetStartEndAddressRanges(); return CommandObjectDisassemble::GetPCRanges(); @@ -440,7 +475,8 @@ bool CommandObjectDisassemble::DoExecute(Args &command, if (m_options.raw) options |= Disassembler::eOptionRawOuput; - llvm::Expected> ranges = GetRangesForSelectedMode(); + llvm::Expected> ranges = + GetRangesForSelectedMode(result); if (!ranges) { result.AppendError(toString(ranges.takeError())); result.SetStatus(eReturnStatusFailed); @@ -453,7 +489,7 @@ bool CommandObjectDisassemble::DoExecute(Args &command, if (m_options.num_instructions == 0) { limit = {Disassembler::Limit::Bytes, cur_range.GetByteSize()}; if (limit.value == 0) - limit.value = DEFAULT_DISASM_BYTE_SIZE; + limit.value = default_disasm_byte_size; } else { limit = {Disassembler::Limit::Instructions, m_options.num_instructions}; } @@ -476,7 +512,7 @@ bool CommandObjectDisassemble::DoExecute(Args &command, result.SetStatus(eReturnStatusFailed); } if (print_sc_header) - result.AppendMessage("\n"); + result.GetOutputStream() << "\n"; } return result.Succeeded(); diff --git a/lldb/source/Commands/CommandObjectDisassemble.h b/lldb/source/Commands/CommandObjectDisassemble.h index bdcb9a1..340bf64 100644 --- a/lldb/source/Commands/CommandObjectDisassemble.h +++ b/lldb/source/Commands/CommandObjectDisassemble.h @@ -62,6 +62,7 @@ public: // "at_pc". This should be set // in SetOptionValue if anything the selects a location is set. lldb::addr_t symbol_containing_addr; + bool force = false; }; CommandObjectDisassemble(CommandInterpreter &interpreter); @@ -73,15 +74,19 @@ public: protected: bool DoExecute(Args &command, CommandReturnObject &result) override; - llvm::Expected> GetRangesForSelectedMode(); + llvm::Expected> + GetRangesForSelectedMode(CommandReturnObject &result); llvm::Expected> GetContainingAddressRanges(); llvm::Expected> GetCurrentFunctionRanges(); llvm::Expected> GetCurrentLineRanges(); - llvm::Expected> GetNameRanges(); + llvm::Expected> + GetNameRanges(CommandReturnObject &result); llvm::Expected> GetPCRanges(); llvm::Expected> GetStartEndAddressRanges(); + llvm::Error CheckRangeSize(const AddressRange &range, llvm::StringRef what); + CommandOptions m_options; }; diff --git a/lldb/source/Commands/Options.td b/lldb/source/Commands/Options.td index 05c7b32..addfec5 100644 --- a/lldb/source/Commands/Options.td +++ b/lldb/source/Commands/Options.td @@ -311,7 +311,7 @@ let Command = "disassemble" in { Desc<"Address at which to start disassembling.">; def disassemble_options_end_address : Option<"end-address", "e">, Group<1>, Arg<"AddressOrExpression">, Desc<"Address at which to end disassembling.">; - def disassemble_options_count : Option<"count", "c">, Groups<[2,3,4,5]>, + def disassemble_options_count : Option<"count", "c">, Groups<[2,3,4,5,7]>, Arg<"NumLines">, Desc<"Number of instructions to display.">; def disassemble_options_name : Option<"name", "n">, Group<3>, Arg<"FunctionName">, Completion<"Symbol">, @@ -326,6 +326,8 @@ let Command = "disassemble" in { def disassemble_options_address : Option<"address", "a">, Group<7>, Arg<"AddressOrExpression">, Desc<"Disassemble function containing this address.">; + def disassemble_options_force : Option<"force", "\\x01">, Groups<[2,3,4,5,7]>, + Desc<"Force dissasembly of large functions.">; } let Command = "expression" in { diff --git a/lldb/test/Shell/Commands/Inputs/command-disassemble.lldbinit b/lldb/test/Shell/Commands/Inputs/command-disassemble.lldbinit index 6a328ff..9174b34 100644 --- a/lldb/test/Shell/Commands/Inputs/command-disassemble.lldbinit +++ b/lldb/test/Shell/Commands/Inputs/command-disassemble.lldbinit @@ -6,7 +6,13 @@ disassemble --start-address 0x0 disassemble --start-address 0x4 --end-address 0x8 disassemble --start-address 0x8 --end-address 0x4 disassemble --address 0x0 -disassemble --address 0xdead +disassemble --address 0xdeadb +disassemble --address 0x100 +disassemble --address 0x100 --count 3 +disassemble --address 0x100 --force disassemble --start-address 0x0 --count 7 disassemble --start-address 0x0 --end-address 0x20 --count 7 -disassemble --address 0x0 --count 7 +disassemble --name case1 +disassemble --name case2 +disassemble --name case3 +disassemble --name case3 --count 3 diff --git a/lldb/test/Shell/Commands/command-disassemble-process.yaml b/lldb/test/Shell/Commands/command-disassemble-process.yaml index 7dc1451..e9db8e8 100644 --- a/lldb/test/Shell/Commands/command-disassemble-process.yaml +++ b/lldb/test/Shell/Commands/command-disassemble-process.yaml @@ -1,6 +1,7 @@ # REQUIRES: x86 -# RUN: yaml2obj --docnum=1 %s > %T/command-disassemble-process.exe +# RUN: yaml2obj --docnum=1 -DMAIN_SIZE=8 %s > %T/command-disassemble-process.exe +# RUN: yaml2obj --docnum=1 -DMAIN_SIZE=4000 %s > %T/command-disassemble-process.big.exe # RUN: yaml2obj --docnum=2 %s > %t # RUN: %lldb -c %t %T/command-disassemble-process.exe \ @@ -8,6 +9,9 @@ # RUN: -s %S/Inputs/command-disassemble-process.lldbinit -o exit 2>&1 \ # RUN: | FileCheck %s +# RUN: %lldb -c %t %T/command-disassemble-process.big.exe \ +# RUN: -o disassemble -o exit 2>&1 | FileCheck %s --check-prefix=BIG + # CHECK: (lldb) disassemble # CHECK-NEXT: command-disassemble-process.exe`main: # CHECK-NEXT: 0x4002 <+0>: addb %al, (%rcx) @@ -59,6 +63,8 @@ # CHECK-NEXT: 0x400e: addb %cl, (%rcx) # CHECK-NEXT: 0x4010: addb %cl, (%rdx) +# BIG: error: Not disassembling the current function because it is very large [0x0000000000004002-0x0000000000004fa2). To disassemble specify an instruction count limit, start/stop addresses or use the --force option. + --- !ELF FileHeader: Class: ELFCLASS64 @@ -72,6 +78,7 @@ Sections: Address: 0x0000000000004000 AddressAlign: 0x0000000000001000 Content: 00000001000200030006000700080009000A000B000E000F00100011001200130016001700180019001A001B001E001F00200021002200230026002700280029002A002B002E002F + Size: 0x10000 - Name: .note.gnu.build-id Type: SHT_NOTE Flags: [ SHF_ALLOC ] @@ -83,7 +90,7 @@ Symbols: Type: STT_FUNC Section: .text Value: 0x0000000000004002 - Size: 0x0000000000000008 + Size: [[MAIN_SIZE]] ProgramHeaders: - Type: PT_LOAD Flags: [ PF_X, PF_R ] diff --git a/lldb/test/Shell/Commands/command-disassemble.s b/lldb/test/Shell/Commands/command-disassemble.s index aa47131..e43c4e3 100644 --- a/lldb/test/Shell/Commands/command-disassemble.s +++ b/lldb/test/Shell/Commands/command-disassemble.s @@ -51,8 +51,19 @@ # CHECK-NEXT: command-disassemble.s.tmp[0x8] <+8>: int $0x14 # CHECK-NEXT: command-disassemble.s.tmp[0xa] <+10>: int $0x15 # CHECK-NEXT: command-disassemble.s.tmp[0xc] <+12>: int $0x16 -# CHECK-NEXT: (lldb) disassemble --address 0xdead -# CHECK-NEXT: error: Could not find function bounds for address 0xdead +# CHECK-NEXT: (lldb) disassemble --address 0xdeadb +# CHECK-NEXT: error: Could not find function bounds for address 0xdeadb +# CHECK-NEXT: (lldb) disassemble --address 0x100 +# CHECK-NEXT: error: Not disassembling the function because it is very large [0x0000000000000040-0x0000000000002040). To disassemble specify an instruction count limit, start/stop addresses or use the --force option. +# CHECK-NEXT: (lldb) disassemble --address 0x100 --count 3 +# CHECK-NEXT: command-disassemble.s.tmp`very_long: +# CHECK-NEXT: command-disassemble.s.tmp[0x40] <+0>: int $0x2a +# CHECK-NEXT: command-disassemble.s.tmp[0x42] <+2>: int $0x2a +# CHECK-NEXT: command-disassemble.s.tmp[0x44] <+4>: int $0x2a +# CHECK-NEXT: (lldb) disassemble --address 0x100 --force +# CHECK-NEXT: command-disassemble.s.tmp`very_long: +# CHECK-NEXT: command-disassemble.s.tmp[0x40] <+0>: int $0x2a +# CHECK: command-disassemble.s.tmp[0x203e] <+8190>: int $0x2a # CHECK-NEXT: (lldb) disassemble --start-address 0x0 --count 7 # CHECK-NEXT: command-disassemble.s.tmp`foo: # CHECK-NEXT: command-disassemble.s.tmp[0x0] <+0>: int $0x10 @@ -64,8 +75,32 @@ # CHECK-NEXT: command-disassemble.s.tmp[0xc] <+12>: int $0x16 # CHECK-NEXT: (lldb) disassemble --start-address 0x0 --end-address 0x20 --count 7 # CHECK-NEXT: error: invalid combination of options for the given command -# CHECK-NEXT: (lldb) disassemble --address 0x0 --count 7 -# CHECK-NEXT: error: invalid combination of options for the given command +# CHECK-NEXT: (lldb) disassemble --name case1 +# CHECK-NEXT: command-disassemble.s.tmp`n1::case1: +# CHECK-NEXT: command-disassemble.s.tmp[0x2040] <+0>: int $0x30 +# CHECK-EMPTY: +# CHECK-NEXT: command-disassemble.s.tmp`n2::case1: +# CHECK-NEXT: command-disassemble.s.tmp[0x2042] <+0>: int $0x31 +# CHECK-EMPTY: +# CHECK-NEXT: (lldb) disassemble --name case2 +# CHECK-NEXT: command-disassemble.s.tmp`n1::case2: +# CHECK-NEXT: command-disassemble.s.tmp[0x2044] <+0>: int $0x32 +# CHECK-NEXT: warning: Not disassembling a range because it is very large [0x0000000000002046-0x0000000000004046). To disassemble specify an instruction count limit, start/stop addresses or use the --force option. +# CHECK-NEXT: (lldb) disassemble --name case3 +# CHECK-NEXT: error: Not disassembling a range because it is very large [0x0000000000004046-0x0000000000006046). To disassemble specify an instruction count limit, start/stop addresses or use the --force option. +# CHECK-NEXT: Not disassembling a range because it is very large [0x0000000000006046-0x0000000000008046). To disassemble specify an instruction count limit, start/stop addresses or use the --force option. +# CHECK-NEXT: (lldb) disassemble --name case3 --count 3 +# CHECK-NEXT: command-disassemble.s.tmp`n1::case3: +# CHECK-NEXT: command-disassemble.s.tmp[0x4046] <+0>: int $0x2a +# CHECK-NEXT: command-disassemble.s.tmp[0x4048] <+2>: int $0x2a +# CHECK-NEXT: command-disassemble.s.tmp[0x404a] <+4>: int $0x2a +# CHECK-EMPTY: +# CHECK-NEXT: command-disassemble.s.tmp`n2::case3: +# CHECK-NEXT: command-disassemble.s.tmp[0x6046] <+0>: int $0x2a +# CHECK-NEXT: command-disassemble.s.tmp[0x6048] <+2>: int $0x2a +# CHECK-NEXT: command-disassemble.s.tmp[0x604a] <+4>: int $0x2a +# CHECK-EMPTY: + .text foo: @@ -102,3 +137,32 @@ bar: int $0x2d int $0x2e int $0x2f + +very_long: + .rept 0x1000 + int $42 + .endr + +_ZN2n15case1Ev: + int $0x30 + +_ZN2n25case1Ev: + int $0x31 + +_ZN2n15case2Ev: + int $0x32 + +_ZN2n25case2Ev: + .rept 0x1000 + int $42 + .endr + +_ZN2n15case3Ev: + .rept 0x1000 + int $42 + .endr + +_ZN2n25case3Ev: + .rept 0x1000 + int $42 + .endr -- 2.7.4