From ad6690afa3e6e9bae6d8338016c4931e084ff380 Mon Sep 17 00:00:00 2001 From: Antonio Afonso Date: Tue, 8 Oct 2019 23:44:49 +0000 Subject: [PATCH] Explicitly set entry point arch when it's thumb [Second Try] Summary: This is a redo of D68069 because I reverted it due to some concerns that were now addressed along with the new comments that @labath added. I found a case where the main android binary (app_process32) had thumb code at its entry point but no entry in the symbol table indicating this. This made lldb set a 4 byte breakpoint at that address (we default to arm code) instead of a 2 byte one (like we should for thumb). The big deal with this is that the expression evaluator uses the entry point as a way to know when a JITed expression has finished executing by putting a breakpoint there. Because of this, evaluating expressions on certain android devices (Google Pixel something) made the process crash. This was fixed by checking this specific situation when we parse the symbol table and add an artificial symbol for this 2 byte range and indicating that it's arm thumb. I created 2 unit tests for this, one to check that now we know that the entry point is arm thumb, and the other to make sure we didn't change the behaviour for arm code. I also run the following on the command line with the `app_process32` where I found the issue: **Before:** ``` (lldb) dis -s 0x1640 -e 0x1644 app_process32[0x1640]: .long 0xf0004668 ; unknown opcode ``` **After:** ``` (lldb) dis -s 0x1640 -e 0x1644 app_process32`: app_process32[0x1640] <+0>: mov r0, sp app_process32[0x1642]: andeq r0, r0, r0 ``` Reviewers: clayborg, labath, wallace, espindola Reviewed By: labath Subscribers: labath, lldb-commits, MaskRay, kristof.beyls, arichardson, emaste, srhines Tags: #lldb Differential Revision: https://reviews.llvm.org/D68533 llvm-svn: 374132 --- lldb/lit/SymbolFile/Breakpad/Inputs/basic-elf.yaml | 2 +- lldb/lit/SymbolFile/Breakpad/symtab.test | 11 +- lldb/lit/SymbolFile/dissassemble-entry-point.s | 13 +++ .../Plugins/ObjectFile/ELF/ObjectFileELF.cpp | 45 ++++++++ .../unittests/ObjectFile/ELF/TestObjectFileELF.cpp | 126 +++++++++++++++++++++ 5 files changed, 191 insertions(+), 6 deletions(-) create mode 100644 lldb/lit/SymbolFile/dissassemble-entry-point.s diff --git a/lldb/lit/SymbolFile/Breakpad/Inputs/basic-elf.yaml b/lldb/lit/SymbolFile/Breakpad/Inputs/basic-elf.yaml index 4d6a1e1..905dc25 100644 --- a/lldb/lit/SymbolFile/Breakpad/Inputs/basic-elf.yaml +++ b/lldb/lit/SymbolFile/Breakpad/Inputs/basic-elf.yaml @@ -6,7 +6,7 @@ FileHeader: Data: ELFDATA2LSB Type: ET_EXEC Machine: EM_X86_64 - Entry: 0x00000000004000D0 + Entry: 0x0000000000400000 Sections: - Name: .text1 Type: SHT_PROGBITS diff --git a/lldb/lit/SymbolFile/Breakpad/symtab.test b/lldb/lit/SymbolFile/Breakpad/symtab.test index ea766ec..caeaa87 100644 --- a/lldb/lit/SymbolFile/Breakpad/symtab.test +++ b/lldb/lit/SymbolFile/Breakpad/symtab.test @@ -3,12 +3,13 @@ # RUN: -s %s | FileCheck %s # CHECK-LABEL: (lldb) image dump symtab symtab.out -# CHECK: Symtab, file = {{.*}}symtab.out, num_symbols = 4: +# CHECK: Symtab, file = {{.*}}symtab.out, num_symbols = 5: # CHECK: Index UserID DSX Type File Address/Value Load Address Size Flags Name -# CHECK: [ 0] 0 X Code 0x00000000004000c0 0x0000000000000010 0x00000000 f2 -# CHECK: [ 1] 0 X Code 0x00000000004000d0 0x0000000000000022 0x00000000 _start -# CHECK: [ 2] 0 X Code 0x00000000004000a0 0x000000000000000d 0x00000000 func_only -# CHECK: [ 3] 0 X Code 0x00000000004000b0 0x000000000000000c 0x00000000 f1_func +# CHECK: [ 0] 0 SX Code 0x0000000000400000 0x00000000000000b0 0x00000000 ___lldb_unnamed_symbol1$$symtab.out +# CHECK: [ 1] 0 X Code 0x00000000004000c0 0x0000000000000010 0x00000000 f2 +# CHECK: [ 2] 0 X Code 0x00000000004000d0 0x0000000000000022 0x00000000 _start +# CHECK: [ 3] 0 X Code 0x00000000004000a0 0x000000000000000d 0x00000000 func_only +# CHECK: [ 4] 0 X Code 0x00000000004000b0 0x000000000000000c 0x00000000 f1_func # CHECK-LABEL: (lldb) image lookup -a 0x4000b0 -v # CHECK: Address: symtab.out[0x00000000004000b0] (symtab.out.PT_LOAD[0]..text2 + 0) diff --git a/lldb/lit/SymbolFile/dissassemble-entry-point.s b/lldb/lit/SymbolFile/dissassemble-entry-point.s new file mode 100644 index 0000000..c805999 --- /dev/null +++ b/lldb/lit/SymbolFile/dissassemble-entry-point.s @@ -0,0 +1,13 @@ +# REQUIRES: lld, arm + +# RUN: llvm-mc -triple=thumbv7-eabi %s -filetype=obj -o %t.o +# RUN: ld.lld %t.o -o %t --section-start=.text=0x8074 -e 0x8075 -s +# RUN: %lldb -x -b -o 'dis -s 0x8074 -e 0x8080' -- %t | FileCheck %s +# CHECK: {{.*}}[0x8074] <+0>: movs r0, #0x2a +# CHECK-NEXT: {{.*}}[0x8076] <+2>: movs r7, #0x1 +# CHECK-NEXT: {{.*}}[0x8078] <+4>: svc #0x0 + +_start: + movs r0, #0x2a + movs r7, #0x1 + svc #0x0 \ No newline at end of file diff --git a/lldb/source/Plugins/ObjectFile/ELF/ObjectFileELF.cpp b/lldb/source/Plugins/ObjectFile/ELF/ObjectFileELF.cpp index 64e32e5..c3a99cf 100644 --- a/lldb/source/Plugins/ObjectFile/ELF/ObjectFileELF.cpp +++ b/lldb/source/Plugins/ObjectFile/ELF/ObjectFileELF.cpp @@ -2778,6 +2778,51 @@ Symtab *ObjectFileELF::GetSymtab() { if (m_symtab_up == nullptr) m_symtab_up.reset(new Symtab(this)); + // In the event that there's no symbol entry for the entry point we'll + // artifically create one. We delegate to the symtab object the figuring + // out of the proper size, this will usually make it span til the next + // symbol it finds in the section. This means that if there are missing + // symbols the entry point might span beyond its function definition. + // We're fine with this as it doesn't make it worse than not having a + // symbol entry at all. + if (CalculateType() == eTypeExecutable) { + ArchSpec arch = GetArchitecture(); + auto entry_point_addr = GetEntryPointAddress(); + bool is_valid_entry_point = + entry_point_addr.IsValid() && entry_point_addr.IsSectionOffset(); + addr_t entry_point_file_addr = entry_point_addr.GetFileAddress(); + if (is_valid_entry_point && !m_symtab_up->FindSymbolContainingFileAddress( + entry_point_file_addr)) { + uint64_t symbol_id = m_symtab_up->GetNumSymbols(); + Symbol symbol(symbol_id, + GetNextSyntheticSymbolName().GetCString(), // Symbol name. + false, // Is the symbol name mangled? + eSymbolTypeCode, // Type of this symbol. + true, // Is this globally visible? + false, // Is this symbol debug info? + false, // Is this symbol a trampoline? + true, // Is this symbol artificial? + entry_point_addr.GetSection(), // Section where this + // symbol is defined. + 0, // Offset in section or symbol value. + 0, // Size. + false, // Size is valid. + false, // Contains linker annotations? + 0); // Symbol flags. + m_symtab_up->AddSymbol(symbol); + // When the entry point is arm thumb we need to explicitly set its + // class address to reflect that. This is important because expression + // evaluation relies on correctly setting a breakpoint at this + // address. + if (arch.GetMachine() == llvm::Triple::arm && + (entry_point_file_addr & 1)) + m_address_class_map[entry_point_file_addr ^ 1] = + AddressClass::eCodeAlternateISA; + else + m_address_class_map[entry_point_file_addr] = AddressClass::eCode; + } + } + m_symtab_up->CalculateSymbolSizes(); } diff --git a/lldb/unittests/ObjectFile/ELF/TestObjectFileELF.cpp b/lldb/unittests/ObjectFile/ELF/TestObjectFileELF.cpp index 42a1334..6bf6418 100644 --- a/lldb/unittests/ObjectFile/ELF/TestObjectFileELF.cpp +++ b/lldb/unittests/ObjectFile/ELF/TestObjectFileELF.cpp @@ -172,3 +172,129 @@ TEST_F(ObjectFileELFTest, GetModuleSpecifications_EarlySectionHeaders) { Uuid.SetFromStringRef("1b8a73ac238390e32a7ff4ac8ebe4d6a41ecf5c9", 20); EXPECT_EQ(Spec.GetUUID(), Uuid); } + +TEST_F(ObjectFileELFTest, GetSymtab_NoSymEntryPointArmThumbAddressClass) { + /* + // nosym-entrypoint-arm-thumb.s + .thumb_func + _start: + mov r0, #42 + mov r7, #1 + svc #0 + // arm-linux-androideabi-as nosym-entrypoint-arm-thumb.s + // -o nosym-entrypoint-arm-thumb.o + // arm-linux-androideabi-ld nosym-entrypoint-arm-thumb.o + // -o nosym-entrypoint-arm-thumb -e 0x8075 -s + */ + auto ExpectedFile = TestFile::fromYaml(R"( +--- !ELF +FileHeader: + Class: ELFCLASS32 + Data: ELFDATA2LSB + Type: ET_EXEC + Machine: EM_ARM + Flags: [ EF_ARM_SOFT_FLOAT, EF_ARM_EABI_VER5 ] + Entry: 0x0000000000008075 +Sections: + - Name: .text + Type: SHT_PROGBITS + Flags: [ SHF_ALLOC, SHF_EXECINSTR ] + Address: 0x0000000000008074 + AddressAlign: 0x0000000000000002 + Content: 2A20012700DF + - Name: .data + Type: SHT_PROGBITS + Flags: [ SHF_WRITE, SHF_ALLOC ] + Address: 0x0000000000009000 + AddressAlign: 0x0000000000000001 + Content: '' + - Name: .bss + Type: SHT_NOBITS + Flags: [ SHF_WRITE, SHF_ALLOC ] + Address: 0x0000000000009000 + AddressAlign: 0x0000000000000001 + - Name: .note.gnu.gold-version + Type: SHT_NOTE + AddressAlign: 0x0000000000000004 + Content: 040000000900000004000000474E5500676F6C6420312E3131000000 + - Name: .ARM.attributes + Type: SHT_ARM_ATTRIBUTES + AddressAlign: 0x0000000000000001 + Content: '4113000000616561626900010900000006020901' +... +)"); + ASSERT_THAT_EXPECTED(ExpectedFile, llvm::Succeeded()); + + ModuleSpec spec{FileSpec(ExpectedFile->name())}; + spec.GetSymbolFileSpec().SetFile(ExpectedFile->name(), + FileSpec::Style::native); + auto module_sp = std::make_shared(spec); + + auto entry_point_addr = module_sp->GetObjectFile()->GetEntryPointAddress(); + ASSERT_TRUE(entry_point_addr.GetOffset() & 1); + // Decrease the offsite by 1 to make it into a breakable address since this + // is Thumb. + entry_point_addr.SetOffset(entry_point_addr.GetOffset() - 1); + ASSERT_EQ(entry_point_addr.GetAddressClass(), + AddressClass::eCodeAlternateISA); +} + +TEST_F(ObjectFileELFTest, GetSymtab_NoSymEntryPointArmAddressClass) { + /* + // nosym-entrypoint-arm.s + _start: + movs r0, #42 + movs r7, #1 + svc #0 + // arm-linux-androideabi-as nosym-entrypoint-arm.s + // -o nosym-entrypoint-arm.o + // arm-linux-androideabi-ld nosym-entrypoint-arm.o + // -o nosym-entrypoint-arm -e 0x8074 -s + */ + auto ExpectedFile = TestFile::fromYaml(R"( +--- !ELF +FileHeader: + Class: ELFCLASS32 + Data: ELFDATA2LSB + Type: ET_EXEC + Machine: EM_ARM + Flags: [ EF_ARM_SOFT_FLOAT, EF_ARM_EABI_VER5 ] + Entry: 0x0000000000008074 +Sections: + - Name: .text + Type: SHT_PROGBITS + Flags: [ SHF_ALLOC, SHF_EXECINSTR ] + Address: 0x0000000000008074 + AddressAlign: 0x0000000000000004 + Content: 2A00A0E30170A0E3000000EF + - Name: .data + Type: SHT_PROGBITS + Flags: [ SHF_WRITE, SHF_ALLOC ] + Address: 0x0000000000009000 + AddressAlign: 0x0000000000000001 + Content: '' + - Name: .bss + Type: SHT_NOBITS + Flags: [ SHF_WRITE, SHF_ALLOC ] + Address: 0x0000000000009000 + AddressAlign: 0x0000000000000001 + - Name: .note.gnu.gold-version + Type: SHT_NOTE + AddressAlign: 0x0000000000000004 + Content: 040000000900000004000000474E5500676F6C6420312E3131000000 + - Name: .ARM.attributes + Type: SHT_ARM_ATTRIBUTES + AddressAlign: 0x0000000000000001 + Content: '4113000000616561626900010900000006010801' +... +)"); + ASSERT_THAT_EXPECTED(ExpectedFile, llvm::Succeeded()); + + ModuleSpec spec{FileSpec(ExpectedFile->name())}; + spec.GetSymbolFileSpec().SetFile(ExpectedFile->name(), + FileSpec::Style::native); + auto module_sp = std::make_shared(spec); + + auto entry_point_addr = module_sp->GetObjectFile()->GetEntryPointAddress(); + ASSERT_EQ(entry_point_addr.GetAddressClass(), AddressClass::eCode); +} \ No newline at end of file -- 2.7.4