From 4bddca306a451f2577ec62dd73c30b5e91a8fb32 Mon Sep 17 00:00:00 2001 From: Jonas Devlieghere Date: Tue, 8 Oct 2019 00:13:59 +0000 Subject: [PATCH] [MachO] Fix symbol merging during symtab parsing. The symtab parser in ObjectFileMachO has logic to coalesce debug (STAB) and non-debug symbols, based on the address and the symbol name for static (STSYM) and global symbols (GSYM) respectively. It makes the assumption that the debug variant is always encountered first. Rather than creating a second entry in the symbol table for the non-debug symbol, the latter gets merged into the existing debug symbol. This breaks when the linker emits the non-debug symbol first. We'd end up with two entries in the symbol table, each containing part of the information LLDB relies on. Indeed, commenting out the merging logic breaks the test suite spectacularly. This patch solves that problem by always parsing the debug symbols first. This guarantees that the assumption for merging holds. I'm not particularly happy with adding a lambda, but after numerous attempts this is the best solution I could come up with. The symtab parsing logic is pretty complex in that it touches a lot of things. I've experienced first hand that it's very easy to break things. I believe this approach strikes a balance between fixing the issue while limiting the risk of regressions. Differential revision: https://reviews.llvm.org/D68536 llvm-svn: 373994 --- lldb/lit/ObjectFile/MachO/symtab.yaml | 699 +++++++++++++++++++++ .../Plugins/ObjectFile/Mach-O/ObjectFileMachO.cpp | 78 ++- 2 files changed, 754 insertions(+), 23 deletions(-) create mode 100644 lldb/lit/ObjectFile/MachO/symtab.yaml diff --git a/lldb/lit/ObjectFile/MachO/symtab.yaml b/lldb/lit/ObjectFile/MachO/symtab.yaml new file mode 100644 index 0000000..e50ba6b --- /dev/null +++ b/lldb/lit/ObjectFile/MachO/symtab.yaml @@ -0,0 +1,699 @@ +# Tests that the symbol table properly merges the two entries for +# global_constant, even when the debug symbol comes last. +# RUN: mkdir -p %t +# RUN: yaml2obj %s -o %t/a.out +# RUN: %lldb %t/a.out -o "target modules dump symtab a.out" | FileCheck %s +# CHECK: global_constant +# CHECK-NOT: global_constant +--- !mach-o +FileHeader: + magic: 0xFEEDFACF + cputype: 0x01000007 + cpusubtype: 0x80000003 + filetype: 0x00000002 + ncmds: 20 + sizeofcmds: 1992 + flags: 0x00200085 + reserved: 0x00000000 +LoadCommands: + - cmd: LC_SEGMENT_64 + cmdsize: 72 + segname: __PAGEZERO + vmaddr: 0 + vmsize: 4294967296 + fileoff: 0 + filesize: 0 + maxprot: 0 + initprot: 0 + nsects: 0 + flags: 0 + - cmd: LC_SEGMENT_64 + cmdsize: 552 + segname: __TEXT + vmaddr: 4294967296 + vmsize: 4096 + fileoff: 0 + filesize: 4096 + maxprot: 5 + initprot: 5 + nsects: 6 + flags: 0 + Sections: + - sectname: __text + segname: __TEXT + addr: 0x0000000100000DE0 + size: 383 + offset: 0x00000DE0 + align: 4 + reloff: 0x00000000 + nreloc: 0 + flags: 0x80000400 + reserved1: 0x00000000 + reserved2: 0x00000000 + reserved3: 0x00000000 + content: 554889E54883EC30488D054112000031C948C7052C1200005704000048C70529120000AE080000488B151A120000897DE44889C7488D45E8488975D84889C6B821000000488955D04889C2E830010000488B7DD0488D35F5110000E840000000488D7DE8E81D01000031C04883C4305DC3662E0F1F8400000000000F1F440000554889E548C745F000000000488975F848897DF05DC3662E0F1F840000000000554889E54883EC3048C745F80000000048C745F00000000048C745E80000000048897DF8488975F0488B06488945E848897DD8488945E0488D3D62110000E81D000000488D7DD84889C6E891FFFFFF4883C4305DC3662E0F1F840000000000904883EC284889F8488B0F4883F9000F9CC2F6C2014889CE48897C2420488944241848894C24104889742408750A488B4424084883C428C3488B44241048C1F82048F7D8488B4C241089CA4863F2488B7C24204801F731D289D6488934244889C6488B1424488B0C24E81F0000004889C1488B54241848890248894C2408EBAE + - sectname: __stubs + segname: __TEXT + addr: 0x0000000100000F60 + size: 18 + offset: 0x00000F60 + align: 1 + reloff: 0x00000000 + nreloc: 0 + flags: 0x80000408 + reserved1: 0x00000000 + reserved2: 0x00000006 + reserved3: 0x00000000 + content: FF259A100000FF259C100000FF259E100000 + - sectname: __stub_helper + segname: __TEXT + addr: 0x0000000100000F74 + size: 46 + offset: 0x00000F74 + align: 2 + reloff: 0x00000000 + nreloc: 0 + flags: 0x80000400 + reserved1: 0x00000000 + reserved2: 0x00000000 + reserved3: 0x00000000 + content: 4C8D1D9D1000004153FF257D000000906800000000E9E6FFFFFF6819000000E9DCFFFFFF6830000000E9D2FFFFFF + - sectname: __swift5_typeref + segname: __TEXT + addr: 0x0000000100000FA2 + size: 7 + offset: 0x00000FA2 + align: 1 + reloff: 0x00000000 + nreloc: 0 + flags: 0x00000000 + reserved1: 0x00000000 + reserved2: 0x00000000 + reserved3: 0x00000000 + content: 53695F53697400 + - sectname: __const + segname: __TEXT + addr: 0x0000000100000FAA + size: 2 + offset: 0x00000FAA + align: 1 + reloff: 0x00000000 + nreloc: 0 + flags: 0x00000000 + reserved1: 0x00000000 + reserved2: 0x00000000 + reserved3: 0x00000000 + content: '0300' + - sectname: __unwind_info + segname: __TEXT + addr: 0x0000000100000FAC + size: 80 + offset: 0x00000FAC + align: 2 + reloff: 0x00000000 + nreloc: 0 + flags: 0x00000000 + reserved1: 0x00000000 + reserved2: 0x00000000 + reserved3: 0x00000000 + content: 010000001C000000000000001C000000000000001C00000002000000E00D00003400000034000000600F00000000000034000000030000000C0002001400020000000001000100000000060200000001 + - cmd: LC_SEGMENT_64 + cmdsize: 232 + segname: __DATA_CONST + vmaddr: 4294971392 + vmsize: 4096 + fileoff: 4096 + filesize: 4096 + maxprot: 3 + initprot: 3 + nsects: 2 + flags: 16 + Sections: + - sectname: __got + segname: __DATA_CONST + addr: 0x0000000100001000 + size: 8 + offset: 0x00001000 + align: 3 + reloff: 0x00000000 + nreloc: 0 + flags: 0x00000006 + reserved1: 0x00000003 + reserved2: 0x00000000 + reserved3: 0x00000000 + content: '0000000000000000' + - sectname: __objc_imageinfo + segname: __DATA_CONST + addr: 0x0000000100001008 + size: 8 + offset: 0x00001008 + align: 2 + reloff: 0x00000000 + nreloc: 0 + flags: 0x00000000 + reserved1: 0x00000000 + reserved2: 0x00000000 + reserved3: 0x00000000 + content: '0000000040070105' + - cmd: LC_SEGMENT_64 + cmdsize: 312 + segname: __DATA + vmaddr: 4294975488 + vmsize: 4096 + fileoff: 8192 + filesize: 4096 + maxprot: 3 + initprot: 3 + nsects: 3 + flags: 0 + Sections: + - sectname: __la_symbol_ptr + segname: __DATA + addr: 0x0000000100002000 + size: 24 + offset: 0x00002000 + align: 3 + reloff: 0x00000000 + nreloc: 0 + flags: 0x00000007 + reserved1: 0x00000004 + reserved2: 0x00000000 + reserved3: 0x00000000 + content: 840F0000010000008E0F000001000000980F000001000000 + - sectname: __data + segname: __DATA + addr: 0x0000000100002018 + size: 16 + offset: 0x00002018 + align: 3 + reloff: 0x00000000 + nreloc: 0 + flags: 0x00000000 + reserved1: 0x00000000 + reserved2: 0x00000000 + reserved3: 0x00000000 + content: 000000000000000082EFFFFFFAFFFFFF + - sectname: __common + segname: __DATA + addr: 0x0000000100002028 + size: 16 + offset: 0x00000000 + align: 3 + reloff: 0x00000000 + nreloc: 0 + flags: 0x00000001 + reserved1: 0x00000000 + reserved2: 0x00000000 + reserved3: 0x00000000 + - cmd: LC_SEGMENT_64 + cmdsize: 72 + segname: __LINKEDIT + vmaddr: 4294979584 + vmsize: 4096 + fileoff: 12288 + filesize: 1924 + maxprot: 1 + initprot: 1 + nsects: 0 + flags: 0 + - cmd: LC_DYLD_INFO_ONLY + cmdsize: 48 + rebase_off: 12288 + rebase_size: 8 + bind_off: 12296 + bind_size: 24 + weak_bind_off: 0 + weak_bind_size: 0 + lazy_bind_off: 12320 + lazy_bind_size: 96 + export_off: 12416 + export_size: 48 + - cmd: LC_SYMTAB + cmdsize: 24 + symoff: 12472 + nsyms: 41 + stroff: 13156 + strsize: 1056 + - cmd: LC_DYSYMTAB + cmdsize: 80 + ilocalsym: 0 + nlocalsym: 35 + iextdefsym: 35 + nextdefsym: 2 + iundefsym: 37 + nundefsym: 4 + tocoff: 0 + ntoc: 0 + modtaboff: 0 + nmodtab: 0 + extrefsymoff: 0 + nextrefsyms: 0 + indirectsymoff: 13128 + nindirectsyms: 7 + extreloff: 0 + nextrel: 0 + locreloff: 0 + nlocrel: 0 + - cmd: LC_LOAD_DYLINKER + cmdsize: 32 + name: 12 + PayloadString: '/usr/lib/dyld' + ZeroPadBytes: 7 + - cmd: LC_UUID + cmdsize: 24 + uuid: A89F9D23-8190-3946-A70B-B8E833E68640 + - cmd: LC_BUILD_VERSION + cmdsize: 32 + platform: 1 + minos: 659200 + sdk: 659200 + ntools: 1 + Tools: + - tool: 3 + version: 36176384 + - cmd: LC_SOURCE_VERSION + cmdsize: 16 + version: 0 + - cmd: LC_MAIN + cmdsize: 24 + entryoff: 3552 + stacksize: 0 + - cmd: LC_LOAD_DYLIB + cmdsize: 56 + dylib: + name: 24 + timestamp: 2 + current_version: 14942208 + compatibility_version: 65536 + PayloadString: '/usr/lib/libobjc.A.dylib' + ZeroPadBytes: 8 + - cmd: LC_LOAD_DYLIB + cmdsize: 56 + dylib: + name: 24 + timestamp: 2 + current_version: 83951616 + compatibility_version: 65536 + PayloadString: '/usr/lib/libSystem.B.dylib' + ZeroPadBytes: 6 + - cmd: LC_LOAD_DYLIB + cmdsize: 64 + dylib: + name: 24 + timestamp: 2 + current_version: 0 + compatibility_version: 65536 + PayloadString: '/usr/lib/swift/libswiftCore.dylib' + ZeroPadBytes: 7 + - cmd: LC_RPATH + cmdsize: 136 + path: 12 + PayloadString: '/Volumes/Jonas/internal-stable/build/Ninja-RelWithDebInfoAssert+stdlib-RelWithDebInfo/swift-macosx-x86_64/lib/swift/macosx' + ZeroPadBytes: 2 + - cmd: LC_RPATH + cmdsize: 128 + path: 12 + PayloadString: '/Applications/XcodeY.app/Contents/Developer/Platforms/MacOSX.platform/Developer/SDKs/MacOSX10.15.sdk/usr/lib/swift' + ZeroPadBytes: 2 + - cmd: LC_FUNCTION_STARTS + cmdsize: 16 + dataoff: 12464 + datasize: 8 + - cmd: LC_DATA_IN_CODE + cmdsize: 16 + dataoff: 12472 + datasize: 0 +LinkEditData: + RebaseOpcodes: + - Opcode: REBASE_OPCODE_SET_TYPE_IMM + Imm: 1 + - Opcode: REBASE_OPCODE_SET_SEGMENT_AND_OFFSET_ULEB + Imm: 3 + ExtraData: + - 0x0000000000000000 + - Opcode: REBASE_OPCODE_DO_REBASE_IMM_TIMES + Imm: 3 + - Opcode: REBASE_OPCODE_DONE + Imm: 0 + BindOpcodes: + - Opcode: BIND_OPCODE_SET_DYLIB_ORDINAL_IMM + Imm: 2 + Symbol: '' + - Opcode: BIND_OPCODE_SET_SYMBOL_TRAILING_FLAGS_IMM + Imm: 0 + Symbol: dyld_stub_binder + - Opcode: BIND_OPCODE_SET_TYPE_IMM + Imm: 1 + Symbol: '' + - Opcode: BIND_OPCODE_SET_SEGMENT_AND_OFFSET_ULEB + Imm: 2 + ULEBExtraData: + - 0x0000000000000000 + Symbol: '' + - Opcode: BIND_OPCODE_DO_BIND + Imm: 0 + Symbol: '' + - Opcode: BIND_OPCODE_DONE + Imm: 0 + Symbol: '' + LazyBindOpcodes: + - Opcode: BIND_OPCODE_SET_SEGMENT_AND_OFFSET_ULEB + Imm: 3 + ULEBExtraData: + - 0x0000000000000000 + Symbol: '' + - Opcode: BIND_OPCODE_SET_DYLIB_ORDINAL_IMM + Imm: 3 + Symbol: '' + - Opcode: BIND_OPCODE_SET_SYMBOL_TRAILING_FLAGS_IMM + Imm: 0 + Symbol: _swift_beginAccess + - Opcode: BIND_OPCODE_DO_BIND + Imm: 0 + Symbol: '' + - Opcode: BIND_OPCODE_DONE + Imm: 0 + Symbol: '' + - Opcode: BIND_OPCODE_SET_SEGMENT_AND_OFFSET_ULEB + Imm: 3 + ULEBExtraData: + - 0x0000000000000008 + Symbol: '' + - Opcode: BIND_OPCODE_SET_DYLIB_ORDINAL_IMM + Imm: 3 + Symbol: '' + - Opcode: BIND_OPCODE_SET_SYMBOL_TRAILING_FLAGS_IMM + Imm: 0 + Symbol: _swift_endAccess + - Opcode: BIND_OPCODE_DO_BIND + Imm: 0 + Symbol: '' + - Opcode: BIND_OPCODE_DONE + Imm: 0 + Symbol: '' + - Opcode: BIND_OPCODE_SET_SEGMENT_AND_OFFSET_ULEB + Imm: 3 + ULEBExtraData: + - 0x0000000000000010 + Symbol: '' + - Opcode: BIND_OPCODE_SET_DYLIB_ORDINAL_IMM + Imm: 3 + Symbol: '' + - Opcode: BIND_OPCODE_SET_SYMBOL_TRAILING_FLAGS_IMM + Imm: 0 + Symbol: _swift_getTypeByMangledNameInContext + - Opcode: BIND_OPCODE_DO_BIND + Imm: 0 + Symbol: '' + - Opcode: BIND_OPCODE_DONE + Imm: 0 + Symbol: '' + - Opcode: BIND_OPCODE_DONE + Imm: 0 + Symbol: '' + - Opcode: BIND_OPCODE_DONE + Imm: 0 + Symbol: '' + - Opcode: BIND_OPCODE_DONE + Imm: 0 + Symbol: '' + - Opcode: BIND_OPCODE_DONE + Imm: 0 + Symbol: '' + - Opcode: BIND_OPCODE_DONE + Imm: 0 + Symbol: '' + ExportTrie: + TerminalSize: 0 + NodeOffset: 0 + Name: '' + Flags: 0x0000000000000000 + Address: 0x0000000000000000 + Other: 0x0000000000000000 + ImportName: '' + Children: + - TerminalSize: 0 + NodeOffset: 5 + Name: _ + Flags: 0x0000000000000000 + Address: 0x0000000000000000 + Other: 0x0000000000000000 + ImportName: '' + Children: + - TerminalSize: 2 + NodeOffset: 33 + Name: _mh_execute_header + Flags: 0x0000000000000000 + Address: 0x0000000000000000 + Other: 0x0000000000000000 + ImportName: '' + - TerminalSize: 3 + NodeOffset: 37 + Name: main + Flags: 0x0000000000000000 + Address: 0x0000000000000DE0 + Other: 0x0000000000000000 + ImportName: '' + NameList: + - n_strx: 118 + n_type: 0x1E + n_sect: 1 + n_desc: 0 + n_value: 4294970976 + - n_strx: 133 + n_type: 0x1E + n_sect: 1 + n_desc: 0 + n_value: 4294971008 + - n_strx: 151 + n_type: 0x1E + n_sect: 1 + n_desc: 128 + n_value: 4294971104 + - n_strx: 199 + n_type: 0x1E + n_sect: 4 + n_desc: 128 + n_value: 4294971298 + - n_strx: 216 + n_type: 0x1E + n_sect: 5 + n_desc: 128 + n_value: 4294971306 + - n_strx: 244 + n_type: 0x0E + n_sect: 10 + n_desc: 0 + n_value: 4294975512 + - n_strx: 259 + n_type: 0x1E + n_sect: 10 + n_desc: 128 + n_value: 4294975520 + - n_strx: 271 + n_type: 0x1E + n_sect: 11 + n_desc: 0 + n_value: 4294975528 + - n_strx: 298 + n_type: 0x1E + n_sect: 11 + n_desc: 0 + n_value: 4294975536 + - n_strx: 325 + n_type: 0x32 + n_sect: 0 + n_desc: 0 + n_value: 1570141141 + - n_strx: 540 + n_type: 0x64 + n_sect: 0 + n_desc: 0 + n_value: 0 + - n_strx: 633 + n_type: 0x64 + n_sect: 0 + n_desc: 0 + n_value: 0 + - n_strx: 644 + n_type: 0x66 + n_sect: 3 + n_desc: 1 + n_value: 1570141203 + - n_strx: 1 + n_type: 0x2E + n_sect: 1 + n_desc: 0 + n_value: 4294970848 + - n_strx: 852 + n_type: 0x24 + n_sect: 1 + n_desc: 0 + n_value: 4294970848 + - n_strx: 1 + n_type: 0x24 + n_sect: 0 + n_desc: 0 + n_value: 128 + - n_strx: 1 + n_type: 0x4E + n_sect: 1 + n_desc: 0 + n_value: 128 + - n_strx: 1 + n_type: 0x2E + n_sect: 1 + n_desc: 0 + n_value: 4294970976 + - n_strx: 858 + n_type: 0x24 + n_sect: 1 + n_desc: 0 + n_value: 4294970976 + - n_strx: 1 + n_type: 0x24 + n_sect: 0 + n_desc: 0 + n_value: 32 + - n_strx: 1 + n_type: 0x4E + n_sect: 1 + n_desc: 0 + n_value: 32 + - n_strx: 1 + n_type: 0x2E + n_sect: 1 + n_desc: 0 + n_value: 4294971008 + - n_strx: 873 + n_type: 0x24 + n_sect: 1 + n_desc: 0 + n_value: 4294971008 + - n_strx: 1 + n_type: 0x24 + n_sect: 0 + n_desc: 0 + n_value: 96 + - n_strx: 1 + n_type: 0x4E + n_sect: 1 + n_desc: 0 + n_value: 96 + - n_strx: 1 + n_type: 0x2E + n_sect: 1 + n_desc: 0 + n_value: 4294971104 + - n_strx: 891 + n_type: 0x24 + n_sect: 1 + n_desc: 0 + n_value: 4294971104 + - n_strx: 1 + n_type: 0x24 + n_sect: 0 + n_desc: 0 + n_value: 127 + - n_strx: 1 + n_type: 0x4E + n_sect: 1 + n_desc: 0 + n_value: 127 + - n_strx: 939 + n_type: 0x20 + n_sect: 0 + n_desc: 0 + n_value: 0 + - n_strx: 956 + n_type: 0x20 + n_sect: 0 + n_desc: 0 + n_value: 0 + - n_strx: 984 + n_type: 0x20 + n_sect: 0 + n_desc: 0 + n_value: 0 + - n_strx: 996 + n_type: 0x20 + n_sect: 0 + n_desc: 0 + n_value: 0 + - n_strx: 1023 + n_type: 0x20 + n_sect: 0 + n_desc: 0 + n_value: 0 + - n_strx: 1 + n_type: 0x64 + n_sect: 1 + n_desc: 0 + n_value: 0 + - n_strx: 2 + n_type: 0x0F + n_sect: 1 + n_desc: 16 + n_value: 4294967296 + - n_strx: 22 + n_type: 0x0F + n_sect: 1 + n_desc: 0 + n_value: 4294970848 + - n_strx: 28 + n_type: 0x01 + n_sect: 0 + n_desc: 768 + n_value: 0 + - n_strx: 47 + n_type: 0x01 + n_sect: 0 + n_desc: 768 + n_value: 0 + - n_strx: 64 + n_type: 0x01 + n_sect: 0 + n_desc: 768 + n_value: 0 + - n_strx: 101 + n_type: 0x01 + n_sect: 0 + n_desc: 512 + n_value: 0 + StringTable: + - ' ' + - __mh_execute_header + - _main + - _swift_beginAccess + - _swift_endAccess + - _swift_getTypeByMangledNameInContext + - dyld_stub_binder + - '_$s1a3useyyxlF' + - '_$s1a1fyySi_SiztF' + - ___swift_instantiateConcreteTypeFromMangledName + - _symbolic Si_Sit + - ___swift_reflection_version + - __dyld_private + - '_$sSi_SitMD' + - '_$s1a15global_constantSivp' + - '_$s1a15global_variableSivp' + - '/Volumes/Jonas/internal-stable/build/Ninja-RelWithDebInfoAssert+stdlib-RelWithDebInfo/lldb-macosx-x86_64/lldb-test-build.noindex/lang/swift/variables/let/TestSwiftLetConstants.test_let_constants_dwarf/a.swiftmodule' + - '/Volumes/Jonas/internal-stable/lldb/packages/Python/lldbsuite/test/lang/swift/variables/let/' + - main.swift + - '/Volumes/Jonas/internal-stable/build/Ninja-RelWithDebInfoAssert+stdlib-RelWithDebInfo/lldb-macosx-x86_64/lldb-test-build.noindex/lang/swift/variables/let/TestSwiftLetConstants.test_let_constants_dwarf/main.o' + - _main + - '_$s1a3useyyxlF' + - '_$s1a1fyySi_SiztF' + - ___swift_instantiateConcreteTypeFromMangledName + - _symbolic Si_Sit + - ___swift_reflection_version + - '_$sSi_SitMD' + - '_$s1a15global_constantSivp' + - '_$s1a15global_variableSivp' + - '' + - '' + - '' + - '' + - '' + - '' +... diff --git a/lldb/source/Plugins/ObjectFile/Mach-O/ObjectFileMachO.cpp b/lldb/source/Plugins/ObjectFile/Mach-O/ObjectFileMachO.cpp index d109dc1..66ac290 100644 --- a/lldb/source/Plugins/ObjectFile/Mach-O/ObjectFileMachO.cpp +++ b/lldb/source/Plugins/ObjectFile/Mach-O/ObjectFileMachO.cpp @@ -2038,19 +2038,22 @@ UUID ObjectFileMachO::GetSharedCacheUUID(FileSpec dyld_shared_cache, return dsc_uuid; } -static bool ParseNList(DataExtractor &nlist_data, - lldb::offset_t &nlist_data_offset, - size_t nlist_byte_size, struct nlist_64 &nlist) { +static llvm::Optional +ParseNList(DataExtractor &nlist_data, lldb::offset_t &nlist_data_offset, + size_t nlist_byte_size) { + struct nlist_64 nlist; if (!nlist_data.ValidOffsetForDataOfSize(nlist_data_offset, nlist_byte_size)) - return false; + return {}; nlist.n_strx = nlist_data.GetU32_unchecked(&nlist_data_offset); nlist.n_type = nlist_data.GetU8_unchecked(&nlist_data_offset); nlist.n_sect = nlist_data.GetU8_unchecked(&nlist_data_offset); nlist.n_desc = nlist_data.GetU16_unchecked(&nlist_data_offset); nlist.n_value = nlist_data.GetAddress_unchecked(&nlist_data_offset); - return true; + return nlist; } +enum { DebugSymbols = true, NonDebugSymbols = false }; + size_t ObjectFileMachO::ParseSymtab() { static Timer::Category func_cat(LLVM_PRETTY_FUNCTION); Timer scoped_timer(func_cat, "ObjectFileMachO::ParseSymtab () module = %s", @@ -3680,10 +3683,18 @@ size_t ObjectFileMachO::ParseSymtab() { typedef std::map SymbolIndexToName; UndefinedNameToDescMap undefined_name_to_desc; SymbolIndexToName reexport_shlib_needs_fixup; - for (; nlist_idx < symtab_load_command.nsyms; ++nlist_idx) { - struct nlist_64 nlist; - if (!ParseNList(nlist_data, nlist_data_offset, nlist_byte_size, nlist)) - break; + + // Symtab parsing is a huge mess. Everything is entangled and the code + // requires access to a ridiculous amount of variables. LLDB depends + // heavily on the proper merging of symbols and to get that right we need + // to make sure we have parsed all the debug symbols first. Therefore we + // invoke the lambda twice, once to parse only the debug symbols and then + // once more to parse the remaining symbols. + auto ParseSymbolLambda = [&](struct nlist_64 &nlist, uint32_t nlist_idx, + bool debug_only) { + const bool is_debug = ((nlist.n_type & N_STAB) != 0); + if (is_debug != debug_only) + return true; const char *symbol_name_non_abi_mangled = nullptr; const char *symbol_name = nullptr; @@ -3699,7 +3710,7 @@ size_t ObjectFileMachO::ParseSymtab() { "0x%x in %s, ignoring symbol\n", nlist_idx, nlist.n_strx, module_sp->GetFileSpec().GetPath().c_str()); - continue; + return true; } if (symbol_name[0] == '\0') symbol_name = nullptr; @@ -3719,7 +3730,6 @@ size_t ObjectFileMachO::ParseSymtab() { bool demangled_is_synthesized = false; bool set_value = true; - const bool is_debug = ((nlist.n_type & N_STAB) != 0); assert(sym_idx < num_syms); sym[sym_idx].SetDebug(is_debug); @@ -4270,7 +4280,7 @@ size_t ObjectFileMachO::ParseSymtab() { if (!add_nlist) { sym[sym_idx].Clear(); - continue; + return true; } uint64_t symbol_value = nlist.n_value; @@ -4361,7 +4371,6 @@ size_t ObjectFileMachO::ParseSymtab() { range; range = N_FUN_addr_to_sym_idx.equal_range(nlist.n_value); if (range.first != range.second) { - bool found_it = false; for (ValueToSymbolIndexMap::const_iterator pos = range.first; pos != range.second; ++pos) { if (sym[sym_idx].GetMangled().GetName(lldb::eLanguageTypeUnknown, @@ -4378,12 +4387,9 @@ size_t ObjectFileMachO::ParseSymtab() { resolver_addresses.end()) sym[pos->second].SetType(eSymbolTypeResolver); sym[sym_idx].Clear(); - found_it = true; - break; + return true; } } - if (found_it) - continue; } else { if (resolver_addresses.find(nlist.n_value) != resolver_addresses.end()) @@ -4401,7 +4407,6 @@ size_t ObjectFileMachO::ParseSymtab() { range; range = N_STSYM_addr_to_sym_idx.equal_range(nlist.n_value); if (range.first != range.second) { - bool found_it = false; for (ValueToSymbolIndexMap::const_iterator pos = range.first; pos != range.second; ++pos) { if (sym[sym_idx].GetMangled().GetName(lldb::eLanguageTypeUnknown, @@ -4415,12 +4420,9 @@ size_t ObjectFileMachO::ParseSymtab() { sym[pos->second].SetExternal(sym[sym_idx].IsExternal()); sym[pos->second].SetFlags(nlist.n_type << 16 | nlist.n_desc); sym[sym_idx].Clear(); - found_it = true; - break; + return true; } } - if (found_it) - continue; } else { // Combine N_GSYM stab entries with the non stab symbol. const char *gsym_name = sym[sym_idx] @@ -4443,7 +4445,7 @@ size_t ObjectFileMachO::ParseSymtab() { // the symbol table. sym[GSYM_sym_idx].SetFlags(nlist.n_type << 16 | nlist.n_desc); sym[sym_idx].Clear(); - continue; + return true; } } } @@ -4467,6 +4469,36 @@ size_t ObjectFileMachO::ParseSymtab() { sym[sym_idx].SetDemangledNameIsSynthesized(true); ++sym_idx; + return true; + }; + + // First parse all the nlists but don't process them yet. See the next + // comment for an explanation why. + std::vector nlists; + nlists.reserve(symtab_load_command.nsyms); + for (; nlist_idx < symtab_load_command.nsyms; ++nlist_idx) { + if (auto nlist = + ParseNList(nlist_data, nlist_data_offset, nlist_byte_size)) + nlists.push_back(*nlist); + else + break; + } + + // Now parse all the debug symbols. This is needed to merge non-debug + // symbols in the next step. Non-debug symbols are always coalesced into + // the debug symbol. Doing this in one step would mean that some symbols + // won't be merged. + nlist_idx = 0; + for (auto &nlist : nlists) { + if (!ParseSymbolLambda(nlist, nlist_idx++, DebugSymbols)) + break; + } + + // Finally parse all the non debug symbols. + nlist_idx = 0; + for (auto &nlist : nlists) { + if (!ParseSymbolLambda(nlist, nlist_idx++, NonDebugSymbols)) + break; } for (const auto &pos : reexport_shlib_needs_fixup) { -- 2.7.4