From 73712c8790a93c29e513f5e201f92ac5b2370cf9 Mon Sep 17 00:00:00 2001 From: Alexander Yermolovich Date: Tue, 10 Jan 2023 15:14:47 -0800 Subject: [PATCH] [DWARFLibrary] Add support to re-construct cu-index According to DWARF5 specification and gnu specification for DWARF4 the offset entry in the CU/TU Index is 32 bits. This presents a problem when .debug_info.dwo in DWP file grows beyond 4GB. The CU Index becomes partially corrupted. This diff adds manual parsing of .debug_info.dwo/.debug_abbrev.dwo to reconstruct CU index in general, and TU index for DWARF5. This is a work around until DWARF6 spec is finalized. Next patch will change internal CU/TU struct to 64 bit, and change uses as necessary. The plan is to land all the patches in one go after all are approved. This patch originates from the discussion in: https://discourse.llvm.org/t/dwarf-dwp-4gb-limit/63902 Reviewed By: dblaikie Differential Revision: https://reviews.llvm.org/D137882 --- llvm/include/llvm/DebugInfo/DWARF/DWARFContext.h | 12 +++ llvm/include/llvm/DebugInfo/DWARF/DWARFUnitIndex.h | 6 ++ llvm/lib/DebugInfo/DWARF/DWARFContext.cpp | 75 +++++++++++++++++- llvm/lib/DebugInfo/DWARF/DWARFUnitIndex.cpp | 5 ++ .../tools/llvm-dwp/X86/cu_tu_units_manual_v5.s | 92 ++++++++++++++++++++++ llvm/test/tools/llvm-dwp/X86/debug_macro_v5.s | 6 +- llvm/test/tools/llvm-dwp/X86/type_dedup.test | 27 ++++++- llvm/tools/llvm-dwarfdump/llvm-dwarfdump.cpp | 8 ++ 8 files changed, 226 insertions(+), 5 deletions(-) create mode 100644 llvm/test/tools/llvm-dwp/X86/cu_tu_units_manual_v5.s diff --git a/llvm/include/llvm/DebugInfo/DWARF/DWARFContext.h b/llvm/include/llvm/DebugInfo/DWARF/DWARFContext.h index 5dbec26..735bd2d 100644 --- a/llvm/include/llvm/DebugInfo/DWARF/DWARFContext.h +++ b/llvm/include/llvm/DebugInfo/DWARF/DWARFContext.h @@ -107,6 +107,10 @@ class DWARFContext : public DIContext { MacroDwoSection }; + // When set parses debug_info.dwo/debug_abbrev.dwo manually and populates CU + // Index, and TU Index for DWARF5. + bool ParseCUTUIndexManually; + public: DWARFContext(std::unique_ptr DObj, std::string DWPName = "", @@ -443,6 +447,14 @@ public: /// into "SectionedAddress Address" DWARFCompileUnit *getCompileUnitForAddress(uint64_t Address); + /// Returns whether CU/TU should be populated manually. TU Index populated + /// manually only for DWARF5. + bool getParseCUTUIndexManually() const { return ParseCUTUIndexManually; } + + /// Sets whether CU/TU should be populated manually. TU Index populated + /// manually only for DWARF5. + void setParseCUTUIndexManually(bool PCUTU) { ParseCUTUIndexManually = PCUTU; } + private: /// Parse a macro[.dwo] or macinfo[.dwo] section. std::unique_ptr diff --git a/llvm/include/llvm/DebugInfo/DWARF/DWARFUnitIndex.h b/llvm/include/llvm/DebugInfo/DWARF/DWARFUnitIndex.h index f2a8611..e01aa41 100644 --- a/llvm/include/llvm/DebugInfo/DWARF/DWARFUnitIndex.h +++ b/llvm/include/llvm/DebugInfo/DWARF/DWARFUnitIndex.h @@ -137,12 +137,14 @@ public: public: const SectionContribution *getContribution(DWARFSectionKind Sec) const; const SectionContribution *getContribution() const; + SectionContribution &getContribution(); const SectionContribution *getContributions() const { return Contributions.get(); } uint64_t getSignature() const { return Signature; } + bool isValid() { return Index; } }; private: @@ -183,6 +185,10 @@ public: ArrayRef getRows() const { return ArrayRef(Rows.get(), Header.NumBuckets); } + + MutableArrayRef getMutableRows() { + return makeMutableArrayRef(Rows.get(), Header.NumBuckets); + } }; } // end namespace llvm diff --git a/llvm/lib/DebugInfo/DWARF/DWARFContext.cpp b/llvm/lib/DebugInfo/DWARF/DWARFContext.cpp index c9be03a..e19857f 100644 --- a/llvm/lib/DebugInfo/DWARF/DWARFContext.cpp +++ b/llvm/lib/DebugInfo/DWARF/DWARFContext.cpp @@ -779,14 +779,82 @@ bool DWARFContext::verify(raw_ostream &OS, DIDumpOptions DumpOpts) { return Success; } +void fixupIndex(const DWARFObject &DObj, DWARFContext &C, + DWARFUnitIndex &Index) { + using EntryType = DWARFUnitIndex::Entry::SectionContribution; + using EntryMap = DenseMap; + EntryMap Map; + if (DObj.getCUIndexSection().empty()) + return; + + uint64_t Offset = 0; + uint32_t TruncOffset = 0; + DObj.forEachInfoDWOSections([&](const DWARFSection &S) { + if (!(C.getParseCUTUIndexManually() || + S.Data.size() >= std::numeric_limits::max())) + return; + + DWARFDataExtractor Data(DObj, S, C.isLittleEndian(), 0); + while (Data.isValidOffset(Offset)) { + DWARFUnitHeader Header; + if (!Header.extract(C, Data, &Offset, DWARFSectionKind::DW_SECT_INFO)) { + logAllUnhandledErrors( + createError("Failed to parse CU header in DWP file"), errs()); + Map.clear(); + break; + } + + auto Iter = Map.insert({TruncOffset, + {Header.getOffset(), Header.getNextUnitOffset() - + Header.getOffset()}}); + if (!Iter.second) { + logAllUnhandledErrors( + createError("Collision occured between for truncated offset 0x" + + Twine::utohexstr(TruncOffset)), + errs()); + Map.clear(); + return; + } + + Offset = Header.getNextUnitOffset(); + TruncOffset = Offset; + } + }); + + if (Map.empty()) + return; + + for (DWARFUnitIndex::Entry &E : Index.getMutableRows()) { + if (!E.isValid()) + continue; + DWARFUnitIndex::Entry::SectionContribution &CUOff = E.getContribution(); + auto Iter = Map.find(CUOff.getOffset()); + if (Iter == Map.end()) { + logAllUnhandledErrors(createError("Could not find CU offset 0x" + + Twine::utohexstr(CUOff.getOffset()) + + " in the Map"), + errs()); + break; + } + CUOff.setOffset(Iter->second.getOffset()); + if (CUOff.getOffset() != Iter->second.getOffset()) + logAllUnhandledErrors(createError("Length of CU in CU index doesn't " + "match calculated length at offset 0x" + + Twine::utohexstr(CUOff.getOffset())), + errs()); + } + + return; +} + const DWARFUnitIndex &DWARFContext::getCUIndex() { if (CUIndex) return *CUIndex; DataExtractor CUIndexData(DObj->getCUIndexSection(), isLittleEndian(), 0); - CUIndex = std::make_unique(DW_SECT_INFO); CUIndex->parse(CUIndexData); + fixupIndex(*DObj, *this, *CUIndex.get()); return *CUIndex; } @@ -795,9 +863,12 @@ const DWARFUnitIndex &DWARFContext::getTUIndex() { return *TUIndex; DataExtractor TUIndexData(DObj->getTUIndexSection(), isLittleEndian(), 0); - TUIndex = std::make_unique(DW_SECT_EXT_TYPES); TUIndex->parse(TUIndexData); + // If we are parsing TU-index and for .debug_types section we don't need + // to do anything. + if (TUIndex->getVersion() != 2) + fixupIndex(*DObj, *this, *TUIndex.get()); return *TUIndex; } diff --git a/llvm/lib/DebugInfo/DWARF/DWARFUnitIndex.cpp b/llvm/lib/DebugInfo/DWARF/DWARFUnitIndex.cpp index 90eb96c..0963739 100644 --- a/llvm/lib/DebugInfo/DWARF/DWARFUnitIndex.cpp +++ b/llvm/lib/DebugInfo/DWARF/DWARFUnitIndex.cpp @@ -269,6 +269,11 @@ DWARFUnitIndex::Entry::getContribution(DWARFSectionKind Sec) const { return nullptr; } +DWARFUnitIndex::Entry::SectionContribution & +DWARFUnitIndex::Entry::getContribution() { + return Contributions[Index->InfoColumn]; +} + const DWARFUnitIndex::Entry::SectionContribution * DWARFUnitIndex::Entry::getContribution() const { return &Contributions[Index->InfoColumn]; diff --git a/llvm/test/tools/llvm-dwp/X86/cu_tu_units_manual_v5.s b/llvm/test/tools/llvm-dwp/X86/cu_tu_units_manual_v5.s new file mode 100644 index 0000000..b852608 --- /dev/null +++ b/llvm/test/tools/llvm-dwp/X86/cu_tu_units_manual_v5.s @@ -0,0 +1,92 @@ +# This test checks if we can correctly parse manull cu and tu index for DWARF5. + +# RUN: llvm-mc -triple x86_64-unknown-linux %s -filetype=obj -o %t.o \ +# RUN: -split-dwarf-file=%t.dwo -dwarf-version=5 +# RUN: llvm-dwp %t.dwo -o %t.dwp +# RUN: llvm-dwarfdump -debug-info -debug-cu-index -debug-tu-index %t.dwp | FileCheck -check-prefix=CHECK %s +# RUN: llvm-dwarfdump -debug-info -debug-cu-index -debug-tu-index -manaully-generate-unit-index %t.dwp | FileCheck -check-prefix=CHECK2 %s + +## Note: In order to check whether the type unit index is generated +## there is no need to add the missing DIEs for the structure type of the type unit. + +# CHECK-DAG: .debug_info.dwo contents: +# CHECK: 0x00000000: Type Unit: length = 0x00000017, format = DWARF32, version = 0x0005, unit_type = DW_UT_split_type, abbr_offset = 0x0000, addr_size = 0x08, name = '', type_signature = [[TUID1:.*]], type_offset = 0x0019 (next unit at 0x0000001b) +# CHECK: 0x0000001b: Type Unit: length = 0x00000017, format = DWARF32, version = 0x0005, unit_type = DW_UT_split_type, abbr_offset = 0x0000, addr_size = 0x08, name = '', type_signature = [[TUID2:.*]], type_offset = 0x0019 (next unit at 0x00000036) +# CHECK: 0x00000036: Compile Unit: length = 0x00000011, format = DWARF32, version = 0x0005, unit_type = DW_UT_split_compile, abbr_offset = 0x0000, addr_size = 0x08, DWO_id = [[CUID1:.*]] (next unit at 0x0000004b) +# CHECK-DAG: .debug_cu_index contents: +# CHECK: version = 5, units = 1, slots = 2 +# CHECK: Index Signature INFO ABBREV +# CHECK: 1 [[CUID1]] [0x0000000000000036, 0x000000000000004b) [0x00000000, 0x00000010) +# CHECK-DAG: .debug_tu_index contents: +# CHECK: version = 5, units = 2, slots = 4 +# CHECK: Index Signature INFO ABBREV +# CHECK: 1 [[TUID1]] [0x0000000000000000, 0x000000000000001b) [0x00000000, 0x00000010) +# CHECK: 4 [[TUID2]] [0x000000000000001b, 0x0000000000000036) [0x00000000, 0x00000010) + +# CHECK2-DAG: .debug_info.dwo contents: +# CHECK2: 0x00000000: Type Unit: length = 0x00000017, format = DWARF32, version = 0x0005, unit_type = DW_UT_split_type, abbr_offset = 0x0000, addr_size = 0x08, name = '', type_signature = [[TUID1:.*]], type_offset = 0x0019 (next unit at 0x0000001b) +# CHECK2: 0x0000001b: Type Unit: length = 0x00000017, format = DWARF32, version = 0x0005, unit_type = DW_UT_split_type, abbr_offset = 0x0000, addr_size = 0x08, name = '', type_signature = [[TUID2:.*]], type_offset = 0x0019 (next unit at 0x00000036) +# CHECK2: 0x00000036: Compile Unit: length = 0x00000011, format = DWARF32, version = 0x0005, unit_type = DW_UT_split_compile, abbr_offset = 0x0000, addr_size = 0x08, DWO_id = [[CUID1:.*]] (next unit at 0x0000004b) +# CHECK2-DAG: .debug_cu_index contents: +# CHECK2: version = 5, units = 1, slots = 2 +# CHECK2: Index Signature INFO ABBREV +# CHECK2: 1 [[CUID1]] [0x0000000000000036, 0x000000000000004b) [0x00000000, 0x00000010) +# CHECK2-DAG: .debug_tu_index contents: +# CHECK2: version = 5, units = 2, slots = 4 +# CHECK2: Index Signature INFO ABBREV +# CHECK2: 1 [[TUID1]] [0x0000000000000000, 0x000000000000001b) [0x00000000, 0x00000010) +# CHECK2: 4 [[TUID2]] [0x000000000000001b, 0x0000000000000036) [0x00000000, 0x00000010) + + .section .debug_info.dwo,"e",@progbits + .long .Ldebug_info_dwo_end0-.Ldebug_info_dwo_start0 # Length of Unit +.Ldebug_info_dwo_start0: + .short 5 # DWARF version number + .byte 6 # DWARF Unit Type (DW_UT_split_type) + .byte 8 # Address Size (in bytes) + .long 0 # Offset Into Abbrev. Section + .quad 5657452045627120676 # Type Signature + .long 25 # Type DIE Offset + .byte 2 # Abbrev [2] DW_TAG_type_unit + .byte 3 # Abbrev [3] DW_TAG_structure_type + .byte 0 # End Of Children Mark +.Ldebug_info_dwo_end0: + .section .debug_info.dwo,"e",@progbits + .long .Ldebug_info_dwo_end1-.Ldebug_info_dwo_start1 # Length of Unit +.Ldebug_info_dwo_start1: + .short 5 # DWARF version number + .byte 6 # DWARF Unit Type (DW_UT_split_type) + .byte 8 # Address Size (in bytes) + .long 0 # Offset Into Abbrev. Section + .quad -8528522068957683993 # Type Signature + .long 25 # Type DIE Offset + .byte 4 # Abbrev [4] DW_TAG_type_unit + .byte 5 # Abbrev [5] DW_TAG_structure_type + .byte 0 # End Of Children Mark +.Ldebug_info_dwo_end1: + .section .debug_info.dwo,"e",@progbits + .long .Ldebug_info_dwo_end2-.Ldebug_info_dwo_start2 # Length of Unit +.Ldebug_info_dwo_start2: + .short 5 # DWARF version number + .byte 5 # DWARF Unit Type (DW_UT_split_compile) + .byte 8 # Address Size (in bytes) + .long 0 # Offset Into Abbrev. Section + .quad 1152943841751211454 + .byte 1 # Abbrev [1] DW_TAG_compile_unit +.Ldebug_info_dwo_end2: + .section .debug_abbrev.dwo,"e",@progbits + .byte 1 # Abbreviation Code + .byte 17 # DW_TAG_compile_unit + .byte 0 # DW_CHILDREN_no + .byte 0 # EOM(1) + .byte 0 # EOM(2) + .byte 2 # Abbreviation Code + .byte 65 # DW_TAG_type_unit + .byte 1 # DW_CHILDREN_yes + .byte 0 # EOM + .byte 0 # EOM + .byte 4 # Abbreviation Code + .byte 65 # DW_TAG_type_unit + .byte 1 # DW_CHILDREN_yes + .byte 0 # EOM + .byte 0 # EOM + .byte 0 # EOM diff --git a/llvm/test/tools/llvm-dwp/X86/debug_macro_v5.s b/llvm/test/tools/llvm-dwp/X86/debug_macro_v5.s index f95a218..e554294 100644 --- a/llvm/test/tools/llvm-dwp/X86/debug_macro_v5.s +++ b/llvm/test/tools/llvm-dwp/X86/debug_macro_v5.s @@ -2,7 +2,8 @@ # RUN: llvm-mc -triple x86_64-unknown-linux --filetype=obj --split-dwarf-file=%t.dwo -dwarf-version=5 %s -o %t.o # RUN: llvm-dwp %t.dwo -o %t.dwp 2>&1 -# RUN: llvm-dwarfdump -debug-macro -debug-cu-index %t.dwp | FileCheck %s +# RUN: llvm-dwarfdump -debug-macro -debug-cu-index %t.dwp | FileCheck -check-prefix=CHECK %s +# RUN: llvm-dwarfdump -debug-macro -debug-cu-index -manaully-generate-unit-index %t.dwp | FileCheck -check-prefix=CHECK2 %s # CHECK-DAG: .debug_macro.dwo contents: # CHECK: macro header: version = 0x0005, flags = 0x00, format = DWARF32 @@ -15,6 +16,9 @@ # CHECK: Index Signature INFO ABBREV STR_OFFSETS MACRO # CHECK: 1 0x0000000000000000 [0x0000000000000000, 0x0000000000000019) [0x00000000, 0x00000008) [0x00000000, 0x0000000c) [0x00000000, 0x0000000b) +# CHECK2: Index Signature INFO ABBREV STR_OFFSETS MACRO +# CHECK2: 1 0x0000000000000000 [0x0000000000000000, 0x0000000000000019) [0x00000000, 0x00000008) [0x00000000, 0x0000000c) [0x00000000, 0x0000000b) + .section .debug_info.dwo,"e",@progbits .long .Ldebug_info_dwo_end0-.Ldebug_info_dwo_start0 # Length of Unit .Ldebug_info_dwo_start0: diff --git a/llvm/test/tools/llvm-dwp/X86/type_dedup.test b/llvm/test/tools/llvm-dwp/X86/type_dedup.test index a59de66..78e50fe 100644 --- a/llvm/test/tools/llvm-dwp/X86/type_dedup.test +++ b/llvm/test/tools/llvm-dwp/X86/type_dedup.test @@ -1,8 +1,10 @@ RUN: llvm-dwp %p/../Inputs/type_dedup/a.dwo %p/../Inputs/type_dedup/b.dwo -o %t -RUN: llvm-dwarfdump -v %t | FileCheck %s +RUN: llvm-dwarfdump -v %t | FileCheck -check-prefix=CHECK %s +RUN: llvm-dwarfdump -v -manaully-generate-unit-index %t | FileCheck -check-prefix=CHECK2 %s RUN: llvm-dwp %p/../Inputs/type_dedup/b.dwo -o %tb.dwp RUN: llvm-dwp %p/../Inputs/type_dedup/a.dwo %tb.dwp -o %t -RUN: llvm-dwarfdump -v %t | FileCheck %s +RUN: llvm-dwarfdump -v %t | FileCheck -check-prefix=CHECK %s +RUN: llvm-dwarfdump -v -manaully-generate-unit-index %t | FileCheck -check-prefix=CHECK2 %s a.cpp: struct common { }; @@ -36,3 +38,24 @@ CHECK: DW_TAG_type_unit CHECK: 0x00000066: DW_TAG_structure_type CHECK: DW_AT_name {{.*}} "bdistinct" CHECK-NOT: Type Unit + +CHECK2-LABEL: .debug_types.dwo contents: +CHECK2: [[COMMONUOFF:0x[0-9a-f]*]]: +CHECK2-LABEL: Type Unit: length = 0x00000020, format = DWARF32, version = 0x0004, abbr_offset = +CHECK2: 0x0000, addr_size = 0x08, name = 'common', type_signature = [[COMMONSIG:0x[0-9a-f]*]], type_offset = 0x[[COMMONOFF:.*]] (next unit at [[AUOFF:.*]]) +CHECK2: DW_TAG_type_unit +CHECK2: [[COMMONOFF]]: DW_TAG_structure_type +CHECK2: DW_AT_name {{.*}} "common" +CHECK2: [[AUOFF]]: +CHECK2-LABEL: Type Unit: length = 0x00000020, format = DWARF32, version = 0x0004, abbr_offset = +CHECK2: 0x0000, addr_size = 0x08, name = 'adistinct', type_signature = [[ASIG:0x[0-9a-f]*]], type_offset = 0x[[AOFF:.*]] (next unit at [[BUOFF:.*]]) +CHECK2: DW_TAG_type_unit +CHECK2: 0x00000042: DW_TAG_structure_type +CHECK2: DW_AT_name {{.*}} "adistinct" +CHECK2: [[BUOFF]]: +CHECK2-LABEL: Type Unit: length = 0x00000020, format = DWARF32, version = 0x0004, abbr_offset = +CHECK2: 0x{{.*}}, addr_size = 0x08, name = 'bdistinct', type_signature = [[BSIG:0x[0-9a-f]*]], type_offset = 0x[[BOFF:.*]] (next unit at [[XUOFF:.*]]) +CHECK2: DW_TAG_type_unit +CHECK2: 0x00000066: DW_TAG_structure_type +CHECK2: DW_AT_name {{.*}} "bdistinct" +CHECK2-NOT: Type Unit diff --git a/llvm/tools/llvm-dwarfdump/llvm-dwarfdump.cpp b/llvm/tools/llvm-dwarfdump/llvm-dwarfdump.cpp index c151129..27330a5 100644 --- a/llvm/tools/llvm-dwarfdump/llvm-dwarfdump.cpp +++ b/llvm/tools/llvm-dwarfdump/llvm-dwarfdump.cpp @@ -249,6 +249,13 @@ static cl::opt cl::desc("Show the sizes of all debug sections, " "expressed in bytes."), cat(DwarfDumpCategory)); +static cl::opt ManuallyGenerateUnitIndex( + "manaully-generate-unit-index", + cl::desc("if the input is dwp file, parse .debug_info " + "section and use it to populate " + "DW_SECT_INFO contributions in cu-index. " + "For DWARF5 it also populated TU Index."), + cl::init(false), cl::Hidden, cl::cat(DwarfDumpCategory)); static cl::opt ShowSources("show-sources", cl::desc("Show the sources across all compilation units."), @@ -675,6 +682,7 @@ static bool handleBuffer(StringRef Filename, MemoryBufferRef Buffer, std::unique_ptr DICtx = DWARFContext::create( *Obj, DWARFContext::ProcessDebugRelocations::Process, nullptr, "", RecoverableErrorHandler); + DICtx->setParseCUTUIndexManually(ManuallyGenerateUnitIndex); if (!HandleObj(*Obj, *DICtx, Filename, OS)) Result = false; } -- 2.7.4