[BOLT][DWARF] Handle shared abbrev section
authorAlexander Yermolovich <ayermolo@fb.com>
Mon, 31 Jan 2022 19:06:06 +0000 (11:06 -0800)
committerAlexander Yermolovich <ayermolo@fb.com>
Mon, 31 Jan 2022 19:10:23 +0000 (11:10 -0800)
We can have a scenario where multiple CUs share an abbrev table.
We modify or don't modify one CU, which leads to other CUs having invalid abbrev section.
Example that caused it.
All of CUs shared the same abbrev table. First CU just had compile_unit and sub_program.
It was not modified. Next CU had DW_TAG_lexical_block with
DW_AT_low_pc/DW_AT_high_pc converted to DW_AT_low_pc/DW_AT_ranges.
We used unmodified abbrev section for first and subsequent CUs.
So when parsing subsequent CUs debug info was corrupted.

In this patch we will now duplicate all sections that are modified and are different.
This also means that if .debug_types is present and it shares Abbrev table, and
they usually are, we now can have two Abbrev tables. One for CU that was modified,
and unmodified one for TU.

Reviewed By: maksfb

Differential Revision: https://reviews.llvm.org/D118517

bolt/include/bolt/Core/DebugData.h
bolt/lib/Core/DebugData.cpp
bolt/test/X86/shared-abbrev.s [new file with mode: 0644]

index 91041c5..2ea10cb 100644 (file)
@@ -715,8 +715,11 @@ class DebugAbbrevWriter {
     std::unique_ptr<DebugBufferVector> Buffer;
     std::unique_ptr<raw_svector_ostream> Stream;
   };
-  /// Map original unit abbrev offset to abbreviations data.
-  std::map<uint64_t, AbbrevData> UnitsAbbrevData;
+  /// Map original unit to abbreviations data.
+  std::unordered_map<const DWARFUnit *, AbbrevData *> UnitsAbbrevData;
+
+  /// Map from Hash Signature to AbbrevData.
+  llvm::StringMap<std::unique_ptr<AbbrevData>> AbbrevDataCache;
 
   /// Attributes substitution (patch) information.
   struct PatchInfo {
@@ -783,10 +786,8 @@ public:
   /// Return an offset in the finalized abbrev section corresponding to CU/TU.
   uint64_t getAbbreviationsOffsetForUnit(const DWARFUnit &Unit) {
     assert(!DWOId && "offsets are tracked for non-DWO units only");
-    assert(UnitsAbbrevData.find(Unit.getAbbreviationsOffset()) !=
-               UnitsAbbrevData.end() &&
-           "no abbrev data found for unit");
-    return UnitsAbbrevData[Unit.getAbbreviationsOffset()].Offset;
+    assert(UnitsAbbrevData.count(&Unit) && "no abbrev data found for unit");
+    return UnitsAbbrevData[&Unit]->Offset;
   }
 };
 
index 0f893a6..2979a6b 100644 (file)
@@ -18,6 +18,7 @@
 #include "llvm/Support/CommandLine.h"
 #include "llvm/Support/EndianStream.h"
 #include "llvm/Support/LEB128.h"
+#include "llvm/Support/SHA1.h"
 #include <algorithm>
 #include <cassert>
 #include <cstdint>
@@ -694,20 +695,30 @@ void DebugAbbrevWriter::addUnitAbbreviations(DWARFUnit &Unit) {
   if (!Abbrevs)
     return;
 
-  // Multiple units may share the same abbreviations. Only add abbreviations
-  // for the first unit and reuse them.
-  const uint64_t AbbrevOffset = Unit.getAbbreviationsOffset();
-  if (UnitsAbbrevData.find(AbbrevOffset) != UnitsAbbrevData.end())
-    return;
+  const PatchesTy &UnitPatches = Patches[&Unit];
 
-  AbbrevData &UnitData = UnitsAbbrevData[AbbrevOffset];
+  // We are duplicating abbrev sections, to handle the case where for one CU we
+  // modify it, but for another we don't.
+  auto UnitDataPtr = std::make_unique<AbbrevData>();
+  AbbrevData &UnitData = *UnitDataPtr.get();
   UnitData.Buffer = std::make_unique<DebugBufferVector>();
   UnitData.Stream = std::make_unique<raw_svector_ostream>(*UnitData.Buffer);
-
-  const PatchesTy &UnitPatches = Patches[&Unit];
-
   raw_svector_ostream &OS = *UnitData.Stream.get();
 
+  // Returns true if AbbrevData is re-used, false otherwise.
+  auto hashAndAddAbbrev = [&](StringRef AbbrevData) -> bool {
+    llvm::SHA1 Hasher;
+    Hasher.update(AbbrevData);
+    StringRef Key = Hasher.final();
+    auto Iter = AbbrevDataCache.find(Key);
+    if (Iter != AbbrevDataCache.end()) {
+      UnitsAbbrevData[&Unit] = Iter->second.get();
+      return true;
+    }
+    AbbrevDataCache[Key] = std::move(UnitDataPtr);
+    UnitsAbbrevData[&Unit] = &UnitData;
+    return false;
+  };
   // Take a fast path if there are no patches to apply. Simply copy the original
   // contents.
   if (UnitPatches.empty()) {
@@ -756,9 +767,10 @@ void DebugAbbrevWriter::addUnitAbbreviations(DWARFUnit &Unit) {
       AbbrevContents = AbbrevSectionContents;
     }
 
-    OS.reserveExtraSpace(AbbrevContents.size());
-    OS << AbbrevContents;
-
+    if (!hashAndAddAbbrev(AbbrevContents)) {
+      OS.reserveExtraSpace(AbbrevContents.size());
+      OS << AbbrevContents;
+    }
     return;
   }
 
@@ -798,9 +810,13 @@ void DebugAbbrevWriter::addUnitAbbreviations(DWARFUnit &Unit) {
     encodeULEB128(0, OS);
   }
   encodeULEB128(0, OS);
+
+  hashAndAddAbbrev(OS.str());
 }
 
 std::unique_ptr<DebugBufferVector> DebugAbbrevWriter::finalize() {
+  // Used to create determinism for writing out abbrevs.
+  std::vector<AbbrevData *> Abbrevs;
   if (DWOId) {
     // We expect abbrev_offset to always be zero for DWO units as there
     // should be one CU per DWO, and TUs should share the same abbreviation
@@ -818,33 +834,40 @@ std::unique_ptr<DebugBufferVector> DebugAbbrevWriter::finalize() {
       }
     }
 
+    DWARFUnit *Unit = Context.getDWOCompileUnitForHash(*DWOId);
     // Issue abbreviations for the DWO CU only.
-    addUnitAbbreviations(*Context.getDWOCompileUnitForHash(*DWOId));
+    addUnitAbbreviations(*Unit);
+    AbbrevData *Abbrev = UnitsAbbrevData[Unit];
+    Abbrevs.push_back(Abbrev);
   } else {
+    Abbrevs.reserve(Context.getNumCompileUnits() + Context.getNumTypeUnits());
+    std::unordered_set<AbbrevData *> ProcessedAbbrevs;
     // Add abbreviations from compile and type non-DWO units.
-    for (const std::unique_ptr<DWARFUnit> &Unit : Context.normal_units())
+    for (const std::unique_ptr<DWARFUnit> &Unit : Context.normal_units()) {
       addUnitAbbreviations(*Unit);
+      AbbrevData *Abbrev = UnitsAbbrevData[Unit.get()];
+      if (!ProcessedAbbrevs.insert(Abbrev).second)
+        continue;
+      Abbrevs.push_back(Abbrev);
+    }
   }
 
   DebugBufferVector ReturnBuffer;
-
   // Pre-calculate the total size of abbrev section.
   uint64_t Size = 0;
-  for (const auto &KV : UnitsAbbrevData) {
-    const AbbrevData &UnitData = KV.second;
-    Size += UnitData.Buffer->size();
-  }
+  for (const AbbrevData *UnitData : Abbrevs)
+    Size += UnitData->Buffer->size();
+
   ReturnBuffer.reserve(Size);
 
   uint64_t Pos = 0;
-  for (auto &KV : UnitsAbbrevData) {
-    AbbrevData &UnitData = KV.second;
-    ReturnBuffer.append(*UnitData.Buffer);
-    UnitData.Offset = Pos;
-    Pos += UnitData.Buffer->size();
-
-    UnitData.Buffer.reset();
-    UnitData.Stream.reset();
+  for (AbbrevData *UnitData : Abbrevs) {
+    ReturnBuffer.append(*UnitData->Buffer);
+    UnitData->Offset = Pos;
+    Pos += UnitData->Buffer->size();
+
+    UnitData->Buffer.reset();
+    UnitData->Stream.reset();
   }
 
   return std::make_unique<DebugBufferVector>(ReturnBuffer);
diff --git a/bolt/test/X86/shared-abbrev.s b/bolt/test/X86/shared-abbrev.s
new file mode 100644 (file)
index 0000000..0975a00
--- /dev/null
@@ -0,0 +1,121 @@
+# RUN: rm -rf %t
+# RUN: mkdir %t
+# RUN: llvm-mc -filetype=obj -triple=x86_64 -dwarf-version=4 %s -o %t/shared-abbrev.o
+# RUN: %clang %cflags %t/shared-abbrev.o -o %t/shared-abbrev.exe
+# RUN: llvm-bolt %t/shared-abbrev.exe -o %t/shared-abbrev.exe.bolt -update-debug-sections
+# RUN: llvm-dwarfdump --debug-info %t/shared-abbrev.exe.bolt | FileCheck %s
+
+# CHECK: 0x00000000:
+# CHECK-SAME: abbr_offset = 0x0000
+# CHECK-EMPTY:
+# CHECK-NEXT:          DW_TAG_compile_unit
+# CHECK-NEXT:          DW_AT_stmt_list
+# CHECK-NEXT:          DW_AT_low_pc
+# CHECK-NEXT:          DW_AT_ranges
+# CHECK: 0x0000001d:
+# CHECK-SAME: abbr_offset = 0x0017
+# CHECK-EMPTY:
+# CHECK:               DW_TAG_compile_unit
+# CHECK-NEXT:          DW_AT_stmt_list
+# CHECK-NEXT:          DW_AT_low_pc
+# CHECK-NEXT:          DW_AT_ranges
+# CHECK: 0x0000003a:
+# CHECK-SAME: abbr_offset = 0x0000
+# CHECK-EMPTY:
+# CHECK-NEXT:          DW_TAG_compile_unit
+# CHECK-NEXT:          DW_AT_stmt_list
+# CHECK-NEXT:          DW_AT_low_pc
+# CHECK-NEXT:          DW_AT_ranges
+
+       .text
+       .file   "main.cpp"
+       .globl  main                            # -- Begin function main
+       .p2align        4, 0x90
+       .type   main,@function
+main:                                   # @main
+.Lfunc_begin0:
+       .file   1 "test" "main.cpp"
+       .loc    1 1 0                           # main.cpp:1:0
+       .cfi_startproc
+       .cfi_def_cfa %rsp, 8
+       retq
+.Ltmp1:
+.Lfunc_end0:
+       .size   main, .Lfunc_end0-main
+       .cfi_endproc
+                                        # -- End function
+  .section     .debug_abbrev,"",@progbits
+  .byte        1                               # Abbreviation Code
+  .byte        17                              # DW_TAG_compile_unit
+  .byte        0                               # DW_CHILDREN_no
+  .byte        16                              # DW_AT_stmt_list
+  .byte        23                              # DW_FORM_sec_offset
+  .byte        17                              # DW_AT_low_pc
+  .byte        1                               # DW_FORM_addr
+  .byte        18                              # DW_AT_high_pc
+  .byte        7                               # DW_FORM_data8
+  .byte        0                               # EOM(1)
+  .byte        0                               # EOM(2)
+  .byte 2                               # Abbreviation Code
+  .byte 17                              # DW_TAG_compile_unit
+  .byte 0                               # DW_CHILDREN_no
+  .byte 16                              # DW_AT_stmt_list
+  .byte 23                              # DW_FORM_sec_offset
+  .byte 17                              # DW_AT_low_pc
+  .byte 1                               # DW_FORM_addr
+  .byte 85                              # DW_AT_ranges
+  .byte 23                              # DW_FORM_sec_offset
+  .byte 0                               # EOM(1)
+  .byte 0                               # EOM(2)
+  .byte        0                               # EOM(3)
+  .section     .debug_info,"",@progbits
+.Lcu_begin0:
+       .long   .Ldebug_info_end0-.Ldebug_info_start0 # Length of Unit
+.Ldebug_info_start0:
+       .short  4                               # DWARF version number
+  .long .debug_abbrev                   # Offset Into Abbrev. Section
+  .byte 8                               # Address Size (in bytes)
+  .byte 2                               # Abbrev [2] DW_TAG_compile_unit
+  .long .Lline_table_start0             # DW_AT_stmt_list
+  .quad 0                               # DW_AT_low_pc
+  .byte 0                               # End Of Children Mark
+  .long .Ldebug_ranges0                 # DW_AT_ranges --- end manual --
+.Ldebug_info_end0:
+
+   # Second CU table.
+   .long   .Ldebug_info_end1-.Ldebug_info_start1 # Length of Unit
+.Ldebug_info_start1:
+  .short       4                               # DWARF version number
+       .long   .debug_abbrev                   # Offset Into Abbrev. Section
+       .byte   8                               # Address Size (in bytes)
+       .byte   1                               # Abbrev [1] DW_TAG_compile_unit
+       .long   .Lline_table_start0             # DW_AT_stmt_list
+       .quad   .Lfunc_begin0                   # DW_AT_low_pc
+       .quad   .Lfunc_end0-.Lfunc_begin0       # DW_AT_high_pc
+  .byte 0                               # End Of Children Mark
+.Ldebug_info_end1:
+
+   # Third CU table.
+   .long   .Ldebug_info_end2-.Ldebug_info_start2 # Length of Unit
+.Ldebug_info_start2:
+       .short  4                               # DWARF version number
+  .long .debug_abbrev                   # Offset Into Abbrev. Section
+  .byte 8                               # Address Size (in bytes)
+  .byte 2                               # Abbrev [2] DW_TAG_compile_unit
+  .long .Lline_table_start0             # DW_AT_stmt_list
+  .quad 0                               # DW_AT_low_pc
+  .byte 0                               # End Of Children Mark
+  .long .Ldebug_ranges0                 # DW_AT_ranges --- end manual --
+.Ldebug_info_end2:
+  .section  .debug_ranges,"",@progbits
+.Ldebug_ranges0:
+  .quad .Lfunc_begin0
+  .quad .Lfunc_end0
+  .quad .Lfunc_begin0
+  .quad .Lfunc_end0
+  .quad 0
+  .quad 0
+  .section     ".note.GNU-stack","",@progbits
+  .addrsig
+  .section     .debug_line,"",@progbits
+.Lline_table_start0: