[ELF] .gdb_index: fix CuOff when a .debug_info section contains more than 1 DW_TAG_co...
authorFangrui Song <maskray@google.com>
Tue, 13 Nov 2018 08:43:07 +0000 (08:43 +0000)
committerFangrui Song <maskray@google.com>
Tue, 13 Nov 2018 08:43:07 +0000 (08:43 +0000)
Summary:
Idx passed to readPubNamesAndTypes was an index into Chunks, not an
index into the CU list. This would be incorrect if some .debug_info
section contained more than 1 DW_TAG_compile_unit.

In real world, glibc Scrt1.o is a partial link of start.os abi-note.o init.o and contains 2 CUs in debug builds.
Without this patch, any application linking such Scrt1.o would have invalid .gdb_index
The issue could be demonstrated by:

    (gdb) py print(gdb.lookup_global_symbol('main'))
    None

Reviewers: espindola, ruiu

Reviewed By: ruiu

Subscribers: Higuoxing, grimar, dblaikie, emaste, aprantl, arichardson, JDevlieghere, arphaman, llvm-commits

Differential Revision: https://reviews.llvm.org/D54361

llvm-svn: 346747

lld/ELF/SyntheticSections.cpp
lld/test/ELF/gdb-index-multiple-cu.s [new file with mode: 0644]

index 066cd99..3bcb4d6 100644 (file)
@@ -2414,18 +2414,28 @@ readAddressAreas(DWARFContext &Dwarf, InputSection *Sec) {
 
 template <class ELFT>
 static std::vector<GdbIndexSection::NameTypeEntry>
-readPubNamesAndTypes(DWARFContext &Dwarf, uint32_t Idx) {
-  auto &Obj = static_cast<const LLDDwarfObj<ELFT> &>(Dwarf.getDWARFObj());
+readPubNamesAndTypes(const LLDDwarfObj<ELFT> &Obj,
+                     const std::vector<GdbIndexSection::CuEntry> &CUs) {
   const DWARFSection &PubNames = Obj.getGnuPubNamesSection();
   const DWARFSection &PubTypes = Obj.getGnuPubTypesSection();
 
   std::vector<GdbIndexSection::NameTypeEntry> Ret;
   for (const DWARFSection *Pub : {&PubNames, &PubTypes}) {
     DWARFDebugPubTable Table(Obj, *Pub, Config->IsLE, true);
-    for (const DWARFDebugPubTable::Set &Set : Table.getData())
+    uint32_t I = 0;
+    for (const DWARFDebugPubTable::Set &Set : Table.getData()) {
+      // The value written into the constant pool is Kind << 24 | CuIndex. As we
+      // don't know how many compilation units precede this object to compute
+      // CuIndex, we compute (Kind << 24 | CuIndexInThisObject) instead, and add
+      // the number of preceding compilation units later.
+      //
+      // We assume both CUs[*].CuOff and Set.Offset are increasing.
+      while (I < CUs.size() && CUs[I].CuOffset < Set.Offset)
+        ++I;
       for (const DWARFDebugPubTable::Entry &Ent : Set.Entries)
         Ret.push_back({{Ent.Name, computeGdbHash(Ent.Name)},
-                       (Ent.Descriptor.toBits() << 24) | Idx});
+                       (Ent.Descriptor.toBits() << 24) | I});
+    }
   }
   return Ret;
 }
@@ -2433,10 +2443,19 @@ readPubNamesAndTypes(DWARFContext &Dwarf, uint32_t Idx) {
 // Create a list of symbols from a given list of symbol names and types
 // by uniquifying them by name.
 static std::vector<GdbIndexSection::GdbSymbol>
-createSymbols(ArrayRef<std::vector<GdbIndexSection::NameTypeEntry>> NameTypes) {
+createSymbols(ArrayRef<std::vector<GdbIndexSection::NameTypeEntry>> NameTypes,
+              const std::vector<GdbIndexSection::GdbChunk> &Chunks) {
   typedef GdbIndexSection::GdbSymbol GdbSymbol;
   typedef GdbIndexSection::NameTypeEntry NameTypeEntry;
 
+  // For each chunk, compute the number of compilation units preceding it.
+  uint32_t CuIdx = 0;
+  std::vector<uint32_t> CuIdxs(Chunks.size());
+  for (uint32_t I = 0, E = Chunks.size(); I != E; ++I) {
+    CuIdxs[I] = CuIdx;
+    CuIdx += Chunks[I].CompilationUnits.size();
+  }
+
   // The number of symbols we will handle in this function is of the order
   // of millions for very large executables, so we use multi-threading to
   // speed it up.
@@ -2453,21 +2472,24 @@ createSymbols(ArrayRef<std::vector<GdbIndexSection::NameTypeEntry>> NameTypes) {
   // Instantiate GdbSymbols while uniqufying them by name.
   std::vector<std::vector<GdbSymbol>> Symbols(NumShards);
   parallelForEachN(0, Concurrency, [&](size_t ThreadId) {
+    uint32_t I = 0;
     for (ArrayRef<NameTypeEntry> Entries : NameTypes) {
       for (const NameTypeEntry &Ent : Entries) {
         size_t ShardId = Ent.Name.hash() >> Shift;
         if ((ShardId & (Concurrency - 1)) != ThreadId)
           continue;
 
+        uint32_t V = Ent.Type + CuIdxs[I];
         size_t &Idx = Map[ShardId][Ent.Name];
         if (Idx) {
-          Symbols[ShardId][Idx - 1].CuVector.push_back(Ent.Type);
+          Symbols[ShardId][Idx - 1].CuVector.push_back(V);
           continue;
         }
 
         Idx = Symbols[ShardId].size() + 1;
-        Symbols[ShardId].push_back({Ent.Name, {Ent.Type}, 0, 0});
+        Symbols[ShardId].push_back({Ent.Name, {V}, 0, 0});
       }
+      ++I;
     }
   });
 
@@ -2519,12 +2541,14 @@ template <class ELFT> GdbIndexSection *GdbIndexSection::create() {
     Chunks[I].Sec = Sections[I];
     Chunks[I].CompilationUnits = readCuList(Dwarf);
     Chunks[I].AddressAreas = readAddressAreas(Dwarf, Sections[I]);
-    NameTypes[I] = readPubNamesAndTypes<ELFT>(Dwarf, I);
+    NameTypes[I] = readPubNamesAndTypes<ELFT>(
+        static_cast<const LLDDwarfObj<ELFT> &>(Dwarf.getDWARFObj()),
+        Chunks[I].CompilationUnits);
   });
 
   auto *Ret = make<GdbIndexSection>();
   Ret->Chunks = std::move(Chunks);
-  Ret->Symbols = createSymbols(NameTypes);
+  Ret->Symbols = createSymbols(NameTypes, Ret->Chunks);
   Ret->initOutputSize();
   return Ret;
 }
diff --git a/lld/test/ELF/gdb-index-multiple-cu.s b/lld/test/ELF/gdb-index-multiple-cu.s
new file mode 100644 (file)
index 0000000..7bfc95a
--- /dev/null
@@ -0,0 +1,64 @@
+# RUN: llvm-mc -filetype=obj -triple=x86_64-unknown-linux %s -o %t.o
+# RUN: ld.lld --gdb-index %t.o -o %t
+# RUN: llvm-dwarfdump -gdb-index %t | FileCheck %s
+
+# Kind << 24 | CuIndex = 48 << 24 | 1 = 0x30000001
+# CHECK: Constant pool
+# CHECK-NEXT: 0(0x0): 0x30000001
+
+.globl _start
+_start:
+       ret
+
+.section .debug_abbrev,"",@progbits
+       .byte   1              # Abbreviation Code
+       .byte   17             # DW_TAG_compile_unit
+       .byte   1              # DW_CHILDREN_yes
+       .ascii  "\264B"        # DW_AT_GNU_pubnames
+       .byte   12             # DW_FORM_flag
+       .byte   0              # EOM(1)
+       .byte   0              # EOM(2)
+       .byte   2              # Abbreviation Code
+       .byte   46             # DW_TAG_subprogram
+       .byte   0              # DW_CHILDREN_no
+       .byte   3              # DW_AT_name
+       .byte   8              # DW_FORM_string
+       .byte   0              # EOM(1)
+       .byte   0              # EOM(2)
+       .byte   0
+
+.section .debug_info,"",@progbits
+.Lcu_begin0:
+       .long   .Lcu_end0 - .Lcu_begin0 - 4
+       .short  4              # DWARF version number
+       .long   0              # Offset Into Abbrev. Section
+       .byte   4              # Address Size
+       .byte   1              # Abbrev [1] DW_TAG_compile_unit
+       .byte   0              # DW_AT_GNU_pubnames
+       .byte   0
+.Lcu_end0:
+.Lcu_begin1:
+       .long   .Lcu_end1 - .Lcu_begin1 - 4
+       .short  4              # DWARF version number
+       .long   0              # Offset Into Abbrev. Section
+       .byte   4              # Address Size
+.Ldie:
+       .byte   1              # Abbrev [1] DW_TAG_compile_unit
+       .byte   1              # DW_AT_GNU_pubnames
+       .byte   2              # Abbrev [2] DW_TAG_subprogram
+       .asciz  "_start"       # DW_AT_name
+       .byte   0
+.Lcu_end1:
+
+# .debug_gnu_pubnames has just one set, associated with .Lcu_begin1 (CuIndex: 1)
+.section .debug_gnu_pubnames,"",@progbits
+       .long   .LpubNames_end1-.LpubNames_begin1
+.LpubNames_begin1:
+       .short  2              # Version
+       .long   .Lcu_begin1    # CU Offset
+       .long   .Lcu_end1 - .Lcu_begin1
+       .long   .Ldie - .Lcu_begin1
+       .byte   48             # Kind: FUNCTION, EXTERNAL
+       .asciz  "_start"       # External Name
+       .long   0
+.LpubNames_end1: