using namespace lld;
using namespace lld::elf;
+using llvm::support::endian::read32le;
using llvm::support::endian::write32le;
using llvm::support::endian::write64le;
}
static std::vector<GdbIndexChunk::NameTypeEntry>
-readPubNamesAndTypes(DWARFContext &Dwarf) {
+readPubNamesAndTypes(DWARFContext &Dwarf, uint32_t Idx) {
StringRef Sec1 = Dwarf.getDWARFObj().getGnuPubNamesSection();
StringRef Sec2 = Dwarf.getDWARFObj().getGnuPubTypesSection();
std::vector<GdbIndexChunk::NameTypeEntry> Ret;
for (StringRef Sec : {Sec1, Sec2}) {
DWARFDebugPubTable Table(Sec, Config->IsLE, true);
- for (const DWARFDebugPubTable::Set &Set : Table.getData()) {
- for (const DWARFDebugPubTable::Entry &Ent : Set.Entries) {
- CachedHashStringRef S(Ent.Name, computeGdbHash(Ent.Name));
- Ret.push_back({S, Ent.Descriptor.toBits()});
- }
- }
+ for (const DWARFDebugPubTable::Set &Set : Table.getData())
+ for (const DWARFDebugPubTable::Entry &Ent : Set.Entries)
+ Ret.push_back({{Ent.Name, computeGdbHash(Ent.Name)},
+ (Ent.Descriptor.toBits() << 24) | Idx});
}
return Ret;
}
return Ret;
}
-void GdbIndexSection::fixCuIndex() {
- uint32_t Idx = 0;
- for (GdbIndexChunk &Chunk : Chunks) {
- for (GdbIndexChunk::AddressEntry &Ent : Chunk.AddressAreas)
- Ent.CuIndex += Idx;
- Idx += Chunk.CompilationUnits.size();
- }
-}
-
-std::vector<std::vector<uint32_t>> GdbIndexSection::createCuVectors() {
- std::vector<std::vector<uint32_t>> Ret;
- uint32_t Idx = 0;
- uint32_t Off = 0;
-
- for (GdbIndexChunk &Chunk : Chunks) {
- for (GdbIndexChunk::NameTypeEntry &Ent : Chunk.NamesAndTypes) {
- GdbSymbol *&Sym = Symbols[Ent.Name];
- if (!Sym) {
- Sym = make<GdbSymbol>(GdbSymbol{Ent.Name.hash(), Off, Ret.size()});
- Off += Ent.Name.size() + 1;
- Ret.push_back({});
- }
-
- // gcc 5.4.1 produces a buggy .debug_gnu_pubnames that contains
- // duplicate entries, so we want to dedup them.
- std::vector<uint32_t> &Vec = Ret[Sym->CuVectorIndex];
- uint32_t Val = (Ent.Type << 24) | Idx;
- if (Vec.empty() || Vec.back() != Val)
- Vec.push_back(Val);
- }
- Idx += Chunk.CompilationUnits.size();
- }
-
- StringPoolSize = Off;
- return Ret;
-}
-
template <class ELFT> GdbIndexSection *elf::createGdbIndex() {
// Gather debug info to create a .gdb_index section.
std::vector<InputSection *> Sections = getDebugInfoSections();
Chunks[I].DebugInfoSec = Sections[I];
Chunks[I].CompilationUnits = readCuList(Dwarf);
Chunks[I].AddressAreas = readAddressAreas(Dwarf, Sections[I]);
- Chunks[I].NamesAndTypes = readPubNamesAndTypes(Dwarf);
+ Chunks[I].NamesAndTypes = readPubNamesAndTypes(Dwarf, I);
});
// .debug_gnu_pub{names,types} are useless in executables.
return Ret;
}
-std::vector<GdbSymbol *> GdbIndexSection::createGdbSymtab() {
- uint32_t Size = NextPowerOf2(Symbols.size() * 4 / 3);
- if (Size < 1024)
- Size = 1024;
-
- uint32_t Mask = Size - 1;
- std::vector<GdbSymbol *> Ret(Size);
-
- for (auto &KV : Symbols) {
- GdbSymbol *Sym = KV.second;
- uint32_t I = Sym->NameHash & Mask;
- uint32_t Step = ((Sym->NameHash * 17) & Mask) | 1;
-
- while (Ret[I])
- I = (I + Step) & Mask;
- Ret[I] = Sym;
- }
- return Ret;
+// Returns the desired size of an on-disk hash table for a .gdb_index section.
+// There's a tradeoff between size and collision rate. We aim 75% utilization.
+static size_t getSymtabSize(size_t NumSymbols) {
+ return std::max<size_t>(NextPowerOf2(NumSymbols * 4 / 3), 1024);
}
GdbIndexSection::GdbIndexSection(std::vector<GdbIndexChunk> &&C)
: SyntheticSection(0, SHT_PROGBITS, 1, ".gdb_index"), Chunks(std::move(C)) {
- fixCuIndex();
- CuVectors = createCuVectors();
- GdbSymtab = createGdbSymtab();
+ // A map to identify duplicate symbols.
+ DenseMap<CachedHashStringRef, size_t> Map;
+
+ // Initialize Symbols and CuVectors while deduplicating symbols by name.
+ for (GdbIndexChunk &Chunk : Chunks) {
+ for (GdbIndexChunk::NameTypeEntry &Ent : Chunk.NamesAndTypes) {
+ CachedHashStringRef S = Ent.Name;
+ size_t &Idx = Map[S];
+
+ if (!Idx) {
+ Idx = Symbols.size() + 1;
+ Symbols.push_back({S, static_cast<uint32_t>(StringPoolSize),
+ static_cast<uint32_t>(Symbols.size())});
+ StringPoolSize += S.size() + 1;
+ CuVectors.push_back({});
+ }
+
+ // gcc 5.4.1 produces a buggy .debug_gnu_pubnames that contains
+ // duplicate entries, so we want to dedup them.
+ std::vector<uint32_t> &Vec = CuVectors[Symbols[Idx - 1].CuVectorIdx];
+ if (Vec.empty() || Vec.back() != Ent.Type)
+ Vec.push_back(Ent.Type);
+ }
+
+ // NamesAndTypes is useless beyond this point, so clear it to save memory.
+ Chunk.NamesAndTypes = {};
+ }
// Compute offsets early to know the section size.
// Each chunk size needs to be in sync with what we write in writeTo.
CuTypesOffset = CuListOffset + getCuSize(Chunks) * 16;
SymtabOffset = CuTypesOffset + getAddressAreaSize(Chunks) * 20;
- ConstantPoolOffset = SymtabOffset + GdbSymtab.size() * 8;
+ SymtabSize = getSymtabSize(Symbols.size());
+ ConstantPoolOffset = SymtabOffset + SymtabSize * 8;
for (ArrayRef<uint32_t> Vec : CuVectors) {
CuVectorOffsets.push_back(CuVectorsPoolSize);
}
// Write the address area.
- for (GdbIndexChunk &D : Chunks) {
- for (GdbIndexChunk::AddressEntry &E : D.AddressAreas) {
+ uint32_t Idx = 0;
+ for (GdbIndexChunk &Chunk : Chunks) {
+ for (GdbIndexChunk::AddressEntry &E : Chunk.AddressAreas) {
uint64_t BaseAddr = E.Section->getVA(0);
write64le(Buf, BaseAddr + E.LowAddress);
write64le(Buf + 8, BaseAddr + E.HighAddress);
- write32le(Buf + 16, E.CuIndex);
+ write32le(Buf + 16, E.CuIndex + Idx);
Buf += 20;
}
+ Idx += Chunk.CompilationUnits.size();
}
- // Write the symbol table.
- for (GdbSymbol *Sym : GdbSymtab) {
- if (Sym) {
- write32le(Buf, CuVectorsPoolSize + Sym->NameOffset);
- write32le(Buf + 4, CuVectorOffsets[Sym->CuVectorIndex]);
- }
- Buf += 8;
+ // Write the on-disk open-addressing hash table containing symbols.
+ for (GdbSymbol &Sym : Symbols) {
+ uint32_t Mask = SymtabSize - 1;
+ uint32_t H = Sym.Name.hash();
+ uint32_t I = H & Mask;
+ uint32_t Step = ((H * 17) & Mask) | 1;
+
+ while (read32le(Buf + I * 8))
+ I = (I + Step) & Mask;
+
+ uint8_t *P = Buf + I * 8;
+ write32le(P, CuVectorsPoolSize + Sym.OutputOff);
+ write32le(P + 4, CuVectorOffsets[Sym.CuVectorIdx]);
}
+ Buf += SymtabSize * 8;
+
// Write the CU vectors.
for (ArrayRef<uint32_t> Vec : CuVectors) {
write32le(Buf, Vec.size());
}
// Write the string pool.
- for (auto &KV : Symbols) {
- CachedHashStringRef S = KV.first;
- GdbSymbol *Sym = KV.second;
- size_t Off = Sym->NameOffset;
- memcpy(Buf + Off, S.val().data(), S.size());
- Buf[Off + S.size()] = '\0';
- }
+ for (GdbSymbol &Sym : Symbols)
+ memcpy(Buf + Sym.OutputOff, Sym.Name.val().data(), Sym.Name.size());
}
bool GdbIndexSection::empty() const { return !Out::DebugInfo; }