From 5070cecd7219fc078cf0084d9dfa91e86b490bc2 Mon Sep 17 00:00:00 2001 From: Reid Kleckner Date: Sat, 2 May 2020 18:06:41 -0700 Subject: [PATCH] [PDB] Bypass generic deserialization code for publics sorting The number of public symbols is very large, and each deserialization does a few heap allocations. The public symbols are serialized by the linker, so we can assume they have the expected layout and use it directly. Saves O(#publics) temporary heap allocations and shrinks some data structures. --- .../include/llvm/DebugInfo/CodeView/SymbolRecord.h | 7 ++++ llvm/lib/DebugInfo/PDB/Native/GSIStreamBuilder.cpp | 45 ++++++++++++++-------- 2 files changed, 35 insertions(+), 17 deletions(-) diff --git a/llvm/include/llvm/DebugInfo/CodeView/SymbolRecord.h b/llvm/include/llvm/DebugInfo/CodeView/SymbolRecord.h index 1fcef9d..4383534 100644 --- a/llvm/include/llvm/DebugInfo/CodeView/SymbolRecord.h +++ b/llvm/include/llvm/DebugInfo/CodeView/SymbolRecord.h @@ -350,6 +350,13 @@ public: uint32_t RecordOffset = 0; }; +struct PublicSym32Header { + ulittle32_t Flags; + ulittle32_t Offset; + ulittle16_t Segment; + // char Name[]; +}; + // S_PUB32 class PublicSym32 : public SymbolRecord { public: diff --git a/llvm/lib/DebugInfo/PDB/Native/GSIStreamBuilder.cpp b/llvm/lib/DebugInfo/PDB/Native/GSIStreamBuilder.cpp index 432f1e9..88eab57 100644 --- a/llvm/lib/DebugInfo/PDB/Native/GSIStreamBuilder.cpp +++ b/llvm/lib/DebugInfo/PDB/Native/GSIStreamBuilder.cpp @@ -229,15 +229,30 @@ Error GSIStreamBuilder::finalizeMsfLayout() { return Error::success(); } -static bool comparePubSymByAddrAndName( - const std::pair &LS, - const std::pair &RS) { - if (LS.second->Segment != RS.second->Segment) - return LS.second->Segment < RS.second->Segment; - if (LS.second->Offset != RS.second->Offset) - return LS.second->Offset < RS.second->Offset; - - return LS.second->Name < RS.second->Name; +static StringRef extractPubSym(const CVSymbol *Sym, uint16_t &Seg, + uint32_t &Offset) { + ArrayRef Buf = Sym->content(); + assert(Buf.size() > sizeof(PublicSym32Header)); + const auto *Hdr = reinterpret_cast(Buf.data()); + Buf = Buf.drop_front(sizeof(PublicSym32Header)); + Seg = Hdr->Segment; + Offset = Hdr->Offset; + // Don't worry about finding the null terminator, since the strings will be + // compared later. + return StringRef(reinterpret_cast(Buf.data()), Buf.size()); +} + +static bool comparePubSymByAddrAndName(const CVSymbol *LS, const CVSymbol *RS) { + uint16_t LSeg, RSeg; + uint32_t LOff, ROff; + StringRef LName, RName; + LName = extractPubSym(LS, LSeg, LOff); + RName = extractPubSym(RS, RSeg, ROff); + if (LSeg != RSeg) + return LSeg < RSeg; + if (LOff != ROff) + return LOff < ROff; + return LName < RName; } /// Compute the address map. The address map is an array of symbol offsets @@ -246,19 +261,15 @@ static std::vector computeAddrMap(ArrayRef Records) { // Make a vector of pointers to the symbols so we can sort it by address. // Also gather the symbol offsets while we're at it. - std::vector DeserializedPublics; - std::vector> PublicsByAddr; + std::vector PublicsByAddr; std::vector SymOffsets; - DeserializedPublics.reserve(Records.size()); PublicsByAddr.reserve(Records.size()); SymOffsets.reserve(Records.size()); uint32_t SymOffset = 0; for (const CVSymbol &Sym : Records) { assert(Sym.kind() == SymbolKind::S_PUB32); - DeserializedPublics.push_back( - cantFail(SymbolDeserializer::deserializeAs(Sym))); - PublicsByAddr.emplace_back(&Sym, &DeserializedPublics.back()); + PublicsByAddr.push_back(&Sym); SymOffsets.push_back(SymOffset); SymOffset += Sym.length(); } @@ -267,8 +278,8 @@ static std::vector computeAddrMap(ArrayRef Records) { // Fill in the symbol offsets in the appropriate order. std::vector AddrMap; AddrMap.reserve(Records.size()); - for (auto &Sym : PublicsByAddr) { - ptrdiff_t Idx = std::distance(Records.data(), Sym.first); + for (const CVSymbol *Sym : PublicsByAddr) { + ptrdiff_t Idx = std::distance(Records.data(), Sym); assert(Idx >= 0 && size_t(Idx) < Records.size()); AddrMap.push_back(ulittle32_t(SymOffsets[Idx])); } -- 2.7.4