From 94926a6db8bef54f251399408ef0c3fb722c6528 Mon Sep 17 00:00:00 2001 From: Zachary Turner Date: Mon, 8 Oct 2018 04:19:16 +0000 Subject: [PATCH] [PDB] Add the ability to lookup global symbols by name. The Globals table is a hash table keyed on symbol name, so it's possible to lookup symbols by name in O(1) time. Add a function to the globals stream to do this, and add an option to llvm-pdbutil to exercise this, then use it to write some tests to verify correctness. llvm-svn: 343951 --- .../include/llvm/DebugInfo/CodeView/SymbolRecord.h | 16 +++++- llvm/include/llvm/DebugInfo/PDB/Native/DbiStream.h | 2 +- .../llvm/DebugInfo/PDB/Native/GlobalsStream.h | 10 +++- .../llvm/DebugInfo/PDB/Native/ModuleDebugStream.h | 5 +- llvm/lib/DebugInfo/CodeView/SymbolDumper.cpp | 3 +- llvm/lib/DebugInfo/PDB/Native/DbiStream.cpp | 2 +- llvm/lib/DebugInfo/PDB/Native/GlobalsStream.cpp | 59 ++++++++++++++++++++-- .../lib/DebugInfo/PDB/Native/ModuleDebugStream.cpp | 8 +++ llvm/test/DebugInfo/PDB/pdbdump-global-lookup.test | 22 ++++++++ llvm/tools/llvm-pdbutil/DumpOutputStyle.cpp | 41 +++++++++++++-- llvm/tools/llvm-pdbutil/llvm-pdbutil.cpp | 5 ++ llvm/tools/llvm-pdbutil/llvm-pdbutil.h | 1 + 12 files changed, 158 insertions(+), 16 deletions(-) create mode 100644 llvm/test/DebugInfo/PDB/pdbdump-global-lookup.test diff --git a/llvm/include/llvm/DebugInfo/CodeView/SymbolRecord.h b/llvm/include/llvm/DebugInfo/CodeView/SymbolRecord.h index bc7571b..c63fb98 100644 --- a/llvm/include/llvm/DebugInfo/CodeView/SymbolRecord.h +++ b/llvm/include/llvm/DebugInfo/CodeView/SymbolRecord.h @@ -358,6 +358,7 @@ public: // S_PUB32 class PublicSym32 : public SymbolRecord { public: + PublicSym32() : SymbolRecord(SymbolRecordKind::PublicSym32) {} explicit PublicSym32(SymbolRecordKind Kind) : SymbolRecord(Kind) {} explicit PublicSym32(uint32_t RecordOffset) : SymbolRecord(SymbolRecordKind::PublicSym32), @@ -636,6 +637,7 @@ public: // S_OBJNAME class ObjNameSym : public SymbolRecord { public: + explicit ObjNameSym() : SymbolRecord(SymbolRecordKind::ObjNameSym) {} explicit ObjNameSym(SymbolRecordKind Kind) : SymbolRecord(Kind) {} ObjNameSym(uint32_t RecordOffset) : SymbolRecord(SymbolRecordKind::ObjNameSym), RecordOffset(RecordOffset) { @@ -718,6 +720,7 @@ public: // S_COMPILE3 class Compile3Sym : public SymbolRecord { public: + Compile3Sym() : SymbolRecord(SymbolRecordKind::Compile3Sym) {} explicit Compile3Sym(SymbolRecordKind Kind) : SymbolRecord(Kind) {} Compile3Sym(uint32_t RecordOffset) : SymbolRecord(SymbolRecordKind::Compile3Sym), @@ -739,8 +742,17 @@ public: Flags = CompileSym3Flags((uint32_t(Flags) & 0xFFFFFF00) | uint32_t(Lang)); } - uint8_t getLanguage() const { return static_cast(Flags) & 0xFF; } - uint32_t getFlags() const { return static_cast(Flags) & ~0xFF; } + SourceLanguage getLanguage() const { + return static_cast(static_cast(Flags) & 0xFF); + } + CompileSym3Flags getFlags() const { + return static_cast(static_cast(Flags) & ~0xFF); + } + + bool hasOptimizations() const { + return CompileSym3Flags::None != + (getFlags() & (CompileSym3Flags::PGO | CompileSym3Flags::LTCG)); + } uint32_t RecordOffset; }; diff --git a/llvm/include/llvm/DebugInfo/PDB/Native/DbiStream.h b/llvm/include/llvm/DebugInfo/PDB/Native/DbiStream.h index 280615b..a3ca607 100644 --- a/llvm/include/llvm/DebugInfo/PDB/Native/DbiStream.h +++ b/llvm/include/llvm/DebugInfo/PDB/Native/DbiStream.h @@ -78,7 +78,7 @@ public: const DbiModuleList &modules() const; - FixedStreamArray getSectionHeaders(); + FixedStreamArray getSectionHeaders() const; FixedStreamArray getFpoRecords(); diff --git a/llvm/include/llvm/DebugInfo/PDB/Native/GlobalsStream.h b/llvm/include/llvm/DebugInfo/PDB/Native/GlobalsStream.h index dd04b5c..97d8197 100644 --- a/llvm/include/llvm/DebugInfo/PDB/Native/GlobalsStream.h +++ b/llvm/include/llvm/DebugInfo/PDB/Native/GlobalsStream.h @@ -10,18 +10,20 @@ #ifndef LLVM_DEBUGINFO_PDB_RAW_GLOBALS_STREAM_H #define LLVM_DEBUGINFO_PDB_RAW_GLOBALS_STREAM_H +#include "llvm/ADT/iterator.h" +#include "llvm/DebugInfo/CodeView/SymbolRecord.h" #include "llvm/DebugInfo/MSF/MappedBlockStream.h" #include "llvm/DebugInfo/PDB/Native/RawConstants.h" #include "llvm/DebugInfo/PDB/Native/RawTypes.h" #include "llvm/DebugInfo/PDB/PDBTypes.h" #include "llvm/Support/BinaryStreamArray.h" #include "llvm/Support/Error.h" -#include "llvm/ADT/iterator.h" namespace llvm { namespace pdb { class DbiStream; class PDBFile; +class SymbolStream; /// Iterator over hash records producing symbol record offsets. Abstracts away /// the fact that symbol record offsets on disk are off-by-one. @@ -50,8 +52,9 @@ class GSIHashTable { public: const GSIHashHeader *HashHdr; FixedStreamArray HashRecords; - ArrayRef HashBitmap; + ArrayRef HashBitmap; FixedStreamArray HashBuckets; + std::array BucketMap; Error read(BinaryStreamReader &Reader); @@ -72,6 +75,9 @@ public: const GSIHashTable &getGlobalsTable() const { return GlobalsTable; } Error reload(); + std::vector> + findRecordsByName(StringRef Name, const SymbolStream &Symbols) const; + private: GSIHashTable GlobalsTable; std::unique_ptr Stream; diff --git a/llvm/include/llvm/DebugInfo/PDB/Native/ModuleDebugStream.h b/llvm/include/llvm/DebugInfo/PDB/Native/ModuleDebugStream.h index efc25e0..19b0ebd 100644 --- a/llvm/include/llvm/DebugInfo/PDB/Native/ModuleDebugStream.h +++ b/llvm/include/llvm/DebugInfo/PDB/Native/ModuleDebugStream.h @@ -15,6 +15,7 @@ #include "llvm/DebugInfo/CodeView/DebugSubsectionRecord.h" #include "llvm/DebugInfo/CodeView/SymbolRecord.h" #include "llvm/DebugInfo/MSF/MappedBlockStream.h" +#include "llvm/DebugInfo/PDB/Native/DbiModuleDescriptor.h" #include "llvm/Support/BinaryStreamRef.h" #include "llvm/Support/Error.h" #include @@ -51,6 +52,8 @@ public: ModuleDebugStreamRef &operator=(ModuleDebugStreamRef &&Other) = delete; + codeview::CVSymbol readSymbolAtOffset(uint32_t Offset) const; + iterator_range subsections() const; codeview::DebugSubsectionArray getSubsectionsArray() const { return Subsections; @@ -64,7 +67,7 @@ public: findChecksumsSubsection() const; private: - const DbiModuleDescriptor &Mod; + DbiModuleDescriptor Mod; uint32_t Signature; diff --git a/llvm/lib/DebugInfo/CodeView/SymbolDumper.cpp b/llvm/lib/DebugInfo/CodeView/SymbolDumper.cpp index 4cb267e..54bf904 100644 --- a/llvm/lib/DebugInfo/CodeView/SymbolDumper.cpp +++ b/llvm/lib/DebugInfo/CodeView/SymbolDumper.cpp @@ -262,7 +262,8 @@ Error CVSymbolDumperImpl::visitKnownRecord(CVSymbol &CVR, Error CVSymbolDumperImpl::visitKnownRecord(CVSymbol &CVR, Compile3Sym &Compile3) { W.printEnum("Language", Compile3.getLanguage(), getSourceLanguageNames()); - W.printFlags("Flags", Compile3.getFlags(), getCompileSym3FlagNames()); + W.printFlags("Flags", uint32_t(Compile3.getFlags()), + getCompileSym3FlagNames()); W.printEnum("Machine", unsigned(Compile3.Machine), getCPUTypeNames()); CompilationCPUType = Compile3.Machine; std::string FrontendVersion; diff --git a/llvm/lib/DebugInfo/PDB/Native/DbiStream.cpp b/llvm/lib/DebugInfo/PDB/Native/DbiStream.cpp index edaa783..60ac17b 100644 --- a/llvm/lib/DebugInfo/PDB/Native/DbiStream.cpp +++ b/llvm/lib/DebugInfo/PDB/Native/DbiStream.cpp @@ -197,7 +197,7 @@ PDB_Machine DbiStream::getMachineType() const { return static_cast(Machine); } -FixedStreamArray DbiStream::getSectionHeaders() { +FixedStreamArray DbiStream::getSectionHeaders() const { return SectionHeaders; } diff --git a/llvm/lib/DebugInfo/PDB/Native/GlobalsStream.cpp b/llvm/lib/DebugInfo/PDB/Native/GlobalsStream.cpp index 36076f4..3a4c65e 100644 --- a/llvm/lib/DebugInfo/PDB/Native/GlobalsStream.cpp +++ b/llvm/lib/DebugInfo/PDB/Native/GlobalsStream.cpp @@ -20,7 +20,11 @@ //===----------------------------------------------------------------------===// #include "llvm/DebugInfo/PDB/Native/GlobalsStream.h" + +#include "llvm/DebugInfo/CodeView/RecordName.h" +#include "llvm/DebugInfo/PDB/Native/Hash.h" #include "llvm/DebugInfo/PDB/Native/RawError.h" +#include "llvm/DebugInfo/PDB/Native/SymbolStream.h" #include "llvm/Support/BinaryStreamReader.h" #include "llvm/Support/Error.h" #include @@ -41,6 +45,35 @@ Error GlobalsStream::reload() { return Error::success(); } +std::vector> +GlobalsStream::findRecordsByName(StringRef Name, + const SymbolStream &Symbols) const { + std::vector> Result; + + // Hash the name to figure out which bucket this goes into. + size_t ExpandedBucketIndex = hashStringV1(Name) % IPHR_HASH; + uint32_t CompressedBucketIndex = GlobalsTable.BucketMap[ExpandedBucketIndex]; + if (CompressedBucketIndex == -1) + return Result; + + uint32_t ChainStartOffset = GlobalsTable.HashBuckets[CompressedBucketIndex]; + uint32_t NextChainStart = GlobalsTable.HashBuckets.size(); + if (CompressedBucketIndex + 1 < GlobalsTable.HashBuckets.size()) + NextChainStart = GlobalsTable.HashBuckets[CompressedBucketIndex + 1]; + ChainStartOffset /= 12; + NextChainStart /= 12; + + while (ChainStartOffset < NextChainStart) { + PSHashRecord PSH = GlobalsTable.HashRecords[ChainStartOffset]; + uint32_t Off = PSH.Off - 1; + codeview::CVSymbol Record = Symbols.readRecord(Off); + if (codeview::getSymbolName(Record) == Name) + Result.push_back(std::make_pair(Off, std::move(Record))); + ++ChainStartOffset; + } + return Result; +} + static Error checkHashHdrVersion(const GSIHashHeader *HashHdr) { if (HashHdr->VerHdr != GSIHashHeader::HdrVersion) return make_error( @@ -86,7 +119,8 @@ static Error readGSIHashRecords(FixedStreamArray &HashRecords, static Error readGSIHashBuckets(FixedStreamArray &HashBuckets, - ArrayRef &HashBitmap, const GSIHashHeader *HashHdr, + ArrayRef &HashBitmap, const GSIHashHeader *HashHdr, + MutableArrayRef BucketMap, BinaryStreamReader &Reader) { if (auto EC = checkHashHdrVersion(HashHdr)) return EC; @@ -94,13 +128,27 @@ readGSIHashBuckets(FixedStreamArray &HashBuckets, // Before the actual hash buckets, there is a bitmap of length determined by // IPHR_HASH. size_t BitmapSizeInBits = alignTo(IPHR_HASH + 1, 32); - uint32_t NumBitmapEntries = BitmapSizeInBits / 8; - if (auto EC = Reader.readBytes(HashBitmap, NumBitmapEntries)) + uint32_t NumBitmapEntries = BitmapSizeInBits / 32; + if (auto EC = Reader.readArray(HashBitmap, NumBitmapEntries)) return joinErrors(std::move(EC), make_error(raw_error_code::corrupt_file, "Could not read a bitmap.")); + uint32_t NumBuckets1 = 0; + uint32_t CompressedBucketIdx = 0; + for (uint32_t I = 0; I <= IPHR_HASH; ++I) { + uint8_t WordIdx = I / 32; + uint8_t BitIdx = I % 32; + bool IsSet = HashBitmap[WordIdx] & (1U << BitIdx); + if (IsSet) { + ++NumBuckets1; + BucketMap[I] = CompressedBucketIdx++; + } else { + BucketMap[I] = -1; + } + } + uint32_t NumBuckets = 0; - for (uint8_t B : HashBitmap) + for (uint32_t B : HashBitmap) NumBuckets += countPopulation(B); // Hash buckets follow. @@ -118,7 +166,8 @@ Error GSIHashTable::read(BinaryStreamReader &Reader) { if (auto EC = readGSIHashRecords(HashRecords, HashHdr, Reader)) return EC; if (HashHdr->HrSize > 0) - if (auto EC = readGSIHashBuckets(HashBuckets, HashBitmap, HashHdr, Reader)) + if (auto EC = readGSIHashBuckets(HashBuckets, HashBitmap, HashHdr, + BucketMap, Reader)) return EC; return Error::success(); } diff --git a/llvm/lib/DebugInfo/PDB/Native/ModuleDebugStream.cpp b/llvm/lib/DebugInfo/PDB/Native/ModuleDebugStream.cpp index 2e1f61c..6464b85 100644 --- a/llvm/lib/DebugInfo/PDB/Native/ModuleDebugStream.cpp +++ b/llvm/lib/DebugInfo/PDB/Native/ModuleDebugStream.cpp @@ -97,6 +97,14 @@ ModuleDebugStreamRef::symbols(bool *HadError) const { return make_range(SymbolArray.begin(HadError), SymbolArray.end()); } +CVSymbol ModuleDebugStreamRef::readSymbolAtOffset(uint32_t Offset) const { + // Offsets include the size of the 4-byte magic at the beginning, but lookup + // doesn't take that into account, so subtract it here. + auto Iter = SymbolArray.at(Offset - 4); + assert(Iter != SymbolArray.end()); + return *Iter; +} + iterator_range ModuleDebugStreamRef::subsections() const { return make_range(Subsections.begin(), Subsections.end()); diff --git a/llvm/test/DebugInfo/PDB/pdbdump-global-lookup.test b/llvm/test/DebugInfo/PDB/pdbdump-global-lookup.test new file mode 100644 index 0000000..fe5ec66 --- /dev/null +++ b/llvm/test/DebugInfo/PDB/pdbdump-global-lookup.test @@ -0,0 +1,22 @@ +; RUN: llvm-pdbutil dump -globals \ +; RUN: -global-name="operator delete" \ +; RUN: -global-name=main \ +; RUN: -global-name=abcdefg \ +; RUN: -global-name="Base2::`vbase destructor'" \ +; RUN: %p/Inputs/every-function.pdb | FileCheck %s + +CHECK: Global Symbols +CHECK-NEXT: ============================================================ +CHECK-NEXT: Global Name `operator delete` +CHECK-NEXT: 1516 | S_PROCREF [size = 32] `operator delete` +CHECK-NEXT: module = 1, sum name = 0, offset = 324 +CHECK-NEXT: 1484 | S_PROCREF [size = 32] `operator delete` +CHECK-NEXT: module = 1, sum name = 0, offset = 184 +CHECK-NEXT: Global Name `main` +CHECK-NEXT: 2016 | S_PROCREF [size = 20] `main` +CHECK-NEXT: module = 1, sum name = 0, offset = 1952 +CHECK-NEXT: Global Name `abcdefg` +CHECK-NEXT: (no matching records found) +CHECK-NEXT: Global Name `Base2::`vbase destructor'` +CHECK-NEXT: 2288 | S_PROCREF [size = 40] `Base2::`vbase destructor'` +CHECK-NEXT: module = 1, sum name = 0, offset = 2852 diff --git a/llvm/tools/llvm-pdbutil/DumpOutputStyle.cpp b/llvm/tools/llvm-pdbutil/DumpOutputStyle.cpp index 4e80f41..e94decc 100644 --- a/llvm/tools/llvm-pdbutil/DumpOutputStyle.cpp +++ b/llvm/tools/llvm-pdbutil/DumpOutputStyle.cpp @@ -1572,8 +1572,40 @@ Error DumpOutputStyle::dumpGlobals() { ExitOnError Err("Error dumping globals stream: "); auto &Globals = Err(getPdb().getPDBGlobalsStream()); - const GSIHashTable &Table = Globals.getGlobalsTable(); - Err(dumpSymbolsFromGSI(Table, opts::dump::DumpGlobalExtras)); + if (opts::dump::DumpGlobalNames.empty()) { + const GSIHashTable &Table = Globals.getGlobalsTable(); + Err(dumpSymbolsFromGSI(Table, opts::dump::DumpGlobalExtras)); + } else { + SymbolStream &SymRecords = cantFail(getPdb().getPDBSymbolStream()); + auto &Types = File.types(); + auto &Ids = File.ids(); + + SymbolVisitorCallbackPipeline Pipeline; + SymbolDeserializer Deserializer(nullptr, CodeViewContainer::Pdb); + MinimalSymbolDumper Dumper(P, opts::dump::DumpSymRecordBytes, Ids, Types); + + Pipeline.addCallbackToPipeline(Deserializer); + Pipeline.addCallbackToPipeline(Dumper); + CVSymbolVisitor Visitor(Pipeline); + + using ResultEntryType = std::pair; + for (StringRef Name : opts::dump::DumpGlobalNames) { + AutoIndent Indent(P); + P.formatLine("Global Name `{0}`", Name); + std::vector Results = + Globals.findRecordsByName(Name, SymRecords); + if (Results.empty()) { + AutoIndent Indent(P); + P.printLine("(no matching records found)"); + continue; + } + + for (ResultEntryType Result : Results) { + if (auto E = Visitor.visitSymbolRecord(Result.second, Result.first)) + return E; + } + } + } return Error::success(); } @@ -1676,7 +1708,10 @@ Error DumpOutputStyle::dumpSymbolsFromGSI(const GSIHashTable &Table, // Return early if we aren't dumping public hash table and address map info. if (HashExtras) { - P.formatBinary("Hash Bitmap", Table.HashBitmap, 0); + ArrayRef BitmapBytes( + reinterpret_cast(Table.HashBitmap.data()), + Table.HashBitmap.size() * sizeof(uint32_t)); + P.formatBinary("Hash Bitmap", BitmapBytes, 0); P.formatLine("Hash Entries"); { diff --git a/llvm/tools/llvm-pdbutil/llvm-pdbutil.cpp b/llvm/tools/llvm-pdbutil/llvm-pdbutil.cpp index da9a644..34618f6 100644 --- a/llvm/tools/llvm-pdbutil/llvm-pdbutil.cpp +++ b/llvm/tools/llvm-pdbutil/llvm-pdbutil.cpp @@ -526,6 +526,11 @@ cl::opt DumpGlobals("globals", cl::desc("dump Globals symbol records"), cl::cat(SymbolOptions), cl::sub(DumpSubcommand)); cl::opt DumpGlobalExtras("global-extras", cl::desc("dump Globals hashes"), cl::cat(SymbolOptions), cl::sub(DumpSubcommand)); +cl::list DumpGlobalNames( + "global-name", + cl::desc( + "With -globals, only dump globals whose name matches the given value"), + cl::cat(SymbolOptions), cl::sub(DumpSubcommand), cl::ZeroOrMore); cl::opt DumpPublics("publics", cl::desc("dump Publics stream data"), cl::cat(SymbolOptions), cl::sub(DumpSubcommand)); cl::opt DumpPublicExtras("public-extras", diff --git a/llvm/tools/llvm-pdbutil/llvm-pdbutil.h b/llvm/tools/llvm-pdbutil/llvm-pdbutil.h index 36c20b7..1584dce 100644 --- a/llvm/tools/llvm-pdbutil/llvm-pdbutil.h +++ b/llvm/tools/llvm-pdbutil/llvm-pdbutil.h @@ -169,6 +169,7 @@ extern llvm::cl::opt DumpSymbols; extern llvm::cl::opt DumpSymRecordBytes; extern llvm::cl::opt DumpGSIRecords; extern llvm::cl::opt DumpGlobals; +extern llvm::cl::list DumpGlobalNames; extern llvm::cl::opt DumpGlobalExtras; extern llvm::cl::opt DumpPublics; extern llvm::cl::opt DumpPublicExtras; -- 2.7.4