[PDB] Add the ability to lookup global symbols by name.
authorZachary Turner <zturner@google.com>
Mon, 8 Oct 2018 04:19:16 +0000 (04:19 +0000)
committerZachary Turner <zturner@google.com>
Mon, 8 Oct 2018 04:19:16 +0000 (04:19 +0000)
The Globals table is a hash table keyed on symbol name, so
it's possible to lookup symbols by name in O(1) time.  Add
a function to the globals stream to do this, and add an option
to llvm-pdbutil to exercise this, then use it to write some
tests to verify correctness.

llvm-svn: 343951

12 files changed:
llvm/include/llvm/DebugInfo/CodeView/SymbolRecord.h
llvm/include/llvm/DebugInfo/PDB/Native/DbiStream.h
llvm/include/llvm/DebugInfo/PDB/Native/GlobalsStream.h
llvm/include/llvm/DebugInfo/PDB/Native/ModuleDebugStream.h
llvm/lib/DebugInfo/CodeView/SymbolDumper.cpp
llvm/lib/DebugInfo/PDB/Native/DbiStream.cpp
llvm/lib/DebugInfo/PDB/Native/GlobalsStream.cpp
llvm/lib/DebugInfo/PDB/Native/ModuleDebugStream.cpp
llvm/test/DebugInfo/PDB/pdbdump-global-lookup.test [new file with mode: 0644]
llvm/tools/llvm-pdbutil/DumpOutputStyle.cpp
llvm/tools/llvm-pdbutil/llvm-pdbutil.cpp
llvm/tools/llvm-pdbutil/llvm-pdbutil.h

index bc7571b..c63fb98 100644 (file)
@@ -358,6 +358,7 @@ public:
 // S_PUB32
 class PublicSym32 : public SymbolRecord {
 public:
+  PublicSym32() : SymbolRecord(SymbolRecordKind::PublicSym32) {}
   explicit PublicSym32(SymbolRecordKind Kind) : SymbolRecord(Kind) {}
   explicit PublicSym32(uint32_t RecordOffset)
       : SymbolRecord(SymbolRecordKind::PublicSym32),
@@ -636,6 +637,7 @@ public:
 // S_OBJNAME
 class ObjNameSym : public SymbolRecord {
 public:
+  explicit ObjNameSym() : SymbolRecord(SymbolRecordKind::ObjNameSym) {}
   explicit ObjNameSym(SymbolRecordKind Kind) : SymbolRecord(Kind) {}
   ObjNameSym(uint32_t RecordOffset)
       : SymbolRecord(SymbolRecordKind::ObjNameSym), RecordOffset(RecordOffset) {
@@ -718,6 +720,7 @@ public:
 // S_COMPILE3
 class Compile3Sym : public SymbolRecord {
 public:
+  Compile3Sym() : SymbolRecord(SymbolRecordKind::Compile3Sym) {}
   explicit Compile3Sym(SymbolRecordKind Kind) : SymbolRecord(Kind) {}
   Compile3Sym(uint32_t RecordOffset)
       : SymbolRecord(SymbolRecordKind::Compile3Sym),
@@ -739,8 +742,17 @@ public:
     Flags = CompileSym3Flags((uint32_t(Flags) & 0xFFFFFF00) | uint32_t(Lang));
   }
 
-  uint8_t getLanguage() const { return static_cast<uint32_t>(Flags) & 0xFF; }
-  uint32_t getFlags() const { return static_cast<uint32_t>(Flags) & ~0xFF; }
+  SourceLanguage getLanguage() const {
+    return static_cast<SourceLanguage>(static_cast<uint32_t>(Flags) & 0xFF);
+  }
+  CompileSym3Flags getFlags() const {
+    return static_cast<CompileSym3Flags>(static_cast<uint32_t>(Flags) & ~0xFF);
+  }
+
+  bool hasOptimizations() const {
+    return CompileSym3Flags::None !=
+           (getFlags() & (CompileSym3Flags::PGO | CompileSym3Flags::LTCG));
+  }
 
   uint32_t RecordOffset;
 };
index 280615b..a3ca607 100644 (file)
@@ -78,7 +78,7 @@ public:
 
   const DbiModuleList &modules() const;
 
-  FixedStreamArray<object::coff_section> getSectionHeaders();
+  FixedStreamArray<object::coff_section> getSectionHeaders() const;
 
   FixedStreamArray<object::FpoData> getFpoRecords();
 
index dd04b5c..97d8197 100644 (file)
 #ifndef LLVM_DEBUGINFO_PDB_RAW_GLOBALS_STREAM_H
 #define LLVM_DEBUGINFO_PDB_RAW_GLOBALS_STREAM_H
 
+#include "llvm/ADT/iterator.h"
+#include "llvm/DebugInfo/CodeView/SymbolRecord.h"
 #include "llvm/DebugInfo/MSF/MappedBlockStream.h"
 #include "llvm/DebugInfo/PDB/Native/RawConstants.h"
 #include "llvm/DebugInfo/PDB/Native/RawTypes.h"
 #include "llvm/DebugInfo/PDB/PDBTypes.h"
 #include "llvm/Support/BinaryStreamArray.h"
 #include "llvm/Support/Error.h"
-#include "llvm/ADT/iterator.h"
 
 namespace llvm {
 namespace pdb {
 class DbiStream;
 class PDBFile;
+class SymbolStream;
 
 /// Iterator over hash records producing symbol record offsets. Abstracts away
 /// the fact that symbol record offsets on disk are off-by-one.
@@ -50,8 +52,9 @@ class GSIHashTable {
 public:
   const GSIHashHeader *HashHdr;
   FixedStreamArray<PSHashRecord> HashRecords;
-  ArrayRef<uint8_t> HashBitmap;
+  ArrayRef<uint32_t> HashBitmap;
   FixedStreamArray<support::ulittle32_t> HashBuckets;
+  std::array<int32_t, IPHR_HASH + 1> BucketMap;
 
   Error read(BinaryStreamReader &Reader);
 
@@ -72,6 +75,9 @@ public:
   const GSIHashTable &getGlobalsTable() const { return GlobalsTable; }
   Error reload();
 
+  std::vector<std::pair<uint32_t, codeview::CVSymbol>>
+  findRecordsByName(StringRef Name, const SymbolStream &Symbols) const;
+
 private:
   GSIHashTable GlobalsTable;
   std::unique_ptr<msf::MappedBlockStream> Stream;
index efc25e0..19b0ebd 100644 (file)
@@ -15,6 +15,7 @@
 #include "llvm/DebugInfo/CodeView/DebugSubsectionRecord.h"
 #include "llvm/DebugInfo/CodeView/SymbolRecord.h"
 #include "llvm/DebugInfo/MSF/MappedBlockStream.h"
+#include "llvm/DebugInfo/PDB/Native/DbiModuleDescriptor.h"
 #include "llvm/Support/BinaryStreamRef.h"
 #include "llvm/Support/Error.h"
 #include <cstdint>
@@ -51,6 +52,8 @@ public:
 
   ModuleDebugStreamRef &operator=(ModuleDebugStreamRef &&Other) = delete;
 
+  codeview::CVSymbol readSymbolAtOffset(uint32_t Offset) const;
+
   iterator_range<DebugSubsectionIterator> subsections() const;
   codeview::DebugSubsectionArray getSubsectionsArray() const {
     return Subsections;
@@ -64,7 +67,7 @@ public:
   findChecksumsSubsection() const;
 
 private:
-  const DbiModuleDescriptor &Mod;
+  DbiModuleDescriptor Mod;
 
   uint32_t Signature;
 
index 4cb267e..54bf904 100644 (file)
@@ -262,7 +262,8 @@ Error CVSymbolDumperImpl::visitKnownRecord(CVSymbol &CVR,
 Error CVSymbolDumperImpl::visitKnownRecord(CVSymbol &CVR,
                                            Compile3Sym &Compile3) {
   W.printEnum("Language", Compile3.getLanguage(), getSourceLanguageNames());
-  W.printFlags("Flags", Compile3.getFlags(), getCompileSym3FlagNames());
+  W.printFlags("Flags", uint32_t(Compile3.getFlags()),
+               getCompileSym3FlagNames());
   W.printEnum("Machine", unsigned(Compile3.Machine), getCPUTypeNames());
   CompilationCPUType = Compile3.Machine;
   std::string FrontendVersion;
index edaa783..60ac17b 100644 (file)
@@ -197,7 +197,7 @@ PDB_Machine DbiStream::getMachineType() const {
   return static_cast<PDB_Machine>(Machine);
 }
 
-FixedStreamArray<object::coff_section> DbiStream::getSectionHeaders() {
+FixedStreamArray<object::coff_section> DbiStream::getSectionHeaders() const {
   return SectionHeaders;
 }
 
index 36076f4..3a4c65e 100644 (file)
 //===----------------------------------------------------------------------===//
 
 #include "llvm/DebugInfo/PDB/Native/GlobalsStream.h"
+
+#include "llvm/DebugInfo/CodeView/RecordName.h"
+#include "llvm/DebugInfo/PDB/Native/Hash.h"
 #include "llvm/DebugInfo/PDB/Native/RawError.h"
+#include "llvm/DebugInfo/PDB/Native/SymbolStream.h"
 #include "llvm/Support/BinaryStreamReader.h"
 #include "llvm/Support/Error.h"
 #include <algorithm>
@@ -41,6 +45,35 @@ Error GlobalsStream::reload() {
   return Error::success();
 }
 
+std::vector<std::pair<uint32_t, codeview::CVSymbol>>
+GlobalsStream::findRecordsByName(StringRef Name,
+                                 const SymbolStream &Symbols) const {
+  std::vector<std::pair<uint32_t, codeview::CVSymbol>> Result;
+
+  // Hash the name to figure out which bucket this goes into.
+  size_t ExpandedBucketIndex = hashStringV1(Name) % IPHR_HASH;
+  uint32_t CompressedBucketIndex = GlobalsTable.BucketMap[ExpandedBucketIndex];
+  if (CompressedBucketIndex == -1)
+    return Result;
+
+  uint32_t ChainStartOffset = GlobalsTable.HashBuckets[CompressedBucketIndex];
+  uint32_t NextChainStart = GlobalsTable.HashBuckets.size();
+  if (CompressedBucketIndex + 1 < GlobalsTable.HashBuckets.size())
+    NextChainStart = GlobalsTable.HashBuckets[CompressedBucketIndex + 1];
+  ChainStartOffset /= 12;
+  NextChainStart /= 12;
+
+  while (ChainStartOffset < NextChainStart) {
+    PSHashRecord PSH = GlobalsTable.HashRecords[ChainStartOffset];
+    uint32_t Off = PSH.Off - 1;
+    codeview::CVSymbol Record = Symbols.readRecord(Off);
+    if (codeview::getSymbolName(Record) == Name)
+      Result.push_back(std::make_pair(Off, std::move(Record)));
+    ++ChainStartOffset;
+  }
+  return Result;
+}
+
 static Error checkHashHdrVersion(const GSIHashHeader *HashHdr) {
   if (HashHdr->VerHdr != GSIHashHeader::HdrVersion)
     return make_error<RawError>(
@@ -86,7 +119,8 @@ static Error readGSIHashRecords(FixedStreamArray<PSHashRecord> &HashRecords,
 
 static Error
 readGSIHashBuckets(FixedStreamArray<support::ulittle32_t> &HashBuckets,
-                   ArrayRef<uint8_t> &HashBitmap, const GSIHashHeader *HashHdr,
+                   ArrayRef<uint32_t> &HashBitmap, const GSIHashHeader *HashHdr,
+                   MutableArrayRef<int32_t> BucketMap,
                    BinaryStreamReader &Reader) {
   if (auto EC = checkHashHdrVersion(HashHdr))
     return EC;
@@ -94,13 +128,27 @@ readGSIHashBuckets(FixedStreamArray<support::ulittle32_t> &HashBuckets,
   // Before the actual hash buckets, there is a bitmap of length determined by
   // IPHR_HASH.
   size_t BitmapSizeInBits = alignTo(IPHR_HASH + 1, 32);
-  uint32_t NumBitmapEntries = BitmapSizeInBits / 8;
-  if (auto EC = Reader.readBytes(HashBitmap, NumBitmapEntries))
+  uint32_t NumBitmapEntries = BitmapSizeInBits / 32;
+  if (auto EC = Reader.readArray(HashBitmap, NumBitmapEntries))
     return joinErrors(std::move(EC),
                       make_error<RawError>(raw_error_code::corrupt_file,
                                            "Could not read a bitmap."));
+  uint32_t NumBuckets1 = 0;
+  uint32_t CompressedBucketIdx = 0;
+  for (uint32_t I = 0; I <= IPHR_HASH; ++I) {
+    uint8_t WordIdx = I / 32;
+    uint8_t BitIdx = I % 32;
+    bool IsSet = HashBitmap[WordIdx] & (1U << BitIdx);
+    if (IsSet) {
+      ++NumBuckets1;
+      BucketMap[I] = CompressedBucketIdx++;
+    } else {
+      BucketMap[I] = -1;
+    }
+  }
+
   uint32_t NumBuckets = 0;
-  for (uint8_t B : HashBitmap)
+  for (uint32_t B : HashBitmap)
     NumBuckets += countPopulation(B);
 
   // Hash buckets follow.
@@ -118,7 +166,8 @@ Error GSIHashTable::read(BinaryStreamReader &Reader) {
   if (auto EC = readGSIHashRecords(HashRecords, HashHdr, Reader))
     return EC;
   if (HashHdr->HrSize > 0)
-    if (auto EC = readGSIHashBuckets(HashBuckets, HashBitmap, HashHdr, Reader))
+    if (auto EC = readGSIHashBuckets(HashBuckets, HashBitmap, HashHdr,
+                                     BucketMap, Reader))
       return EC;
   return Error::success();
 }
index 2e1f61c..6464b85 100644 (file)
@@ -97,6 +97,14 @@ ModuleDebugStreamRef::symbols(bool *HadError) const {
   return make_range(SymbolArray.begin(HadError), SymbolArray.end());
 }
 
+CVSymbol ModuleDebugStreamRef::readSymbolAtOffset(uint32_t Offset) const {
+  // Offsets include the size of the 4-byte magic at the beginning, but lookup
+  // doesn't take that into account, so subtract it here.
+  auto Iter = SymbolArray.at(Offset - 4);
+  assert(Iter != SymbolArray.end());
+  return *Iter;
+}
+
 iterator_range<ModuleDebugStreamRef::DebugSubsectionIterator>
 ModuleDebugStreamRef::subsections() const {
   return make_range(Subsections.begin(), Subsections.end());
diff --git a/llvm/test/DebugInfo/PDB/pdbdump-global-lookup.test b/llvm/test/DebugInfo/PDB/pdbdump-global-lookup.test
new file mode 100644 (file)
index 0000000..fe5ec66
--- /dev/null
@@ -0,0 +1,22 @@
+; RUN: llvm-pdbutil dump -globals \
+; RUN:    -global-name="operator delete" \
+; RUN:    -global-name=main \
+; RUN:    -global-name=abcdefg \
+; RUN:    -global-name="Base2::`vbase destructor'" \
+; RUN:    %p/Inputs/every-function.pdb | FileCheck %s
+
+CHECK:                        Global Symbols
+CHECK-NEXT: ============================================================
+CHECK-NEXT:     Global Name `operator delete`
+CHECK-NEXT:       1516 | S_PROCREF [size = 32] `operator delete`
+CHECK-NEXT:              module = 1, sum name = 0, offset = 324
+CHECK-NEXT:       1484 | S_PROCREF [size = 32] `operator delete`
+CHECK-NEXT:              module = 1, sum name = 0, offset = 184
+CHECK-NEXT:     Global Name `main`
+CHECK-NEXT:       2016 | S_PROCREF [size = 20] `main`
+CHECK-NEXT:              module = 1, sum name = 0, offset = 1952
+CHECK-NEXT:     Global Name `abcdefg`
+CHECK-NEXT:       (no matching records found)
+CHECK-NEXT:     Global Name `Base2::`vbase destructor'`
+CHECK-NEXT:       2288 | S_PROCREF [size = 40] `Base2::`vbase destructor'`
+CHECK-NEXT:              module = 1, sum name = 0, offset = 2852
index 4e80f41..e94decc 100644 (file)
@@ -1572,8 +1572,40 @@ Error DumpOutputStyle::dumpGlobals() {
   ExitOnError Err("Error dumping globals stream: ");
   auto &Globals = Err(getPdb().getPDBGlobalsStream());
 
-  const GSIHashTable &Table = Globals.getGlobalsTable();
-  Err(dumpSymbolsFromGSI(Table, opts::dump::DumpGlobalExtras));
+  if (opts::dump::DumpGlobalNames.empty()) {
+    const GSIHashTable &Table = Globals.getGlobalsTable();
+    Err(dumpSymbolsFromGSI(Table, opts::dump::DumpGlobalExtras));
+  } else {
+    SymbolStream &SymRecords = cantFail(getPdb().getPDBSymbolStream());
+    auto &Types = File.types();
+    auto &Ids = File.ids();
+
+    SymbolVisitorCallbackPipeline Pipeline;
+    SymbolDeserializer Deserializer(nullptr, CodeViewContainer::Pdb);
+    MinimalSymbolDumper Dumper(P, opts::dump::DumpSymRecordBytes, Ids, Types);
+
+    Pipeline.addCallbackToPipeline(Deserializer);
+    Pipeline.addCallbackToPipeline(Dumper);
+    CVSymbolVisitor Visitor(Pipeline);
+
+    using ResultEntryType = std::pair<uint32_t, CVSymbol>;
+    for (StringRef Name : opts::dump::DumpGlobalNames) {
+      AutoIndent Indent(P);
+      P.formatLine("Global Name `{0}`", Name);
+      std::vector<ResultEntryType> Results =
+          Globals.findRecordsByName(Name, SymRecords);
+      if (Results.empty()) {
+        AutoIndent Indent(P);
+        P.printLine("(no matching records found)");
+        continue;
+      }
+
+      for (ResultEntryType Result : Results) {
+        if (auto E = Visitor.visitSymbolRecord(Result.second, Result.first))
+          return E;
+      }
+    }
+  }
   return Error::success();
 }
 
@@ -1676,7 +1708,10 @@ Error DumpOutputStyle::dumpSymbolsFromGSI(const GSIHashTable &Table,
 
   // Return early if we aren't dumping public hash table and address map info.
   if (HashExtras) {
-    P.formatBinary("Hash Bitmap", Table.HashBitmap, 0);
+    ArrayRef<uint8_t> BitmapBytes(
+        reinterpret_cast<const uint8_t *>(Table.HashBitmap.data()),
+        Table.HashBitmap.size() * sizeof(uint32_t));
+    P.formatBinary("Hash Bitmap", BitmapBytes, 0);
 
     P.formatLine("Hash Entries");
     {
index da9a644..34618f6 100644 (file)
@@ -526,6 +526,11 @@ cl::opt<bool> DumpGlobals("globals", cl::desc("dump Globals symbol records"),
                           cl::cat(SymbolOptions), cl::sub(DumpSubcommand));
 cl::opt<bool> DumpGlobalExtras("global-extras", cl::desc("dump Globals hashes"),
                                cl::cat(SymbolOptions), cl::sub(DumpSubcommand));
+cl::list<std::string> DumpGlobalNames(
+    "global-name",
+    cl::desc(
+        "With -globals, only dump globals whose name matches the given value"),
+    cl::cat(SymbolOptions), cl::sub(DumpSubcommand), cl::ZeroOrMore);
 cl::opt<bool> DumpPublics("publics", cl::desc("dump Publics stream data"),
                           cl::cat(SymbolOptions), cl::sub(DumpSubcommand));
 cl::opt<bool> DumpPublicExtras("public-extras",
index 36c20b7..1584dce 100644 (file)
@@ -169,6 +169,7 @@ extern llvm::cl::opt<bool> DumpSymbols;
 extern llvm::cl::opt<bool> DumpSymRecordBytes;
 extern llvm::cl::opt<bool> DumpGSIRecords;
 extern llvm::cl::opt<bool> DumpGlobals;
+extern llvm::cl::list<std::string> DumpGlobalNames;
 extern llvm::cl::opt<bool> DumpGlobalExtras;
 extern llvm::cl::opt<bool> DumpPublics;
 extern llvm::cl::opt<bool> DumpPublicExtras;