[PDB] Make our PDBs look more like MS PDBs.
authorZachary Turner <zturner@google.com>
Fri, 23 Mar 2018 18:43:39 +0000 (18:43 +0000)
committerZachary Turner <zturner@google.com>
Fri, 23 Mar 2018 18:43:39 +0000 (18:43 +0000)
When investigating bugs in PDB generation, the first step is
often to do the same link with link.exe and then compare PDBs.

But comparing PDBs is hard because two completely different byte
sequences can both be correct, so it hampers the investigation when
you also have to spend time figuring out not just which bytes are
different, but also if the difference is meaningful.

This patch fixes a couple of cases related to string table emission,
hash table emission, and the order in which we emit strings that
makes more of our bytes the same as the bytes generated by MS PDBs.

Differential Revision: https://reviews.llvm.org/D44810

llvm-svn: 328348

22 files changed:
lld/COFF/PDB.cpp
lld/test/COFF/pdb-diff.test [deleted file]
lld/test/COFF/pdb-file-static.test
lld/test/COFF/pdb-lib.s
lld/test/COFF/pdb-linker-module.test
lld/test/COFF/pdb-same-name.test
lld/test/COFF/pdb.test
llvm/include/llvm/DebugInfo/CodeView/DebugStringTableSubsection.h
llvm/include/llvm/DebugInfo/PDB/Native/DbiStreamBuilder.h
llvm/include/llvm/DebugInfo/PDB/Native/HashTable.h
llvm/include/llvm/DebugInfo/PDB/Native/PDBFileBuilder.h
llvm/lib/DebugInfo/CodeView/DebugStringTableSubsection.cpp
llvm/lib/DebugInfo/PDB/Native/DbiStreamBuilder.cpp
llvm/lib/DebugInfo/PDB/Native/GSIStreamBuilder.cpp
llvm/lib/DebugInfo/PDB/Native/InfoStreamBuilder.cpp
llvm/lib/DebugInfo/PDB/Native/NamedStreamMap.cpp
llvm/lib/DebugInfo/PDB/Native/PDBFileBuilder.cpp
llvm/lib/DebugInfo/PDB/Native/PDBStringTableBuilder.cpp
llvm/tools/llvm-pdbutil/DumpOutputStyle.cpp
llvm/tools/llvm-pdbutil/DumpOutputStyle.h
llvm/tools/llvm-pdbutil/llvm-pdbutil.cpp
llvm/tools/llvm-pdbutil/llvm-pdbutil.h

index a13b900..2287dda 100644 (file)
@@ -85,7 +85,12 @@ class PDBLinker {
 public:
   PDBLinker(SymbolTable *Symtab)
       : Alloc(), Symtab(Symtab), Builder(Alloc), TypeTable(Alloc),
-        IDTable(Alloc), GlobalTypeTable(Alloc), GlobalIDTable(Alloc) {}
+        IDTable(Alloc), GlobalTypeTable(Alloc), GlobalIDTable(Alloc) {
+    // This isn't strictly necessary, but link.exe usually puts an empty string
+    // as the first "valid" string in the string table, so we do the same in
+    // order to maintain as much byte-for-byte compatibility as possible.
+    PDBStrTab.insert("");
+  }
 
   /// Emit the basic PDB structure: initial streams, headers, etc.
   void initialize(const llvm::codeview::DebugInfo &BuildId);
@@ -1066,7 +1071,6 @@ void PDBLinker::initialize(const llvm::codeview::DebugInfo &BuildId) {
   pdb::DbiStreamBuilder &DbiBuilder = Builder.getDbiBuilder();
   DbiBuilder.setAge(BuildId.PDB70.Age);
   DbiBuilder.setVersionHeader(pdb::PdbDbiV70);
-  ExitOnErr(DbiBuilder.addDbgStream(pdb::DbgHeaderType::NewFPO, {}));
 }
 
 void PDBLinker::addSectionContrib(pdb::DbiModuleDescriptorBuilder &LinkerModule,
diff --git a/lld/test/COFF/pdb-diff.test b/lld/test/COFF/pdb-diff.test
deleted file mode 100644 (file)
index 17d26b6..0000000
+++ /dev/null
@@ -1,215 +0,0 @@
-This test verifies that we produce PDBs compatible with MSVC in various ways.
-We check in a cl-generated object file, PDB, and original source which serve
-as the "baseline" for us to measure against.  Then we link the same object
-file with LLD and compare the two PDBs.  Since the baseline object file and
-PDB are already checked in, we just run LLD on the object file.
-
-RUN: rm -f %T/pdb-diff-lld.pdb %T/pdb-diff-lld.exe
-RUN: lld-link /debug /pdb:%T/pdb-diff-lld.pdb /out:%T/pdb-diff-lld.exe /nodefaultlib \
-RUN:   /entry:main %S/Inputs/pdb-diff.obj
-RUN: llvm-pdbutil diff -result -values=false -left-bin-root=%S -right-bin-root=D:/src/llvm-mono/lld/test/COFF/ \
-RUN:   %T/pdb-diff-lld.pdb %S/Inputs/pdb-diff-cl.pdb | FileCheck %s
-
-CHECK:        ----------------------
-CHECK-NEXT:   |  MSF Super Block   |
-CHECK-NEXT:   |----------------+---|
-CHECK-NEXT:   |           File |   |
-CHECK-NEXT:   |----------------+---|
-CHECK-NEXT:   |     Block Size | I |
-CHECK-NEXT:   |----------------+---|
-CHECK-NEXT:   |    Block Count |
-CHECK-NEXT:   |----------------+---|
-CHECK-NEXT:   |      Unknown 1 | I |
-CHECK-NEXT:   |----------------+---|
-CHECK-NEXT:   | Directory Size |
-CHECK-NEXT:   |----------------+---|
-CHECK-NEXT:   ------------------------------------
-CHECK-NEXT:   |         Stream Directory         |
-CHECK-NEXT:   |------------------------------+---|
-CHECK-NEXT:   |                         File |   |
-CHECK-NEXT:   |------------------------------+---|
-CHECK-NEXT:   |                 Stream Count | I |
-CHECK-NEXT:   |------------------------------+---|
-CHECK-NEXT:   |            Old MSF Directory | I |
-CHECK-NEXT:   |------------------------------+---|
-CHECK-NEXT:   |                   PDB Stream | I |
-CHECK-NEXT:   |------------------------------+---|
-CHECK-NEXT:   |                   TPI Stream | I |
-CHECK-NEXT:   |------------------------------+---|
-CHECK-NEXT:   |                   DBI Stream | I |
-CHECK-NEXT:   |------------------------------+---|
-CHECK-NEXT:   |                   IPI Stream | I |
-CHECK-NEXT:   |------------------------------+---|
-CHECK-NEXT:   |                 New FPO Data | {{[EI]}} |
-CHECK-NEXT:   |------------------------------+---|
-CHECK-NEXT:   |          Section Header Data | {{[EI]}} |
-CHECK-NEXT:   |------------------------------+---|
-CHECK-NEXT:   |        Named Stream "/names" | {{[EI]}} |
-CHECK-NEXT:   |------------------------------+---|
-CHECK-NEXT:   |     Named Stream "/LinkInfo" | {{[EI]}} |
-CHECK-NEXT:   |------------------------------+---|
-CHECK-NEXT:   | Module "Inputs\pdb-diff.obj" | {{[EI]}} |
-CHECK-NEXT:   |------------------------------+---|
-CHECK-NEXT:   |          Module "* Linker *" | {{[EI]}} |
-CHECK-NEXT:   |------------------------------+---|
-CHECK-NEXT:   |                     TPI Hash | {{[EI]}} |
-CHECK-NEXT:   |------------------------------+---|
-CHECK-NEXT:   |                     IPI Hash | {{[EI]}} |
-CHECK-NEXT:   |------------------------------+---|
-CHECK-NEXT:   |           Public Symbol Hash | {{[EI]}} |
-CHECK-NEXT:   |------------------------------+---|
-CHECK-NEXT:   |           Global Symbol Hash | {{[EI]}} |
-CHECK-NEXT:   |------------------------------+---|
-CHECK-NEXT:   |               Symbol Records | {{[EI]}} |
-CHECK-NEXT:   |------------------------------+---|
-CHECK-NEXT:   ------------------------------------
-CHECK-NEXT:   |           String Table           |
-CHECK-NEXT:   |------------------------------+---|
-CHECK-NEXT:   |                         File |   |
-CHECK-NEXT:   |------------------------------+---|
-CHECK-NEXT:   |            Number of Strings | D |
-CHECK-NEXT:   |------------------------------+---|
-CHECK-NEXT:   |                 Hash Version | I |
-CHECK-NEXT:   |------------------------------+---|
-CHECK-NEXT:   |                    Byte Size |
-CHECK-NEXT:   |------------------------------+---|
-CHECK-NEXT:   |                    Signature | I |
-CHECK-NEXT:   |------------------------------+---|
-CHECK-NEXT:   |                Empty Strings |
-CHECK-NEXT:   |------------------------------+---|
-CHECK-NEXT:   |  {{.*}}pdb-diff.cpp | {{[EI]}} |
-CHECK-NEXT:   |------------------------------+---|
-CHECK-NEXT:   |  $T0 $ebp = $...p $T0 8 + =  | D |
-CHECK-NEXT:   |------------------------------+---|
-CHECK-NEXT:   |  d:\src\llvm-...er internal) | D |
-CHECK-NEXT:   |------------------------------+---|
-CHECK-NEXT:   ----------------------------
-CHECK-NEXT:   |        PDB Stream        |
-CHECK-NEXT:   |----------------------+---|
-CHECK-NEXT:   |                 File |   |
-CHECK-NEXT:   |----------------------+---|
-CHECK-NEXT:   |          Stream Size |
-CHECK-NEXT:   |----------------------+---|
-CHECK-NEXT:   |                  Age | I |
-CHECK-NEXT:   |----------------------+---|
-CHECK-NEXT:   |                 Guid | D |
-CHECK-NEXT:   |----------------------+---|
-CHECK-NEXT:   |            Signature | D |
-CHECK-NEXT:   |----------------------+---|
-CHECK-NEXT:   |              Version | I |
-CHECK-NEXT:   |----------------------+---|
-CHECK-NEXT:   |       Features (set) | I |
-CHECK-NEXT:   |----------------------+---|
-CHECK-NEXT:   |              Feature | I |
-CHECK-NEXT:   |----------------------+---|
-CHECK-NEXT:   |    Named Stream Size |
-CHECK-NEXT:   |----------------------+---|
-CHECK-NEXT:   |  Named Streams (map) | {{[EI]}} |
-CHECK-NEXT:   |----------------------+---|
-CHECK-NEXT:   |               /names | {{[EI]}} |
-CHECK-NEXT:   |----------------------+---|
-CHECK-NEXT:   |            /LinkInfo | {{[EI]}} |
-CHECK-NEXT:   |----------------------+---|
-CHECK-NEXT:   ----------------------------------------------
-CHECK-NEXT:   |                 DBI Stream                 |
-CHECK-NEXT:   |----------------------------------------+---|
-CHECK-NEXT:   |                                   File |   |
-CHECK-NEXT:   |----------------------------------------+---|
-CHECK-NEXT:   |                            Dbi Version | I |
-CHECK-NEXT:   |----------------------------------------+---|
-CHECK-NEXT:   |                                    Age | I |
-CHECK-NEXT:   |----------------------------------------+---|
-CHECK-NEXT:   |                                Machine | I |
-CHECK-NEXT:   |----------------------------------------+---|
-CHECK-NEXT:   |                                  Flags | D |
-CHECK-NEXT:   |----------------------------------------+---|
-CHECK-NEXT:   |                            Build Major | D |
-CHECK-NEXT:   |----------------------------------------+---|
-CHECK-NEXT:   |                            Build Minor | D |
-CHECK-NEXT:   |----------------------------------------+---|
-CHECK-NEXT:   |                           Build Number | D |
-CHECK-NEXT:   |----------------------------------------+---|
-CHECK-NEXT:   |                        PDB DLL Version | D |
-CHECK-NEXT:   |----------------------------------------+---|
-CHECK-NEXT:   |                           PDB DLL RBLD | I |
-CHECK-NEXT:   |----------------------------------------+---|
-CHECK-NEXT:   |                              DBG (FPO) | I |
-CHECK-NEXT:   |----------------------------------------+---|
-CHECK-NEXT:   |                        DBG (Exception) | I |
-CHECK-NEXT:   |----------------------------------------+---|
-CHECK-NEXT:   |                            DBG (Fixup) | I |
-CHECK-NEXT:   |----------------------------------------+---|
-CHECK-NEXT:   |                        DBG (OmapToSrc) | I |
-CHECK-NEXT:   |----------------------------------------+---|
-CHECK-NEXT:   |                      DBG (OmapFromSrc) | I |
-CHECK-NEXT:   |----------------------------------------+---|
-CHECK-NEXT:   |                       DBG (SectionHdr) | {{[EI]}} |
-CHECK-NEXT:   |----------------------------------------+---|
-CHECK-NEXT:   |                      DBG (TokenRidMap) | I |
-CHECK-NEXT:   |----------------------------------------+---|
-CHECK-NEXT:   |                            DBG (Xdata) | I |
-CHECK-NEXT:   |----------------------------------------+---|
-CHECK-NEXT:   |                            DBG (Pdata) | I |
-CHECK-NEXT:   |----------------------------------------+---|
-CHECK-NEXT:   |                           DBG (NewFPO) | {{[EI]}} |
-CHECK-NEXT:   |----------------------------------------+---|
-CHECK-NEXT:   |                   DBG (SectionHdrOrig) | I |
-CHECK-NEXT:   |----------------------------------------+---|
-CHECK-NEXT:   |                         Globals Stream | {{[EI]}} |
-CHECK-NEXT:   |----------------------------------------+---|
-CHECK-NEXT:   |                         Publics Stream | {{[EI]}} |
-CHECK-NEXT:   |----------------------------------------+---|
-CHECK-NEXT:   |                         Symbol Records | {{[EI]}} |
-CHECK-NEXT:   |----------------------------------------+---|
-CHECK-NEXT:   |                             Has CTypes | I |
-CHECK-NEXT:   |----------------------------------------+---|
-CHECK-NEXT:   |                Is Incrementally Linked | D |
-CHECK-NEXT:   |----------------------------------------+---|
-CHECK-NEXT:   |                            Is Stripped | I |
-CHECK-NEXT:   |----------------------------------------+---|
-CHECK-NEXT:   |                           Module Count | I |
-CHECK-NEXT:   |----------------------------------------+---|
-CHECK-NEXT:   |                      Source File Count | I |
-CHECK-NEXT:   |----------------------------------------+---|
-CHECK-NEXT:   | Module "Inputs\pdb-diff.obj" |
-CHECK-NEXT:   |----------------------------------------+---|
-CHECK-NEXT:   |                                 - Modi | I |
-CHECK-NEXT:   |----------------------------------------+---|
-CHECK-NEXT:   |                        - Obj File Name | {{[EI]}} |
-CHECK-NEXT:   |----------------------------------------+---|
-CHECK-NEXT:   |                         - Debug Stream | {{[EI]}} |
-CHECK-NEXT:   |----------------------------------------+---|
-CHECK-NEXT:   |                        - C11 Byte Size | I |
-CHECK-NEXT:   |----------------------------------------+---|
-CHECK-NEXT:   |                        - C13 Byte Size | I |
-CHECK-NEXT:   |----------------------------------------+---|
-CHECK-NEXT:   |                           - # of files | I |
-CHECK-NEXT:   |----------------------------------------+---|
-CHECK-NEXT:   |                  - Pdb File Path Index | I |
-CHECK-NEXT:   |----------------------------------------+---|
-CHECK-NEXT:   |               - Source File Name Index | I |
-CHECK-NEXT:   |----------------------------------------+---|
-CHECK-NEXT:   |                     - Symbol Byte Size |
-CHECK-NEXT:   |----------------------------------------+---|
-CHECK-NEXT:   |            Module "* Linker *"             |
-CHECK-NEXT:   |----------------------------------------+---|
-CHECK-NEXT:   |                                 - Modi | I |
-CHECK-NEXT:   |----------------------------------------+---|
-CHECK-NEXT:   |                        - Obj File Name | I |
-CHECK-NEXT:   |----------------------------------------+---|
-CHECK-NEXT:   |                         - Debug Stream | {{[EI]}} |
-CHECK-NEXT:   |----------------------------------------+---|
-CHECK-NEXT:   |                        - C11 Byte Size | I |
-CHECK-NEXT:   |----------------------------------------+---|
-CHECK-NEXT:   |                        - C13 Byte Size | I |
-CHECK-NEXT:   |----------------------------------------+---|
-CHECK-NEXT:   |                           - # of files | I |
-CHECK-NEXT:   |----------------------------------------+---|
-CHECK-NEXT:   |                  - Pdb File Path Index | {{[EI]}} |
-CHECK-NEXT:   |----------------------------------------+---|
-CHECK-NEXT:   |               - Source File Name Index | {{[EI]}} |
-CHECK-NEXT:   |----------------------------------------+---|
-CHECK-NEXT:   |                     - Symbol Byte Size |
-CHECK-NEXT:   |----------------------------------------+---|
-
-
index 1d09823..f08f717 100644 (file)
@@ -43,9 +43,9 @@
 # CHECK: ============================================================
 # CHECK-LABEL:   Mod 0000 | `{{.*}}a.obj`:
 # CHECK:              232 | S_FILESTATIC [size = 16] `x`
-# CHECK-NEXT:               type = 0x0074 (int), file name = 1 (D:\src\llvmbuild\cl\Debug\x64\a.obj), flags = enreg global | enreg static
+# CHECK-NEXT:               type = 0x0074 (int), file name = 2 (D:\src\llvmbuild\cl\Debug\x64\a.obj), flags = enreg global | enreg static
 # CHECK:         Mod 0001 | `{{.*}}b.obj`:
 # CHECK:              232 | S_FILESTATIC [size = 16] `y`
-# CHECK-NEXT:               type = 0x0074 (int), file name = 73 (D:\src\llvmbuild\cl\Debug\x64\b.obj), flags = enreg global | enreg static
+# CHECK-NEXT:               type = 0x0074 (int), file name = 74 (D:\src\llvmbuild\cl\Debug\x64\b.obj), flags = enreg global | enreg static
 # CHECK-LABEL:   Mod 0002 | `* Linker *`:
 
index 319d4bc..c970f0b 100644 (file)
 # CHECK-NEXT: ============================================================
 # CHECK-NEXT:   Mod 0000 | `{{.*pdb-lib.s.tmp[/\\]foo.obj}}`:
 # CHECK-NEXT:              Obj: `{{.*pdb-lib.s.tmp[/\\]foo.obj}}`:
-# CHECK-NEXT:              debug stream: 9, # files: 0, has ec info: false
+# CHECK-NEXT:              debug stream: 10, # files: 0, has ec info: false
 # CHECK-NEXT:              pdb file ni: 0 ``, src file ni: 0 ``
 # CHECK-NEXT:   Mod 0001 | `bar.obj`:
 # CHECK-NEXT:              Obj: `{{.*pdb-lib.s.tmp[/\\]bar.lib}}`:
-# CHECK-NEXT:              debug stream: 10, # files: 0, has ec info: false
+# CHECK-NEXT:              debug stream: 11, # files: 0, has ec info: false
 # CHECK-NEXT:              pdb file ni: 0 ``, src file ni: 0 ``
 # CHECK-NEXT:   Mod 0002 | `* Linker *`:
 # CHECK-NEXT:              Obj: ``:
-# CHECK-NEXT:              debug stream: 11, # files: 0, has ec info: false
+# CHECK-NEXT:              debug stream: 12, # files: 0, has ec info: false
 # CHECK-NEXT:              pdb file ni: 1 `{{.*foo.pdb}}`, src file ni: 0 ``
 
         .def     _main;
index 1bb5729..96ca1b4 100644 (file)
@@ -4,7 +4,7 @@ RUN: llvm-pdbutil dump -symbols %t.pdb | FileCheck --check-prefix=SYMS %s
 
 MODS:      Mod 0001 | `* Linker *`
 MODS-NEXT:             Obj: ``:
-MODS-NEXT:             debug stream: 10, # files: 0, has ec info: false
+MODS-NEXT:             debug stream: 12, # files: 0, has ec info: false
 MODS-NEXT:             pdb file ni: 1 `{{.*}}pdb-linker-module.test.tmp.pdb`, src file ni: 0 ``
 
 SYMS:      Mod 0001 | `* Linker *`
index 76db69f..352bfc9 100644 (file)
@@ -15,9 +15,9 @@ RAW:                               Modules
 RAW-NEXT: ============================================================
 RAW-NEXT:   Mod 0000 | `foo.obj`:
 RAW-NEXT:              Obj: `{{.*}}1\foo.lib`:
-RAW-NEXT:              debug stream: 9, # files: 1, has ec info: false
+RAW-NEXT:              debug stream: 11, # files: 1, has ec info: false
 RAW-NEXT:              pdb file ni: 0 ``, src file ni: 0 ``
 RAW-NEXT:   Mod 0001 | `foo.obj`:
 RAW-NEXT:              Obj: `{{.*}}2\foo.lib`:
-RAW-NEXT:              debug stream: 10, # files: 1, has ec info: false
+RAW-NEXT:              debug stream: 12, # files: 1, has ec info: false
 RAW-NEXT:              pdb file ni: 0 ``, src file ni: 0 ``
index dad6de2..0d9f128 100644 (file)
@@ -121,15 +121,15 @@ RAW:                               Modules
 RAW-NEXT: ============================================================
 RAW-NEXT:   Mod 0000 | `{{.*}}pdb.test.tmp1.obj`:
 RAW-NEXT:              Obj: `{{.*}}pdb.test.tmp1.obj`:
-RAW-NEXT:              debug stream: 9, # files: 1, has ec info: false
+RAW-NEXT:              debug stream: 11, # files: 1, has ec info: false
 RAW-NEXT:              pdb file ni: 0 ``, src file ni: 0 ``
 RAW-NEXT:   Mod 0001 | `{{.*}}pdb.test.tmp2.obj`:
 RAW-NEXT:              Obj: `{{.*}}pdb.test.tmp2.obj`:
-RAW-NEXT:              debug stream: 10, # files: 1, has ec info: false
+RAW-NEXT:              debug stream: 12, # files: 1, has ec info: false
 RAW-NEXT:              pdb file ni: 0 ``, src file ni: 0 ``
 RAW-NEXT:   Mod 0002 | `* Linker *`:
 RAW-NEXT:              Obj: ``:
-RAW-NEXT:              debug stream: 11, # files: 0, has ec info: false
+RAW-NEXT:              debug stream: 13, # files: 0, has ec info: false
 RAW-NEXT:              pdb file ni: 1 `{{.*pdb.test.tmp.pdb}}`, src file ni: 0 ``
 RAW:                          Types (TPI Stream)
 RAW-NEXT: ============================================================
index 9469c06..bebc960 100644 (file)
@@ -82,6 +82,8 @@ public:
 
   StringMap<uint32_t>::const_iterator end() const { return StringToId.end(); }
 
+  std::vector<uint32_t> sortedIds() const;
+
 private:
   DenseMap<uint32_t, StringRef> IdToString;
   StringMap<uint32_t> StringToId;
index ad4a0d1..daea062 100644 (file)
@@ -121,7 +121,7 @@ private:
   MutableBinaryByteStream FileInfoBuffer;
   std::vector<SectionContrib> SectionContribs;
   ArrayRef<SecMapEntry> SectionMap;
-  llvm::SmallVector<DebugStream, (int)DbgHeaderType::Max> DbgStreams;
+  std::array<Optional<DebugStream>, (int)DbgHeaderType::Max> DbgStreams;
 };
 }
 }
index 4b7aa0d..34cc617 100644 (file)
@@ -304,12 +304,12 @@ private:
 
   void grow() {
     uint32_t S = size();
+    uint32_t MaxLoad = maxLoad(capacity());
     if (S < maxLoad(capacity()))
       return;
     assert(capacity() != UINT32_MAX && "Can't grow Hash table!");
 
-    uint32_t NewCapacity =
-        (capacity() <= INT32_MAX) ? capacity() * 2 : UINT32_MAX;
+    uint32_t NewCapacity = (capacity() <= INT32_MAX) ? MaxLoad * 2 : UINT32_MAX;
 
     // Growing requires rebuilding the table and re-hashing every item.  Make a
     // copy with a larger capacity, insert everything into the copy, then swap
index 7ed164b..2604306 100644 (file)
@@ -54,10 +54,11 @@ public:
   Error commit(StringRef Filename);
 
   Expected<uint32_t> getNamedStreamIndex(StringRef Name) const;
-  Error addNamedStream(StringRef Name, uint32_t Size);
+  Error addNamedStream(StringRef Name, StringRef Data);
 
 private:
   Expected<msf::MSFLayout> finalizeMsfLayout();
+  Expected<uint32_t> allocateNamedStream(StringRef Name, uint32_t Size);
 
   void commitFpm(WritableBinaryStream &MsfBuffer, const msf::MSFLayout &Layout);
 
@@ -72,6 +73,7 @@ private:
 
   PDBStringTableBuilder Strings;
   NamedStreamMap NamedStreams;
+  DenseMap<uint32_t, std::string> NamedStreamData;
 };
 }
 }
index c731b68..1278680 100644 (file)
@@ -86,6 +86,15 @@ Error DebugStringTableSubsection::commit(BinaryStreamWriter &Writer) const {
 
 uint32_t DebugStringTableSubsection::size() const { return StringToId.size(); }
 
+std::vector<uint32_t> DebugStringTableSubsection::sortedIds() const {
+  std::vector<uint32_t> Result;
+  Result.reserve(IdToString.size());
+  for (const auto &Entry : IdToString)
+    Result.push_back(Entry.first);
+  std::sort(Result.begin(), Result.end());
+  return Result;
+}
+
 uint32_t DebugStringTableSubsection::getIdForString(StringRef S) const {
   auto Iter = StringToId.find(S);
   assert(Iter != StringToId.end());
index c96553f..f17e081 100644 (file)
@@ -27,7 +27,7 @@ using namespace llvm::pdb;
 DbiStreamBuilder::DbiStreamBuilder(msf::MSFBuilder &Msf)
     : Msf(Msf), Allocator(Msf.getAllocator()), Age(1), BuildNumber(0),
       PdbDllVersion(0), PdbDllRbld(0), Flags(0), MachineType(PDB_Machine::x86),
-      Header(nullptr), DbgStreams((int)DbgHeaderType::Max) {}
+      Header(nullptr) {}
 
 DbiStreamBuilder::~DbiStreamBuilder() {}
 
@@ -63,15 +63,8 @@ void DbiStreamBuilder::setPublicsStreamIndex(uint32_t Index) {
 
 Error DbiStreamBuilder::addDbgStream(pdb::DbgHeaderType Type,
                                      ArrayRef<uint8_t> Data) {
-  if (DbgStreams[(int)Type].StreamNumber != kInvalidStreamIndex)
-    return make_error<RawError>(raw_error_code::duplicate_entry,
-                                "The specified stream type already exists");
-  auto ExpectedIndex = Msf.addStream(Data.size());
-  if (!ExpectedIndex)
-    return ExpectedIndex.takeError();
-  uint32_t Index = std::move(*ExpectedIndex);
-  DbgStreams[(int)Type].Data = Data;
-  DbgStreams[(int)Type].StreamNumber = Index;
+  DbgStreams[(int)Type].emplace();
+  DbgStreams[(int)Type]->Data = Data;
   return Error::success();
 }
 
@@ -266,6 +259,15 @@ Error DbiStreamBuilder::finalize() {
 }
 
 Error DbiStreamBuilder::finalizeMsfLayout() {
+  for (auto &S : DbgStreams) {
+    if (!S.hasValue())
+      continue;
+    auto ExpectedIndex = Msf.addStream(S->Data.size());
+    if (!ExpectedIndex)
+      return ExpectedIndex.takeError();
+    S->StreamNumber = *ExpectedIndex;
+  }
+
   for (auto &MI : ModiList) {
     if (auto EC = MI->finalizeMsfLayout())
       return EC;
@@ -375,17 +377,23 @@ Error DbiStreamBuilder::commit(const msf::MSFLayout &Layout,
   if (auto EC = ECNamesBuilder.commit(Writer))
     return EC;
 
-  for (auto &Stream : DbgStreams)
-    if (auto EC = Writer.writeInteger(Stream.StreamNumber))
+  for (auto &Stream : DbgStreams) {
+    uint16_t StreamNumber = kInvalidStreamIndex;
+    if (Stream.hasValue())
+      StreamNumber = Stream->StreamNumber;
+    if (auto EC = Writer.writeInteger(StreamNumber))
       return EC;
+  }
 
   for (auto &Stream : DbgStreams) {
-    if (Stream.StreamNumber == kInvalidStreamIndex)
+    if (!Stream.hasValue())
       continue;
+    assert(Stream->StreamNumber != kInvalidStreamIndex);
+
     auto WritableStream = WritableMappedBlockStream::createIndexedStream(
-        Layout, MsfBuffer, Stream.StreamNumber, Allocator);
+        Layout, MsfBuffer, Stream->StreamNumber, Allocator);
     BinaryStreamWriter DbgStreamWriter(*WritableStream);
-    if (auto EC = DbgStreamWriter.writeArray(Stream.Data))
+    if (auto EC = DbgStreamWriter.writeArray(Stream->Data))
       return EC;
   }
 
index e84f25d..63d63c1 100644 (file)
@@ -150,14 +150,14 @@ Error GSIStreamBuilder::finalizeMsfLayout() {
   PSH->finalizeBuckets(PSHZero);
   GSH->finalizeBuckets(GSHZero);
 
-  Expected<uint32_t> Idx = Msf.addStream(calculatePublicsHashStreamSize());
+  Expected<uint32_t> Idx = Msf.addStream(calculateGlobalsHashStreamSize());
   if (!Idx)
     return Idx.takeError();
-  PSH->StreamIndex = *Idx;
-  Idx = Msf.addStream(calculateGlobalsHashStreamSize());
+  GSH->StreamIndex = *Idx;
+  Idx = Msf.addStream(calculatePublicsHashStreamSize());
   if (!Idx)
     return Idx.takeError();
-  GSH->StreamIndex = *Idx;
+  PSH->StreamIndex = *Idx;
 
   uint32_t RecordBytes =
       GSH->calculateRecordByteSize() + PSH->calculateRecordByteSize();
index a20b451..54d6835 100644 (file)
@@ -73,5 +73,6 @@ Error InfoStreamBuilder::commit(const msf::MSFLayout &Layout,
     if (auto EC = Writer.writeEnum(E))
       return EC;
   }
+  assert(Writer.bytesRemaining() == 0);
   return Error::success();
 }
index 6076b10..a4eaed9 100644 (file)
@@ -47,7 +47,7 @@ uint32_t NamedStreamMapTraits::lookupKeyToStorageKey(StringRef S) {
 }
 
 NamedStreamMap::NamedStreamMap()
-    : HashTraits(*this), OffsetIndexMap(HashTraits) {}
+    : HashTraits(*this), OffsetIndexMap(1, HashTraits) {}
 
 Error NamedStreamMap::load(BinaryStreamReader &Stream) {
   uint32_t StringBufferSize;
index 1cb890f..38bba9e 100644 (file)
@@ -80,11 +80,20 @@ GSIStreamBuilder &PDBFileBuilder::getGsiBuilder() {
   return *Gsi;
 }
 
-Error PDBFileBuilder::addNamedStream(StringRef Name, uint32_t Size) {
+Expected<uint32_t> PDBFileBuilder::allocateNamedStream(StringRef Name,
+                                                       uint32_t Size) {
   auto ExpectedStream = Msf->addStream(Size);
-  if (!ExpectedStream)
-    return ExpectedStream.takeError();
-  NamedStreams.set(Name, *ExpectedStream);
+  if (ExpectedStream)
+    NamedStreams.set(Name, *ExpectedStream);
+  return ExpectedStream;
+}
+
+Error PDBFileBuilder::addNamedStream(StringRef Name, StringRef Data) {
+  Expected<uint32_t> ExpectedIndex = allocateNamedStream(Name, Data.size());
+  if (!ExpectedIndex)
+    return ExpectedIndex.takeError();
+  assert(NamedStreamData.count(*ExpectedIndex) == 0);
+  NamedStreamData[*ExpectedIndex] = Data;
   return Error::success();
 }
 
@@ -101,35 +110,41 @@ Expected<msf::MSFLayout> PDBFileBuilder::finalizeMsfLayout() {
 
   uint32_t StringsLen = Strings.calculateSerializedSize();
 
-  if (auto EC = addNamedStream("/names", StringsLen))
-    return std::move(EC);
-  if (auto EC = addNamedStream("/LinkInfo", 0))
-    return std::move(EC);
+  Expected<uint32_t> SN = allocateNamedStream("/LinkInfo", 0);
+  if (!SN)
+    return SN.takeError();
 
-  if (Info) {
-    if (auto EC = Info->finalizeMsfLayout())
-      return std::move(EC);
-  }
-  if (Dbi) {
-    if (auto EC = Dbi->finalizeMsfLayout())
+  if (Gsi) {
+    if (auto EC = Gsi->finalizeMsfLayout())
       return std::move(EC);
+    if (Dbi) {
+      Dbi->setPublicsStreamIndex(Gsi->getPublicsStreamIndex());
+      Dbi->setGlobalsStreamIndex(Gsi->getGlobalsStreamIndex());
+      Dbi->setSymbolRecordStreamIndex(Gsi->getRecordStreamIdx());
+    }
   }
   if (Tpi) {
     if (auto EC = Tpi->finalizeMsfLayout())
       return std::move(EC);
   }
+  if (Dbi) {
+    if (auto EC = Dbi->finalizeMsfLayout())
+      return std::move(EC);
+  }
+  SN = allocateNamedStream("/names", StringsLen);
+  if (!SN)
+    return SN.takeError();
+
   if (Ipi) {
     if (auto EC = Ipi->finalizeMsfLayout())
       return std::move(EC);
   }
-  if (Gsi) {
-    if (auto EC = Gsi->finalizeMsfLayout())
+
+  // Do this last, since it relies on the named stream map being complete, and
+  // that can be updated by previous steps in the finalization.
+  if (Info) {
+    if (auto EC = Info->finalizeMsfLayout())
       return std::move(EC);
-    if (Dbi) {
-      Dbi->setPublicsStreamIndex(Gsi->getPublicsStreamIndex());
-      Dbi->setGlobalsStreamIndex(Gsi->getGlobalsStreamIndex());
-      Dbi->setSymbolRecordStreamIndex(Gsi->getRecordStreamIdx());
-    }
   }
 
   return Msf->build();
@@ -219,6 +234,17 @@ Error PDBFileBuilder::commit(StringRef Filename) {
   if (auto EC = Strings.commit(NSWriter))
     return EC;
 
+  for (const auto &NSE : NamedStreamData) {
+    if (NSE.second.empty())
+      continue;
+
+    auto NS = WritableMappedBlockStream::createIndexedStream(
+        Layout, Buffer, NSE.first, Allocator);
+    BinaryStreamWriter NSW(*NS);
+    if (auto EC = NSW.writeBytes(arrayRefFromStringRef(NSE.second)))
+      return EC;
+  }
+
   if (Info) {
     if (auto EC = Info->commit(Layout, Buffer))
       return EC;
index 5020423..0975d9e 100644 (file)
@@ -15,6 +15,8 @@
 #include "llvm/Support/BinaryStreamWriter.h"
 #include "llvm/Support/Endian.h"
 
+#include <map>
+
 using namespace llvm;
 using namespace llvm::msf;
 using namespace llvm::support;
@@ -33,13 +35,66 @@ StringRef PDBStringTableBuilder::getStringForId(uint32_t Id) const {
   return Strings.getStringForId(Id);
 }
 
+// This is a precomputed list of Buckets given the specified number of
+// strings.  Matching the reference algorithm exactly is not strictly
+// necessary for correctness, but it helps when comparing LLD's PDBs with
+// Microsoft's PDBs so as to eliminate superfluous differences.
+static std::map<uint32_t, uint32_t> StringsToBuckets = {
+    {1, 2},
+    {2, 4},
+    {4, 7},
+    {6, 11},
+    {9, 17},
+    {13, 26},
+    {20, 40},
+    {31, 61},
+    {46, 92},
+    {70, 139},
+    {105, 209},
+    {157, 314},
+    {236, 472},
+    {355, 709},
+    {532, 1064},
+    {799, 1597},
+    {1198, 2396},
+    {1798, 3595},
+    {2697, 5393},
+    {4045, 8090},
+    {6068, 12136},
+    {9103, 18205},
+    {13654, 27308},
+    {20482, 40963},
+    {30723, 61445},
+    {46084, 92168},
+    {69127, 138253},
+    {103690, 207380},
+    {155536, 311071},
+    {233304, 466607},
+    {349956, 699911},
+    {524934, 1049867},
+    {787401, 1574801},
+    {1181101, 2362202},
+    {1771652, 3543304},
+    {2657479, 5314957},
+    {3986218, 7972436},
+    {5979328, 11958655},
+    {8968992, 17937983},
+    {13453488, 26906975},
+    {20180232, 40360463},
+    {30270348, 60540695},
+    {45405522, 90811043},
+    {68108283, 136216565},
+    {102162424, 204324848},
+    {153243637, 306487273},
+    {229865455, 459730910},
+    {344798183, 689596366},
+    {517197275, 1034394550},
+    {775795913, 1551591826}};
+
 static uint32_t computeBucketCount(uint32_t NumStrings) {
-  // The /names stream is basically an on-disk open-addressing hash table.
-  // Hash collisions are resolved by linear probing. We cannot make
-  // utilization 100% because it will make the linear probing extremely
-  // slow. But lower utilization wastes disk space. As a reasonable
-  // load factor, we choose 80%. We need +1 because slot 0 is reserved.
-  return (NumStrings + 1) * 1.25;
+  auto Entry = StringsToBuckets.lower_bound(NumStrings);
+  assert(Entry != StringsToBuckets.end());
+  return Entry->second;
 }
 
 uint32_t PDBStringTableBuilder::calculateHashTableSize() const {
index 365386f..6694a21 100644 (file)
@@ -90,7 +90,13 @@ Error DumpOutputStyle::dump() {
     P.NewLine();
   }
 
-  if (opts::dump::DumpStringTable) {
+  if (opts::dump::DumpNamedStreams) {
+    if (auto EC = dumpNamedStreams())
+      return EC;
+    P.NewLine();
+  }
+
+  if (opts::dump::DumpStringTable || opts::dump::DumpStringTableDetails) {
     if (auto EC = dumpStringTable())
       return EC;
     P.NewLine();
@@ -857,33 +863,64 @@ Error DumpOutputStyle::dumpStringTableFromPdb() {
     return Error::success();
   }
 
-  if (IS->name_ids().empty()) {
-    P.formatLine("Empty");
-    return Error::success();
+  if (opts::dump::DumpStringTable) {
+    if (IS->name_ids().empty())
+      P.formatLine("Empty");
+    else {
+      auto MaxID =
+          std::max_element(IS->name_ids().begin(), IS->name_ids().end());
+      uint32_t Digits = NumDigits(*MaxID);
+
+      P.formatLine("{0} | {1}", fmt_align("ID", AlignStyle::Right, Digits),
+                   "String");
+
+      std::vector<uint32_t> SortedIDs(IS->name_ids().begin(),
+                                      IS->name_ids().end());
+      std::sort(SortedIDs.begin(), SortedIDs.end());
+      for (uint32_t I : SortedIDs) {
+        auto ES = IS->getStringForID(I);
+        llvm::SmallString<32> Str;
+        if (!ES) {
+          consumeError(ES.takeError());
+          Str = "Error reading string";
+        } else if (!ES->empty()) {
+          Str.append("'");
+          Str.append(*ES);
+          Str.append("'");
+        }
+
+        if (!Str.empty())
+          P.formatLine("{0} | {1}", fmt_align(I, AlignStyle::Right, Digits),
+                       Str);
+      }
+    }
   }
 
-  auto MaxID = std::max_element(IS->name_ids().begin(), IS->name_ids().end());
-  uint32_t Digits = NumDigits(*MaxID);
-
-  P.formatLine("{0} | {1}", fmt_align("ID", AlignStyle::Right, Digits),
-               "String");
-
-  std::vector<uint32_t> SortedIDs(IS->name_ids().begin(), IS->name_ids().end());
-  std::sort(SortedIDs.begin(), SortedIDs.end());
-  for (uint32_t I : SortedIDs) {
-    auto ES = IS->getStringForID(I);
-    llvm::SmallString<32> Str;
-    if (!ES) {
-      consumeError(ES.takeError());
-      Str = "Error reading string";
-    } else if (!ES->empty()) {
-      Str.append("'");
-      Str.append(*ES);
-      Str.append("'");
+  if (opts::dump::DumpStringTableDetails) {
+    P.NewLine();
+    {
+      P.printLine("String Table Header:");
+      AutoIndent Indent(P);
+      P.formatLine("Signature: {0}", IS->getSignature());
+      P.formatLine("Hash Version: {0}", IS->getHashVersion());
+      P.formatLine("Name Buffer Size: {0}", IS->getByteSize());
+      P.NewLine();
     }
 
-    if (!Str.empty())
-      P.formatLine("{0} | {1}", fmt_align(I, AlignStyle::Right, Digits), Str);
+    BinaryStreamRef NameBuffer = IS->getStringTable().getBuffer();
+    ArrayRef<uint8_t> Contents;
+    cantFail(NameBuffer.readBytes(0, NameBuffer.getLength(), Contents));
+    P.formatBinary("Name Buffer", Contents, 0);
+    P.NewLine();
+    {
+      P.printLine("Hash Table:");
+      AutoIndent Indent(P);
+      P.formatLine("Bucket Count: {0}", IS->name_ids().size());
+      for (const auto &Entry : enumerate(IS->name_ids()))
+        P.formatLine("Bucket[{0}] : {1}", Entry.index(),
+                     uint32_t(Entry.value()));
+      P.formatLine("Name Count: {0}", IS->getNameCount());
+    }
   }
   return Error::success();
 }
@@ -909,6 +946,29 @@ Error DumpOutputStyle::dumpStringTableFromObj() {
   return Error::success();
 }
 
+Error DumpOutputStyle::dumpNamedStreams() {
+  printHeader(P, "Named Streams");
+  AutoIndent Indent(P, 2);
+
+  if (File.isObj()) {
+    P.formatLine("Dumping Named Streams is only supported for PDB files.");
+    return Error::success();
+  }
+  ExitOnError Err("Invalid PDB File: ");
+
+  auto &IS = Err(File.pdb().getPDBInfoStream());
+  const NamedStreamMap &NS = IS.getNamedStreams();
+  for (const auto &Entry : NS.entries()) {
+    P.printLine(Entry.getKey());
+    AutoIndent Indent2(P, 2);
+    P.formatLine("Index: {0}", Entry.getValue());
+    P.formatLine("Size in bytes: {0}",
+                 File.pdb().getStreamByteSize(Entry.getValue()));
+  }
+
+  return Error::success();
+}
+
 Error DumpOutputStyle::dumpStringTable() {
   printHeader(P, "String Table");
 
index fad304c..36d8b13 100644 (file)
@@ -74,6 +74,7 @@ private:
   Error dumpStreamSummary();
   Error dumpSymbolStats();
   Error dumpUdtStats();
+  Error dumpNamedStreams();
   Error dumpStringTable();
   Error dumpStringTableFromPdb();
   Error dumpStringTableFromObj();
index 25e0ddf..cdc01d8 100644 (file)
@@ -534,8 +534,16 @@ cl::opt<bool> JustMyCode("jmc", cl::Optional,
                          cl::cat(FileOptions), cl::sub(DumpSubcommand));
 
 // MISCELLANEOUS OPTIONS
+cl::opt<bool> DumpNamedStreams("named-streams",
+                               cl::desc("dump PDB named stream table"),
+                               cl::cat(MiscOptions), cl::sub(DumpSubcommand));
+
 cl::opt<bool> DumpStringTable("string-table", cl::desc("dump PDB String Table"),
                               cl::cat(MiscOptions), cl::sub(DumpSubcommand));
+cl::opt<bool> DumpStringTableDetails("string-table-details",
+                                     cl::desc("dump PDB String Table Details"),
+                                     cl::cat(MiscOptions),
+                                     cl::sub(DumpSubcommand));
 
 cl::opt<bool> DumpSectionContribs("section-contribs",
                                   cl::desc("dump section contributions"),
@@ -1199,6 +1207,7 @@ int main(int argc_, const char *argv_[]) {
       opts::dump::DumpStreams = true;
       opts::dump::DumpStreamBlocks = true;
       opts::dump::DumpStringTable = true;
+      opts::dump::DumpStringTableDetails = true;
       opts::dump::DumpSummary = true;
       opts::dump::DumpSymbols = true;
       opts::dump::DumpSymbolStats = true;
index 3ce03d5..7003b8c 100644 (file)
@@ -142,7 +142,9 @@ extern llvm::cl::opt<bool> DumpLines;
 extern llvm::cl::opt<bool> DumpInlineeLines;
 extern llvm::cl::opt<bool> DumpXmi;
 extern llvm::cl::opt<bool> DumpXme;
+extern llvm::cl::opt<bool> DumpNamedStreams;
 extern llvm::cl::opt<bool> DumpStringTable;
+extern llvm::cl::opt<bool> DumpStringTableDetails;
 extern llvm::cl::opt<bool> DumpTypes;
 extern llvm::cl::opt<bool> DumpTypeData;
 extern llvm::cl::opt<bool> DumpTypeExtras;