[PDB] Sort globals symbols by name in GSI hash buckets.
authorZachary Turner <zturner@google.com>
Fri, 6 Jul 2018 02:33:58 +0000 (02:33 +0000)
committerZachary Turner <zturner@google.com>
Fri, 6 Jul 2018 02:33:58 +0000 (02:33 +0000)
It seems like the debugger first computes a symbol's bucket,
and then does a binary search of entries in the bucket using the
symbol's name in order to find it.  If the bucket entries are not
in sorted order, this obviously won't work.  After this patch a
couple of simple test cases show that we generate an exactly
identical GSI hash stream, which is very nice.

llvm-svn: 336405

lld/test/COFF/Inputs/globals-dia-vfunc-collision.obj [new file with mode: 0644]
lld/test/COFF/Inputs/globals-dia-vfunc-collision2.obj [new file with mode: 0644]
lld/test/COFF/Inputs/globals-dia-vfunc-simple.obj [new file with mode: 0644]
lld/test/COFF/pdb-globals-dia-vfunc-collision.test [new file with mode: 0644]
lld/test/COFF/pdb-globals-dia-vfunc-collision2.test [new file with mode: 0644]
lld/test/COFF/pdb-globals-dia-vfunc-simple.test [new file with mode: 0644]
llvm/lib/DebugInfo/PDB/Native/GSIStreamBuilder.cpp

diff --git a/lld/test/COFF/Inputs/globals-dia-vfunc-collision.obj b/lld/test/COFF/Inputs/globals-dia-vfunc-collision.obj
new file mode 100644 (file)
index 0000000..3191c3e
Binary files /dev/null and b/lld/test/COFF/Inputs/globals-dia-vfunc-collision.obj differ
diff --git a/lld/test/COFF/Inputs/globals-dia-vfunc-collision2.obj b/lld/test/COFF/Inputs/globals-dia-vfunc-collision2.obj
new file mode 100644 (file)
index 0000000..f406102
Binary files /dev/null and b/lld/test/COFF/Inputs/globals-dia-vfunc-collision2.obj differ
diff --git a/lld/test/COFF/Inputs/globals-dia-vfunc-simple.obj b/lld/test/COFF/Inputs/globals-dia-vfunc-simple.obj
new file mode 100644 (file)
index 0000000..f0a9b4d
Binary files /dev/null and b/lld/test/COFF/Inputs/globals-dia-vfunc-simple.obj differ
diff --git a/lld/test/COFF/pdb-globals-dia-vfunc-collision.test b/lld/test/COFF/pdb-globals-dia-vfunc-collision.test
new file mode 100644 (file)
index 0000000..b19ff34
--- /dev/null
@@ -0,0 +1,42 @@
+REQUIRES: diasdk\r
+\r
+Input object file reconstruction:\r
+\r
+; // main.cpp\r
+; struct S {\r
+;   // Function names are chosen specifically to generate hash collisions in the\r
+;   // GSI hash table.\r
+;   virtual int A307() { return 102; }\r
+;   virtual int A400() { return 12; }\r
+;   virtual int A206() { return 201; }\r
+;   virtual int A105() { return 300; }\r
+; };\r
+; \r
+; struct T : public S {\r
+;   int A105() override { return 300; }\r
+;   int A307() override { return 102; }\r
+;   int A206() override { return 201; }\r
+;   int A400() override { return 12; }\r
+; };\r
+; \r
+; int main(int argc, char **argv) {\r
+;   T s;\r
+;   return s.A105() + s.A206() + s.A307() + s.A400();\r
+; }\r
+\r
+clang-cl /Z7 /GS- /GR- /c main.cpp /Foglobals-dia-vfunc-collision.obj\r
+\r
+RUN: lld-link /debug /nodefaultlib /entry:main /out:%t.exe %S/Inputs/globals-dia-vfunc-collision.obj\r
+RUN: llvm-pdbutil pretty -classes %t.pdb | FileCheck %s\r
+\r
+CHECK: struct T\r
+CHECK: func [0x000010c0+ 0 - 0x000010dd-29 | sizeof= 29] (FPO) virtual int __cdecl A105()\r
+CHECK: func [0x00001100+ 0 - 0x0000111b-27 | sizeof= 27] (FPO) virtual int __cdecl A307()\r
+CHECK: func [0x000010e0+ 0 - 0x000010fd-29 | sizeof= 29] (FPO) virtual int __cdecl A206()\r
+CHECK: func [0x00001120+ 0 - 0x0000113b-27 | sizeof= 27] (FPO) virtual int __cdecl A400()\r
+\r
+CHECK: struct S\r
+CHECK: func [0x00001160+ 0 - 0x0000116c-12 | sizeof= 12] (FPO) virtual int __cdecl A307()\r
+CHECK: func [0x00001170+ 0 - 0x0000117c-12 | sizeof= 12] (FPO) virtual int __cdecl A400()\r
+CHECK: func [0x00001180+ 0 - 0x0000118c-12 | sizeof= 12] (FPO) virtual int __cdecl A206()\r
+CHECK: func [0x00001190+ 0 - 0x0000119c-12 | sizeof= 12] (FPO) virtual int __cdecl A105()\r
diff --git a/lld/test/COFF/pdb-globals-dia-vfunc-collision2.test b/lld/test/COFF/pdb-globals-dia-vfunc-collision2.test
new file mode 100644 (file)
index 0000000..0d5da46
--- /dev/null
@@ -0,0 +1,25 @@
+REQUIRES: diasdk\r
+\r
+Input object file reconstruction:\r
+\r
+; // main.cpp\r
+; struct S {\r
+;   // Function names are chosen specifically to generate hash collisions in the\r
+;   // GSI hash table.\r
+;   virtual int A132() { return 102; }\r
+;   virtual int A1001() { return 300; }\r
+; };\r
+; \r
+; int main(int argc, char **argv) {\r
+;   S s;\r
+;   return s.A132();\r
+; }\r
+\r
+clang-cl /Z7 /GS- /GR- /c main.cpp /Foglobals-dia-vfunc-collision2.obj\r
+\r
+RUN: lld-link /debug /nodefaultlib /entry:main /out:%t.exe %S/Inputs/globals-dia-vfunc-collision2.obj\r
+RUN: llvm-pdbutil pretty -classes %t.pdb | FileCheck %s\r
+\r
+CHECK: struct S\r
+CHECK: func [0x00001060+ 0 - 0x0000106c-12 | sizeof= 12] (FPO) virtual int __cdecl A132()\r
+CHECK: func [0x00001070+ 0 - 0x0000107c-12 | sizeof= 12] (FPO) virtual int __cdecl A1001()\r
diff --git a/lld/test/COFF/pdb-globals-dia-vfunc-simple.test b/lld/test/COFF/pdb-globals-dia-vfunc-simple.test
new file mode 100644 (file)
index 0000000..6273c39
--- /dev/null
@@ -0,0 +1,26 @@
+REQUIRES: diasdk\r
+\r
+Input object file reconstruction:\r
+\r
+; // main.cpp\r
+; struct Base {\r
+;   virtual int V2() { return 42; }\r
+; };\r
+; \r
+; struct Derived : public Base {\r
+;   int V2() override { return 42; }\r
+; };\r
+; \r
+; int main()\r
+; {\r
+;   Derived D;\r
+;   return D.V2();\r
+; }\r
+\r
+clang-cl /Z7 /GS- /GR- /c main.cpp /Foglobals-dia-vfunc-simple.obj\r
+\r
+RUN: lld-link /debug /nodefaultlib /entry:main /out:%t.exe %S/Inputs/globals-dia-vfunc-simple.obj\r
+RUN: llvm-pdbutil pretty -classes %t.pdb | FileCheck %s\r
+\r
+CHECK: func [0x00001070+ 0 - 0x0000107c-12 | sizeof= 12] (FPO) virtual int __cdecl V2()\r
+CHECK: func [0x000010a0+ 0 - 0x000010ac-12 | sizeof= 12] (FPO) virtual int __cdecl V2()\r
index 63d63c1..eaea24a 100644 (file)
@@ -83,7 +83,8 @@ Error GSIHashStreamBuilder::commit(BinaryStreamWriter &Writer) {
 }
 
 void GSIHashStreamBuilder::finalizeBuckets(uint32_t RecordZeroOffset) {
-  std::array<std::vector<PSHashRecord>, IPHR_HASH + 1> TmpBuckets;
+  std::array<std::vector<std::pair<StringRef, PSHashRecord>>, IPHR_HASH + 1>
+      TmpBuckets;
   uint32_t SymOffset = RecordZeroOffset;
   for (const CVSymbol &Sym : Records) {
     PSHashRecord HR;
@@ -94,8 +95,7 @@ void GSIHashStreamBuilder::finalizeBuckets(uint32_t RecordZeroOffset) {
     // Hash the name to figure out which bucket this goes into.
     StringRef Name = getSymbolName(Sym);
     size_t BucketIdx = hashStringV1(Name) % IPHR_HASH;
-    TmpBuckets[BucketIdx].push_back(HR); // FIXME: Does order matter?
-
+    TmpBuckets[BucketIdx].push_back(std::make_pair(Name, HR));
     SymOffset += Sym.length();
   }
 
@@ -117,8 +117,22 @@ void GSIHashStreamBuilder::finalizeBuckets(uint32_t RecordZeroOffset) {
     ulittle32_t ChainStartOff =
         ulittle32_t(HashRecords.size() * SizeOfHROffsetCalc);
     HashBuckets.push_back(ChainStartOff);
-    for (const auto &HR : Bucket)
-      HashRecords.push_back(HR);
+
+    // Sort each bucket by memcmp of the symbol's name.
+    std::sort(Bucket.begin(), Bucket.end(),
+              [](const std::pair<StringRef, PSHashRecord> &Left,
+                 const std::pair<StringRef, PSHashRecord> &Right) {
+                size_t LS = Left.first.size();
+                size_t RS = Right.first.size();
+                if (LS < RS)
+                  return true;
+                if (LS > RS)
+                  return false;
+                return Left.first < Right.first;
+              });
+
+    for (const auto &Entry : Bucket)
+      HashRecords.push_back(Entry.second);
   }
 }