[PDB] Don't ignore bucket 0 when writing the PDB string table.
authorZachary Turner <zturner@google.com>
Wed, 21 Mar 2018 22:23:59 +0000 (22:23 +0000)
committerZachary Turner <zturner@google.com>
Wed, 21 Mar 2018 22:23:59 +0000 (22:23 +0000)
The hash table is a list of buckets, and the *value* stored in
the bucket cannot be 0 since that is reserved.  However, the code
here was incorrectly skipping over the 0'th bucket entirely.
The 0'th bucket is perfectly fine, just none of these buckets
can contain the value 0.

As a result, whenever there was a string where hash(S) % Size
was equal to 0, we would write the value in the next bucket
instead.  We never caught this in our tests due to *another*
bug, which is that we would iterate the entire list of buckets
looking for the value, only using the hash value as a starting
point.  However, the real algorithm stops when it finds 0 in
a bucket since it takes that to mean "the item is not in the
hash table".

The unit test is updated to carefully construct a set of hash
values that will cause one item to hash to 0 mod bucket count,
and the reader is also updated to return an error indicating that
the item is not found when it encounters a 0 bucket.

llvm-svn: 328162

llvm/lib/DebugInfo/PDB/Native/PDBStringTable.cpp
llvm/lib/DebugInfo/PDB/Native/PDBStringTableBuilder.cpp
llvm/unittests/DebugInfo/PDB/StringTableBuilderTest.cpp

index f1c10357132be2fc7034dd2fd63997855a830095..afeea32043dd652c9d8e3457dad3a77d4a71d77d 100644 (file)
@@ -122,7 +122,10 @@ Expected<uint32_t> PDBStringTable::getIDForString(StringRef Str) const {
     // we iterate the entire array.
     uint32_t Index = (Start + I) % Count;
 
+    // If we find 0, it means the item isn't in the hash table.
     uint32_t ID = IDs[Index];
+    if (ID == 0)
+      return make_error<RawError>(raw_error_code::no_entry);
     auto ExpectedStr = getStringForID(ID);
     if (!ExpectedStr)
       return ExpectedStr.takeError();
index ece3e00b1a87edc9bf12bd4b0df74b3a0524a464..cdd23f8df524fa1bd3c8b28ecc1205872bf92fc5 100644 (file)
@@ -89,8 +89,6 @@ Error PDBStringTableBuilder::writeHashTable(BinaryStreamWriter &Writer) const {
 
     for (uint32_t I = 0; I != BucketCount; ++I) {
       uint32_t Slot = (Hash + I) % BucketCount;
-      if (Slot == 0)
-        continue; // Skip reserved slot
       if (Buckets[Slot] != 0)
         continue;
       Buckets[Slot] = Offset;
index 0efc2c6411b86eeb747cfb4af47e57df522529c5..bf08ab20923b1245f08ef0ea9f602b2ce4b8fd23 100644 (file)
@@ -27,10 +27,29 @@ class StringTableBuilderTest : public ::testing::Test {};
 TEST_F(StringTableBuilderTest, Simple) {
   // Create /names table contents.
   PDBStringTableBuilder Builder;
-  EXPECT_EQ(1U, Builder.insert("foo"));
-  EXPECT_EQ(5U, Builder.insert("bar"));
-  EXPECT_EQ(1U, Builder.insert("foo"));
-  EXPECT_EQ(9U, Builder.insert("baz"));
+
+  // This test case is carefully constructed to ensure that at least one
+  // string gets bucketed into slot 0, *and* to ensure that at least one
+  // has a hash collision at the end of the bucket list so it has to
+  // wrap around.
+  uint32_t FooID = Builder.insert("foo");
+  uint32_t BarID = Builder.insert("bar");
+  uint32_t BazID = Builder.insert("baz");
+  uint32_t BuzzID = Builder.insert("buzz");
+  uint32_t BazzID = Builder.insert("bazz");
+  uint32_t BarrID = Builder.insert("barr");
+
+  // Re-inserting the same item should return the same id.
+  EXPECT_EQ(FooID, Builder.insert("foo"));
+  EXPECT_EQ(BarID, Builder.insert("bar"));
+  EXPECT_EQ(BazID, Builder.insert("baz"));
+  EXPECT_EQ(BuzzID, Builder.insert("buzz"));
+  EXPECT_EQ(BazzID, Builder.insert("bazz"));
+  EXPECT_EQ(BarrID, Builder.insert("barr"));
+
+  // Each ID should be distinct.
+  std::set<uint32_t> Distinct{FooID, BarID, BazID, BuzzID, BazzID, BarrID};
+  EXPECT_EQ(6U, Distinct.size());
 
   std::vector<uint8_t> Buffer(Builder.calculateSerializedSize());
   MutableBinaryByteStream OutStream(Buffer, little);
@@ -43,13 +62,20 @@ TEST_F(StringTableBuilderTest, Simple) {
   PDBStringTable Table;
   EXPECT_THAT_ERROR(Table.reload(Reader), Succeeded());
 
-  EXPECT_EQ(3U, Table.getNameCount());
+  EXPECT_EQ(6U, Table.getNameCount());
   EXPECT_EQ(1U, Table.getHashVersion());
 
-  EXPECT_THAT_EXPECTED(Table.getStringForID(1), HasValue("foo"));
-  EXPECT_THAT_EXPECTED(Table.getStringForID(5), HasValue("bar"));
-  EXPECT_THAT_EXPECTED(Table.getStringForID(9), HasValue("baz"));
-  EXPECT_THAT_EXPECTED(Table.getIDForString("foo"), HasValue(1U));
-  EXPECT_THAT_EXPECTED(Table.getIDForString("bar"), HasValue(5U));
-  EXPECT_THAT_EXPECTED(Table.getIDForString("baz"), HasValue(9U));
+  EXPECT_THAT_EXPECTED(Table.getStringForID(FooID), HasValue("foo"));
+  EXPECT_THAT_EXPECTED(Table.getStringForID(BarID), HasValue("bar"));
+  EXPECT_THAT_EXPECTED(Table.getStringForID(BazID), HasValue("baz"));
+  EXPECT_THAT_EXPECTED(Table.getStringForID(BuzzID), HasValue("buzz"));
+  EXPECT_THAT_EXPECTED(Table.getStringForID(BazzID), HasValue("bazz"));
+  EXPECT_THAT_EXPECTED(Table.getStringForID(BarrID), HasValue("barr"));
+
+  EXPECT_THAT_EXPECTED(Table.getIDForString("foo"), HasValue(FooID));
+  EXPECT_THAT_EXPECTED(Table.getIDForString("bar"), HasValue(BarID));
+  EXPECT_THAT_EXPECTED(Table.getIDForString("baz"), HasValue(BazID));
+  EXPECT_THAT_EXPECTED(Table.getIDForString("buzz"), HasValue(BuzzID));
+  EXPECT_THAT_EXPECTED(Table.getIDForString("bazz"), HasValue(BazzID));
+  EXPECT_THAT_EXPECTED(Table.getIDForString("barr"), HasValue(BarrID));
 }