Fix a regression in DWARF access speed caused by svn revision 356190
authorGreg Clayton <clayborg@gmail.com>
Thu, 30 May 2019 15:32:33 +0000 (15:32 +0000)
committerGreg Clayton <clayborg@gmail.com>
Thu, 30 May 2019 15:32:33 +0000 (15:32 +0000)
The issue was caused by the error checking code that was added. It was incorrectly adding an extra abbreviation when DWARFEnumState::Complete was received since it would push an extra abbreviation onto the list with the abbreviation code of zero. This cause m_idx_offset in each DWARFAbbreviationDeclarationSet to be set to UINT32_MAX. This valid indicates we must linearly search for attributes, not access them in O(1) time. This caused every DWARFDebugInfoEntry that would try to get its DWARFAbbreviationDeclaration from the CU's DWARFAbbreviationDeclarationSet to always linearly search the abbreviation set for a given abbreviation code. Easy to see why this would cause things to be slow.

This regression was caused by: https://reviews.llvm.org/D59370. I asked to ensure there was no regression is parsing or access speed, but that must not have been done. In my test with 40 DWARF files trying to set a breakpoint by function name and in a header file, I see a 8% speed improvement with this fix.

There was no regression in correctness, just very inefficient access.

Added full unit testing for DWARFAbbreviationDeclarationSet parsing to ensure this doesn't regress.

Differential Revision: https://reviews.llvm.org/D62630

llvm-svn: 362105

lldb/source/Plugins/SymbolFile/DWARF/DWARFDebugAbbrev.cpp
lldb/source/Plugins/SymbolFile/DWARF/DWARFDebugAbbrev.h
lldb/unittests/SymbolFile/DWARF/SymbolFileDWARFTests.cpp

index e9aa8f3..2630156 100644 (file)
@@ -29,21 +29,19 @@ DWARFAbbreviationDeclarationSet::extract(const DWARFDataExtractor &data,
   Clear();
   DWARFAbbreviationDeclaration abbrevDeclaration;
   dw_uleb128_t prev_abbr_code = 0;
-  DWARFEnumState state = DWARFEnumState::MoreItems;
-  while (state == DWARFEnumState::MoreItems) {
+  while (true) {
     llvm::Expected<DWARFEnumState> es =
         abbrevDeclaration.extract(data, offset_ptr);
     if (!es)
       return es.takeError();
-
-    state = *es;
+    if (*es == DWARFEnumState::Complete)
+      break;
     m_decls.push_back(abbrevDeclaration);
     if (m_idx_offset == 0)
       m_idx_offset = abbrevDeclaration.Code();
-    else {
-      if (prev_abbr_code + 1 != abbrevDeclaration.Code())
-        m_idx_offset =
-            UINT32_MAX; // Out of order indexes, we can't do O(1) lookups...
+    else if (prev_abbr_code + 1 != abbrevDeclaration.Code()) {
+      // Out of order indexes, we can't do O(1) lookups...
+      m_idx_offset = UINT32_MAX;
     }
     prev_abbr_code = abbrevDeclaration.Code();
   }
index 391df8d..9c47293 100644 (file)
@@ -45,6 +45,10 @@ public:
   const DWARFAbbreviationDeclaration *
   GetAbbreviationDeclaration(dw_uleb128_t abbrCode) const;
 
+  /// Unit test accessor functions.
+  /// @{
+  uint32_t GetIndexOffset() const { return m_idx_offset; }
+  /// @}
 private:
   dw_offset_t m_offset;
   uint32_t m_idx_offset;
index cca5a61..f0dd247 100644 (file)
@@ -15,6 +15,9 @@
 #include "llvm/Support/Path.h"
 
 #include "Plugins/ObjectFile/PECOFF/ObjectFilePECOFF.h"
+#include "Plugins/SymbolFile/DWARF/DWARFAbbreviationDeclaration.h"
+#include "Plugins/SymbolFile/DWARF/DWARFDataExtractor.h"
+#include "Plugins/SymbolFile/DWARF/DWARFDebugAbbrev.h"
 #include "Plugins/SymbolFile/DWARF/SymbolFileDWARF.h"
 #include "Plugins/SymbolFile/PDB/SymbolFilePDB.h"
 #include "TestingSupport/TestUtilities.h"
 #include "lldb/Symbol/LineTable.h"
 #include "lldb/Symbol/SymbolVendor.h"
 #include "lldb/Utility/ArchSpec.h"
+#include "lldb/Utility/DataEncoder.h"
 #include "lldb/Utility/FileSpec.h"
+#include "lldb/Utility/StreamString.h"
 
+
+
+using namespace lldb;
 using namespace lldb_private;
 
 class SymbolFileDWARFTests : public testing::Test {
@@ -76,3 +84,248 @@ TEST_F(SymbolFileDWARFTests, TestAbilitiesForDWARF) {
   uint32_t expected_abilities = SymbolFile::kAllAbilities;
   EXPECT_EQ(expected_abilities, symfile->CalculateAbilities());
 }
+
+TEST_F(SymbolFileDWARFTests, TestAbbrevOrder1Start1) {
+  // Test that if we have a .debug_abbrev that contains ordered abbreviation
+  // codes that start at 1, that we get O(1) access.
+  
+  const auto byte_order = eByteOrderLittle;
+  const uint8_t addr_size = 4;
+  StreamString encoder(Stream::eBinary, addr_size, byte_order);
+  encoder.PutULEB128(1); // Abbrev code 1
+  encoder.PutULEB128(DW_TAG_compile_unit);
+  encoder.PutHex8(DW_CHILDREN_yes);
+  encoder.PutULEB128(DW_AT_name);
+  encoder.PutULEB128(DW_FORM_strp);
+  encoder.PutULEB128(0);
+  encoder.PutULEB128(0);
+  
+  encoder.PutULEB128(2); // Abbrev code 2
+  encoder.PutULEB128(DW_TAG_subprogram);
+  encoder.PutHex8(DW_CHILDREN_no);
+  encoder.PutULEB128(DW_AT_name);
+  encoder.PutULEB128(DW_FORM_strp);
+  encoder.PutULEB128(0);
+  encoder.PutULEB128(0);
+  
+  encoder.PutULEB128(0); // Abbrev code 0 (termination)
+  DWARFDataExtractor data;
+  data.SetData(encoder.GetData(), encoder.GetSize(), byte_order);
+  DWARFAbbreviationDeclarationSet abbrev_set;
+  lldb::offset_t data_offset = 0;
+  llvm::Error error = abbrev_set.extract(data, &data_offset);
+  EXPECT_FALSE(bool(error));
+  // Make sure we have O(1) access to each abbreviation by making sure the
+  // index offset is 1 and not UINT32_MAX
+  EXPECT_EQ(abbrev_set.GetIndexOffset(), 1);
+  
+  auto abbrev1 = abbrev_set.GetAbbreviationDeclaration(1);
+  EXPECT_EQ(abbrev1->Tag(), DW_TAG_compile_unit);
+  EXPECT_TRUE(abbrev1->HasChildren());
+  EXPECT_EQ(abbrev1->NumAttributes(), 1);
+  auto abbrev2 = abbrev_set.GetAbbreviationDeclaration(2);
+  EXPECT_EQ(abbrev2->Tag(), DW_TAG_subprogram);
+  EXPECT_FALSE(abbrev2->HasChildren());
+  EXPECT_EQ(abbrev2->NumAttributes(), 1);
+}
+
+TEST_F(SymbolFileDWARFTests, TestAbbrevOrder1Start5) {
+  // Test that if we have a .debug_abbrev that contains ordered abbreviation
+  // codes that start at 5, that we get O(1) access.
+  
+  const auto byte_order = eByteOrderLittle;
+  const uint8_t addr_size = 4;
+  StreamString encoder(Stream::eBinary, addr_size, byte_order);
+  encoder.PutULEB128(5); // Abbrev code 5
+  encoder.PutULEB128(DW_TAG_compile_unit);
+  encoder.PutHex8(DW_CHILDREN_yes);
+  encoder.PutULEB128(DW_AT_name);
+  encoder.PutULEB128(DW_FORM_strp);
+  encoder.PutULEB128(0);
+  encoder.PutULEB128(0);
+  
+  encoder.PutULEB128(6); // Abbrev code 6
+  encoder.PutULEB128(DW_TAG_subprogram);
+  encoder.PutHex8(DW_CHILDREN_no);
+  encoder.PutULEB128(DW_AT_name);
+  encoder.PutULEB128(DW_FORM_strp);
+  encoder.PutULEB128(0);
+  encoder.PutULEB128(0);
+  
+  encoder.PutULEB128(0); // Abbrev code 0 (termination)
+  
+  DWARFDataExtractor data;
+  data.SetData(encoder.GetData(), encoder.GetSize(), byte_order);
+  DWARFAbbreviationDeclarationSet abbrev_set;
+  lldb::offset_t data_offset = 0;
+  llvm::Error error = abbrev_set.extract(data, &data_offset);
+  EXPECT_FALSE(bool(error));
+  // Make sure we have O(1) access to each abbreviation by making sure the
+  // index offset is 5 and not UINT32_MAX
+  EXPECT_EQ(abbrev_set.GetIndexOffset(), 5);
+  
+  auto abbrev1 = abbrev_set.GetAbbreviationDeclaration(5);
+  EXPECT_EQ(abbrev1->Tag(), DW_TAG_compile_unit);
+  EXPECT_TRUE(abbrev1->HasChildren());
+  EXPECT_EQ(abbrev1->NumAttributes(), 1);
+  auto abbrev2 = abbrev_set.GetAbbreviationDeclaration(6);
+  EXPECT_EQ(abbrev2->Tag(), DW_TAG_subprogram);
+  EXPECT_FALSE(abbrev2->HasChildren());
+  EXPECT_EQ(abbrev2->NumAttributes(), 1);
+}
+
+TEST_F(SymbolFileDWARFTests, TestAbbrevOutOfOrder) {
+  // Test that if we have a .debug_abbrev that contains unordered abbreviation
+  // codes, that we can access the information correctly.
+  
+  const auto byte_order = eByteOrderLittle;
+  const uint8_t addr_size = 4;
+  StreamString encoder(Stream::eBinary, addr_size, byte_order);
+  encoder.PutULEB128(2); // Abbrev code 2
+  encoder.PutULEB128(DW_TAG_compile_unit);
+  encoder.PutHex8(DW_CHILDREN_yes);
+  encoder.PutULEB128(DW_AT_name);
+  encoder.PutULEB128(DW_FORM_strp);
+  encoder.PutULEB128(0);
+  encoder.PutULEB128(0);
+  
+  encoder.PutULEB128(1); // Abbrev code 1
+  encoder.PutULEB128(DW_TAG_subprogram);
+  encoder.PutHex8(DW_CHILDREN_no);
+  encoder.PutULEB128(DW_AT_name);
+  encoder.PutULEB128(DW_FORM_strp);
+  encoder.PutULEB128(0);
+  encoder.PutULEB128(0);
+  
+  encoder.PutULEB128(0); // Abbrev code 0 (termination)
+  
+  DWARFDataExtractor data;
+  data.SetData(encoder.GetData(), encoder.GetSize(), byte_order);
+  DWARFAbbreviationDeclarationSet abbrev_set;
+  lldb::offset_t data_offset = 0;
+  llvm::Error error = abbrev_set.extract(data, &data_offset);
+  EXPECT_FALSE(bool(error));
+  // Make sure we don't have O(1) access to each abbreviation by making sure
+  // the index offset is UINT32_MAX
+  EXPECT_EQ(abbrev_set.GetIndexOffset(), UINT32_MAX);
+  
+  auto abbrev1 = abbrev_set.GetAbbreviationDeclaration(2);
+  EXPECT_EQ(abbrev1->Tag(), DW_TAG_compile_unit);
+  EXPECT_TRUE(abbrev1->HasChildren());
+  EXPECT_EQ(abbrev1->NumAttributes(), 1);
+  auto abbrev2 = abbrev_set.GetAbbreviationDeclaration(1);
+  EXPECT_EQ(abbrev2->Tag(), DW_TAG_subprogram);
+  EXPECT_FALSE(abbrev2->HasChildren());
+  EXPECT_EQ(abbrev2->NumAttributes(), 1);
+}
+
+TEST_F(SymbolFileDWARFTests, TestAbbrevInvalidNULLTag) {
+  // Test that we detect when an abbreviation has a NULL tag and that we get
+  // an error when decoding.
+  
+  const auto byte_order = eByteOrderLittle;
+  const uint8_t addr_size = 4;
+  StreamString encoder(Stream::eBinary, addr_size, byte_order);
+  encoder.PutULEB128(1); // Abbrev code 1
+  encoder.PutULEB128(0); // Invalid NULL tag here!
+  encoder.PutHex8(DW_CHILDREN_no);
+  encoder.PutULEB128(0);
+  encoder.PutULEB128(0);
+  
+  encoder.PutULEB128(0); // Abbrev code 0 (termination)
+  
+  DWARFDataExtractor data;
+  data.SetData(encoder.GetData(), encoder.GetSize(), byte_order);
+  DWARFAbbreviationDeclarationSet abbrev_set;
+  lldb::offset_t data_offset = 0;
+  llvm::Error error = abbrev_set.extract(data, &data_offset);
+  // Verify we get an error
+  EXPECT_TRUE(bool(error));
+  EXPECT_EQ("abbrev decl requires non-null tag.",
+            llvm::toString(std::move(error)));
+
+}
+
+TEST_F(SymbolFileDWARFTests, TestAbbrevNullAttrValidForm) {
+  // Test that we detect when an abbreviation has a NULL attribute and a non
+  // NULL form and that we get an error when decoding.
+  
+  const auto byte_order = eByteOrderLittle;
+  const uint8_t addr_size = 4;
+  StreamString encoder(Stream::eBinary, addr_size, byte_order);
+  encoder.PutULEB128(1); // Abbrev code 1
+  encoder.PutULEB128(DW_TAG_compile_unit);
+  encoder.PutHex8(DW_CHILDREN_no);
+  encoder.PutULEB128(0); // Invalid NULL DW_AT
+  encoder.PutULEB128(DW_FORM_strp); // With a valid form
+  encoder.PutULEB128(0);
+  encoder.PutULEB128(0);
+
+  encoder.PutULEB128(0); // Abbrev code 0 (termination)
+  
+  DWARFDataExtractor data;
+  data.SetData(encoder.GetData(), encoder.GetSize(), byte_order);
+  DWARFAbbreviationDeclarationSet abbrev_set;
+  lldb::offset_t data_offset = 0;
+  llvm::Error error = abbrev_set.extract(data, &data_offset);
+  // Verify we get an error
+  EXPECT_TRUE(bool(error));
+  EXPECT_EQ("malformed abbreviation declaration attribute",
+            llvm::toString(std::move(error)));
+  
+}
+
+TEST_F(SymbolFileDWARFTests, TestAbbrevValidAttrNullForm) {
+  // Test that we detect when an abbreviation has a valid attribute and a
+  // NULL form and that we get an error when decoding.
+  
+  const auto byte_order = eByteOrderLittle;
+  const uint8_t addr_size = 4;
+  StreamString encoder(Stream::eBinary, addr_size, byte_order);
+  encoder.PutULEB128(1); // Abbrev code 1
+  encoder.PutULEB128(DW_TAG_compile_unit);
+  encoder.PutHex8(DW_CHILDREN_no);
+  encoder.PutULEB128(DW_AT_name); // Valid attribute
+  encoder.PutULEB128(0); // NULL form
+  encoder.PutULEB128(0);
+  encoder.PutULEB128(0);
+  
+  encoder.PutULEB128(0); // Abbrev code 0 (termination)
+  
+  DWARFDataExtractor data;
+  data.SetData(encoder.GetData(), encoder.GetSize(), byte_order);
+  DWARFAbbreviationDeclarationSet abbrev_set;
+  lldb::offset_t data_offset = 0;
+  llvm::Error error = abbrev_set.extract(data, &data_offset);
+  // Verify we get an error
+  EXPECT_TRUE(bool(error));
+  EXPECT_EQ("malformed abbreviation declaration attribute",
+            llvm::toString(std::move(error)));
+}
+
+TEST_F(SymbolFileDWARFTests, TestAbbrevMissingTerminator) {
+  // Test that we detect when an abbreviation has a valid attribute and a
+  // form, but is missing the NULL attribute and form that terminates an
+  // abbreviation
+  
+  const auto byte_order = eByteOrderLittle;
+  const uint8_t addr_size = 4;
+  StreamString encoder(Stream::eBinary, addr_size, byte_order);
+  encoder.PutULEB128(1); // Abbrev code 1
+  encoder.PutULEB128(DW_TAG_compile_unit);
+  encoder.PutHex8(DW_CHILDREN_no);
+  encoder.PutULEB128(DW_AT_name);
+  encoder.PutULEB128(DW_FORM_strp);
+  // Don't add the NULL DW_AT and NULL DW_FORM terminator
+  
+  DWARFDataExtractor data;
+  data.SetData(encoder.GetData(), encoder.GetSize(), byte_order);
+  DWARFAbbreviationDeclarationSet abbrev_set;
+  lldb::offset_t data_offset = 0;
+  llvm::Error error = abbrev_set.extract(data, &data_offset);
+  // Verify we get an error
+  EXPECT_TRUE(bool(error));
+  EXPECT_EQ("abbreviation declaration attribute list not terminated with a "
+            "null entry", llvm::toString(std::move(error)));
+}