DWARFVerifier: verify debug_names abbreviation table
authorPavel Labath <labath@google.com>
Thu, 22 Mar 2018 14:50:44 +0000 (14:50 +0000)
committerPavel Labath <labath@google.com>
Thu, 22 Mar 2018 14:50:44 +0000 (14:50 +0000)
Summary:
This commit adds checks of the abbreviation table in a DWARF v5 Name
Index. The most interesting/useful check is the one which checks that
each index attributes is encoded using the correct form class, but it
also checks for the more obvious errors like unknown
forms/tags/attributes and duplicated attributes.

Reviewers: JDevlieghere, aprantl, dblaikie

Subscribers: llvm-commits

Differential Revision: https://reviews.llvm.org/D44736

llvm-svn: 328202

llvm/include/llvm/DebugInfo/DWARF/DWARFAcceleratorTable.h
llvm/include/llvm/DebugInfo/DWARF/DWARFVerifier.h
llvm/lib/DebugInfo/DWARF/DWARFVerifier.cpp
llvm/test/tools/llvm-dwarfdump/X86/debug-names-verify-abbrev-forms.s [new file with mode: 0644]
llvm/test/tools/llvm-dwarfdump/X86/debug-names-verify-no-buckets.s

index d9aca7b..27f11ca 100644 (file)
@@ -425,6 +425,10 @@ public:
 
     uint32_t getNameCount() const { return Hdr.NameCount; }
 
+    const DenseSet<Abbrev, AbbrevMapInfo> &getAbbrevs() const {
+      return Abbrevs;
+    }
+
     llvm::Error extract();
     uint32_t getUnitOffset() const { return Base; }
     uint32_t getNextUnitOffset() const { return Base + 4 + Hdr.UnitLength; }
index f003e3b..afaa299 100644 (file)
@@ -236,12 +236,17 @@ private:
   unsigned verifyDebugNamesCULists(const DWARFDebugNames &AccelTable);
   unsigned verifyNameIndexBuckets(const DWARFDebugNames::NameIndex &NI,
                                   const DataExtractor &StrData);
+  unsigned verifyNameIndexAbbrevs(const DWARFDebugNames::NameIndex &NI);
+  unsigned verifyNameIndexAttribute(const DWARFDebugNames::NameIndex &NI,
+                                    const DWARFDebugNames::Abbrev &Abbr,
+                                    DWARFDebugNames::AttributeEncoding AttrEnc);
 
   /// Verify that the DWARF v5 accelerator table is valid.
   ///
   /// This function currently checks that:
-  /// - Headers and abbreviation tables of individual Name Indices fit into the
-  ///   section and can be parsed.
+  /// - Headers individual Name Indices fit into the section and can be parsed.
+  /// - Abbreviation tables can be parsed and contain valid index attributes
+  ///   with correct form encodings.
   /// - The CU lists reference existing compile units.
   /// - The buckets have a valid index, or they are empty.
   /// - All names are reachable via the hash table (they have the correct hash,
index c5b1f75..92d956c 100644 (file)
@@ -8,6 +8,7 @@
 //===----------------------------------------------------------------------===//
 
 #include "llvm/DebugInfo/DWARF/DWARFVerifier.h"
+#include "llvm/ADT/SmallSet.h"
 #include "llvm/DebugInfo/DWARF/DWARFCompileUnit.h"
 #include "llvm/DebugInfo/DWARF/DWARFContext.h"
 #include "llvm/DebugInfo/DWARF/DWARFDebugLine.h"
@@ -939,6 +940,89 @@ DWARFVerifier::verifyNameIndexBuckets(const DWARFDebugNames::NameIndex &NI,
   return NumErrors;
 }
 
+unsigned DWARFVerifier::verifyNameIndexAttribute(
+    const DWARFDebugNames::NameIndex &NI, const DWARFDebugNames::Abbrev &Abbr,
+    DWARFDebugNames::AttributeEncoding AttrEnc) {
+  StringRef FormName = dwarf::FormEncodingString(AttrEnc.Form);
+  if (FormName.empty()) {
+    error() << formatv("NameIndex @ {0:x}: Abbreviation {1:x}: {2} uses an "
+                       "unknown form: {3}.\n",
+                       NI.getUnitOffset(), Abbr.Code, AttrEnc.Index,
+                       AttrEnc.Form);
+    return 1;
+  }
+
+  if (AttrEnc.Index == DW_IDX_type_hash) {
+    if (AttrEnc.Form != dwarf::DW_FORM_data8) {
+      error() << formatv(
+          "NameIndex @ {0:x}: Abbreviation {1:x}: DW_IDX_type_hash "
+          "uses an unexpected form {2} (should be {3}).\n",
+          NI.getUnitOffset(), Abbr.Code, AttrEnc.Form, dwarf::DW_FORM_data8);
+      return 1;
+    }
+  }
+
+  // A list of known index attributes and their expected form classes.
+  // DW_IDX_type_hash is handled specially in the check above, as it has a
+  // specific form (not just a form class) we should expect.
+  struct FormClassTable {
+    dwarf::Index Index;
+    DWARFFormValue::FormClass Class;
+    StringLiteral ClassName;
+  };
+  static constexpr FormClassTable Table[] = {
+      {dwarf::DW_IDX_compile_unit, DWARFFormValue::FC_Constant, {"constant"}},
+      {dwarf::DW_IDX_type_unit, DWARFFormValue::FC_Constant, {"constant"}},
+      {dwarf::DW_IDX_die_offset, DWARFFormValue::FC_Reference, {"reference"}},
+      {dwarf::DW_IDX_parent, DWARFFormValue::FC_Constant, {"constant"}},
+  };
+
+  ArrayRef<FormClassTable> TableRef(Table);
+  auto Iter = find_if(TableRef, [AttrEnc](const FormClassTable &T) {
+    return T.Index == AttrEnc.Index;
+  });
+  if (Iter == TableRef.end()) {
+    warn() << formatv("NameIndex @ {0:x}: Abbreviation {1:x} contains an "
+                      "unknown index attribute: {2}.\n",
+                      NI.getUnitOffset(), Abbr.Code, AttrEnc.Index);
+    return 0;
+  }
+
+  if (!DWARFFormValue(AttrEnc.Form).isFormClass(Iter->Class)) {
+    error() << formatv("NameIndex @ {0:x}: Abbreviation {1:x}: {2} uses an "
+                       "unexpected form {3} (expected form class {4}).\n",
+                       NI.getUnitOffset(), Abbr.Code, AttrEnc.Index,
+                       AttrEnc.Form, Iter->ClassName);
+    return 1;
+  }
+  return 0;
+}
+
+unsigned
+DWARFVerifier::verifyNameIndexAbbrevs(const DWARFDebugNames::NameIndex &NI) {
+  unsigned NumErrors = 0;
+  for (const auto &Abbrev : NI.getAbbrevs()) {
+    StringRef TagName = dwarf::TagString(Abbrev.Tag);
+    if (TagName.empty()) {
+      warn() << formatv("NameIndex @ {0:x}: Abbreviation {1:x} references an "
+                        "unknown tag: {2}.\n",
+                        NI.getUnitOffset(), Abbrev.Code, Abbrev.Tag);
+    }
+    SmallSet<unsigned, 5> Attributes;
+    for (const auto &AttrEnc : Abbrev.Attributes) {
+      if (!Attributes.insert(AttrEnc.Index).second) {
+        error() << formatv("NameIndex @ {0:x}: Abbreviation {1:x} contains "
+                           "multiple {2} attributes.\n",
+                           NI.getUnitOffset(), Abbrev.Code, AttrEnc.Index);
+        ++NumErrors;
+        continue;
+      }
+      NumErrors += verifyNameIndexAttribute(NI, Abbrev, AttrEnc);
+    }
+  }
+  return NumErrors;
+}
+
 unsigned DWARFVerifier::verifyDebugNames(const DWARFSection &AccelSection,
                                          const DataExtractor &StrData) {
   unsigned NumErrors = 0;
@@ -958,6 +1042,8 @@ unsigned DWARFVerifier::verifyDebugNames(const DWARFSection &AccelSection,
   NumErrors += verifyDebugNamesCULists(AccelTable);
   for (const auto &NI : AccelTable)
     NumErrors += verifyNameIndexBuckets(NI, StrData);
+  for (const auto &NI : AccelTable)
+    NumErrors += verifyNameIndexAbbrevs(NI);
 
   return NumErrors;
 }
diff --git a/llvm/test/tools/llvm-dwarfdump/X86/debug-names-verify-abbrev-forms.s b/llvm/test/tools/llvm-dwarfdump/X86/debug-names-verify-abbrev-forms.s
new file mode 100644 (file)
index 0000000..9aaa4c4
--- /dev/null
@@ -0,0 +1,92 @@
+# RUN: llvm-mc -triple x86_64-pc-linux %s -filetype=obj | \
+# RUN:   not llvm-dwarfdump -verify - | FileCheck %s
+
+# CHECK: error: NameIndex @ 0x0: Abbreviation 0x2: DW_IDX_compile_unit uses an unexpected form DW_FORM_ref1 (expected form class constant).
+# CHECK: error: NameIndex @ 0x0: Abbreviation 0x2: DW_IDX_type_unit uses an unexpected form DW_FORM_ref1 (expected form class constant).
+# CHECK: error: NameIndex @ 0x0: Abbreviation 0x2: DW_IDX_type_hash uses an unexpected form DW_FORM_data4 (should be DW_FORM_data8).
+# CHECK: warning: NameIndex @ 0x0: Abbreviation 0x2 contains an unknown index attribute: DW_IDX_unknown_2020.
+# CHECK: error: NameIndex @ 0x0: Abbreviation 0x4 contains multiple DW_IDX_die_offset attributes.
+# CHECK: NameIndex @ 0x0: Abbreviation 0x1: DW_IDX_die_offset uses an unknown form: DW_FORM_unknown_1fff.
+# CHECK: warning: NameIndex @ 0x0: Abbreviation 0x3 references an unknown tag: DW_TAG_unknown_8080.
+
+       .section        .debug_str,"MS",@progbits,1
+.Lstring_producer:
+       .asciz  "Hand-written dwarf"
+
+       .section        .debug_abbrev,"",@progbits
+.Lsection_abbrev:
+       .byte   1                       # Abbreviation Code
+       .byte   17                      # DW_TAG_compile_unit
+       .byte   1                       # DW_CHILDREN_yes
+       .byte   37                      # DW_AT_producer
+       .byte   14                      # DW_FORM_strp
+       .byte   19                      # DW_AT_language
+       .byte   5                       # DW_FORM_data2
+       .byte   0                       # EOM(1)
+       .byte   0                       # EOM(2)
+       .byte   0                       # EOM(3)
+
+       .section        .debug_info,"",@progbits
+.Lcu_begin0:
+       .long   .Lcu_end0-.Lcu_start0   # Length of Unit
+.Lcu_start0:
+       .short  4                       # DWARF version number
+       .long   .Lsection_abbrev        # Offset Into Abbrev. Section
+       .byte   8                       # Address Size (in bytes)
+       .byte   1                       # Abbrev [1] DW_TAG_compile_unit
+       .long   .Lstring_producer       # DW_AT_producer
+       .short  12                      # DW_AT_language
+       .byte   0                       # End Of Children Mark
+.Lcu_end0:
+
+       .section        .debug_names,"",@progbits
+       .long   .Lnames_end0-.Lnames_start0 # Header: contribution length
+.Lnames_start0:
+       .short  5                       # Header: version
+       .short  0                       # Header: padding
+       .long   1                       # Header: compilation unit count
+       .long   0                       # Header: local type unit count
+       .long   0                       # Header: foreign type unit count
+       .long   0                       # Header: bucket count
+       .long   0                       # Header: name count
+       .long   .Lnames_abbrev_end0-.Lnames_abbrev_start0 # Header: abbreviation table size
+       .long   0                       # Header: augmentation length
+       .long   .Lcu_begin0             # Compilation unit 0
+.Lnames_abbrev_start0:
+       .byte   1                       # Abbrev code
+       .byte   46                      # DW_TAG_subprogram
+       .byte   3                       # DW_IDX_die_offset
+       .uleb128 0x1fff                 # DW_FORM_unknown_1fff
+       .byte   0                       # End of abbrev
+       .byte   0                       # End of abbrev
+       .byte   2                       # Abbrev code
+       .byte   46                      # DW_TAG_subprogram
+       .byte   1                       # DW_IDX_compile_unit
+       .byte   17                      # DW_FORM_ref1
+       .byte   2                       # DW_IDX_type_unit
+       .byte   17                      # DW_FORM_ref1
+       .byte   2                       # DW_IDX_die_offset
+       .byte   5                       # DW_FORM_data2
+       .byte   5                       # DW_IDX_type_hash
+       .byte   6                       # DW_FORM_data4
+       .uleb128 0x2020                 # DW_IDX_unknown_2020
+       .byte   6                       # DW_FORM_data4
+       .byte   0                       # End of abbrev
+       .byte   0                       # End of abbrev
+       .byte   3                       # Abbrev code
+       .uleb128 0x8080                 # DW_TAG_unknown_8080
+       .byte   3                       # DW_IDX_die_offset
+       .byte   17                      # DW_FORM_ref1
+       .byte   0                       # End of abbrev
+       .byte   0                       # End of abbrev
+       .byte   4                       # Abbrev code
+       .byte   46                      # DW_TAG_subprogram
+       .byte   3                       # DW_IDX_die_offset
+       .byte   17                      # DW_FORM_ref1
+       .byte   3                       # DW_IDX_die_offset
+       .byte   17                      # DW_FORM_ref1
+       .byte   0                       # End of abbrev
+       .byte   0                       # End of abbrev
+       .byte   0                       # End of abbrev list
+.Lnames_abbrev_end0:
+.Lnames_end0:
index b95fc2a..3966b95 100644 (file)
@@ -66,7 +66,7 @@
        .byte   46                      # Abbrev code
        .byte   46                      # DW_TAG_subprogram
        .byte   3                       # DW_IDX_die_offset
-       .byte   6                       # DW_FORM_data4
+       .byte   19                      # DW_FORM_ref4
        .byte   0                       # End of abbrev
        .byte   0                       # End of abbrev
        .byte   0                       # End of abbrev list