DWARFVerifier: Check section-relative references at the end of the section
authorDavid Blaikie <dblaikie@gmail.com>
Sat, 14 Aug 2021 19:46:08 +0000 (12:46 -0700)
committerDavid Blaikie <dblaikie@gmail.com>
Sun, 15 Aug 2021 18:40:24 +0000 (11:40 -0700)
This ensures that debug_types references aren't looked for in
debug_info section.

Behavior is still going to be questionable in an unlinked object file -
since cross-cu references could refer to symbols in another .debug_info
(or, in theory, .debug_types) chunk - but if a producer only uses
ref_addr to refer to things within the same .debug_info chunk in an
object file (eg: whole program optimization/LTO - producing two CUs into
a single .debug_info section in an object file - the ref_addrs there
could be resolved relative to that .debug_info chunk, not needing to
consider comdat  (DWARFv5 type units or other creatures) chunks of
.debug_info, etc)

llvm/include/llvm/DebugInfo/DWARF/DWARFVerifier.h
llvm/lib/DebugInfo/DWARF/DWARFVerifier.cpp
llvm/test/DebugInfo/X86/skeleton-unit-verify.s
llvm/test/tools/llvm-dwarfdump/X86/verify_invalid_ref_addr_between.yaml
llvm/test/tools/llvm-dwarfdump/X86/verify_invalid_ref_multi_section.s [new file with mode: 0644]
llvm/test/tools/llvm-dwarfdump/X86/verify_overlapping_cu_ranges.yaml

index 161a4f8..5ab2165 100644 (file)
@@ -79,14 +79,11 @@ private:
   raw_ostream &OS;
   DWARFContext &DCtx;
   DIDumpOptions DumpOpts;
-  /// A map that tracks all references (converted absolute references) so we
-  /// can verify each reference points to a valid DIE and not an offset that
-  /// lies between to valid DIEs.
-  std::map<uint64_t, std::set<uint64_t>> ReferenceToDIEOffsets;
   uint32_t NumDebugLineErrors = 0;
   // Used to relax some checks that do not currently work portably
   bool IsObjectFile;
   bool IsMachOObject;
+  using ReferenceMap = std::map<uint64_t, std::set<uint64_t>>;
 
   raw_ostream &error() const;
   raw_ostream &warn() const;
@@ -144,7 +141,9 @@ private:
   /// \param Unit      The DWARF Unit to verify.
   ///
   /// \returns The number of errors that occurred during verification.
-  unsigned verifyUnitContents(DWARFUnit &Unit);
+  unsigned verifyUnitContents(DWARFUnit &Unit,
+                              ReferenceMap &UnitLocalReferences,
+                              ReferenceMap &CrossUnitReferences);
 
   /// Verifies the unit headers and contents in a .debug_info or .debug_types
   /// section.
@@ -196,7 +195,9 @@ private:
   ///
   /// \returns NumErrors The number of errors occurred during verification of
   /// attributes' forms in a unit
-  unsigned verifyDebugInfoForm(const DWARFDie &Die, DWARFAttribute &AttrValue);
+  unsigned verifyDebugInfoForm(const DWARFDie &Die, DWARFAttribute &AttrValue,
+                               ReferenceMap &UnitLocalReferences,
+                               ReferenceMap &CrossUnitReferences);
 
   /// Verifies the all valid references that were found when iterating through
   /// all of the DIE attributes.
@@ -208,7 +209,9 @@ private:
   ///
   /// \returns NumErrors The number of errors occurred during verification of
   /// references for the .debug_info and .debug_types sections
-  unsigned verifyDebugInfoReferences();
+  unsigned verifyDebugInfoReferences(
+      const ReferenceMap &,
+      llvm::function_ref<DWARFUnit *(uint64_t)> GetUnitForDieOffset);
 
   /// Verify the DW_AT_stmt_list encoding and value and ensure that no
   /// compile units that have the same DW_AT_stmt_list value.
index ac624ec..c6e414a 100644 (file)
@@ -158,7 +158,9 @@ bool DWARFVerifier::verifyUnitHeader(const DWARFDataExtractor DebugInfoData,
   return Success;
 }
 
-unsigned DWARFVerifier::verifyUnitContents(DWARFUnit &Unit) {
+unsigned DWARFVerifier::verifyUnitContents(DWARFUnit &Unit,
+                                           ReferenceMap &UnitLocalReferences,
+                                           ReferenceMap &CrossUnitReferences) {
   unsigned NumUnitErrors = 0;
   unsigned NumDies = Unit.getNumDIEs();
   for (unsigned I = 0; I < NumDies; ++I) {
@@ -169,7 +171,8 @@ unsigned DWARFVerifier::verifyUnitContents(DWARFUnit &Unit) {
 
     for (auto AttrValue : Die.attributes()) {
       NumUnitErrors += verifyDebugInfoAttribute(Die, AttrValue);
-      NumUnitErrors += verifyDebugInfoForm(Die, AttrValue);
+      NumUnitErrors += verifyDebugInfoForm(Die, AttrValue, UnitLocalReferences,
+                                           CrossUnitReferences);
     }
 
     if (Die.hasChildren()) {
@@ -299,6 +302,10 @@ unsigned DWARFVerifier::verifyUnitSection(const DWARFSection &S,
   bool hasDIE = DebugInfoData.isValidOffset(Offset);
   DWARFUnitVector TypeUnitVector;
   DWARFUnitVector CompileUnitVector;
+  /// A map that tracks all references (converted absolute references) so we
+  /// can verify each reference points to a valid DIE and not an offset that
+  /// lies between to valid DIEs.
+  ReferenceMap CrossUnitReferences;
   while (hasDIE) {
     OffsetStart = Offset;
     if (!verifyUnitHeader(DebugInfoData, &Offset, UnitIdx, UnitType,
@@ -309,6 +316,7 @@ unsigned DWARFVerifier::verifyUnitSection(const DWARFSection &S,
     } else {
       DWARFUnitHeader Header;
       Header.extract(DCtx, DebugInfoData, &OffsetStart, SectionKind);
+      ReferenceMap UnitLocalReferences;
       DWARFUnit *Unit;
       switch (UnitType) {
       case dwarf::DW_UT_type:
@@ -337,7 +345,10 @@ unsigned DWARFVerifier::verifyUnitSection(const DWARFSection &S,
       }
       default: { llvm_unreachable("Invalid UnitType."); }
       }
-      NumDebugInfoErrors += verifyUnitContents(*Unit);
+      NumDebugInfoErrors +=
+          verifyUnitContents(*Unit, UnitLocalReferences, CrossUnitReferences);
+      NumDebugInfoErrors += verifyDebugInfoReferences(
+          UnitLocalReferences, [&](uint64_t Offset) { return Unit; });
     }
     hasDIE = DebugInfoData.isValidOffset(Offset);
     ++UnitIdx;
@@ -348,7 +359,14 @@ unsigned DWARFVerifier::verifyUnitSection(const DWARFSection &S,
   }
   if (!isHeaderChainValid)
     ++NumDebugInfoErrors;
-  NumDebugInfoErrors += verifyDebugInfoReferences();
+  NumDebugInfoErrors += verifyDebugInfoReferences(
+      CrossUnitReferences, [&](uint64_t Offset) -> DWARFUnit * {
+        if (DWARFUnit *U = TypeUnitVector.getUnitForOffset(Offset))
+          return U;
+        if (DWARFUnit *U = CompileUnitVector.getUnitForOffset(Offset))
+          return U;
+        return nullptr;
+      });
   return NumDebugInfoErrors;
 }
 
@@ -587,7 +605,9 @@ unsigned DWARFVerifier::verifyDebugInfoAttribute(const DWARFDie &Die,
 }
 
 unsigned DWARFVerifier::verifyDebugInfoForm(const DWARFDie &Die,
-                                            DWARFAttribute &AttrValue) {
+                                            DWARFAttribute &AttrValue,
+                                            ReferenceMap &LocalReferences,
+                                            ReferenceMap &CrossUnitReferences) {
   const DWARFObject &DObj = DCtx.getDWARFObj();
   auto DieCU = Die.getDwarfUnit();
   unsigned NumErrors = 0;
@@ -615,7 +635,7 @@ unsigned DWARFVerifier::verifyDebugInfoForm(const DWARFDie &Die,
       } else {
         // Valid reference, but we will verify it points to an actual
         // DIE later.
-        ReferenceToDIEOffsets[*RefVal].insert(Die.getOffset());
+        LocalReferences[*RefVal].insert(Die.getOffset());
       }
     }
     break;
@@ -634,7 +654,7 @@ unsigned DWARFVerifier::verifyDebugInfoForm(const DWARFDie &Die,
       } else {
         // Valid reference, but we will verify it points to an actual
         // DIE later.
-        ReferenceToDIEOffsets[*RefVal].insert(Die.getOffset());
+        CrossUnitReferences[*RefVal].insert(Die.getOffset());
       }
     }
     break;
@@ -694,20 +714,24 @@ unsigned DWARFVerifier::verifyDebugInfoForm(const DWARFDie &Die,
   return NumErrors;
 }
 
-unsigned DWARFVerifier::verifyDebugInfoReferences() {
-  // Take all references and make sure they point to an actual DIE by
-  // getting the DIE by offset and emitting an error
-  OS << "Verifying .debug_info references...\n";
+unsigned DWARFVerifier::verifyDebugInfoReferences(
+    const ReferenceMap &References,
+    llvm::function_ref<DWARFUnit *(uint64_t)> GetUnitForOffset) {
+  auto GetDIEForOffset = [&](uint64_t Offset) {
+    if (DWARFUnit *U = GetUnitForOffset(Offset))
+      return U->getDIEForOffset(Offset);
+    return DWARFDie();
+  };
   unsigned NumErrors = 0;
   for (const std::pair<const uint64_t, std::set<uint64_t>> &Pair :
-       ReferenceToDIEOffsets) {
-    if (DCtx.getDIEForOffset(Pair.first))
+       References) {
+    if (GetDIEForOffset(Pair.first))
       continue;
     ++NumErrors;
     error() << "invalid DIE reference " << format("0x%08" PRIx64, Pair.first)
             << ". Offset is in between DIEs:\n";
     for (auto Offset : Pair.second)
-      dump(DCtx.getDIEForOffset(Offset)) << '\n';
+      dump(GetDIEForOffset(Offset)) << '\n';
     OS << "\n";
   }
   return NumErrors;
index 95fbd11..a5911e9 100644 (file)
@@ -6,7 +6,6 @@
 # CHECK-NEXT: warning: DW_TAG_skeleton_unit has DW_CHILDREN_yes but DIE has no children
 # CHECK-NEXT: DW_TAG_skeleton_unit
 # CHECK-NEXT: error: Skeleton compilation unit has children.
-# CHECK-NEXT: Verifying .debug_info references...
 # CHECK-NEXT: Verifying .debug_types Unit Header Chain...
 # CHECK-NEXT: Errors detected.
 
index 605af01..a412f7b 100644 (file)
@@ -1,7 +1,7 @@
 # RUN: yaml2obj %s -o %t.o
 # RUN: not llvm-dwarfdump -debug-info -verify %t.o | FileCheck %s
 
-#      CHECK: Verifying .debug_info references...
+#      CHECK: Verifying .debug_info
 # CHECK-NEXT: error: invalid DIE reference 0x00000011. Offset is in between DIEs:
 
 --- !ELF
diff --git a/llvm/test/tools/llvm-dwarfdump/X86/verify_invalid_ref_multi_section.s b/llvm/test/tools/llvm-dwarfdump/X86/verify_invalid_ref_multi_section.s
new file mode 100644 (file)
index 0000000..b1f94cf
--- /dev/null
@@ -0,0 +1,193 @@
+# RUN: llvm-mc %s -o %t.o -filetype=obj
+# RUN: llvm-dwarfdump -debug-info -verify %t.o | FileCheck %s
+
+# CHECK-NOT: error:
+
+# Assembly generated from this source:
+# struct t1 { int i; };
+# t1 v1;
+# and compiled with -g -fdebug-types-section
+#
+# This demonstrates that llvm-dwarfdump --verify does not try to apply offsets found in the .debug_type
+
+       .text
+       .file   "test.cpp"
+       .file   1 "/usr/local/google/home/blaikie/dev/scratch" "test.cpp"
+       .section        .debug_types,"G",@progbits,14297044602779165170,comdat
+       .long   .Ldebug_info_end0-.Ldebug_info_start0 # Length of Unit
+.Ldebug_info_start0:
+       .short  4                               # DWARF version number
+       .long   .debug_abbrev                   # Offset Into Abbrev. Section
+       .byte   8                               # Address Size (in bytes)
+       .quad   -4149699470930386446            # Type Signature
+       .long   30                              # Type DIE Offset
+       .byte   1                               # Abbrev [1] 0x17:0x25 DW_TAG_type_unit
+       .short  33                              # DW_AT_language
+       .long   .Lline_table_start0             # DW_AT_stmt_list
+       .byte   2                               # Abbrev [2] 0x1e:0x16 DW_TAG_structure_type
+       .byte   5                               # DW_AT_calling_convention
+       .long   .Linfo_string6                  # DW_AT_name
+       .byte   4                               # DW_AT_byte_size
+       .byte   1                               # DW_AT_decl_file
+       .byte   1                               # DW_AT_decl_line
+       .byte   3                               # Abbrev [3] 0x27:0xc DW_TAG_member
+       .long   .Linfo_string4                  # DW_AT_name
+       .long   52                              # DW_AT_type
+       .byte   1                               # DW_AT_decl_file
+       .byte   1                               # DW_AT_decl_line
+       .byte   0                               # DW_AT_data_member_location
+       .byte   0                               # End Of Children Mark
+       .byte   4                               # Abbrev [4] 0x34:0x7 DW_TAG_base_type
+       .long   .Linfo_string5                  # DW_AT_name
+       .byte   5                               # DW_AT_encoding
+       .byte   4                               # DW_AT_byte_size
+       .byte   0                               # End Of Children Mark
+.Ldebug_info_end0:
+       .type   v1,@object                      # @v1
+       .bss
+       .globl  v1
+       .p2align        2
+v1:
+       .zero   4
+       .size   v1, 4
+
+       .section        .debug_abbrev,"",@progbits
+       .byte   1                               # Abbreviation Code
+       .byte   65                              # DW_TAG_type_unit
+       .byte   1                               # DW_CHILDREN_yes
+       .byte   19                              # DW_AT_language
+       .byte   5                               # DW_FORM_data2
+       .byte   16                              # DW_AT_stmt_list
+       .byte   23                              # DW_FORM_sec_offset
+       .byte   0                               # EOM(1)
+       .byte   0                               # EOM(2)
+       .byte   2                               # Abbreviation Code
+       .byte   19                              # DW_TAG_structure_type
+       .byte   1                               # DW_CHILDREN_yes
+       .byte   54                              # DW_AT_calling_convention
+       .byte   11                              # DW_FORM_data1
+       .byte   3                               # DW_AT_name
+       .byte   14                              # DW_FORM_strp
+       .byte   11                              # DW_AT_byte_size
+       .byte   11                              # DW_FORM_data1
+       .byte   58                              # DW_AT_decl_file
+       .byte   11                              # DW_FORM_data1
+       .byte   59                              # DW_AT_decl_line
+       .byte   11                              # DW_FORM_data1
+       .byte   0                               # EOM(1)
+       .byte   0                               # EOM(2)
+       .byte   3                               # Abbreviation Code
+       .byte   13                              # DW_TAG_member
+       .byte   0                               # DW_CHILDREN_no
+       .byte   3                               # DW_AT_name
+       .byte   14                              # DW_FORM_strp
+       .byte   73                              # DW_AT_type
+       .byte   19                              # DW_FORM_ref4
+       .byte   58                              # DW_AT_decl_file
+       .byte   11                              # DW_FORM_data1
+       .byte   59                              # DW_AT_decl_line
+       .byte   11                              # DW_FORM_data1
+       .byte   56                              # DW_AT_data_member_location
+       .byte   11                              # DW_FORM_data1
+       .byte   0                               # EOM(1)
+       .byte   0                               # EOM(2)
+       .byte   4                               # Abbreviation Code
+       .byte   36                              # DW_TAG_base_type
+       .byte   0                               # DW_CHILDREN_no
+       .byte   3                               # DW_AT_name
+       .byte   14                              # DW_FORM_strp
+       .byte   62                              # DW_AT_encoding
+       .byte   11                              # DW_FORM_data1
+       .byte   11                              # DW_AT_byte_size
+       .byte   11                              # DW_FORM_data1
+       .byte   0                               # EOM(1)
+       .byte   0                               # EOM(2)
+       .byte   5                               # Abbreviation Code
+       .byte   17                              # DW_TAG_compile_unit
+       .byte   1                               # DW_CHILDREN_yes
+       .byte   37                              # DW_AT_producer
+       .byte   14                              # DW_FORM_strp
+       .byte   19                              # DW_AT_language
+       .byte   5                               # DW_FORM_data2
+       .byte   3                               # DW_AT_name
+       .byte   14                              # DW_FORM_strp
+       .byte   16                              # DW_AT_stmt_list
+       .byte   23                              # DW_FORM_sec_offset
+       .byte   27                              # DW_AT_comp_dir
+       .byte   14                              # DW_FORM_strp
+       .byte   0                               # EOM(1)
+       .byte   0                               # EOM(2)
+       .byte   6                               # Abbreviation Code
+       .byte   52                              # DW_TAG_variable
+       .byte   0                               # DW_CHILDREN_no
+       .byte   3                               # DW_AT_name
+       .byte   14                              # DW_FORM_strp
+       .byte   73                              # DW_AT_type
+       .byte   19                              # DW_FORM_ref4
+       .byte   63                              # DW_AT_external
+       .byte   25                              # DW_FORM_flag_present
+       .byte   58                              # DW_AT_decl_file
+       .byte   11                              # DW_FORM_data1
+       .byte   59                              # DW_AT_decl_line
+       .byte   11                              # DW_FORM_data1
+       .byte   2                               # DW_AT_location
+       .byte   24                              # DW_FORM_exprloc
+       .byte   0                               # EOM(1)
+       .byte   0                               # EOM(2)
+       .byte   7                               # Abbreviation Code
+       .byte   19                              # DW_TAG_structure_type
+       .byte   0                               # DW_CHILDREN_no
+       .byte   60                              # DW_AT_declaration
+       .byte   25                              # DW_FORM_flag_present
+       .byte   105                             # DW_AT_signature
+       .byte   32                              # DW_FORM_ref_sig8
+       .byte   0                               # EOM(1)
+       .byte   0                               # EOM(2)
+       .byte   0                               # EOM(3)
+       .section        .debug_info,"",@progbits
+.Lcu_begin0:
+       .long   .Ldebug_info_end1-.Ldebug_info_start1 # Length of Unit
+.Ldebug_info_start1:
+       .short  4                               # DWARF version number
+       .long   .debug_abbrev                   # Offset Into Abbrev. Section
+       .byte   8                               # Address Size (in bytes)
+       .byte   5                               # Abbrev [5] 0xb:0x32 DW_TAG_compile_unit
+       .long   .Linfo_string0                  # DW_AT_producer
+       .short  33                              # DW_AT_language
+       .long   .Linfo_string1                  # DW_AT_name
+       .long   .Lline_table_start0             # DW_AT_stmt_list
+       .long   .Linfo_string2                  # DW_AT_comp_dir
+       .byte   6                               # Abbrev [6] 0x1e:0x15 DW_TAG_variable
+       .long   .Linfo_string3                  # DW_AT_name
+       .long   51                              # DW_AT_type
+                                        # DW_AT_external
+       .byte   1                               # DW_AT_decl_file
+       .byte   2                               # DW_AT_decl_line
+       .byte   9                               # DW_AT_location
+       .byte   3
+       .quad   v1
+       .byte   7                               # Abbrev [7] 0x33:0x9 DW_TAG_structure_type
+                                        # DW_AT_declaration
+       .quad   -4149699470930386446            # DW_AT_signature
+       .byte   0                               # End Of Children Mark
+.Ldebug_info_end1:
+       .section        .debug_str,"MS",@progbits,1
+.Linfo_string0:
+       .asciz  "clang version 14.0.0 (git@github.com:llvm/llvm-project.git 7f00c7ce4b186ab8ba2ae66c82efdcf908c61019)" # string offset=0
+.Linfo_string1:
+       .asciz  "test.cpp"                      # string offset=101
+.Linfo_string2:
+       .asciz  "/usr/local/google/home/blaikie/dev/scratch" # string offset=110
+.Linfo_string3:
+       .asciz  "v1"                            # string offset=153
+.Linfo_string4:
+       .asciz  "i"                             # string offset=156
+.Linfo_string5:
+       .asciz  "int"                           # string offset=158
+.Linfo_string6:
+       .asciz  "t1"                            # string offset=162
+       .ident  "clang version 14.0.0 (git@github.com:llvm/llvm-project.git 7f00c7ce4b186ab8ba2ae66c82efdcf908c61019)"
+       .section        ".note.GNU-stack","",@progbits
+       .addrsig
+       .section        .debug_line,"",@progbits
+.Lline_table_start0:
index b0970cd..9533c27 100644 (file)
@@ -64,7 +64,7 @@
 # CHECK-NEXT:              DW_AT_low_pc      (0x0000000000000000)
 # CHECK-NEXT:              DW_AT_high_pc     (0x0000000000000020)
 
-# CHECK: Verifying .debug_info references...
+# CHECK: Verifying
 
 --- !mach-o
 FileHeader: