llvm-dwarfdump: Improve/fix pretty printing of array dimensions
authorDavid Blaikie <dblaikie@gmail.com>
Wed, 19 Dec 2018 19:34:24 +0000 (19:34 +0000)
committerDavid Blaikie <dblaikie@gmail.com>
Wed, 19 Dec 2018 19:34:24 +0000 (19:34 +0000)
This is to address post-commit feedback from Paul Robinson on r348954.

The original commit misinterprets count and upper bound as the same thing (I thought I saw GCC producing an upper bound the same as Clang's count, but GCC correctly produces an upper bound that's one less than the count (in C, that is, where arrays are zero indexed)).

I want to preserve the C-like output for the common case, so in the absence of a lower bound the count (or one greater than the upper bound) is rendered between []. In the trickier cases, where a lower bound is specified, a half-open range is used (eg: lower bound 1, count 2 would be "[1, 3)" and an unknown parts use a '?' (eg: "[1, ?)" or "[?, 7)" or "[?, ? + 3)").

Reviewers: aprantl, probinson, JDevlieghere

Differential Revision: https://reviews.llvm.org/D55721

llvm-svn: 349670

llvm/include/llvm/BinaryFormat/Dwarf.def
llvm/include/llvm/BinaryFormat/Dwarf.h
llvm/lib/BinaryFormat/Dwarf.cpp
llvm/lib/DebugInfo/DWARF/DWARFDie.cpp
llvm/lib/IR/DebugInfo.cpp
llvm/test/tools/llvm-dwarfdump/X86/prettyprint_types.s

index cb9f7f5..6ad3cb5 100644 (file)
@@ -43,7 +43,7 @@
 #endif
 
 #ifndef HANDLE_DW_LANG
-#define HANDLE_DW_LANG(ID, NAME, VERSION, VENDOR)
+#define HANDLE_DW_LANG(ID, NAME, LOWER_BOUND, VERSION, VENDOR)
 #endif
 
 #ifndef HANDLE_DW_ATE
@@ -632,50 +632,50 @@ HANDLE_DW_OP(0xfb, GNU_addr_index, 0, GNU)
 HANDLE_DW_OP(0xfc, GNU_const_index, 0, GNU)
 
 // DWARF languages.
-HANDLE_DW_LANG(0x0001, C89, 2, DWARF)
-HANDLE_DW_LANG(0x0002, C, 2, DWARF)
-HANDLE_DW_LANG(0x0003, Ada83, 2, DWARF)
-HANDLE_DW_LANG(0x0004, C_plus_plus, 2, DWARF)
-HANDLE_DW_LANG(0x0005, Cobol74, 2, DWARF)
-HANDLE_DW_LANG(0x0006, Cobol85, 2, DWARF)
-HANDLE_DW_LANG(0x0007, Fortran77, 2, DWARF)
-HANDLE_DW_LANG(0x0008, Fortran90, 2, DWARF)
-HANDLE_DW_LANG(0x0009, Pascal83, 2, DWARF)
-HANDLE_DW_LANG(0x000a, Modula2, 2, DWARF)
+HANDLE_DW_LANG(0x0001, C89, 0, 2, DWARF)
+HANDLE_DW_LANG(0x0002, C, 0, 2, DWARF)
+HANDLE_DW_LANG(0x0003, Ada83, 1, 2, DWARF)
+HANDLE_DW_LANG(0x0004, C_plus_plus, 0, 2, DWARF)
+HANDLE_DW_LANG(0x0005, Cobol74, 1, 2, DWARF)
+HANDLE_DW_LANG(0x0006, Cobol85, 1, 2, DWARF)
+HANDLE_DW_LANG(0x0007, Fortran77, 1, 2, DWARF)
+HANDLE_DW_LANG(0x0008, Fortran90, 1, 2, DWARF)
+HANDLE_DW_LANG(0x0009, Pascal83, 1, 2, DWARF)
+HANDLE_DW_LANG(0x000a, Modula2, 1, 2, DWARF)
 // New in DWARF v3:
-HANDLE_DW_LANG(0x000b, Java, 3, DWARF)
-HANDLE_DW_LANG(0x000c, C99, 3, DWARF)
-HANDLE_DW_LANG(0x000d, Ada95, 3, DWARF)
-HANDLE_DW_LANG(0x000e, Fortran95, 3, DWARF)
-HANDLE_DW_LANG(0x000f, PLI, 3, DWARF)
-HANDLE_DW_LANG(0x0010, ObjC, 3, DWARF)
-HANDLE_DW_LANG(0x0011, ObjC_plus_plus, 3, DWARF)
-HANDLE_DW_LANG(0x0012, UPC, 3, DWARF)
-HANDLE_DW_LANG(0x0013, D, 3, DWARF)
+HANDLE_DW_LANG(0x000b, Java, 0, 3, DWARF)
+HANDLE_DW_LANG(0x000c, C99, 0, 3, DWARF)
+HANDLE_DW_LANG(0x000d, Ada95, 1, 3, DWARF)
+HANDLE_DW_LANG(0x000e, Fortran95, 1, 3, DWARF)
+HANDLE_DW_LANG(0x000f, PLI, 1, 3, DWARF)
+HANDLE_DW_LANG(0x0010, ObjC, 0, 3, DWARF)
+HANDLE_DW_LANG(0x0011, ObjC_plus_plus, 0, 3, DWARF)
+HANDLE_DW_LANG(0x0012, UPC, 0, 3, DWARF)
+HANDLE_DW_LANG(0x0013, D, 0, 3, DWARF)
 // New in DWARF v4:
-HANDLE_DW_LANG(0x0014, Python, 4, DWARF)
+HANDLE_DW_LANG(0x0014, Python, 0, 4, DWARF)
 // New in DWARF v5:
-HANDLE_DW_LANG(0x0015, OpenCL, 5, DWARF)
-HANDLE_DW_LANG(0x0016, Go, 5, DWARF)
-HANDLE_DW_LANG(0x0017, Modula3, 5, DWARF)
-HANDLE_DW_LANG(0x0018, Haskell, 5, DWARF)
-HANDLE_DW_LANG(0x0019, C_plus_plus_03, 5, DWARF)
-HANDLE_DW_LANG(0x001a, C_plus_plus_11, 5, DWARF)
-HANDLE_DW_LANG(0x001b, OCaml, 5, DWARF)
-HANDLE_DW_LANG(0x001c, Rust, 5, DWARF)
-HANDLE_DW_LANG(0x001d, C11, 5, DWARF)
-HANDLE_DW_LANG(0x001e, Swift, 5, DWARF)
-HANDLE_DW_LANG(0x001f, Julia, 5, DWARF)
-HANDLE_DW_LANG(0x0020, Dylan, 5, DWARF)
-HANDLE_DW_LANG(0x0021, C_plus_plus_14, 5, DWARF)
-HANDLE_DW_LANG(0x0022, Fortran03, 5, DWARF)
-HANDLE_DW_LANG(0x0023, Fortran08, 5, DWARF)
-HANDLE_DW_LANG(0x0024, RenderScript, 5, DWARF)
-HANDLE_DW_LANG(0x0025, BLISS, 5, DWARF)
+HANDLE_DW_LANG(0x0015, OpenCL, 0, 5, DWARF)
+HANDLE_DW_LANG(0x0016, Go, 0, 5, DWARF)
+HANDLE_DW_LANG(0x0017, Modula3, 1, 5, DWARF)
+HANDLE_DW_LANG(0x0018, Haskell, 0, 5, DWARF)
+HANDLE_DW_LANG(0x0019, C_plus_plus_03, 0, 5, DWARF)
+HANDLE_DW_LANG(0x001a, C_plus_plus_11, 0, 5, DWARF)
+HANDLE_DW_LANG(0x001b, OCaml, 0, 5, DWARF)
+HANDLE_DW_LANG(0x001c, Rust, 0, 5, DWARF)
+HANDLE_DW_LANG(0x001d, C11, 0, 5, DWARF)
+HANDLE_DW_LANG(0x001e, Swift, 0, 5, DWARF)
+HANDLE_DW_LANG(0x001f, Julia, 1, 5, DWARF)
+HANDLE_DW_LANG(0x0020, Dylan, 0, 5, DWARF)
+HANDLE_DW_LANG(0x0021, C_plus_plus_14, 0, 5, DWARF)
+HANDLE_DW_LANG(0x0022, Fortran03, 1, 5, DWARF)
+HANDLE_DW_LANG(0x0023, Fortran08, 1, 5, DWARF)
+HANDLE_DW_LANG(0x0024, RenderScript, 0, 5, DWARF)
+HANDLE_DW_LANG(0x0025, BLISS, 0, 5, DWARF)
 // Vendor extensions:
-HANDLE_DW_LANG(0x8001, Mips_Assembler, 0, MIPS)
-HANDLE_DW_LANG(0x8e57, GOOGLE_RenderScript, 0, GOOGLE)
-HANDLE_DW_LANG(0xb000, BORLAND_Delphi, 0, BORLAND)
+HANDLE_DW_LANG(0x8001, Mips_Assembler, None, 0, MIPS)
+HANDLE_DW_LANG(0x8e57, GOOGLE_RenderScript, 0, 0, GOOGLE)
+HANDLE_DW_LANG(0xb000, BORLAND_Delphi, 0, 0, BORLAND)
 
 // DWARF attribute type encodings.
 HANDLE_DW_ATE(0x01, address, 2, DWARF)
index c25b6cd..525a04d 100644 (file)
@@ -184,7 +184,8 @@ enum DefaultedMemberAttribute {
 };
 
 enum SourceLanguage {
-#define HANDLE_DW_LANG(ID, NAME, VERSION, VENDOR) DW_LANG_##NAME = ID,
+#define HANDLE_DW_LANG(ID, NAME, LOWER_BOUND, VERSION, VENDOR)                 \
+  DW_LANG_##NAME = ID,
 #include "llvm/BinaryFormat/Dwarf.def"
   DW_LANG_lo_user = 0x8000,
   DW_LANG_hi_user = 0xffff
@@ -490,6 +491,8 @@ unsigned AttributeEncodingVendor(TypeKind E);
 unsigned LanguageVendor(SourceLanguage L);
 /// @}
 
+Optional<unsigned> LanguageLowerBound(SourceLanguage L);
+
 /// A helper struct providing information about the byte size of DW_FORM
 /// values that vary in size depending on the DWARF version, address byte
 /// size, or DWARF32/DWARF64.
index fe8ce2b..46f8056 100644 (file)
@@ -301,7 +301,7 @@ StringRef llvm::dwarf::LanguageString(unsigned Language) {
   switch (Language) {
   default:
     return StringRef();
-#define HANDLE_DW_LANG(ID, NAME, VERSION, VENDOR)                              \
+#define HANDLE_DW_LANG(ID, NAME, LOWER_BOUND, VERSION, VENDOR)                 \
   case DW_LANG_##NAME:                                                         \
     return "DW_LANG_" #NAME;
 #include "llvm/BinaryFormat/Dwarf.def"
@@ -310,7 +310,7 @@ StringRef llvm::dwarf::LanguageString(unsigned Language) {
 
 unsigned llvm::dwarf::getLanguage(StringRef LanguageString) {
   return StringSwitch<unsigned>(LanguageString)
-#define HANDLE_DW_LANG(ID, NAME, VERSION, VENDOR)                              \
+#define HANDLE_DW_LANG(ID, NAME, LOWER_BOUND, VERSION, VENDOR)                 \
   .Case("DW_LANG_" #NAME, DW_LANG_##NAME)
 #include "llvm/BinaryFormat/Dwarf.def"
       .Default(0);
@@ -320,7 +320,7 @@ unsigned llvm::dwarf::LanguageVersion(dwarf::SourceLanguage Lang) {
   switch (Lang) {
   default:
     return 0;
-#define HANDLE_DW_LANG(ID, NAME, VERSION, VENDOR)                              \
+#define HANDLE_DW_LANG(ID, NAME, LOWER_BOUND, VERSION, VENDOR)                 \
   case DW_LANG_##NAME:                                                         \
     return VERSION;
 #include "llvm/BinaryFormat/Dwarf.def"
@@ -331,13 +331,24 @@ unsigned llvm::dwarf::LanguageVendor(dwarf::SourceLanguage Lang) {
   switch (Lang) {
   default:
     return 0;
-#define HANDLE_DW_LANG(ID, NAME, VERSION, VENDOR)                              \
+#define HANDLE_DW_LANG(ID, NAME, LOWER_BOUND, VERSION, VENDOR)                 \
   case DW_LANG_##NAME:                                                         \
     return DWARF_VENDOR_##VENDOR;
 #include "llvm/BinaryFormat/Dwarf.def"
   }
 }
 
+Optional<unsigned> llvm::dwarf::LanguageLowerBound(dwarf::SourceLanguage Lang) {
+  switch (Lang) {
+  default:
+    return None;
+#define HANDLE_DW_LANG(ID, NAME, LOWER_BOUND, VERSION, VENDOR)                 \
+  case DW_LANG_##NAME:                                                         \
+    return LOWER_BOUND;
+#include "llvm/BinaryFormat/Dwarf.def"
+  }
+}
+
 StringRef llvm::dwarf::CaseString(unsigned Case) {
   switch (Case) {
   case DW_ID_case_sensitive:
index 551e292..e6018d9 100644 (file)
@@ -154,6 +154,52 @@ static void dumpTypeTagName(raw_ostream &OS, dwarf::Tag T) {
   OS << TagStr.substr(7, TagStr.size() - 12) << " ";
 }
 
+static void dumpArrayType(raw_ostream &OS, const DWARFDie &D) {
+  Optional<uint64_t> Bound;
+  for (const DWARFDie &C : D.children())
+    if (C.getTag() == DW_TAG_subrange_type) {
+      Optional<uint64_t> LB;
+      Optional<uint64_t> Count;
+      Optional<uint64_t> UB;
+      Optional<unsigned> DefaultLB;
+      if (Optional<DWARFFormValue> L = C.find(DW_AT_lower_bound))
+        LB = L->getAsUnsignedConstant();
+      if (Optional<DWARFFormValue> CountV = C.find(DW_AT_count))
+        Count = CountV->getAsUnsignedConstant();
+      if (Optional<DWARFFormValue> UpperV = C.find(DW_AT_upper_bound))
+        UB = UpperV->getAsUnsignedConstant();
+      if (Optional<DWARFFormValue> LV =
+              D.getDwarfUnit()->getUnitDIE().find(DW_AT_language))
+        if (Optional<uint64_t> LC = LV->getAsUnsignedConstant())
+          if ((DefaultLB =
+                   LanguageLowerBound(static_cast<dwarf::SourceLanguage>(*LC))))
+            if (LB && *LB == *DefaultLB)
+              LB = None;
+      if (!LB && !Count && !UB)
+        OS << "[]";
+      else if (!LB && (Count || UB) && DefaultLB)
+        OS << '[' << (Count ? *Count : *UB - *DefaultLB + 1) << ']';
+      else {
+        OS << "[[";
+        if (LB)
+          OS << *LB;
+        else
+          OS << '?';
+        OS << ", ";
+        if (Count)
+          if (LB)
+            OS << *LB + *Count;
+          else
+            OS << "? + " << *Count;
+        else if (UB)
+          OS << *UB + 1;
+        else
+          OS << '?';
+        OS << ")]";
+      }
+    }
+}
+
 /// Recursively dump the DIE type name when applicable.
 static void dumpTypeName(raw_ostream &OS, const DWARFDie &D) {
   if (!D.isValid())
@@ -201,24 +247,7 @@ static void dumpTypeName(raw_ostream &OS, const DWARFDie &D) {
     break;
   }
   case DW_TAG_array_type: {
-    Optional<uint64_t> Bound;
-    for (const DWARFDie &C : D.children())
-      if (C.getTag() == DW_TAG_subrange_type) {
-        OS << '[';
-        uint64_t LowerBound = 0;
-        if (Optional<DWARFFormValue> L = C.find(DW_AT_lower_bound))
-          if (Optional<uint64_t> LB = L->getAsUnsignedConstant()) {
-            LowerBound = *LB;
-            OS << LowerBound << '-';
-          }
-        if (Optional<DWARFFormValue> CountV = C.find(DW_AT_count)) {
-          if (Optional<uint64_t> C = CountV->getAsUnsignedConstant())
-            OS << (*C + LowerBound);
-        } else if (Optional<DWARFFormValue> UpperV = C.find(DW_AT_upper_bound))
-          if (Optional<uint64_t> U = UpperV->getAsUnsignedConstant())
-            OS << *U;
-        OS << ']';
-      }
+    dumpArrayType(OS, D);
     break;
   }
   case DW_TAG_pointer_type:
index d1ff545..9fa3177 100644 (file)
@@ -697,8 +697,9 @@ void Instruction::applyMergedLocation(const DILocation *LocA,
 
 static unsigned map_from_llvmDWARFsourcelanguage(LLVMDWARFSourceLanguage lang) {
   switch (lang) {
-#define HANDLE_DW_LANG(ID, NAME, VERSION, VENDOR) \
-case LLVMDWARFSourceLanguage##NAME: return ID;
+#define HANDLE_DW_LANG(ID, NAME, LOWER_BOUND, VERSION, VENDOR)                 \
+  case LLVMDWARFSourceLanguage##NAME:                                          \
+    return ID;
 #include "llvm/BinaryFormat/Dwarf.def"
 #undef HANDLE_DW_LANG
   }
index b3e871e..afeee4c 100644 (file)
 # CHECK:   DW_AT_type{{.*}}"int foo::*"
 
 # array_type
-# Testing lower_bound, upper_bound, lower and upper, lower and count, and count separately.
-# CHECK:   DW_AT_type{{.*}}"int[1-][2][1-2][1-3][2]"
+# CHECK:   DW_AT_type{{.*}}"int
+# Testing with a default lower bound of 0 and the following explicit bounds:
+#   lower_bound(1)
+# CHECK-NOT: {{.}}
+# CHECK-SAME: {{\[}}[1, ?)]
+#   upper_bound(2)
+# CHECK-NOT: {{.}}
+# CHECK-SAME: [3]
+#   lower(1) and upper(2)
+# CHECK-NOT: {{.}}
+# CHECK-SAME: {{\[}}[1, 3)]
+#   lower(1) and count(3)
+# CHECK-NOT: {{.}}
+# CHECK-SAME: {{\[}}[1, 4)]
+#   lower(0) and count(4) - testing that the lower bound matching language
+#   default is not rendered
+# CHECK-NOT: {{.}}
+# CHECK-SAME: [4]
+#   count(2)
+# CHECK-SAME: [2]
+#   no attributes
+# CHECK-NOT: {{.}}
+# CHECK-SAME: []{{"\)$}}
+
 
 # subroutine types
 # CHECK:   DW_AT_type{{.*}}"int()"
 # CHECK:   DW_AT_type{{.*}}"void(int)"
 # CHECK:   DW_AT_type{{.*}}"void(int, int)"
+
+# array_type with a language with a default lower bound of 1 instead of 0 and
+# an upper bound of 2. This describes an array with 2 elements (whereas with a
+# default lower bound of 0 it would be an array of 3 elements)
+# CHECK: DW_AT_type{{.*}}"int[2]"
+
        .section        .debug_str,"MS",@progbits,1
 .Lint_name:
        .asciz  "int"
        .byte   19                      # DW_FORM_ref4
        .byte   0                       # EOM(1)
        .byte   0                       # EOM(2)
+       .byte   18                      # Abbreviation Code
+       .byte   0x21                    # DW_TAG_subrange_type
+       .byte   0                       # DW_CHILDREN_no
+       .byte   0                       # EOM(1)
+       .byte   0                       # EOM(2)
        .byte   0                       # EOM(3)
        .section        .debug_info,"",@progbits
 .Lcu_begin:
        .byte   2                       #     DW_AT_upper_bound
        .byte   12                      #   DW_AT_subrange_type
        .byte   1                       #     DW_AT_lower_bound
-       .byte   2                       #     DW_AT_count
+       .byte   3                       #     DW_AT_count
+       .byte   12                      #   DW_AT_subrange_type
+       .byte   0                       #     DW_AT_lower_bound
+       .byte   4                       #     DW_AT_count
        .byte   13                      #   DW_AT_subrange_type
        .byte   2                       #     DW_AT_count
+       .byte   18                      #   DW_AT_subrange_type
        .byte   0                       # End Of Children Mark
 .Lsub_int_empty_type:
        .byte   15                      # DW_TAG_subroutine_type
        .long   .Lsub_void_int_int_type - .Lcu_begin #   DW_AT_type
        .byte   0                       # End Of Children Mark
 .Lunit_end:
+.Lcu2_begin:
+       .long   .Lcu2_unit_end - .Lcu2_unit_start # Length of Unit
+.Lcu2_unit_start:
+       .short  4                       # DWARF version number
+       .long   .debug_abbrev           # Offset Into Abbrev. Section
+       .byte   8                       # Address Size (in bytes)
+       .byte   1                       # DW_TAG_compile_unit
+       .short  13                      #   DW_AT_language
+.Lcu2_int_type:
+       .byte   2                       # DW_TAG_base_type
+       .long   .Lint_name              #   DW_AT_name
+.Lcu2_array_type:
+       .byte   8                       # DW_TAG_array_type
+       .long   .Lcu2_int_type - .Lcu2_begin #   DW_AT_type
+       .byte   10                      #   DW_AT_subrange_type
+       .byte   2                       #     DW_AT_upper_bound
+       .byte   3                       # DW_TAG_variable
+       .long   .Lcu2_array_type - .Lcu2_begin #   DW_AT_type
+.Lcu2_unit_end: