DebugInfo: Deduplicate addresses in debug_addr
authorDavid Blaikie <dblaikie@gmail.com>
Fri, 25 Oct 2019 02:48:42 +0000 (19:48 -0700)
committerDavid Blaikie <dblaikie@gmail.com>
Wed, 27 Jan 2021 22:00:43 +0000 (14:00 -0800)
Experimental, using non-existent DWARF support to use an expr for the
location involving an addr_index (to compute address + offset so
addresses can be reused in more places).

The global variable debug info had to be deferred until the end of the
module (so bss variables would all be emitted first - so their labels
would have the relevant section). Non-bss variables seemed to not have
their label assigned to a section even at the end of the module, so I
didn't know what to do there.

Also, the hashing code is broken - doesn't know how to hash these
expressions (& isn't hashing anything inside subprograms, which seems
problematic), so for test purposes this change just skips the hash
computation. (GCC's actually overly sensitive in its hash function, it
seems - I'm forgetting the specific case right now - anyway, we might
want to just use the frontend-known file hash and give up on optimistic
.dwo/.dwp reuse)

llvm/lib/CodeGen/AsmPrinter/DIE.cpp
llvm/lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp
llvm/lib/CodeGen/AsmPrinter/DwarfDebug.cpp
llvm/lib/CodeGen/AsmPrinter/DwarfDebug.h
llvm/lib/CodeGen/AsmPrinter/DwarfUnit.cpp
llvm/lib/CodeGen/AsmPrinter/DwarfUnit.h
llvm/test/DebugInfo/X86/ranges_always.ll

index 39b0b02..2b83d8b 100644 (file)
@@ -785,6 +785,7 @@ void DIEBlock::emitValue(const AsmPrinter *Asm, dwarf::Form Form) const {
   case dwarf::DW_FORM_block1: Asm->emitInt8(Size);    break;
   case dwarf::DW_FORM_block2: Asm->emitInt16(Size);   break;
   case dwarf::DW_FORM_block4: Asm->emitInt32(Size);   break;
+  case dwarf::DW_FORM_exprloc:
   case dwarf::DW_FORM_block:
     Asm->emitULEB128(Size);
     break;
@@ -803,6 +804,7 @@ unsigned DIEBlock::SizeOf(const AsmPrinter *AP, dwarf::Form Form) const {
   case dwarf::DW_FORM_block1: return Size + sizeof(int8_t);
   case dwarf::DW_FORM_block2: return Size + sizeof(int16_t);
   case dwarf::DW_FORM_block4: return Size + sizeof(int32_t);
+  case dwarf::DW_FORM_exprloc:
   case dwarf::DW_FORM_block:  return Size + getULEB128Size(Size);
   case dwarf::DW_FORM_data16: return 16;
   default: llvm_unreachable("Improper form for block");
index befc4bb..c16c84d 100644 (file)
@@ -73,11 +73,26 @@ void DwarfCompileUnit::addLabelAddress(DIE &Die, dwarf::Attribute Attribute,
   if (Label)
     DD->addArangeLabel(SymbolCU(this, Label));
 
-  unsigned idx = DD->getAddressPool().getIndex(Label);
-  Die.addValue(DIEValueAllocator, Attribute,
-               DD->getDwarfVersion() >= 5 ? dwarf::DW_FORM_addrx
-                                          : dwarf::DW_FORM_GNU_addr_index,
-               DIEInteger(idx));
+  if (Label->isInSection() || !DD->useAddrOffsetExpressions()) {
+    const MCSymbol *Base = DD->getSectionLabel(&Label->getSection());
+    if (Base == Label || !DD->useAddrOffsetExpressions()) {
+      unsigned idx = DD->getAddressPool().getIndex(Label);
+      Die.addValue(DIEValueAllocator, Attribute,
+                   DD->getDwarfVersion() >= 5 ? dwarf::DW_FORM_addrx
+                                              : dwarf::DW_FORM_GNU_addr_index,
+                   DIEInteger(idx));
+      return;
+    }
+  }
+
+  // Could be extended to work with DWARFv4 Split DWARF if that's important for
+  // someone. In that case DW_FORM_data would be used.
+  assert(DD->getDwarfVersion() >= 5 &&
+         "Addr+offset expressions are only valuable when using debug_addr (to "
+         "reduce relocations) available in DWARFv5 or higher");
+  auto *Loc = new (DIEValueAllocator) DIEBlock();
+  addPoolOpAddress(*Loc, Label);
+  addBlock(Die, Attribute, dwarf::DW_FORM_exprloc, Loc);
 }
 
 void DwarfCompileUnit::addLocalLabelAddress(DIE &Die,
index 4626827..1cebfdf 100644 (file)
@@ -160,6 +160,10 @@ static cl::opt<DwarfDebug::MinimizeAddrInV5> MinimizeAddrInV5Option(
                clEnumValN(DwarfDebug::MinimizeAddrInV5::Ranges, "Ranges",
                           "Use rnglists for contiguous ranges if that allows "
                           "using a pre-existing base address"),
+               clEnumValN(DwarfDebug::MinimizeAddrInV5::Expressions,
+                          "Expressions",
+                          "Use exprloc addrx+offset expressions for any "
+                          "address with a prior base address"),
                clEnumValN(DwarfDebug::MinimizeAddrInV5::Disabled, "Disabled",
                           "Stuff")),
     cl::init(DwarfDebug::MinimizeAddrInV5::Default));
@@ -3397,7 +3401,10 @@ dwarf::Form DwarfDebug::getDwarfSectionOffsetForm() const {
 }
 
 const MCSymbol *DwarfDebug::getSectionLabel(const MCSection *S) {
-  return SectionLabels.find(S)->second;
+  auto I = SectionLabels.find(S);
+  if (I == SectionLabels.end())
+    return nullptr;
+  return I->second;
 }
 void DwarfDebug::insertSectionLabel(const MCSymbol *S) {
   if (SectionLabels.insert(std::make_pair(&S->getSection(), S)).second)
index e082b11..58ad8ea 100644 (file)
@@ -383,6 +383,7 @@ public:
     Default,
     Disabled,
     Ranges,
+    Expressions,
   };
 
 private:
@@ -703,6 +704,12 @@ public:
     return MinimizeAddr == MinimizeAddrInV5::Ranges;
   }
 
+  // Returns whether novel exprloc addrx+offset encodings should be used to
+  // reduce debug_addr size.
+  bool useAddrOffsetExpressions() const {
+    return MinimizeAddr == MinimizeAddrInV5::Expressions;
+  }
+
   /// Returns whether to use sections as labels rather than temp symbols.
   bool useSectionsAsReferences() const {
     return UseSectionsAsReferences;
index 46161e7..6267c3e 100644 (file)
@@ -315,17 +315,40 @@ unsigned DwarfTypeUnit::getOrCreateSourceID(const DIFile *File) {
       Asm->OutContext.getDwarfVersion(), File->getSource());
 }
 
+void DwarfUnit::addPoolOpAddress(DIEValueList &Die, const MCSymbol *Label) {
+  const MCSymbol *Base = nullptr;
+  if (Label->isInSection() && DD->useAddrOffsetExpressions())
+    Base = DD->getSectionLabel(&Label->getSection());
+  if (!Base) {
+    uint32_t Index = DD->getAddressPool().getIndex(Label);
+    if (DD->getDwarfVersion() >= 5) {
+      addUInt(Die, dwarf::DW_FORM_data1, dwarf::DW_OP_addrx);
+      addUInt(Die, dwarf::DW_FORM_addrx, Index);
+    } else {
+      addUInt(Die, dwarf::DW_FORM_data1, dwarf::DW_OP_GNU_addr_index);
+      addUInt(Die, dwarf::DW_FORM_GNU_addr_index, Index);
+    }
+    return;
+  }
+
+  addUInt(Die, dwarf::DW_FORM_data1, dwarf::DW_OP_GNU_addr_index);
+  addUInt(Die, dwarf::DW_FORM_GNU_addr_index,
+          DD->getAddressPool().getIndex(Base));
+  if (Base != Label) {
+    addUInt(Die, dwarf::DW_FORM_data1, dwarf::DW_OP_const4u);
+    addLabelDelta(Die, (dwarf::Attribute)0, Label, Base);
+    addUInt(Die, dwarf::DW_FORM_data1, dwarf::DW_OP_plus);
+  }
+}
+
 void DwarfUnit::addOpAddress(DIELoc &Die, const MCSymbol *Sym) {
   if (DD->getDwarfVersion() >= 5) {
-    addUInt(Die, dwarf::DW_FORM_data1, dwarf::DW_OP_addrx);
-    addUInt(Die, dwarf::DW_FORM_addrx, DD->getAddressPool().getIndex(Sym));
+    addPoolOpAddress(Die, Sym);
     return;
   }
 
   if (DD->useSplitDwarf()) {
-    addUInt(Die, dwarf::DW_FORM_data1, dwarf::DW_OP_GNU_addr_index);
-    addUInt(Die, dwarf::DW_FORM_GNU_addr_index,
-            DD->getAddressPool().getIndex(Sym));
+    addPoolOpAddress(Die, Sym);
     return;
   }
 
@@ -333,7 +356,7 @@ void DwarfUnit::addOpAddress(DIELoc &Die, const MCSymbol *Sym) {
   addLabel(Die, dwarf::DW_FORM_addr, Sym);
 }
 
-void DwarfUnit::addLabelDelta(DIE &Die, dwarf::Attribute Attribute,
+void DwarfUnit::addLabelDelta(DIEValueList &Die, dwarf::Attribute Attribute,
                               const MCSymbol *Hi, const MCSymbol *Lo) {
   Die.addValue(DIEValueAllocator, Attribute, dwarf::DW_FORM_data4,
                new (DIEValueAllocator) DIEDelta(Hi, Lo));
@@ -382,11 +405,16 @@ void DwarfUnit::addBlock(DIE &Die, dwarf::Attribute Attribute, DIELoc *Loc) {
                Loc->BestForm(DD->getDwarfVersion()), Loc);
 }
 
-void DwarfUnit::addBlock(DIE &Die, dwarf::Attribute Attribute,
+void DwarfUnit::addBlock(DIE &Die, dwarf::Attribute Attribute, dwarf::Form Form,
                          DIEBlock *Block) {
   Block->ComputeSize(Asm);
   DIEBlocks.push_back(Block); // Memoize so we can call the destructor later on.
-  Die.addValue(DIEValueAllocator, Attribute, Block->BestForm(), Block);
+  Die.addValue(DIEValueAllocator, Attribute, Form, Block);
+}
+
+void DwarfUnit::addBlock(DIE &Die, dwarf::Attribute Attribute,
+                         DIEBlock *Block) {
+  addBlock(Die, Attribute, Block->BestForm(), Block);
 }
 
 void DwarfUnit::addSourceLine(DIE &Die, unsigned Line, const DIFile *File) {
index 5c64376..a1d6008 100644 (file)
@@ -160,10 +160,11 @@ public:
   /// Add a dwarf op address data and value using the form given and an
   /// op of either DW_FORM_addr or DW_FORM_GNU_addr_index.
   void addOpAddress(DIELoc &Die, const MCSymbol *Sym);
+  void addPoolOpAddress(DIEValueList &Die, const MCSymbol *Label);
 
   /// Add a label delta attribute data and value.
-  void addLabelDelta(DIE &Die, dwarf::Attribute Attribute, const MCSymbol *Hi,
-                     const MCSymbol *Lo);
+  void addLabelDelta(DIEValueList &Die, dwarf::Attribute Attribute,
+                     const MCSymbol *Hi, const MCSymbol *Lo);
 
   /// Add a DIE attribute data and value.
   void addDIEEntry(DIE &Die, dwarf::Attribute Attribute, DIE &Entry);
@@ -179,6 +180,8 @@ public:
 
   /// Add block data.
   void addBlock(DIE &Die, dwarf::Attribute Attribute, DIEBlock *Block);
+  void addBlock(DIE &Die, dwarf::Attribute Attribute, dwarf::Form Form,
+                DIEBlock *Block);
 
   /// Add location information to specified debug information entry.
   void addSourceLine(DIE &Die, unsigned Line, const DIFile *File);
index 230db20..9d1498c 100644 (file)
@@ -1,6 +1,13 @@
 ; RUN: llc -O0 %s -mtriple=x86_64-unknown-linux-gnu -filetype=obj -o - -minimize-addr-in-v5=Ranges \
 ; RUN:   | llvm-dwarfdump -debug-info -debug-addr -debug-rnglists -v - \
-; RUN:   | FileCheck --implicit-check-not=DW_TAG --implicit-check-not=NULL --implicit-check-not=DW_AT_low_pc --implicit-check-not=DW_AT_high_pc --implicit-check-not=DW_AT_ranges %s
+; RUN:   | FileCheck --check-prefix=CHECK --check-prefix=RNG \
+; RUN:     --implicit-check-not=DW_TAG --implicit-check-not=NULL --implicit-check-not=_pc %s
+
+
+; RUN: llc -O0 %s -mtriple=x86_64-unknown-linux-gnu -filetype=obj -o - -minimize-addr-in-v5=Expressions \
+; RUN:   | llvm-dwarfdump -debug-info -debug-addr -debug-rnglists -v - \
+; RUN:   | FileCheck --check-prefix=CHECK --check-prefix=EXPR \
+; RUN:     --implicit-check-not=DW_TAG --implicit-check-not=NULL --implicit-check-not=_pc %s
 
 ; Generated from the following source. f4 is used to put a hole in the CU
 ; ranges while keeping f2 and f4 in the same section (as opposed to
 ; CHECK-LABEL: .debug_info contents:
 ; CHECK: DW_TAG_compile_unit
 ; CHECK:   DW_AT_low_pc [DW_FORM_addr] (0x0000000000000000)
-; CHECK:   DW_AT_ranges [DW_FORM_rnglistx]   (indexed (0x2) rangelist = [[CU_RANGE:.*]]
+; RNG:     DW_AT_ranges [DW_FORM_rnglistx]   (indexed (0x2) rangelist = [[CU_RANGE:.*]]
+; EXPR:    DW_AT_ranges [DW_FORM_rnglistx]   (indexed (0x0) rangelist = [[CU_RANGE:.*]]
 ; CHECK:   DW_TAG_subprogram
 ; CHECK:     DW_AT_name {{.*}} "f2"
 ; CHECK:   DW_TAG_subprogram
 ; CHECK:     DW_AT_low_pc [DW_FORM_addrx]    (indexed (00000000) address = 0x0000000000000000 ".text")
 ; CHECK:     DW_AT_high_pc [DW_FORM_data4]   (0x00000010)
 ; CHECK:     DW_TAG_inlined_subroutine
-; CHECK:       DW_AT_ranges [DW_FORM_rnglistx] (indexed (0x0) rangelist = [[INL_RANGE:.*]]
+; EXPR:        DW_AT_low_pc [DW_FORM_exprloc] (DW_OP_GNU_addr_index 0x0, DW_OP_const4u 0x9, DW_OP_plus)
+; EXPR:        DW_AT_high_pc [DW_FORM_data4]   (0x00000005)
+; RNG:         DW_AT_ranges [DW_FORM_rnglistx] (indexed (0x0) rangelist = [[INL_RANGE:.*]]
+; CHECK:     DW_TAG_call_site
+; RNG:         DW_AT_call_return_pc [DW_FORM_addrx]  (indexed (00000001) address = 0x0000000000000009 ".text")
+; EXPR:        DW_AT_call_return_pc [DW_FORM_exprloc] (DW_OP_GNU_addr_index 0x0, DW_OP_const4u 0x9, DW_OP_plus)
+; CHECK:     DW_TAG_call_site
+; RNG:         DW_AT_call_return_pc [DW_FORM_addrx]  (indexed (00000002) address = 0x000000000000000e ".text")
+; EXPR:        DW_AT_call_return_pc [DW_FORM_exprloc] (DW_OP_GNU_addr_index 0x0, DW_OP_const4u 0xe, DW_OP_plus)
 ; CHECK:     NULL
 ; CHECK:   DW_TAG_subprogram
-; CHECK:     DW_AT_ranges [DW_FORM_rnglistx] (indexed (0x1) rangelist = [[F5_RANGE:.*]]
+; CHECK:     DW_AT_name {{.*}} "f1"
 ; CHECK:   DW_TAG_subprogram
-; CHECK:     DW_AT_low_pc [DW_FORM_addrx]    (indexed (00000001) address = 0x0000000000000000 ".other")
+; EXPR:      DW_AT_low_pc [DW_FORM_exprloc] (DW_OP_GNU_addr_index 0x0, DW_OP_const4u 0x20, DW_OP_plus)
+; EXPR:      DW_AT_high_pc [DW_FORM_data4]   (0x00000006)
+; RNG:       DW_AT_ranges [DW_FORM_rnglistx] (indexed (0x1) rangelist = [[F5_RANGE:.*]]
+; CHECK:   DW_TAG_subprogram
+; CHECK:     DW_AT_low_pc [DW_FORM_addrx]    (indexed (
+; RNG-SAME: 00000003
+; EXPR-SAME: 00000001
+; CHECK: ) address = 0x0000000000000000 ".other")
 ; CHECK:     DW_AT_high_pc [DW_FORM_data4]   (0x00000006)
 ; CHECK:   NULL
 
 ; CHECK: 0x00000000: Address table
 ; CHECK-NEXT: Addrs: [
 ; CHECK-NEXT: 0x0000000000000000
+; RNG-NEXT:   0x0000000000000009
+; RNG-NEXT:   0x000000000000000e
 ; CHECK-NEXT: 0x0000000000000000
 ; CHECK-NEXT: ]
 
 ; CHECK-LABEL: .debug_rnglists contents:
-; CHECK: 0x00000000: range list header: {{.*}}, offset_entry_count = 0x00000003
+; RNG: 0x00000000: range list header: {{.*}}, offset_entry_count = 0x00000003
+; EXPR: 0x00000000: range list header: {{.*}}, offset_entry_count = 0x00000001
 ; CHECK: ranges:
-; CHECK-NEXT: [[INL_RANGE]]: [DW_RLE_base_addressx]:  0x0000000000000000
-; CHECK-NEXT:                [DW_RLE_offset_pair  ]
-; CHECK-NEXT:                [DW_RLE_end_of_list  ]
+; RNG-NEXT:   [[INL_RANGE]]: [DW_RLE_base_addressx]:  0x0000000000000000
+; RNG-NEXT:                  [DW_RLE_offset_pair  ]
+; RNG-NEXT:                  [DW_RLE_end_of_list  ]
 
-; CHECK-NEXT: [[F5_RANGE]]: [DW_RLE_base_addressx]:  0x0000000000000000
-; CHECK-NEXT:               [DW_RLE_offset_pair  ]
-; CHECK-NEXT:               [DW_RLE_end_of_list  ]
+; RNG-NEXT:   [[F5_RANGE]]: [DW_RLE_base_addressx]:  0x0000000000000000
+; RNG-NEXT:                 [DW_RLE_offset_pair  ]
+; RNG-NEXT:                 [DW_RLE_end_of_list  ]
 
 ; CHECK-NEXT: [[CU_RANGE]]: [DW_RLE_base_addressx]:  0x0000000000000000
 ; CHECK-NEXT:               [DW_RLE_offset_pair  ]
 ; CHECK-NEXT:               [DW_RLE_offset_pair  ]
-; CHECK-NEXT:               [DW_RLE_startx_length]:  0x0000000000000001
+; RNG-NEXT:                 [DW_RLE_startx_length]:  0x0000000000000003
+; EXPR-NEXT:                [DW_RLE_startx_length]:  0x0000000000000001
 ; CHECK-NEXT:               [DW_RLE_end_of_list  ]
 
 ; Function Attrs: noinline optnone uwtable mustprogress
@@ -84,7 +111,7 @@ entry:
   ret void, !dbg !14
 }
 
-declare dso_local void @_Z2f1v() #1
+declare !dbg !19 dso_local void @_Z2f1v() #1
 
 ; Function Attrs: noinline nounwind optnone uwtable mustprogress
 define dso_local void @_Z2f4v() #2 {
@@ -119,7 +146,7 @@ attributes #2 = { noinline nounwind optnone uwtable mustprogress "disable-tail-c
 !4 = !{i32 2, !"Debug Info Version", i32 3}
 !5 = !{i32 1, !"wchar_size", i32 4}
 !6 = !{!"clang version 12.0.0 (git@github.com:llvm/llvm-project.git 79afdd7d36b814942ec7f2f577d0443f6aecc939)"}
-!7 = distinct !DISubprogram(name: "f3", linkageName: "_Z2f3v", scope: !1, file: !1, line: 5, type: !8, scopeLine: 5, flags: DIFlagPrototyped, spFlags: DISPFlagDefinition, unit: !0, retainedNodes: !2)
+!7 = distinct !DISubprogram(name: "f3", linkageName: "_Z2f3v", scope: !1, file: !1, line: 5, type: !8, scopeLine: 5, flags: DIFlagPrototyped | DIFlagAllCallsDescribed, spFlags: DISPFlagDefinition, unit: !0, retainedNodes: !2)
 !8 = !DISubroutineType(types: !9)
 !9 = !{null}
 !10 = !DILocation(line: 6, column: 3, scope: !7)
@@ -131,3 +158,4 @@ attributes #2 = { noinline nounwind optnone uwtable mustprogress "disable-tail-c
 !16 = !DILocation(line: 12, column: 1, scope: !15)
 !17 = distinct !DISubprogram(name: "f6", linkageName: "_Z2f6v", scope: !1, file: !1, line: 13, type: !8, scopeLine: 13, flags: DIFlagPrototyped, spFlags: DISPFlagDefinition, unit: !0, retainedNodes: !2)
 !18 = !DILocation(line: 14, column: 1, scope: !17)
+!19 = !DISubprogram(name: "f1", linkageName: "_Z2f1v", scope: !1, file: !1, line: 1, type: !8, flags: DIFlagPrototyped, spFlags: DISPFlagOptimized, retainedNodes: !2)