From 4c273cd071150912fd6f1e4aee12148cf78a6410 Mon Sep 17 00:00:00 2001 From: Alexey Lapshin Date: Thu, 2 Feb 2023 17:17:52 +0100 Subject: [PATCH] [DWARFLinker] Refactor cloneAddressAttribute(). As a preparation for implementing DWARFv5 address ranges generation, this patch refactors cloneAddressAttribute() method. It has special handling for addresses which can be relocated in some unrelated value, for applying relocations twice, for indexed addresses. Instead of all these special handlings this patch uses general handling: Read attribute value from InputDIE and apply PCOffset. Another thing is that current handling of DW_FORM_addrx misses the fact that relocations might be applied twice in some cases. This patch fixes this problem also. Differential Revision: https://reviews.llvm.org/D143269 --- llvm/include/llvm/DWARFLinker/DWARFLinker.h | 21 +--- llvm/lib/DWARFLinker/DWARFLinker.cpp | 139 ++++++++------------- llvm/test/tools/dsymutil/Inputs/call-dwarf5.o | Bin 0 -> 2232 bytes .../dsymutil/X86/dwarf5-call-site-entry-reloc.test | 37 ++++++ llvm/tools/dsymutil/DwarfLinkerForBinary.cpp | 13 -- llvm/tools/dsymutil/DwarfLinkerForBinary.h | 3 - llvm/tools/llvm-dwarfutil/DebugInfoLinker.cpp | 27 +--- 7 files changed, 92 insertions(+), 148 deletions(-) create mode 100644 llvm/test/tools/dsymutil/Inputs/call-dwarf5.o create mode 100644 llvm/test/tools/dsymutil/X86/dwarf5-call-site-entry-reloc.test diff --git a/llvm/include/llvm/DWARFLinker/DWARFLinker.h b/llvm/include/llvm/DWARFLinker/DWARFLinker.h index d12b6d1..30f0c1c 100644 --- a/llvm/include/llvm/DWARFLinker/DWARFLinker.h +++ b/llvm/include/llvm/DWARFLinker/DWARFLinker.h @@ -68,10 +68,6 @@ public: virtual bool applyValidRelocs(MutableArrayRef Data, uint64_t BaseOffset, bool IsLittleEndian) = 0; - /// Relocate the given address offset if a valid relocation exists. - virtual llvm::Expected relocateIndexedAddr(uint64_t StartOffset, - uint64_t EndOffset) = 0; - /// Returns all valid functions address ranges(i.e., those ranges /// which points to sections with code). virtual RangesTy &getValidAddressRanges() = 0; @@ -635,18 +631,6 @@ private: uint32_t NameOffset = 0; uint32_t MangledNameOffset = 0; - /// Value of AT_low_pc in the input DIE - uint64_t OrigLowPc = std::numeric_limits::max(); - - /// Value of AT_high_pc in the input DIE - uint64_t OrigHighPc = 0; - - /// Value of DW_AT_call_return_pc in the input DIE - uint64_t OrigCallReturnPc = 0; - - /// Value of DW_AT_call_pc in the input DIE - uint64_t OrigCallPc = 0; - /// Offset to apply to PC addresses inside a function. int64_t PCOffset = 0; @@ -704,8 +688,9 @@ private: /// Clone an attribute referencing another DIE and add /// it to \p Die. /// \returns the size of the new attribute. - unsigned cloneAddressAttribute(DIE &Die, AttributeSpec AttrSpec, - unsigned AttrSize, const DWARFFormValue &Val, + unsigned cloneAddressAttribute(DIE &Die, const DWARFDie &InputDIE, + AttributeSpec AttrSpec, unsigned AttrSize, + const DWARFFormValue &Val, const CompileUnit &Unit, AttributesInfo &Info); diff --git a/llvm/lib/DWARFLinker/DWARFLinker.cpp b/llvm/lib/DWARFLinker/DWARFLinker.cpp index 6d6bd33..abe77ea 100644 --- a/llvm/lib/DWARFLinker/DWARFLinker.cpp +++ b/llvm/lib/DWARFLinker/DWARFLinker.cpp @@ -1151,83 +1151,65 @@ unsigned DWARFLinker::DIECloner::cloneBlockAttribute( } unsigned DWARFLinker::DIECloner::cloneAddressAttribute( - DIE &Die, AttributeSpec AttrSpec, unsigned AttrSize, - const DWARFFormValue &Val, const CompileUnit &Unit, AttributesInfo &Info) { + DIE &Die, const DWARFDie &InputDIE, AttributeSpec AttrSpec, + unsigned AttrSize, const DWARFFormValue &Val, const CompileUnit &Unit, + AttributesInfo &Info) { + if (AttrSpec.Attr == dwarf::DW_AT_low_pc) + Info.HasLowPc = true; + if (LLVM_UNLIKELY(Linker.Options.Update)) { - if (AttrSpec.Attr == dwarf::DW_AT_low_pc) - Info.HasLowPc = true; Die.addValue(DIEAlloc, dwarf::Attribute(AttrSpec.Attr), dwarf::Form(AttrSpec.Form), DIEInteger(Val.getRawUValue())); return AttrSize; } + // Cloned Die may have address attributes relocated to a + // totally unrelated value. This can happen: + // - If high_pc is an address (Dwarf version == 2), then it might have been + // relocated to a totally unrelated value (because the end address in the + // object file might be start address of another function which got moved + // independently by the linker). + // - If address relocated in an inline_subprogram that happens at the + // beginning of its inlining function. + // To avoid above cases and to not apply relocation twice (in applyValidRelocs + // and here), read address attribute from InputDIE and apply Info.PCOffset + // here. + + std::optional AddrAttribute = InputDIE.find(AttrSpec.Attr); + if (!AddrAttribute) + llvm_unreachable("Cann't find attribute."); + + std::optional Addr = AddrAttribute->getAsAddress(); + if (!Addr) { + Linker.reportWarning("Cann't read address attribute value.", ObjFile); + Addr = 0; + } + + if (InputDIE.getTag() == dwarf::DW_TAG_compile_unit && + AttrSpec.Attr == dwarf::DW_AT_low_pc) { + if (std::optional LowPC = Unit.getLowPc()) + Addr = *LowPC; + else + return 0; + } else if (InputDIE.getTag() == dwarf::DW_TAG_compile_unit && + AttrSpec.Attr == dwarf::DW_AT_high_pc) { + if (uint64_t HighPc = Unit.getHighPc()) + Addr = HighPc; + else + return 0; + } else { + *Addr += Info.PCOffset; + } + dwarf::Form Form = AttrSpec.Form; - uint64_t Addr = 0; - if (Form == dwarf::DW_FORM_addrx) { - if (std::optional AddrOffsetSectionBase = - Unit.getOrigUnit().getAddrOffsetSectionBase()) { - uint64_t StartOffset = - *AddrOffsetSectionBase + - Val.getRawUValue() * Unit.getOrigUnit().getAddressByteSize(); - uint64_t EndOffset = - StartOffset + Unit.getOrigUnit().getAddressByteSize(); - if (llvm::Expected RelocAddr = - ObjFile.Addresses->relocateIndexedAddr(StartOffset, EndOffset)) - Addr = *RelocAddr; - else - Linker.reportWarning(toString(RelocAddr.takeError()), ObjFile); - } else - Linker.reportWarning("no base offset for address table", ObjFile); - // Generation of DWARFv5 .debug_addr table is not supported yet. - // Convert attribute into the dwarf::DW_FORM_addr. + // FIXME: Generation of DWARFv5 .debug_addr table is not supported yet. + // Convert attribute into the dwarf::DW_FORM_addr. + if (Form == dwarf::DW_FORM_addrx) Form = dwarf::DW_FORM_addr; - } else - Addr = *Val.getAsAddress(); - - if (AttrSpec.Attr == dwarf::DW_AT_low_pc) { - if (Die.getTag() == dwarf::DW_TAG_inlined_subroutine || - Die.getTag() == dwarf::DW_TAG_lexical_block || - Die.getTag() == dwarf::DW_TAG_label) { - // The low_pc of a block or inline subroutine might get - // relocated because it happens to match the low_pc of the - // enclosing subprogram. To prevent issues with that, always use - // the low_pc from the input DIE if relocations have been applied. - Addr = (Info.OrigLowPc != std::numeric_limits::max() - ? Info.OrigLowPc - : Addr) + - Info.PCOffset; - } else if (Die.getTag() == dwarf::DW_TAG_compile_unit) { - if (std::optional LowPC = Unit.getLowPc()) - Addr = *LowPC; - else - return 0; - } - Info.HasLowPc = true; - } else if (AttrSpec.Attr == dwarf::DW_AT_high_pc) { - if (Die.getTag() == dwarf::DW_TAG_compile_unit) { - if (uint64_t HighPc = Unit.getHighPc()) - Addr = HighPc; - else - return 0; - } else - // If we have a high_pc recorded for the input DIE, use - // it. Otherwise (when no relocations where applied) just use the - // one we just decoded. - Addr = (Info.OrigHighPc ? Info.OrigHighPc : Addr) + Info.PCOffset; - } else if (AttrSpec.Attr == dwarf::DW_AT_call_return_pc) { - // Relocate a return PC address within a call site entry. - if (Die.getTag() == dwarf::DW_TAG_call_site) - Addr = (Info.OrigCallReturnPc ? Info.OrigCallReturnPc : Addr) + - Info.PCOffset; - } else if (AttrSpec.Attr == dwarf::DW_AT_call_pc) { - // Relocate the address of a branch instruction within a call site entry. - if (Die.getTag() == dwarf::DW_TAG_call_site) - Addr = (Info.OrigCallPc ? Info.OrigCallPc : Addr) + Info.PCOffset; - } Die.addValue(DIEAlloc, static_cast(AttrSpec.Attr), - static_cast(Form), DIEInteger(Addr)); + static_cast(Form), DIEInteger(*Addr)); return Unit.getOrigUnit().getAddressByteSize(); } @@ -1350,7 +1332,8 @@ unsigned DWARFLinker::DIECloner::cloneAttribute( IsLittleEndian); case dwarf::DW_FORM_addr: case dwarf::DW_FORM_addrx: - return cloneAddressAttribute(Die, AttrSpec, AttrSize, Val, Unit, Info); + return cloneAddressAttribute(Die, InputDIE, AttrSpec, AttrSize, Val, Unit, + Info); case dwarf::DW_FORM_data1: case dwarf::DW_FORM_data2: case dwarf::DW_FORM_data4: @@ -1500,27 +1483,7 @@ DIE *DWARFLinker::DIECloner::cloneDIE(const DWARFDie &InputDIE, DWARFDataExtractor(DIECopy, Data.isLittleEndian(), Data.getAddressSize()); // Modify the copy with relocated addresses. - if (ObjFile.Addresses->applyValidRelocs(DIECopy, Offset, - Data.isLittleEndian())) { - // If we applied relocations, we store the value of high_pc that was - // potentially stored in the input DIE. If high_pc is an address - // (Dwarf version == 2), then it might have been relocated to a - // totally unrelated value (because the end address in the object - // file might be start address of another function which got moved - // independently by the linker). The computation of the actual - // high_pc value is done in cloneAddressAttribute(). - AttrInfo.OrigHighPc = - dwarf::toAddress(InputDIE.find(dwarf::DW_AT_high_pc), 0); - // Also store the low_pc. It might get relocated in an - // inline_subprogram that happens at the beginning of its - // inlining function. - AttrInfo.OrigLowPc = dwarf::toAddress(InputDIE.find(dwarf::DW_AT_low_pc), - std::numeric_limits::max()); - AttrInfo.OrigCallReturnPc = - dwarf::toAddress(InputDIE.find(dwarf::DW_AT_call_return_pc), 0); - AttrInfo.OrigCallPc = - dwarf::toAddress(InputDIE.find(dwarf::DW_AT_call_pc), 0); - } + ObjFile.Addresses->applyValidRelocs(DIECopy, Offset, Data.isLittleEndian()); // Reset the Offset to 0 as we will be working on the local copy of // the data. diff --git a/llvm/test/tools/dsymutil/Inputs/call-dwarf5.o b/llvm/test/tools/dsymutil/Inputs/call-dwarf5.o new file mode 100644 index 0000000000000000000000000000000000000000..70417f5a2fc6fd18d3c7760c4926ee59a811719e GIT binary patch literal 2232 zcmb_d&2Jk;6n|rTZO4W$*ojlV8j+$RKv+$y7y(+Xch~Wj_3mnS z?Su*x#03zS9ucRCBY#9ss8<9hBrb5G0wKf+iI2AL&Ca-vTcjL#(%bhrznM4lX7-!E ze!HCmU}?Y)at8Sp8C7JY3i5#L|DZ)0fRq*C9k6p8bh@GOb-O%epGv?ZS7^?mZq z;(Z|!-x2c5W8y_|sQdkXG>iAAzKDlH7qM6 z-z=Ubvg=}in@yBDo`-;*!883~V3@H!^7b9i()HK2#ZWFj7un~8^E%>DTXp66-gh4M zK<|gf&`t*E`q6&*Uf>-FydMRgFrkQTD|M2xUf1zB;%vPg(c_*N)TQP{8sPfGmd@0h z^dI5rbjUOPcSGRqBrtGO;0cq&5xiA&oIs|zGNl4%RCyBl16&6|UPnnb)(0KrX`;B8 zcG`U`y;SU6yG5BD{$-1-Rx2y%+DGr#em|^*OAAZOwVw_vi_4E>qa|FH3Z(vgSUGp% zY9(DeTYR?oVg(aEX82;HRrcU0)^-67%O5Ob6Fng3l`_Ww$UX(>+6%=ZRrw~e0u1Y> z-FX~XD$R8?GeFmztQ6lUwu^~rMqABxv!G;ZJfx(ldD(6b3SCq()YBQr=9LVbMpd*k zIH>O<|A71}avjg2iQGnh9@%saZ?8JG!^rWyYGXsKtM%&XK^zCsSxwt>;=!n=;w{iz zcbt4yg3!NWo3V<)YO~+$tv1#hwz+1S&DHj1-7q$dX0z35x2+bc>l^lFeQoUwn1<`F zSo=oUZ>c6|TS4GDrV-<;k=8YR%T|pb&|dPr*!JQ`+qTEH>j!qIbzCFv`{6LsUN_7) zcDiaH98)Ji<5OFm2!9N z`);IJ(R;&D?6_LDy$*ffhn^9_&~Q9(ycp=MQ}}VAf+R1%Tw;Qh^6j3^Nv=;y4iO_@ zGA4PR?vFnzGTGIOzz!DJ!b7?IH9XA6&`+HI_YS97V2VA5{L4Q{d~$ua<`>Y-$G(m7 zTMUd;Vb(I5FLDR8Eo#X?8MIQl4Pv$?!X${_)*v{*$l1o}X(L zZrsE_!rWw8Oi>DDC0ACUj@l_DcS?b`l=(t6g)Ptim-9N>cC;cSx`9Y5bV+c@D|BH{ xW~)MqH!Dki70 literal 0 HcmV?d00001 diff --git a/llvm/test/tools/dsymutil/X86/dwarf5-call-site-entry-reloc.test b/llvm/test/tools/dsymutil/X86/dwarf5-call-site-entry-reloc.test new file mode 100644 index 0000000..b960d44 --- /dev/null +++ b/llvm/test/tools/dsymutil/X86/dwarf5-call-site-entry-reloc.test @@ -0,0 +1,37 @@ +## Test binaries created with the following commands: + +## $ cat call-dwarf5.c +## __attribute__((noinline, noreturn)) void foo() { +## asm volatile("" ::: "memory"); +## __builtin_unreachable(); +## } +## __attribute__((noinline)) void bar() { +## asm volatile("nop" :::); +## foo(); +## } + +## int main() { bar(); } + +## $ clang -gdwarf-5 call-dwarf5.c -fomit-frame-pointer -c -Os -o call-dwarf5.o +## $ clang -gdwarf-5 call-dwarf5.o -o call-dwarf5 + +## The test requires the return PC to match a relocation (in this case the +## DW_AT_high_pc of main). Without this change the value would get relocated +## twice. + +#RUN: dsymutil -oso-prepend-path %p/../Inputs -y %s -o %t.dSYM +#RUN: llvm-dwarfdump %t.dSYM | FileCheck %s -implicit-check-not=DW_AT_call_return_pc + +#CHECK: DW_AT_call_return_pc (0x0000000100000f72) +#CHECK: DW_AT_call_return_pc (0x0000000100000f78) + +--- +triple: 'x86_64-apple-darwin' +objects: + - filename: 'call-dwarf5.o' + timestamp: 1675373912 + symbols: + - { sym: _foo, objAddr: 0x0, binAddr: 0x100000F69, size: 0x2 } + - { sym: _bar, objAddr: 0x2, binAddr: 0x100000F6B, size: 0x7 } + - { sym: _main, objAddr: 0x9, binAddr: 0x100000F72, size: 0x6 } +... diff --git a/llvm/tools/dsymutil/DwarfLinkerForBinary.cpp b/llvm/tools/dsymutil/DwarfLinkerForBinary.cpp index 6f23eb0..413cbec 100644 --- a/llvm/tools/dsymutil/DwarfLinkerForBinary.cpp +++ b/llvm/tools/dsymutil/DwarfLinkerForBinary.cpp @@ -1063,19 +1063,6 @@ bool DwarfLinkerForBinary::AddressManager::applyValidRelocs( return Relocs.size() > 0; } -llvm::Expected -DwarfLinkerForBinary::AddressManager::relocateIndexedAddr(uint64_t StartOffset, - uint64_t EndOffset) { - std::vector Relocs = - getRelocations(ValidDebugAddrRelocs, StartOffset, EndOffset); - if (Relocs.size() == 0) - return createStringError( - std::make_error_code(std::errc::invalid_argument), - "no relocation for offset %llu in debug_addr section", StartOffset); - - return relocate(Relocs[0]); -} - bool linkDwarf(raw_fd_ostream &OutFile, BinaryHolder &BinHolder, const DebugMap &DM, LinkOptions Options) { DwarfLinkerForBinary Linker(OutFile, BinHolder, std::move(Options)); diff --git a/llvm/tools/dsymutil/DwarfLinkerForBinary.h b/llvm/tools/dsymutil/DwarfLinkerForBinary.h index 623441c..1f7422f 100644 --- a/llvm/tools/dsymutil/DwarfLinkerForBinary.h +++ b/llvm/tools/dsymutil/DwarfLinkerForBinary.h @@ -177,9 +177,6 @@ private: bool applyValidRelocs(MutableArrayRef Data, uint64_t BaseOffset, bool IsLittleEndian) override; - llvm::Expected relocateIndexedAddr(uint64_t StartOffset, - uint64_t EndOffset) override; - RangesTy &getValidAddressRanges() override { return AddressRanges; } void clear() override { diff --git a/llvm/tools/llvm-dwarfutil/DebugInfoLinker.cpp b/llvm/tools/llvm-dwarfutil/DebugInfoLinker.cpp index ef222f8..5d2352e 100644 --- a/llvm/tools/llvm-dwarfutil/DebugInfoLinker.cpp +++ b/llvm/tools/llvm-dwarfutil/DebugInfoLinker.cpp @@ -41,7 +41,7 @@ class ObjFileAddressMap : public AddressesMap { public: ObjFileAddressMap(DWARFContext &Context, const Options &Options, object::ObjectFile &ObjFile) - : Opts(Options), Context(Context) { + : Opts(Options) { // Remember addresses of existing text sections. for (const object::SectionRef &Sect : ObjFile.sections()) { if (!Sect.isText()) @@ -138,30 +138,6 @@ public: void clear() override { DWARFAddressRanges.clear(); } - llvm::Expected relocateIndexedAddr(uint64_t StartOffset, - uint64_t EndOffset) override { - // No relocations in linked binary. Return just address value. - - const char *AddrPtr = - Context.getDWARFObj().getAddrSection().Data.data() + StartOffset; - support::endianness Endianess = - Context.getDWARFObj().isLittleEndian() ? support::little : support::big; - - assert(EndOffset > StartOffset); - switch (EndOffset - StartOffset) { - case 1: - return *AddrPtr; - case 2: - return support::endian::read16(AddrPtr, Endianess); - case 4: - return support::endian::read32(AddrPtr, Endianess); - case 8: - return support::endian::read64(AddrPtr, Endianess); - } - - llvm_unreachable("relocateIndexedAddr unhandled case!"); - } - protected: // returns true if specified address range is inside address ranges // of executable sections. @@ -231,7 +207,6 @@ private: RangesTy DWARFAddressRanges; AddressRanges TextAddressRanges; const Options &Opts; - DWARFContext &Context; }; static bool knownByDWARFUtil(StringRef SecName) { -- 2.7.4