From a2639dcbe613bb2e219a50171f322d7ac1dc8de1 Mon Sep 17 00:00:00 2001 From: Ben Langmuir Date: Tue, 2 Nov 2021 10:49:08 -0700 Subject: [PATCH] [ORC] Add a utility for adding missing "self" relocations to a Symbol If a tool wants to introduce new indirections via stubs at link-time in ORC, it can cause fidelity issues around the address of the function if some references to the function do not have relocations. This is known to happen inside the body of the function itself on x86_64 for example, where a PC-relative address is formed, but without a relocation. ``` _foo: leaq -7(%rip), %rax ## form pointer to '_foo' without relocation _bar: leaq (%rip), %rax ## uses X86_64_RELOC_SIGNED to '_foo' ``` The consequence of introducing a stub for such a function at link time is that if it forms a pointer to itself without relocation, it will not have the same value as a pointer from outside the function. If the function pointer is used as a key, this can cause problems. This utility provides best-effort support for adding such missing relocations using MCDisassembler and MCInstrAnalysis to identify the problematic instructions. Currently it is only implemented for x86_64. Note: the related issue with call/jump instructions is not handled here, only forming function pointers. rdar://83514317 Differential revision: https://reviews.llvm.org/D113038 --- .../llvm/ExecutionEngine/Orc/IndirectionUtils.h | 34 +++++++++ llvm/include/llvm/MC/MCInstrAnalysis.h | 5 ++ llvm/lib/ExecutionEngine/Orc/CMakeLists.txt | 1 + llvm/lib/ExecutionEngine/Orc/IndirectionUtils.cpp | 77 ++++++++++++++++++++ llvm/lib/MC/MCInstrAnalysis.cpp | 6 ++ .../Target/X86/MCTargetDesc/X86MCTargetDesc.cpp | 27 ++++++++ .../JITLink/X86/Inputs/x86-64_self_relocation.s | 35 ++++++++++ .../JITLink/X86/MachO_x86-64_self_relocation.test | 34 +++++++++ .../X86/MachO_x86-64_self_relocation_exec.test | 8 +++ llvm/tools/llvm-jitlink/llvm-jitlink.cpp | 81 +++++++++++++++++++--- 10 files changed, 297 insertions(+), 11 deletions(-) create mode 100644 llvm/test/ExecutionEngine/JITLink/X86/Inputs/x86-64_self_relocation.s create mode 100644 llvm/test/ExecutionEngine/JITLink/X86/MachO_x86-64_self_relocation.test create mode 100644 llvm/test/ExecutionEngine/JITLink/X86/MachO_x86-64_self_relocation_exec.test diff --git a/llvm/include/llvm/ExecutionEngine/Orc/IndirectionUtils.h b/llvm/include/llvm/ExecutionEngine/Orc/IndirectionUtils.h index 78e3cee..4d6d465 100644 --- a/llvm/include/llvm/ExecutionEngine/Orc/IndirectionUtils.h +++ b/llvm/include/llvm/ExecutionEngine/Orc/IndirectionUtils.h @@ -45,6 +45,13 @@ class PointerType; class Triple; class Twine; class Value; +class MCDisassembler; +class MCInstrAnalysis; + +namespace jitlink { +class LinkGraph; +class Symbol; +} // namespace jitlink namespace orc { @@ -557,6 +564,33 @@ GlobalAlias *cloneGlobalAliasDecl(Module &Dst, const GlobalAlias &OrigA, void cloneModuleFlagsMetadata(Module &Dst, const Module &Src, ValueToValueMapTy &VMap); +/// Introduce relocations to \p Sym in its own definition if there are any +/// pointers formed via PC-relative address that do not already have a +/// relocation. +/// +/// This is useful when introducing indirection via a stub function at link time +/// without compiler support. If a function pointer is formed without a +/// relocation, e.g. in the definition of \c foo +/// +/// \code +/// _foo: +/// leaq -7(%rip), rax # form pointer to _foo without relocation +/// _bar: +/// leaq (%rip), %rax # uses X86_64_RELOC_SIGNED to '_foo' +/// \endcode +/// +/// the pointer to \c _foo computed by \c _foo and \c _bar may differ if we +/// introduce a stub for _foo. If the pointer is used as a key, this may be +/// observable to the program. This pass will attempt to introduce the missing +/// "self-relocation" on the leaq instruction. +/// +/// This is based on disassembly and should be considered "best effort". It may +/// silently fail to add relocations. +Error addFunctionPointerRelocationsToCurrentSymbol(jitlink::Symbol &Sym, + jitlink::LinkGraph &G, + MCDisassembler &Disassembler, + MCInstrAnalysis &MIA); + } // end namespace orc } // end namespace llvm diff --git a/llvm/include/llvm/MC/MCInstrAnalysis.h b/llvm/include/llvm/MC/MCInstrAnalysis.h index 68506d1..632a7d8 100644 --- a/llvm/include/llvm/MC/MCInstrAnalysis.h +++ b/llvm/include/llvm/MC/MCInstrAnalysis.h @@ -158,6 +158,11 @@ public: evaluateMemoryOperandAddress(const MCInst &Inst, const MCSubtargetInfo *STI, uint64_t Addr, uint64_t Size) const; + /// Given an instruction with a memory operand that could require relocation, + /// returns the offset within the instruction of that relocation. + virtual Optional + getMemoryOperandRelocationOffset(const MCInst &Inst, uint64_t Size) const; + /// Returns (PLT virtual address, GOT virtual address) pairs for PLT entries. virtual std::vector> findPltEntries(uint64_t PltSectionVA, ArrayRef PltContents, diff --git a/llvm/lib/ExecutionEngine/Orc/CMakeLists.txt b/llvm/lib/ExecutionEngine/Orc/CMakeLists.txt index 7e9180c..60e7751 100644 --- a/llvm/lib/ExecutionEngine/Orc/CMakeLists.txt +++ b/llvm/lib/ExecutionEngine/Orc/CMakeLists.txt @@ -51,6 +51,7 @@ add_llvm_component_library(LLVMOrcJIT OrcShared OrcTargetProcess MC + MCDisassembler Passes RuntimeDyld Support diff --git a/llvm/lib/ExecutionEngine/Orc/IndirectionUtils.cpp b/llvm/lib/ExecutionEngine/Orc/IndirectionUtils.cpp index e8dd1bb..ee1630a 100644 --- a/llvm/lib/ExecutionEngine/Orc/IndirectionUtils.cpp +++ b/llvm/lib/ExecutionEngine/Orc/IndirectionUtils.cpp @@ -9,12 +9,17 @@ #include "llvm/ExecutionEngine/Orc/IndirectionUtils.h" #include "llvm/ADT/STLExtras.h" #include "llvm/ADT/Triple.h" +#include "llvm/ExecutionEngine/JITLink/x86_64.h" #include "llvm/ExecutionEngine/Orc/OrcABISupport.h" #include "llvm/IR/IRBuilder.h" +#include "llvm/MC/MCDisassembler/MCDisassembler.h" +#include "llvm/MC/MCInstrAnalysis.h" #include "llvm/Support/Format.h" #include "llvm/Transforms/Utils/Cloning.h" #include +#define DEBUG_TYPE "orc" + using namespace llvm; using namespace llvm::orc; @@ -372,5 +377,77 @@ void cloneModuleFlagsMetadata(Module &Dst, const Module &Src, Dst.addModuleFlag(MapMetadata(MF, VMap)); } +Error addFunctionPointerRelocationsToCurrentSymbol(jitlink::Symbol &Sym, + jitlink::LinkGraph &G, + MCDisassembler &Disassembler, + MCInstrAnalysis &MIA) { + // AArch64 appears to already come with the necessary relocations. Among other + // architectures, only x86_64 is currently implemented here. + if (G.getTargetTriple().getArch() != Triple::x86_64) + return Error::success(); + + raw_null_ostream CommentStream; + auto &STI = Disassembler.getSubtargetInfo(); + + // Determine the function bounds + auto &B = Sym.getBlock(); + assert(!B.isZeroFill() && "expected content block"); + auto SymAddress = Sym.getAddress(); + auto SymStartInBlock = + (const uint8_t *)B.getContent().data() + Sym.getOffset(); + auto SymSize = Sym.getSize() ? Sym.getSize() : B.getSize() - Sym.getOffset(); + auto Content = makeArrayRef(SymStartInBlock, SymSize); + + LLVM_DEBUG(dbgs() << "Adding self-relocations to " << Sym.getName() << "\n"); + + SmallDenseSet ExistingRelocations; + for (auto &E : B.edges()) { + if (E.isRelocation()) + ExistingRelocations.insert(E.getOffset()); + } + + size_t I = 0; + while (I < Content.size()) { + MCInst Instr; + uint64_t InstrSize = 0; + uint64_t InstrStart = SymAddress + I; + auto DecodeStatus = Disassembler.getInstruction( + Instr, InstrSize, Content.drop_front(I), InstrStart, CommentStream); + if (DecodeStatus != MCDisassembler::Success) { + LLVM_DEBUG(dbgs() << "Aborting due to disassembly failure at address " + << InstrStart); + return make_error( + formatv("failed to disassemble at address {0:x16}", InstrStart), + inconvertibleErrorCode()); + } + // Advance to the next instruction. + I += InstrSize; + + // Check for a PC-relative address equal to the symbol itself. + auto PCRelAddr = + MIA.evaluateMemoryOperandAddress(Instr, &STI, InstrStart, InstrSize); + if (!PCRelAddr.hasValue() || PCRelAddr.getValue() != SymAddress) + continue; + + auto RelocOffInInstr = + MIA.getMemoryOperandRelocationOffset(Instr, InstrSize); + if (!RelocOffInInstr.hasValue() || + InstrSize - RelocOffInInstr.getValue() != 4) { + LLVM_DEBUG(dbgs() << "Skipping unknown self-relocation at " + << InstrStart); + continue; + } + + auto RelocOffInBlock = + InstrStart + *RelocOffInInstr - SymAddress + Sym.getOffset(); + if (ExistingRelocations.contains(RelocOffInBlock)) + continue; + + LLVM_DEBUG(dbgs() << "Adding delta32 self-relocation at " << InstrStart); + B.addEdge(jitlink::x86_64::Delta32, RelocOffInBlock, Sym, /*Addend=*/-4); + } + return Error::success(); +} + } // End namespace orc. } // End namespace llvm. diff --git a/llvm/lib/MC/MCInstrAnalysis.cpp b/llvm/lib/MC/MCInstrAnalysis.cpp index ef5516c..52b5918 100644 --- a/llvm/lib/MC/MCInstrAnalysis.cpp +++ b/llvm/lib/MC/MCInstrAnalysis.cpp @@ -34,3 +34,9 @@ Optional MCInstrAnalysis::evaluateMemoryOperandAddress( uint64_t Size) const { return None; } + +Optional +MCInstrAnalysis::getMemoryOperandRelocationOffset(const MCInst &Inst, + uint64_t Size) const { + return None; +} \ No newline at end of file diff --git a/llvm/lib/Target/X86/MCTargetDesc/X86MCTargetDesc.cpp b/llvm/lib/Target/X86/MCTargetDesc/X86MCTargetDesc.cpp index aeea0aa..9da0a81 100644 --- a/llvm/lib/Target/X86/MCTargetDesc/X86MCTargetDesc.cpp +++ b/llvm/lib/Target/X86/MCTargetDesc/X86MCTargetDesc.cpp @@ -408,6 +408,9 @@ public: const MCSubtargetInfo *STI, uint64_t Addr, uint64_t Size) const override; + Optional + getMemoryOperandRelocationOffset(const MCInst &Inst, + uint64_t Size) const override; }; #define GET_STIPREDICATE_DEFS_FOR_MC_ANALYSIS @@ -557,6 +560,30 @@ Optional X86MCInstrAnalysis::evaluateMemoryOperandAddress( return None; } +Optional +X86MCInstrAnalysis::getMemoryOperandRelocationOffset(const MCInst &Inst, + uint64_t Size) const { + if (Inst.getOpcode() != X86::LEA64r) + return None; + const MCInstrDesc &MCID = Info->get(Inst.getOpcode()); + int MemOpStart = X86II::getMemoryOperandNo(MCID.TSFlags); + if (MemOpStart == -1) + return None; + MemOpStart += X86II::getOperandBias(MCID); + const MCOperand &SegReg = Inst.getOperand(MemOpStart + X86::AddrSegmentReg); + const MCOperand &BaseReg = Inst.getOperand(MemOpStart + X86::AddrBaseReg); + const MCOperand &IndexReg = Inst.getOperand(MemOpStart + X86::AddrIndexReg); + const MCOperand &ScaleAmt = Inst.getOperand(MemOpStart + X86::AddrScaleAmt); + const MCOperand &Disp = Inst.getOperand(MemOpStart + X86::AddrDisp); + // Must be a simple rip-relative address. + if (BaseReg.getReg() != X86::RIP || SegReg.getReg() != 0 || + IndexReg.getReg() != 0 || ScaleAmt.getImm() != 1 || !Disp.isImm()) + return None; + // rip-relative ModR/M immediate is 32 bits. + assert(Size > 4 && "invalid instruction size for rip-relative lea"); + return Size - 4; +} + } // end of namespace X86_MC } // end of namespace llvm diff --git a/llvm/test/ExecutionEngine/JITLink/X86/Inputs/x86-64_self_relocation.s b/llvm/test/ExecutionEngine/JITLink/X86/Inputs/x86-64_self_relocation.s new file mode 100644 index 0000000..3738440 --- /dev/null +++ b/llvm/test/ExecutionEngine/JITLink/X86/Inputs/x86-64_self_relocation.s @@ -0,0 +1,35 @@ + .section __TEXT,__text,regular,pure_instructions + + .globl _form_func_ptr +_form_func_ptr: + leaq _form_func_ptr(%rip), %rax + leaq _other(%rip), %rax + leaq _form_func_ptr(%rip), %rax + nop + leaq _form_func_ptr(%rip), %rax + retq + + .globl _other +_other: + leaq _form_func_ptr(%rip), %rax + retq + +# Return 0 if the pointers formed inside and outside the function are the same. + .globl _main +_main: + pushq %rbp + movq %rsp, %rbp + subq $32, %rsp + movl $0, -4(%rbp) + callq _form_func_ptr + movq %rax, -16(%rbp) + callq _other + movq %rax, -24(%rbp) + movq -16(%rbp), %rax + cmpq -24(%rbp), %rax + setne %al + andb $1, %al + movzbl %al, %eax + addq $32, %rsp + popq %rbp + retq \ No newline at end of file diff --git a/llvm/test/ExecutionEngine/JITLink/X86/MachO_x86-64_self_relocation.test b/llvm/test/ExecutionEngine/JITLink/X86/MachO_x86-64_self_relocation.test new file mode 100644 index 0000000..54f8a8a --- /dev/null +++ b/llvm/test/ExecutionEngine/JITLink/X86/MachO_x86-64_self_relocation.test @@ -0,0 +1,34 @@ +# RUN: llvm-mc -triple=x86_64-apple-macosx10.9 -filetype=obj -o %t %S/Inputs/x86-64_self_relocation.s +# RUN: llvm-jitlink -noexec -show-graph %t 2>&1 | \ +# RUN: FileCheck -check-prefix=WITHOUT %s +# RUN: llvm-jitlink -noexec -show-graph -add-self-relocations %t 2>&1 | \ +# RUN: FileCheck -check-prefix=CHECK %s + +# WITHOUT: block +# WITHOUT-NEXT: symbols +# WITHOUT-NEXT: _form_func_ptr +# WITHOUT-NEXT: edges +# WITHOUT-NEXT: (block + 0x0000000a), addend = -0x00000004, kind = Delta32, target = _other +# WITHOUT-NOT: kind = + +# CHECK: block +# CHECK-NEXT: symbols +# CHECK-NEXT: _form_func_ptr +# CHECK-NEXT: edges +# CHECK-NEXT: (block + 0x00000003), addend = -0x00000004, kind = Delta32, target = _form_func_ptr +# CHECK-NEXT: (block + 0x0000000a), addend = -0x00000004, kind = Delta32, target = _other +# CHECK-NEXT: (block + 0x00000011), addend = -0x00000004, kind = Delta32, target = _form_func_ptr +# CHECK-NEXT: (block + 0x00000019), addend = -0x00000004, kind = Delta32, target = _form_func_ptr +# CHECK-NOT: kind = + +# WITHOUT: block +# WITHOUT-NEXT: symbols +# WITHOUT-NEXT: _other +# WITHOUT-NEXT: edges +# WITHOUT-NEXT: kind = Delta32, target = _form_func_ptr + +# CHECK: block +# CHECK-NEXT: symbols +# CHECK-NEXT: _other +# CHECK-NEXT: edges +# CHECK-NEXT: kind = Delta32, target = _form_func_ptr diff --git a/llvm/test/ExecutionEngine/JITLink/X86/MachO_x86-64_self_relocation_exec.test b/llvm/test/ExecutionEngine/JITLink/X86/MachO_x86-64_self_relocation_exec.test new file mode 100644 index 0000000..49a1c00 --- /dev/null +++ b/llvm/test/ExecutionEngine/JITLink/X86/MachO_x86-64_self_relocation_exec.test @@ -0,0 +1,8 @@ +# RUN: llvm-mc -triple=x86_64-apple-macosx10.9 -filetype=obj -o %t %S/Inputs/x86-64_self_relocation.s +# RUN: llvm-jitlink -show-graph %t +# RUN: llvm-jitlink -show-graph -add-self-relocations %t + +# Ensure that the added relocation does not create an incorrect pointer. + +# Execution test +# REQUIRES: system-darwin && native && target-x86_64 diff --git a/llvm/tools/llvm-jitlink/llvm-jitlink.cpp b/llvm/tools/llvm-jitlink/llvm-jitlink.cpp index 91908ea..b9faf3d 100644 --- a/llvm/tools/llvm-jitlink/llvm-jitlink.cpp +++ b/llvm/tools/llvm-jitlink/llvm-jitlink.cpp @@ -21,6 +21,7 @@ #include "llvm/ExecutionEngine/Orc/EPCDynamicLibrarySearchGenerator.h" #include "llvm/ExecutionEngine/Orc/EPCEHFrameRegistrar.h" #include "llvm/ExecutionEngine/Orc/ExecutionUtils.h" +#include "llvm/ExecutionEngine/Orc/IndirectionUtils.h" #include "llvm/ExecutionEngine/Orc/MachOPlatform.h" #include "llvm/ExecutionEngine/Orc/TargetProcess/JITLoaderGDB.h" #include "llvm/ExecutionEngine/Orc/TargetProcess/RegisterEHFrames.h" @@ -28,6 +29,7 @@ #include "llvm/MC/MCContext.h" #include "llvm/MC/MCDisassembler/MCDisassembler.h" #include "llvm/MC/MCInstPrinter.h" +#include "llvm/MC/MCInstrAnalysis.h" #include "llvm/MC/MCInstrInfo.h" #include "llvm/MC/MCRegisterInfo.h" #include "llvm/MC/MCSubtargetInfo.h" @@ -178,6 +180,11 @@ static cl::opt OrcRuntime("orc-runtime", cl::desc("Use ORC runtime from given path"), cl::init(""), cl::cat(JITLinkCategory)); +static cl::opt AddSelfRelocations( + "add-self-relocations", + cl::desc("Add relocations to function pointers to the current function"), + cl::init(false), cl::cat(JITLinkCategory)); + ExitOnError ExitOnErr; LLVM_ATTRIBUTE_USED void linkComponents() { @@ -194,6 +201,8 @@ extern "C" void llvm_jitlink_setTestResultOverride(int64_t Value) { UseTestResultOverride = true; } +static Error addSelfRelocations(LinkGraph &G); + namespace llvm { static raw_ostream & @@ -1086,6 +1095,9 @@ void Session::modifyPassConfig(const Triple &TT, dumpSectionContents(outs(), G); return Error::success(); }); + + if (AddSelfRelocations) + PassConfig.PostPrunePasses.push_back(addSelfRelocations); } Expected Session::findFileInfo(StringRef FileName) { @@ -1374,14 +1386,21 @@ static Error loadObjects(Session &S) { return Error::success(); } -static Error runChecks(Session &S) { - const auto &TT = S.ES.getExecutorProcessControl().getTargetTriple(); - - if (CheckFiles.empty()) - return Error::success(); - - LLVM_DEBUG(dbgs() << "Running checks...\n"); +namespace { +struct TargetInfo { + const Target *TheTarget; + std::unique_ptr STI; + std::unique_ptr MRI; + std::unique_ptr MAI; + std::unique_ptr Ctx; + std::unique_ptr Disassembler; + std::unique_ptr MII; + std::unique_ptr MIA; + std::unique_ptr InstPrinter; +}; +} // anonymous namespace +static TargetInfo getTargetInfo(const Triple &TT) { auto TripleName = TT.str(); std::string ErrorStr; const Target *TheTarget = TargetRegistry::lookupTarget(TripleName, ErrorStr); @@ -1412,19 +1431,49 @@ static Error runChecks(Session &S) { TripleName, inconvertibleErrorCode())); - MCContext Ctx(Triple(TripleName), MAI.get(), MRI.get(), STI.get()); + auto Ctx = std::make_unique(Triple(TripleName), MAI.get(), + MRI.get(), STI.get()); std::unique_ptr Disassembler( - TheTarget->createMCDisassembler(*STI, Ctx)); + TheTarget->createMCDisassembler(*STI, *Ctx)); if (!Disassembler) ExitOnErr(make_error("Unable to create disassembler for " + TripleName, inconvertibleErrorCode())); std::unique_ptr MII(TheTarget->createMCInstrInfo()); + if (!MII) + ExitOnErr(make_error("Unable to create instruction info for" + + TripleName, + inconvertibleErrorCode())); + + std::unique_ptr MIA( + TheTarget->createMCInstrAnalysis(MII.get())); + if (!MIA) + ExitOnErr(make_error( + "Unable to create instruction analysis for" + TripleName, + inconvertibleErrorCode())); std::unique_ptr InstPrinter( TheTarget->createMCInstPrinter(Triple(TripleName), 0, *MAI, *MII, *MRI)); + if (!InstPrinter) + ExitOnErr(make_error( + "Unable to create instruction printer for" + TripleName, + inconvertibleErrorCode())); + return {TheTarget, std::move(STI), std::move(MRI), + std::move(MAI), std::move(Ctx), std::move(Disassembler), + std::move(MII), std::move(MIA), std::move(InstPrinter)}; +} + +static Error runChecks(Session &S) { + const auto &TT = S.ES.getExecutorProcessControl().getTargetTriple(); + + if (CheckFiles.empty()) + return Error::success(); + + LLVM_DEBUG(dbgs() << "Running checks...\n"); + + auto TI = getTargetInfo(TT); auto IsSymbolValid = [&S](StringRef Symbol) { return S.isSymbolRegistered(Symbol); @@ -1448,8 +1497,8 @@ static Error runChecks(Session &S) { RuntimeDyldChecker Checker( IsSymbolValid, GetSymbolInfo, GetSectionInfo, GetStubInfo, GetGOTInfo, - TT.isLittleEndian() ? support::little : support::big, Disassembler.get(), - InstPrinter.get(), dbgs()); + TT.isLittleEndian() ? support::little : support::big, + TI.Disassembler.get(), TI.InstPrinter.get(), dbgs()); std::string CheckLineStart = "# " + CheckName + ":"; for (auto &CheckFile : CheckFiles) { @@ -1463,6 +1512,16 @@ static Error runChecks(Session &S) { return Error::success(); } +static Error addSelfRelocations(LinkGraph &G) { + auto TI = getTargetInfo(G.getTargetTriple()); + for (auto *Sym : G.defined_symbols()) + if (Sym->isCallable()) + if (auto Err = addFunctionPointerRelocationsToCurrentSymbol( + *Sym, G, *TI.Disassembler, *TI.MIA)) + return Err; + return Error::success(); +} + static void dumpSessionStats(Session &S) { if (!ShowSizes) return; -- 2.7.4