From 573c7e6b3c79c7ce80a2221e000fab7dd20c0bb4 Mon Sep 17 00:00:00 2001 From: Daniel Bertalan Date: Thu, 30 Jun 2022 11:01:18 +0200 Subject: [PATCH] [lld-macho] Handle LOH_ARM64_ADRP_LDR linker optimization hints This linker optimization hint transforms a pair of adrp+ldr (immediate) instructions into an ldr (literal) load from a PC-relative address if it is 4-byte aligned and within +/- 1 MiB, as ldr can encode a signed 19-bit offset that gets multiplied by 4. In the wild, only a small number of these hints are applicable because not many loads end up close enough to the data segment. However, the added helper functions will be useful in implementing the rest of the LOH types. Differential Revision: https://reviews.llvm.org/D128942 --- lld/MachO/Arch/ARM64.cpp | 110 +++++++++++++++++++++++++++++++ lld/test/MachO/loh-adrp-ldr.s | 149 ++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 259 insertions(+) create mode 100644 lld/test/MachO/loh-adrp-ldr.s diff --git a/lld/MachO/Arch/ARM64.cpp b/lld/MachO/Arch/ARM64.cpp index 8c0c2d7..93941f4 100644 --- a/lld/MachO/Arch/ARM64.cpp +++ b/lld/MachO/Arch/ARM64.cpp @@ -163,6 +163,17 @@ struct Add { uint32_t addend; }; +enum ExtendType { ZeroExtend = 1, Sign64 = 2, Sign32 = 3 }; + +struct Ldr { + uint8_t destRegister; + uint8_t baseRegister; + uint8_t size; + bool isFloat; + ExtendType extendType; + uint64_t offset; +}; + struct PerformedReloc { const Reloc &rel; uint64_t referentVA; @@ -177,6 +188,7 @@ public: void applyAdrpAdd(const OptimizationHint &); void applyAdrpAdrp(const OptimizationHint &); + void applyAdrpLdr(const OptimizationHint &); private: uint8_t *buf; @@ -207,6 +219,41 @@ static bool parseAdd(uint32_t insn, Add &add) { return true; } +static bool parseLdr(uint32_t insn, Ldr &ldr) { + ldr.destRegister = insn & 0x1f; + ldr.baseRegister = (insn >> 5) & 0x1f; + uint8_t size = insn >> 30; + uint8_t opc = (insn >> 22) & 3; + + if ((insn & 0x3fc00000) == 0x39400000) { + // LDR (immediate), LDRB (immediate), LDRH (immediate) + ldr.size = 1 << size; + ldr.extendType = ZeroExtend; + ldr.isFloat = false; + } else if ((insn & 0x3f800000) == 0x39800000) { + // LDRSB (immediate), LDRSH (immediate), LDRSW (immediate) + ldr.size = 1 << size; + ldr.extendType = static_cast(opc); + ldr.isFloat = false; + } else if ((insn & 0x3f400000) == 0x3d400000) { + // LDR (immediate, SIMD&FP) + ldr.extendType = ZeroExtend; + ldr.isFloat = true; + if (size == 2 && opc == 1) + ldr.size = 4; + else if (size == 3 && opc == 1) + ldr.size = 8; + else if (size == 0 && opc == 3) + ldr.size = 16; + else + return false; + } else { + return false; + } + ldr.offset = ((insn >> 10) & 0xfff) * ldr.size; + return true; +} + static void writeAdr(void *loc, uint32_t dest, int32_t delta) { uint32_t opcode = 0x10000000; uint32_t immHi = (delta & 0x001ffffc) << 3; @@ -216,6 +263,28 @@ static void writeAdr(void *loc, uint32_t dest, int32_t delta) { static void writeNop(void *loc) { write32le(loc, 0xd503201f); } +static void writeLiteralLdr(void *loc, Ldr original, int32_t delta) { + uint32_t imm19 = (delta << 3) & 0x00ffffe0; + uint32_t opcode = 0; + switch (original.size) { + case 4: + if (original.isFloat) + opcode = 0x1c000000; + else + opcode = original.extendType == Sign64 ? 0x98000000 : 0x18000000; + break; + case 8: + opcode = original.isFloat ? 0x5c000000 : 0x58000000; + break; + case 16: + opcode = 0x9c000000; + break; + default: + assert(false && "Invalid size for literal ldr"); + } + write32le(loc, opcode | imm19 | original.destRegister); +} + uint64_t OptimizationHintContext::getRelocTarget(const Reloc &reloc) { size_t relocIdx = &reloc - isec->relocs.data(); return relocTargets[relocIdx]; @@ -316,6 +385,45 @@ void OptimizationHintContext::applyAdrpAdrp(const OptimizationHint &hint) { writeNop(buf + hint.offset0 + hint.delta[0]); } +// Transforms a pair of adrp+ldr (immediate) instructions into an ldr (literal) +// load from a PC-relative address if it is 4-byte aligned and within +/- 1 MiB, +// as ldr can encode a signed 19-bit offset that gets multiplied by 4. +// +// adrp xN, _foo@PAGE +// ldr xM, [xN, _foo@PAGEOFF] +// -> +// nop +// ldr xM, _foo +void OptimizationHintContext::applyAdrpLdr(const OptimizationHint &hint) { + uint32_t ins1 = read32le(buf + hint.offset0); + uint32_t ins2 = read32le(buf + hint.offset0 + hint.delta[0]); + Adrp adrp; + if (!parseAdrp(ins1, adrp)) + return; + Ldr ldr; + if (!parseLdr(ins2, ldr)) + return; + if (adrp.destRegister != ldr.baseRegister) + return; + + Optional rel1 = findPrimaryReloc(hint.offset0); + Optional rel2 = findReloc(hint.offset0 + hint.delta[0]); + if (!rel1 || !rel2) + return; + if (ldr.offset != (rel1->referentVA & 0xfff)) + return; + if ((rel1->referentVA & 3) != 0) + return; + if (ldr.size == 1 || ldr.size == 2) + return; + int64_t delta = rel1->referentVA - rel2->rel.offset - isec->getVA(); + if (delta >= (1 << 20) || delta < -(1 << 20)) + return; + + writeNop(buf + hint.offset0); + writeLiteralLdr(buf + hint.offset0 + hint.delta[0], ldr, delta); +} + void ARM64::applyOptimizationHints(uint8_t *buf, const ConcatInputSection *isec, ArrayRef relocTargets) const { assert(isec); @@ -332,6 +440,8 @@ void ARM64::applyOptimizationHints(uint8_t *buf, const ConcatInputSection *isec, // might cause its targets to be turned into NOPs. break; case LOH_ARM64_ADRP_LDR: + ctx1.applyAdrpLdr(hint); + break; case LOH_ARM64_ADRP_ADD_LDR: case LOH_ARM64_ADRP_LDR_GOT_LDR: case LOH_ARM64_ADRP_ADD_STR: diff --git a/lld/test/MachO/loh-adrp-ldr.s b/lld/test/MachO/loh-adrp-ldr.s new file mode 100644 index 0000000..46e8e3a --- /dev/null +++ b/lld/test/MachO/loh-adrp-ldr.s @@ -0,0 +1,149 @@ +# REQUIRES: aarch64 + +# RUN: llvm-mc -filetype=obj -triple=arm64-apple-darwin %s -o %t.o +# RUN: %lld -arch arm64 %t.o -o %t +# RUN: llvm-objdump -d --macho %t | FileCheck %s + +.text +.align 2 +_before_far: + .space 1048576 + +.align 2 +_before_near: + .quad 0 + +.globl _main +# CHECK-LABEL: _main: +_main: +## Out of range, before +L1: adrp x0, _before_far@PAGE +L2: ldr x0, [x0, _before_far@PAGEOFF] +# CHECK-NEXT: adrp x0 +# CHECK-NEXT: ldr x0 + +## In range, before +L3: adrp x1, _before_near@PAGE +L4: ldr x1, [x1, _before_near@PAGEOFF] +# CHECK-NEXT: nop +# CHECK-NEXT: ldr x1, #-20 + +## Registers don't match (invalid input) +L5: adrp x2, _before_near@PAGE +L6: ldr x3, [x3, _before_near@PAGEOFF] +# CHECK-NEXT: adrp x2 +# CHECK-NEXT: ldr x3 + +## Targets don't match (invalid input) +L7: adrp x4, _before_near@PAGE +L8: ldr x4, [x4, _after_near@PAGEOFF] +# CHECK-NEXT: adrp x4 +# CHECK-NEXT: ldr x4 + +## Not an adrp instruction +L9: udf 0 +L10: ldr x5, [x5, _after_near@PAGEOFF] +# CHECK-NEXT: udf +# CHECK-NEXT: ldr x5 + +## Not an ldr with an immediate offset +L11: adrp x6, _after_near@PAGE +L12: ldr x6, 0 +# CHECK-NEXT: adrp x6 +# CHECK-NEXT: ldr x6, #0 + +## Target is not aligned to 4 bytes +L13: adrp x7, _after_unaligned@PAGE +L14: ldr x7, [x7, _after_unaligned@PAGEOFF] +# CHECK-NEXT: adrp x7 +# CHECK-NEXT: ldr x7 + +## Byte load, unsupported +L15: adrp x8, _after_near@PAGE +L16: ldr b8, [x8, _after_near@PAGEOFF] +# CHECK-NEXT: adrp x8 +# CHECK-NEXT: ldr b8 + +## Halfword load, unsupported +L17: adrp x9, _after_near@PAGE +L18: ldr h9, [x9, _after_near@PAGEOFF] +# CHECK-NEXT: adrp x9 +# CHECK-NEXT: ldr h9 + +## Word load +L19: adrp x10, _after_near@PAGE +L20: ldr w10, [x10, _after_near@PAGEOFF] +# CHECK-NEXT: nop +# CHECK-NEXT: ldr w10, _after_near + +## With addend +L21: adrp x11, _after_near@PAGE + 8 +L22: ldr x11, [x11, _after_near@PAGEOFF + 8] +# CHECK-NEXT: nop +# CHECK-NEXT: ldr x11 + +## Signed 32-bit read from 16-bit value, unsupported +L23: adrp x12, _after_near@PAGE +L24: ldrsb w12, [x12, _after_near@PAGEOFF] +# CHECK-NEXT: adrp x12 +# CHECK-NEXT: ldrsb w12 + +## 64-bit load from signed 32-bit value +L25: adrp x13, _after_near@PAGE +L26: ldrsw x13, [x13, _after_near@PAGEOFF] +# CHECK-NEXT: nop +# CHECK-NEXT: ldrsw x13, _after_near + +## Single precision FP read +L27: adrp x14, _after_near@PAGE +L28: ldr s0, [x14, _after_near@PAGEOFF] +# CHECK-NEXT: nop +# CHECK-NEXT: ldr s0, _after_near + +## Double precision FP read +L29: adrp x15, _after_near@PAGE +L30: ldr d0, [x15, _after_near@PAGEOFF] +# CHECK-NEXT: nop +# CHECK-NEXT: ldr d0, _after_near + +## Quad precision FP read +L31: adrp x16, _after_near@PAGE +L32: ldr q0, [x16, _after_near@PAGEOFF] +# CHECK-NEXT: nop +# CHECK-NEXT: ldr q0, _after_near + +## Out of range, after +L33: adrp x17, _after_far@PAGE +L34: ldr x17, [x17, _after_far@PAGEOFF] +# CHECK-NEXT: adrp x17 +# CHECK-NEXT: ldr x17 + +.data +.align 4 +_after_near: + .quad 0 + .quad 0 + .byte 0 +_after_unaligned: +.space 1048575 + +_after_far: + .quad 0 + +.loh AdrpLdr L1, L2 +.loh AdrpLdr L3, L4 +.loh AdrpLdr L5, L6 +.loh AdrpLdr L7, L8 +.loh AdrpLdr L9, L10 +.loh AdrpLdr L11, L12 +.loh AdrpLdr L13, L14 +.loh AdrpLdr L15, L16 +.loh AdrpLdr L17, L18 +.loh AdrpLdr L19, L20 +.loh AdrpLdr L21, L22 +.loh AdrpLdr L23, L24 +.loh AdrpLdr L25, L26 +.loh AdrpLdr L27, L28 +.loh AdrpLdr L29, L30 +.loh AdrpLdr L31, L32 +.loh AdrpLdr L33, L34 -- 2.7.4