From 22a2209433a40508c2866ce8f547fcf319f83186 Mon Sep 17 00:00:00 2001 From: David Green Date: Tue, 17 Sep 2019 15:23:09 +0000 Subject: [PATCH] [ARM] Reserve an emergency spill slot for fp16 addressing modes that need it Similar to D67327, but this time for the FP16 VLDR and VSTR instructions that use the AddrMode5FP16 addressing mode. We need to reserve an emergency spill slot for instructions that will be out of range to use sp directly. AddrMode5FP16 is 8 bits with a scale of 2. Differential Revision: https://reviews.llvm.org/D67483 llvm-svn: 372132 --- llvm/lib/Target/ARM/ARMFrameLowering.cpp | 15 ++++- llvm/test/CodeGen/Thumb2/fp16-stacksplot.mir | 95 ++++++++++++++++++++++++++++ 2 files changed, 109 insertions(+), 1 deletion(-) create mode 100644 llvm/test/CodeGen/Thumb2/fp16-stacksplot.mir diff --git a/llvm/lib/Target/ARM/ARMFrameLowering.cpp b/llvm/lib/Target/ARM/ARMFrameLowering.cpp index b3413ec02..03681d5 100644 --- a/llvm/lib/Target/ARM/ARMFrameLowering.cpp +++ b/llvm/lib/Target/ARM/ARMFrameLowering.cpp @@ -1512,6 +1512,8 @@ static unsigned estimateRSStackSizeLimit(MachineFunction &MF, unsigned Limit = (1 << 12) - 1; for (auto &MBB : MF) { for (auto &MI : MBB) { + if (MI.isDebugInstr()) + continue; for (unsigned i = 0, e = MI.getNumOperands(); i != e; ++i) { if (!MI.getOperand(i).isFI()) continue; @@ -1522,6 +1524,10 @@ static unsigned estimateRSStackSizeLimit(MachineFunction &MF, Limit = std::min(Limit, (1U << 8) - 1); break; } + // t2ADDri will not require an extra register, it can reuse the + // destination. + if (MI.getOpcode() == ARM::t2ADDri || MI.getOpcode() == ARM::t2ADDri12) + break; const MCInstrDesc &MCID = MI.getDesc(); const TargetRegisterClass *RegClass = TII.getRegClass(MCID, i, TRI, MF); @@ -1530,10 +1536,17 @@ static unsigned estimateRSStackSizeLimit(MachineFunction &MF, // Otherwise check the addressing mode. switch (MI.getDesc().TSFlags & ARMII::AddrModeMask) { + case ARMII::AddrMode_i12: + case ARMII::AddrMode2: + // Default 12 bit limit. + break; case ARMII::AddrMode3: case ARMII::AddrModeT2_i8: Limit = std::min(Limit, (1U << 8) - 1); break; + case ARMII::AddrMode5FP16: + Limit = std::min(Limit, ((1U << 8) - 1) * 2); + break; case ARMII::AddrMode5: case ARMII::AddrModeT2_i8s4: case ARMII::AddrModeT2_ldrex: @@ -1560,7 +1573,7 @@ static unsigned estimateRSStackSizeLimit(MachineFunction &MF, Limit = std::min(Limit, ((1U << 7) - 1) * 4); break; default: - break; + llvm_unreachable("Unhandled addressing mode in stack size limit calculation"); } break; // At most one FI per instruction } diff --git a/llvm/test/CodeGen/Thumb2/fp16-stacksplot.mir b/llvm/test/CodeGen/Thumb2/fp16-stacksplot.mir new file mode 100644 index 0000000..856f307 --- /dev/null +++ b/llvm/test/CodeGen/Thumb2/fp16-stacksplot.mir @@ -0,0 +1,95 @@ +# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py +# RUN: llc -o - %s -mtriple=thumbv8.1m.main-arm-none-eabi -mattr=+fullfp16 -run-pass=stack-protector -run-pass=prologepilog | FileCheck %s +--- +name: func0 +tracksRegLiveness: true +stack: + - { id: 0, name: '', type: default, offset: 0, size: 2, alignment: 2, + stack-id: default, callee-saved-register: '', callee-saved-restored: true, + local-offset: -1200, debug-info-variable: '', debug-info-expression: '', + debug-info-location: '' } + - { id: 1, name: '', type: default, offset: 0, size: 1200, alignment: 4, + stack-id: default, callee-saved-register: '', callee-saved-restored: true, + local-offset: -2, debug-info-variable: '', debug-info-expression: '', + debug-info-location: '' } +body: | + bb.0: + ; CHECK-LABEL: name: func0 + ; CHECK: liveins: $r4, $r5, $r6, $r7, $r8, $r9, $r10, $r11, $lr + ; CHECK: $sp = frame-setup t2STMDB_UPD $sp, 14, $noreg, killed $r4, killed $r5, killed $r6, killed $r7, killed $r8, killed $r9, killed $r10, killed $r11, killed $lr + ; CHECK: frame-setup CFI_INSTRUCTION def_cfa_offset 36 + ; CHECK: frame-setup CFI_INSTRUCTION offset $lr, -4 + ; CHECK: frame-setup CFI_INSTRUCTION offset $r11, -8 + ; CHECK: frame-setup CFI_INSTRUCTION offset $r10, -12 + ; CHECK: frame-setup CFI_INSTRUCTION offset $r9, -16 + ; CHECK: frame-setup CFI_INSTRUCTION offset $r8, -20 + ; CHECK: frame-setup CFI_INSTRUCTION offset $r7, -24 + ; CHECK: frame-setup CFI_INSTRUCTION offset $r6, -28 + ; CHECK: frame-setup CFI_INSTRUCTION offset $r5, -32 + ; CHECK: frame-setup CFI_INSTRUCTION offset $r4, -36 + ; CHECK: $sp = frame-setup t2SUBri killed $sp, 1208, 14, $noreg, $noreg + ; CHECK: frame-setup CFI_INSTRUCTION def_cfa_offset 1244 + ; CHECK: $r0 = IMPLICIT_DEF + ; CHECK: $r1 = IMPLICIT_DEF + ; CHECK: $r2 = IMPLICIT_DEF + ; CHECK: $r3 = IMPLICIT_DEF + ; CHECK: $r4 = IMPLICIT_DEF + ; CHECK: $r5 = IMPLICIT_DEF + ; CHECK: $r6 = IMPLICIT_DEF + ; CHECK: $r7 = IMPLICIT_DEF + ; CHECK: $r8 = IMPLICIT_DEF + ; CHECK: $r9 = IMPLICIT_DEF + ; CHECK: $r10 = IMPLICIT_DEF + ; CHECK: $r11 = IMPLICIT_DEF + ; CHECK: $r12 = IMPLICIT_DEF + ; CHECK: $lr = IMPLICIT_DEF + ; CHECK: t2STRi12 killed $r0, $sp, 0, 14, $noreg :: (store 4 into %stack.2) + ; CHECK: $r0 = t2ADDri killed $sp, 1024, 14, $noreg, $noreg + ; CHECK: renamable $s4 = VLDRH killed $r0, 91, 14, $noreg :: (dereferenceable load 2 from %stack.0) + ; CHECK: $r0 = t2LDRi12 $sp, 0, 14, $noreg :: (load 4 from %stack.2) + ; CHECK: KILL $r0 + ; CHECK: KILL $r1 + ; CHECK: KILL $r2 + ; CHECK: KILL $r3 + ; CHECK: KILL $r4 + ; CHECK: KILL $r5 + ; CHECK: KILL $r6 + ; CHECK: KILL $r7 + ; CHECK: KILL $r8 + ; CHECK: KILL $r9 + ; CHECK: KILL $r10 + ; CHECK: KILL $r11 + ; CHECK: KILL $r12 + ; CHECK: KILL $lr + $r0 = IMPLICIT_DEF + $r1 = IMPLICIT_DEF + $r2 = IMPLICIT_DEF + $r3 = IMPLICIT_DEF + $r4 = IMPLICIT_DEF + $r5 = IMPLICIT_DEF + $r6 = IMPLICIT_DEF + $r7 = IMPLICIT_DEF + $r8 = IMPLICIT_DEF + $r9 = IMPLICIT_DEF + $r10 = IMPLICIT_DEF + $r11 = IMPLICIT_DEF + $r12 = IMPLICIT_DEF + $lr = IMPLICIT_DEF + + renamable $s4 = VLDRH %stack.0, 0, 14, $noreg :: (dereferenceable load 2 from %stack.0) + + KILL $r0 + KILL $r1 + KILL $r2 + KILL $r3 + KILL $r4 + KILL $r5 + KILL $r6 + KILL $r7 + KILL $r8 + KILL $r9 + KILL $r10 + KILL $r11 + KILL $r12 + KILL $lr +... -- 2.7.4