From 01dc10774eb87bc2f114a0e2d77083dbec62226b Mon Sep 17 00:00:00 2001 From: Keith Walker Date: Thu, 26 Mar 2020 09:40:28 +0000 Subject: [PATCH] [ARM] unwinding .pad instructions missing in execute-only prologue If the stack pointer is altered for local variables and we are generating Thumb2 execute-only code the .pad directive is missing. Usually the size of the adjustment is stored in a PC-relative location and loaded into a register which is then added to the stack pointer. However when we are generating execute-only code code the size of the adjustment is instead generated using the MOVW/MOVT instruction pair. As a by product of handling the execute-only case this also fixes an existing issue that in the none execute-only case the .pad directive was generated against the load of the constant to a register instruction, instead of the instruction which adds the register to the stack pointer. Differential Revision: https://reviews.llvm.org/D76849 --- llvm/lib/Target/ARM/ARMAsmPrinter.cpp | 85 ++++++++++++++-------- llvm/lib/Target/ARM/ARMExpandPseudoInsts.cpp | 7 ++ llvm/lib/Target/ARM/ARMMachineFunctionInfo.h | 1 + llvm/lib/Target/ARM/Thumb1FrameLowering.cpp | 6 +- llvm/test/CodeGen/Thumb/emergency-spill-slot.ll | 6 +- .../CodeGen/Thumb2/thumb2-execute-only-prologue.ll | 38 ++++++++++ 6 files changed, 109 insertions(+), 34 deletions(-) create mode 100644 llvm/test/CodeGen/Thumb2/thumb2-execute-only-prologue.ll diff --git a/llvm/lib/Target/ARM/ARMAsmPrinter.cpp b/llvm/lib/Target/ARM/ARMAsmPrinter.cpp index 1342f2b..d0adb24 100644 --- a/llvm/lib/Target/ARM/ARMAsmPrinter.cpp +++ b/llvm/lib/Target/ARM/ARMAsmPrinter.cpp @@ -1084,16 +1084,26 @@ void ARMAsmPrinter::EmitUnwindingInstruction(const MachineInstr *MI) { unsigned Opc = MI->getOpcode(); unsigned SrcReg, DstReg; - if (Opc == ARM::tPUSH || Opc == ARM::tLDRpci) { - // Two special cases: - // 1) tPUSH does not have src/dst regs. - // 2) for Thumb1 code we sometimes materialize the constant via constpool - // load. Yes, this is pretty fragile, but for now I don't see better - // way... :( + switch (Opc) { + case ARM::tPUSH: + // special case: tPUSH does not have src/dst regs. SrcReg = DstReg = ARM::SP; - } else { + break; + case ARM::tLDRpci: + case ARM::t2MOVi16: + case ARM::t2MOVTi16: + // special cases: + // 1) for Thumb1 code we sometimes materialize the constant via constpool + // load. + // 2) for Thumb2 execute only code we materialize the constant via + // immediate constants in 2 seperate instructions (MOVW/MOVT). + SrcReg = ~0U; + DstReg = MI->getOperand(0).getReg(); + break; + default: SrcReg = MI->getOperand(1).getReg(); DstReg = MI->getOperand(0).getReg(); + break; } // Try to figure out the unwinding opcode out of src / dst regs. @@ -1197,23 +1207,11 @@ void ARMAsmPrinter::EmitUnwindingInstruction(const MachineInstr *MI) { case ARM::tADDrSPi: Offset = -MI->getOperand(2).getImm()*4; break; - case ARM::tLDRpci: { - // Grab the constpool index and check, whether it corresponds to - // original or cloned constpool entry. - unsigned CPI = MI->getOperand(1).getIndex(); - const MachineConstantPool *MCP = MF.getConstantPool(); - if (CPI >= MCP->getConstants().size()) - CPI = AFI->getOriginalCPIdx(CPI); - assert(CPI != -1U && "Invalid constpool index"); - - // Derive the actual offset. - const MachineConstantPoolEntry &CPE = MCP->getConstants()[CPI]; - assert(!CPE.isMachineConstantPoolEntry() && "Invalid constpool entry"); - // FIXME: Check for user, it should be "add" instruction! - Offset = -cast(CPE.Val.ConstVal)->getSExtValue(); + case ARM::tADDhirr: + Offset = + -AFI->EHPrologueOffsetInRegs.lookup(MI->getOperand(2).getReg()); break; } - } if (MAI->getExceptionHandlingType() == ExceptionHandling::ARM) { if (DstReg == FramePtr && FramePtr != ARM::SP) @@ -1233,14 +1231,43 @@ void ARMAsmPrinter::EmitUnwindingInstruction(const MachineInstr *MI) { } else if (DstReg == ARM::SP) { MI->print(errs()); llvm_unreachable("Unsupported opcode for unwinding information"); - } else if (Opc == ARM::tMOVr) { - // If a Thumb1 function spills r8-r11, we copy the values to low - // registers before pushing them. Record the copy so we can emit the - // correct ".save" later. - AFI->EHPrologueRemappedRegs[DstReg] = SrcReg; } else { - MI->print(errs()); - llvm_unreachable("Unsupported opcode for unwinding information"); + int64_t Offset = 0; + switch (Opc) { + case ARM::tMOVr: + // If a Thumb1 function spills r8-r11, we copy the values to low + // registers before pushing them. Record the copy so we can emit the + // correct ".save" later. + AFI->EHPrologueRemappedRegs[DstReg] = SrcReg; + break; + case ARM::tLDRpci: { + // Grab the constpool index and check, whether it corresponds to + // original or cloned constpool entry. + unsigned CPI = MI->getOperand(1).getIndex(); + const MachineConstantPool *MCP = MF.getConstantPool(); + if (CPI >= MCP->getConstants().size()) + CPI = AFI->getOriginalCPIdx(CPI); + assert(CPI != -1U && "Invalid constpool index"); + + // Derive the actual offset. + const MachineConstantPoolEntry &CPE = MCP->getConstants()[CPI]; + assert(!CPE.isMachineConstantPoolEntry() && "Invalid constpool entry"); + Offset = cast(CPE.Val.ConstVal)->getSExtValue(); + AFI->EHPrologueOffsetInRegs[DstReg] = Offset; + break; + } + case ARM::t2MOVi16: + Offset = MI->getOperand(1).getImm(); + AFI->EHPrologueOffsetInRegs[DstReg] = Offset; + break; + case ARM::t2MOVTi16: + Offset = MI->getOperand(2).getImm(); + AFI->EHPrologueOffsetInRegs[DstReg] |= (Offset << 16); + break; + default: + MI->print(errs()); + llvm_unreachable("Unsupported opcode for unwinding information"); + } } } } diff --git a/llvm/lib/Target/ARM/ARMExpandPseudoInsts.cpp b/llvm/lib/Target/ARM/ARMExpandPseudoInsts.cpp index 9fc5667..4afa3a4 100644 --- a/llvm/lib/Target/ARM/ARMExpandPseudoInsts.cpp +++ b/llvm/lib/Target/ARM/ARMExpandPseudoInsts.cpp @@ -852,10 +852,13 @@ void ARMExpandPseudo::ExpandMOV32BitImm(MachineBasicBlock &MBB, unsigned ImmVal = (unsigned)MO.getImm(); unsigned SOImmValV1 = ARM_AM::getSOImmTwoPartFirst(ImmVal); unsigned SOImmValV2 = ARM_AM::getSOImmTwoPartSecond(ImmVal); + unsigned MIFlags = MI.getFlags(); LO16 = LO16.addImm(SOImmValV1); HI16 = HI16.addImm(SOImmValV2); LO16.cloneMemRefs(MI); HI16.cloneMemRefs(MI); + LO16.setMIFlags(MIFlags); + HI16.setMIFlags(MIFlags); LO16.addImm(Pred).addReg(PredReg).add(condCodeOp()); HI16.addImm(Pred).addReg(PredReg).add(condCodeOp()); if (isCC) @@ -867,6 +870,7 @@ void ARMExpandPseudo::ExpandMOV32BitImm(MachineBasicBlock &MBB, unsigned LO16Opc = 0; unsigned HI16Opc = 0; + unsigned MIFlags = MI.getFlags(); if (Opcode == ARM::t2MOVi32imm || Opcode == ARM::t2MOVCCi32imm) { LO16Opc = ARM::t2MOVi16; HI16Opc = ARM::t2MOVTi16; @@ -880,6 +884,9 @@ void ARMExpandPseudo::ExpandMOV32BitImm(MachineBasicBlock &MBB, .addReg(DstReg, RegState::Define | getDeadRegState(DstIsDead)) .addReg(DstReg); + LO16.setMIFlags(MIFlags); + HI16.setMIFlags(MIFlags); + switch (MO.getType()) { case MachineOperand::MO_Immediate: { unsigned Imm = MO.getImm(); diff --git a/llvm/lib/Target/ARM/ARMMachineFunctionInfo.h b/llvm/lib/Target/ARM/ARMMachineFunctionInfo.h index 7adf52e..85c6837 100644 --- a/llvm/lib/Target/ARM/ARMMachineFunctionInfo.h +++ b/llvm/lib/Target/ARM/ARMMachineFunctionInfo.h @@ -245,6 +245,7 @@ public: } DenseMap EHPrologueRemappedRegs; + DenseMap EHPrologueOffsetInRegs; void setPreservesR0() { PreservesR0 = true; } bool getPreservesR0() const { return PreservesR0; } diff --git a/llvm/lib/Target/ARM/Thumb1FrameLowering.cpp b/llvm/lib/Target/ARM/Thumb1FrameLowering.cpp index c5ca64b..5676c4f 100644 --- a/llvm/lib/Target/ARM/Thumb1FrameLowering.cpp +++ b/llvm/lib/Target/ARM/Thumb1FrameLowering.cpp @@ -88,8 +88,10 @@ emitPrologueEpilogueSPUpdate(MachineBasicBlock &MBB, 0, MIFlags); } BuildMI(MBB, MBBI, dl, TII.get(ARM::tADDhirr), ARM::SP) - .addReg(ARM::SP).addReg(ScratchReg, RegState::Kill) - .add(predOps(ARMCC::AL)); + .addReg(ARM::SP) + .addReg(ScratchReg, RegState::Kill) + .add(predOps(ARMCC::AL)) + .setMIFlags(MIFlags); return; } // FIXME: This is assuming the heuristics in emitThumbRegPlusImmediate diff --git a/llvm/test/CodeGen/Thumb/emergency-spill-slot.ll b/llvm/test/CodeGen/Thumb/emergency-spill-slot.ll index 13d2851..e11c5bb 100644 --- a/llvm/test/CodeGen/Thumb/emergency-spill-slot.ll +++ b/llvm/test/CodeGen/Thumb/emergency-spill-slot.ll @@ -9,8 +9,8 @@ define void @vla_emergency_spill(i32 %n) { ; CHECK-NEXT: push {r4, r5, r6, r7, lr} ; CHECK-NEXT: .setfp r7, sp, #12 ; CHECK-NEXT: add r7, sp, #12 -; CHECK-NEXT: .pad #4100 ; CHECK-NEXT: ldr r6, .LCPI0_0 +; CHECK-NEXT: .pad #4100 ; CHECK-NEXT: add sp, r6 ; CHECK-NEXT: mov r6, sp ; CHECK-NEXT: adds r0, r0, #7 @@ -59,8 +59,8 @@ define void @simple_emergency_spill(i32 %n) { ; CHECK: @ %bb.0: @ %entry ; CHECK-NEXT: .save {r4, r5, r6, r7, lr} ; CHECK-NEXT: push {r4, r5, r6, r7, lr} -; CHECK-NEXT: .pad #8196 ; CHECK-NEXT: ldr r7, .LCPI1_0 +; CHECK-NEXT: .pad #8196 ; CHECK-NEXT: add sp, r7 ; CHECK-NEXT: add r0, sp, #4 ; CHECK-NEXT: ldr r1, .LCPI1_2 @@ -119,8 +119,8 @@ define void @simple_emergency_spill_nor7(i32 %n) { ; CHECK: @ %bb.0: @ %entry ; CHECK-NEXT: .save {r4, r5, r6, lr} ; CHECK-NEXT: push {r4, r5, r6, lr} -; CHECK-NEXT: .pad #8196 ; CHECK-NEXT: ldr r6, .LCPI2_0 +; CHECK-NEXT: .pad #8196 ; CHECK-NEXT: add sp, r6 ; CHECK-NEXT: add r0, sp, #4 ; CHECK-NEXT: ldr r1, .LCPI2_2 diff --git a/llvm/test/CodeGen/Thumb2/thumb2-execute-only-prologue.ll b/llvm/test/CodeGen/Thumb2/thumb2-execute-only-prologue.ll new file mode 100644 index 0000000..a8f4275 --- /dev/null +++ b/llvm/test/CodeGen/Thumb2/thumb2-execute-only-prologue.ll @@ -0,0 +1,38 @@ +; RUN: llc < %s -mtriple=thumbv8m.base-arm-none-eabi | FileCheck %s + +define void @fn() { +entry: +; CHECK-LABEL: fn: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: .save {r4, r5, r6, lr} +; CHECK-NEXT: push {r4, r5, r6, lr} +; CHECK-NEXT: ldr r6, .LCPI0_0 +; CHECK-NEXT: .pad #1600 +; CHECK-NEXT: add sp, r6 +; CHECK: .LCPI0_0: +; CHECK_NEXT: long 4294963196 + %a = alloca [400 x i32], align 4 + %arraydecay = getelementptr inbounds [400 x i32], [400 x i32]* %a, i32 0, i32 0 + call void @bar(i32* %arraydecay) + ret void +} + +define void @execute_only_fn() #0 { +entry: +; CHECK-LABEL: execute_only_fn: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: .save {r4, r5, r6, lr} +; CHECK-NEXT: push {r4, r5, r6, lr} +; CHECK-NEXT: movw r6, #63936 +; CHECK-NEXT: movt r6, #65535 +; CHECK-NEXT: .pad #1600 +; CHECK-NEXT: add sp, r6 + %a = alloca [400 x i32], align 4 + %arraydecay = getelementptr inbounds [400 x i32], [400 x i32]* %a, i32 0, i32 0 + call void @bar(i32* %arraydecay) + ret void +} + +declare dso_local void @bar(i32*) + +attributes #0 = { noinline optnone "target-features"="+armv8-m.base,+execute-only,+thumb-mode" } -- 2.7.4