From: Momchil Velikov Date: Thu, 14 May 2020 15:43:20 +0000 (+0100) Subject: Re-commit: [ARM] CMSE code generation X-Git-Url: http://review.tizen.org/git/?a=commitdiff_plain;h=bc2e572f51dac4aed8ef86b2f09427109f0cabb8;p=platform%2Fupstream%2Fllvm.git Re-commit: [ARM] CMSE code generation This patch implements the final bits of CMSE code generation: * emit special linker symbols * restrict parameter passing to no use memory * emit BXNS and BLXNS instructions for returns from non-secure entry functions, and non-secure function calls, respectively * emit code to save/restore secure floating-point state around calls to non-secure functions * emit code to save/restore non-secure floating-pointy state upon entry to non-secure entry function, and return to non-secure state * emit code to clobber registers not used for arguments and returns * when switching to no-secure state Patch by Momchil Velikov, Bradley Smith, Javed Absar, David Green, possibly others. Differential Revision: https://reviews.llvm.org/D76518 --- diff --git a/llvm/lib/Target/ARM/ARMAsmPrinter.cpp b/llvm/lib/Target/ARM/ARMAsmPrinter.cpp index d0adb24..d1f3573 100644 --- a/llvm/lib/Target/ARM/ARMAsmPrinter.cpp +++ b/llvm/lib/Target/ARM/ARMAsmPrinter.cpp @@ -73,6 +73,16 @@ void ARMAsmPrinter::emitFunctionEntryLabel() { } else { OutStreamer->emitAssemblerFlag(MCAF_Code32); } + + // Emit symbol for CMSE non-secure entry point + if (AFI->isCmseNSEntryFunction()) { + MCSymbol *S = + OutContext.getOrCreateSymbol("__acle_se_" + CurrentFnSym->getName()); + emitLinkage(&MF->getFunction(), S); + OutStreamer->emitSymbolAttribute(S, MCSA_ELF_TypeFunction); + OutStreamer->emitLabel(S); + } + OutStreamer->emitLabel(CurrentFnSym); } diff --git a/llvm/lib/Target/ARM/ARMExpandPseudoInsts.cpp b/llvm/lib/Target/ARM/ARMExpandPseudoInsts.cpp index 2812a76..f908f88 100644 --- a/llvm/lib/Target/ARM/ARMExpandPseudoInsts.cpp +++ b/llvm/lib/Target/ARM/ARMExpandPseudoInsts.cpp @@ -71,6 +71,35 @@ namespace { unsigned Opc, bool IsExt); void ExpandMOV32BitImm(MachineBasicBlock &MBB, MachineBasicBlock::iterator &MBBI); + void CMSEClearGPRegs(MachineBasicBlock &MBB, + MachineBasicBlock::iterator MBBI, const DebugLoc &DL, + const SmallVectorImpl &ClearRegs, + unsigned ClobberReg); + MachineBasicBlock &CMSEClearFPRegs(MachineBasicBlock &MBB, + MachineBasicBlock::iterator MBBI); + MachineBasicBlock &CMSEClearFPRegsV8(MachineBasicBlock &MBB, + MachineBasicBlock::iterator MBBI, + const BitVector &ClearRegs); + MachineBasicBlock &CMSEClearFPRegsV81(MachineBasicBlock &MBB, + MachineBasicBlock::iterator MBBI, + const BitVector &ClearRegs); + void CMSESaveClearFPRegs(MachineBasicBlock &MBB, + MachineBasicBlock::iterator MBBI, DebugLoc &DL, + SmallVectorImpl &AvailableRegs); + void CMSESaveClearFPRegsV8(MachineBasicBlock &MBB, + MachineBasicBlock::iterator MBBI, DebugLoc &DL, + SmallVectorImpl &ScratchRegs); + void CMSESaveClearFPRegsV81(MachineBasicBlock &MBB, + MachineBasicBlock::iterator MBBI, DebugLoc &DL); + void CMSERestoreFPRegs(MachineBasicBlock &MBB, + MachineBasicBlock::iterator MBBI, DebugLoc &DL, + SmallVectorImpl &AvailableRegs); + void CMSERestoreFPRegsV8(MachineBasicBlock &MBB, + MachineBasicBlock::iterator MBBI, DebugLoc &DL, + SmallVectorImpl &AvailableRegs); + void CMSERestoreFPRegsV81(MachineBasicBlock &MBB, + MachineBasicBlock::iterator MBBI, DebugLoc &DL, + SmallVectorImpl &AvailableRegs); bool ExpandCMP_SWAP(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, unsigned LdrexOp, unsigned StrexOp, unsigned UxtOp, @@ -927,6 +956,573 @@ void ARMExpandPseudo::ExpandMOV32BitImm(MachineBasicBlock &MBB, LLVM_DEBUG(dbgs() << "And: "; HI16.getInstr()->dump();); } +// The size of the area, accessed by that VLSTM/VLLDM +// S0-S31 + FPSCR + 8 more bytes (VPR + pad, or just pad) +static const int CMSE_FP_SAVE_SIZE = 136; + +void determineGPRegsToClear(const MachineInstr &MI, + const std::initializer_list &Regs, + SmallVectorImpl &ClearRegs) { + SmallVector OpRegs; + for (const MachineOperand &Op : MI.operands()) { + if (!Op.isReg() || !Op.isUse()) + continue; + OpRegs.push_back(Op.getReg()); + } + llvm::sort(OpRegs); + + std::set_difference(Regs.begin(), Regs.end(), OpRegs.begin(), OpRegs.end(), + std::back_inserter(ClearRegs)); +} + +void ARMExpandPseudo::CMSEClearGPRegs( + MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, + const DebugLoc &DL, const SmallVectorImpl &ClearRegs, + unsigned ClobberReg) { + + if (STI->hasV8_1MMainlineOps()) { + // Clear the registers using the CLRM instruction. + MachineInstrBuilder CLRM = + BuildMI(MBB, MBBI, DL, TII->get(ARM::t2CLRM)).add(predOps(ARMCC::AL)); + for (unsigned R : ClearRegs) + CLRM.addReg(R, RegState::Define); + CLRM.addReg(ARM::APSR, RegState::Define); + CLRM.addReg(ARM::CPSR, RegState::Define | RegState::Implicit); + } else { + // Clear the registers and flags by copying ClobberReg into them. + // (Baseline can't do a high register clear in one instruction). + for (unsigned Reg : ClearRegs) { + if (Reg == ClobberReg) + continue; + BuildMI(MBB, MBBI, DL, TII->get(ARM::tMOVr), Reg) + .addReg(ClobberReg) + .add(predOps(ARMCC::AL)); + } + + BuildMI(MBB, MBBI, DL, TII->get(ARM::t2MSR_M)) + .addImm(STI->hasDSP() ? 0xc00 : 0x800) + .addReg(ClobberReg) + .add(predOps(ARMCC::AL)); + } +} + +// Find which FP registers need to be cleared. The parameter `ClearRegs` is +// initialised with all elements set to true, and this function resets all the +// bits, which correspond to register uses. Returns true if any floating point +// register is defined, false otherwise. +static bool determineFPRegsToClear(const MachineInstr &MI, + BitVector &ClearRegs) { + bool DefFP = false; + for (const MachineOperand &Op : MI.operands()) { + if (!Op.isReg()) + continue; + + unsigned Reg = Op.getReg(); + if (Op.isDef()) { + if ((Reg >= ARM::Q0 && Reg <= ARM::Q7) || + (Reg >= ARM::D0 && Reg <= ARM::D15) || + (Reg >= ARM::S0 && Reg <= ARM::S31)) + DefFP = true; + continue; + } + + if (Reg >= ARM::Q0 && Reg <= ARM::Q7) { + int R = Reg - ARM::Q0; + ClearRegs.reset(R * 4, (R + 1) * 4); + } else if (Reg >= ARM::D0 && Reg <= ARM::D15) { + int R = Reg - ARM::D0; + ClearRegs.reset(R * 2, (R + 1) * 2); + } else if (Reg >= ARM::S0 && Reg <= ARM::S31) { + ClearRegs[Reg - ARM::S0] = false; + } + } + return DefFP; +} + +MachineBasicBlock & +ARMExpandPseudo::CMSEClearFPRegs(MachineBasicBlock &MBB, + MachineBasicBlock::iterator MBBI) { + BitVector ClearRegs(16, true); + (void)determineFPRegsToClear(*MBBI, ClearRegs); + + if (STI->hasV8_1MMainlineOps()) + return CMSEClearFPRegsV81(MBB, MBBI, ClearRegs); + else + return CMSEClearFPRegsV8(MBB, MBBI, ClearRegs); +} + +// Clear the FP registers for v8.0-M, by copying over the content +// of LR. Uses R12 as a scratch register. +MachineBasicBlock & +ARMExpandPseudo::CMSEClearFPRegsV8(MachineBasicBlock &MBB, + MachineBasicBlock::iterator MBBI, + const BitVector &ClearRegs) { + if (!STI->hasFPRegs()) + return MBB; + + auto &RetI = *MBBI; + const DebugLoc &DL = RetI.getDebugLoc(); + + // If optimising for minimum size, clear FP registers unconditionally. + // Otherwise, check the CONTROL.SFPA (Secure Floating-Point Active) bit and + // don't clear them if they belong to the non-secure state. + MachineBasicBlock *ClearBB, *DoneBB; + if (STI->hasMinSize()) { + ClearBB = DoneBB = &MBB; + } else { + MachineFunction *MF = MBB.getParent(); + ClearBB = MF->CreateMachineBasicBlock(MBB.getBasicBlock()); + DoneBB = MF->CreateMachineBasicBlock(MBB.getBasicBlock()); + + MF->insert(++MBB.getIterator(), ClearBB); + MF->insert(++ClearBB->getIterator(), DoneBB); + + DoneBB->splice(DoneBB->end(), &MBB, MBBI, MBB.end()); + DoneBB->transferSuccessors(&MBB); + MBB.addSuccessor(ClearBB); + MBB.addSuccessor(DoneBB); + ClearBB->addSuccessor(DoneBB); + + // At the new basic blocks we need to have live-in the registers, used + // for the return value as well as LR, used to clear registers. + for (const MachineOperand &Op : RetI.operands()) { + if (!Op.isReg()) + continue; + Register Reg = Op.getReg(); + if (Reg == ARM::NoRegister || Reg == ARM::LR) + continue; + assert(Register::isPhysicalRegister(Reg) && "Unallocated register"); + ClearBB->addLiveIn(Reg); + DoneBB->addLiveIn(Reg); + } + ClearBB->addLiveIn(ARM::LR); + DoneBB->addLiveIn(ARM::LR); + + // Read the CONTROL register. + BuildMI(MBB, MBB.end(), DL, TII->get(ARM::t2MRS_M), ARM::R12) + .addImm(20) + .add(predOps(ARMCC::AL)); + // Check bit 3 (SFPA). + BuildMI(MBB, MBB.end(), DL, TII->get(ARM::t2TSTri)) + .addReg(ARM::R12) + .addImm(8) + .add(predOps(ARMCC::AL)); + // If SFPA is clear, jump over ClearBB to DoneBB. + BuildMI(MBB, MBB.end(), DL, TII->get(ARM::tBcc)) + .addMBB(DoneBB) + .addImm(ARMCC::EQ) + .addReg(ARM::CPSR, RegState::Kill); + } + + // Emit the clearing sequence + for (unsigned D = 0; D < 8; D++) { + // Attempt to clear as double + if (ClearRegs[D * 2 + 0] && ClearRegs[D * 2 + 1]) { + unsigned Reg = ARM::D0 + D; + BuildMI(ClearBB, DL, TII->get(ARM::VMOVDRR), Reg) + .addReg(ARM::LR) + .addReg(ARM::LR) + .add(predOps(ARMCC::AL)); + } else { + // Clear first part as single + if (ClearRegs[D * 2 + 0]) { + unsigned Reg = ARM::S0 + D * 2; + BuildMI(ClearBB, DL, TII->get(ARM::VMOVSR), Reg) + .addReg(ARM::LR) + .add(predOps(ARMCC::AL)); + } + // Clear second part as single + if (ClearRegs[D * 2 + 1]) { + unsigned Reg = ARM::S0 + D * 2 + 1; + BuildMI(ClearBB, DL, TII->get(ARM::VMOVSR), Reg) + .addReg(ARM::LR) + .add(predOps(ARMCC::AL)); + } + } + } + + // Clear FPSCR bits 0-4, 7, 28-31 + // The other bits are program global according to the AAPCS + BuildMI(ClearBB, DL, TII->get(ARM::VMRS), ARM::R12) + .add(predOps(ARMCC::AL)); + BuildMI(ClearBB, DL, TII->get(ARM::t2BICri), ARM::R12) + .addReg(ARM::R12) + .addImm(0x0000009F) + .add(predOps(ARMCC::AL)) + .add(condCodeOp()); + BuildMI(ClearBB, DL, TII->get(ARM::t2BICri), ARM::R12) + .addReg(ARM::R12) + .addImm(0xF0000000) + .add(predOps(ARMCC::AL)) + .add(condCodeOp()); + BuildMI(ClearBB, DL, TII->get(ARM::VMSR)) + .addReg(ARM::R12) + .add(predOps(ARMCC::AL)); + + return *DoneBB; +} + +MachineBasicBlock & +ARMExpandPseudo::CMSEClearFPRegsV81(MachineBasicBlock &MBB, + MachineBasicBlock::iterator MBBI, + const BitVector &ClearRegs) { + auto &RetI = *MBBI; + + // Emit a sequence of VSCCLRM instructions, one instruction for + // each contiguous sequence of S-registers. + int Start = -1, End = -1; + for (int S = 0, E = ClearRegs.size(); S != E; ++S) { + if (ClearRegs[S] && S == End + 1) { + End = S; // extend range + continue; + } + // Emit current range. + if (Start < End) { + MachineInstrBuilder VSCCLRM = + BuildMI(MBB, MBBI, RetI.getDebugLoc(), TII->get(ARM::VSCCLRMS)) + .add(predOps(ARMCC::AL)); + while (++Start <= End) + VSCCLRM.addReg(ARM::S0 + Start, RegState::Define); + VSCCLRM.addReg(ARM::VPR, RegState::Define); + } + Start = End = S; + } + // Emit last range. + if (Start < End) { + MachineInstrBuilder VSCCLRM = + BuildMI(MBB, MBBI, RetI.getDebugLoc(), TII->get(ARM::VSCCLRMS)) + .add(predOps(ARMCC::AL)); + while (++Start <= End) + VSCCLRM.addReg(ARM::S0 + Start, RegState::Define); + VSCCLRM.addReg(ARM::VPR, RegState::Define); + } + + return MBB; +} + +void ARMExpandPseudo::CMSESaveClearFPRegs( + MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, DebugLoc &DL, + SmallVectorImpl &ScratchRegs) { + if (STI->hasV8_1MMainlineOps()) + CMSESaveClearFPRegsV81(MBB, MBBI, DL); + else + CMSESaveClearFPRegsV8(MBB, MBBI, DL, ScratchRegs); +} + +// Save and clear FP registers if present +void ARMExpandPseudo::CMSESaveClearFPRegsV8( + MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, DebugLoc &DL, + SmallVectorImpl &ScratchRegs) { + if (!STI->hasFPRegs()) + return; + + // Store an available register for FPSCR clearing + assert(!ScratchRegs.empty()); + unsigned SpareReg = ScratchRegs.front(); + + // save space on stack for VLSTM + BuildMI(MBB, MBBI, DL, TII->get(ARM::tSUBspi), ARM::SP) + .addReg(ARM::SP) + .addImm(CMSE_FP_SAVE_SIZE >> 2) + .add(predOps(ARMCC::AL)); + + // Use ScratchRegs to store the fp regs + std::vector> ClearedFPRegs; + std::vector NonclearedFPRegs; + for (const MachineOperand &Op : MBBI->operands()) { + if (Op.isReg() && Op.isUse()) { + unsigned Reg = Op.getReg(); + assert(!ARM::DPRRegClass.contains(Reg) || + ARM::DPR_VFP2RegClass.contains(Reg)); + assert(!ARM::QPRRegClass.contains(Reg)); + if (ARM::DPR_VFP2RegClass.contains(Reg)) { + if (ScratchRegs.size() >= 2) { + unsigned SaveReg2 = ScratchRegs.pop_back_val(); + unsigned SaveReg1 = ScratchRegs.pop_back_val(); + ClearedFPRegs.emplace_back(Reg, SaveReg1, SaveReg2); + + // Save the fp register to the normal registers + BuildMI(MBB, MBBI, DL, TII->get(ARM::VMOVRRD)) + .addReg(SaveReg1, RegState::Define) + .addReg(SaveReg2, RegState::Define) + .addReg(Reg) + .add(predOps(ARMCC::AL)); + } else { + NonclearedFPRegs.push_back(Reg); + } + } else if (ARM::SPRRegClass.contains(Reg)) { + if (ScratchRegs.size() >= 1) { + unsigned SaveReg = ScratchRegs.pop_back_val(); + ClearedFPRegs.emplace_back(Reg, SaveReg, 0); + + // Save the fp register to the normal registers + BuildMI(MBB, MBBI, DL, TII->get(ARM::VMOVRS), SaveReg) + .addReg(Reg) + .add(predOps(ARMCC::AL)); + } else { + NonclearedFPRegs.push_back(Reg); + } + } + } + } + + bool passesFPReg = (!NonclearedFPRegs.empty() || !ClearedFPRegs.empty()); + + // Lazy store all fp registers to the stack + MachineInstrBuilder VLSTM = BuildMI(MBB, MBBI, DL, TII->get(ARM::VLSTM)) + .addReg(ARM::SP) + .add(predOps(ARMCC::AL)); + + // Restore all arguments + for (const auto &Regs : ClearedFPRegs) { + unsigned Reg, SaveReg1, SaveReg2; + std::tie(Reg, SaveReg1, SaveReg2) = Regs; + if (ARM::DPR_VFP2RegClass.contains(Reg)) + BuildMI(MBB, MBBI, DL, TII->get(ARM::VMOVDRR), Reg) + .addReg(SaveReg1) + .addReg(SaveReg2) + .add(predOps(ARMCC::AL)); + else if (ARM::SPRRegClass.contains(Reg)) + BuildMI(MBB, MBBI, DL, TII->get(ARM::VMOVSR), Reg) + .addReg(SaveReg1) + .add(predOps(ARMCC::AL)); + } + + for (unsigned Reg : NonclearedFPRegs) { + if (ARM::DPR_VFP2RegClass.contains(Reg)) { + if (STI->isLittle()) { + BuildMI(MBB, MBBI, DL, TII->get(ARM::VLDRD), Reg) + .addReg(ARM::SP) + .addImm((Reg - ARM::D0) * 2) + .add(predOps(ARMCC::AL)); + } else { + // For big-endian targets we need to load the two subregisters of Reg + // manually because VLDRD would load them in wrong order + unsigned SReg0 = TRI->getSubReg(Reg, ARM::ssub_0); + BuildMI(MBB, MBBI, DL, TII->get(ARM::VLDRS), SReg0) + .addReg(ARM::SP) + .addImm((Reg - ARM::D0) * 2) + .add(predOps(ARMCC::AL)); + BuildMI(MBB, MBBI, DL, TII->get(ARM::VLDRS), SReg0 + 1) + .addReg(ARM::SP) + .addImm((Reg - ARM::D0) * 2 + 1) + .add(predOps(ARMCC::AL)); + } + } else if (ARM::SPRRegClass.contains(Reg)) { + BuildMI(MBB, MBBI, DL, TII->get(ARM::VLDRS), Reg) + .addReg(ARM::SP) + .addImm(Reg - ARM::S0) + .add(predOps(ARMCC::AL)); + } + } + // restore FPSCR from stack and clear bits 0-4, 7, 28-31 + // The other bits are program global according to the AAPCS + if (passesFPReg) { + BuildMI(MBB, MBBI, DL, TII->get(ARM::t2LDRi8), SpareReg) + .addReg(ARM::SP) + .addImm(0x40) + .add(predOps(ARMCC::AL)); + BuildMI(MBB, MBBI, DL, TII->get(ARM::t2BICri), SpareReg) + .addReg(SpareReg) + .addImm(0x0000009F) + .add(predOps(ARMCC::AL)) + .add(condCodeOp()); + BuildMI(MBB, MBBI, DL, TII->get(ARM::t2BICri), SpareReg) + .addReg(SpareReg) + .addImm(0xF0000000) + .add(predOps(ARMCC::AL)) + .add(condCodeOp()); + BuildMI(MBB, MBBI, DL, TII->get(ARM::VMSR)) + .addReg(SpareReg) + .add(predOps(ARMCC::AL)); + // The ldr must happen after a floating point instruction. To prevent the + // post-ra scheduler to mess with the order, we create a bundle. + finalizeBundle(MBB, VLSTM->getIterator(), MBBI->getIterator()); + } +} + +void ARMExpandPseudo::CMSESaveClearFPRegsV81(MachineBasicBlock &MBB, + MachineBasicBlock::iterator MBBI, + DebugLoc &DL) { + BitVector ClearRegs(32, true); + bool DefFP = determineFPRegsToClear(*MBBI, ClearRegs); + + // If the instruction does not write to a FP register and no elements were + // removed from the set, then no FP registers were used to pass + // arguments/returns. + if (!DefFP && ClearRegs.count() == ClearRegs.size()) { + // save space on stack for VLSTM + BuildMI(MBB, MBBI, DL, TII->get(ARM::tSUBspi), ARM::SP) + .addReg(ARM::SP) + .addImm(CMSE_FP_SAVE_SIZE >> 2) + .add(predOps(ARMCC::AL)); + + // Lazy store all FP registers to the stack + BuildMI(MBB, MBBI, DL, TII->get(ARM::VLSTM)) + .addReg(ARM::SP) + .add(predOps(ARMCC::AL)); + } else { + // Push all the callee-saved registers (s16-s31). + MachineInstrBuilder VPUSH = + BuildMI(MBB, MBBI, DL, TII->get(ARM::VSTMSDB_UPD), ARM::SP) + .addReg(ARM::SP) + .add(predOps(ARMCC::AL)); + for (int Reg = ARM::S16; Reg <= ARM::S31; ++Reg) + VPUSH.addReg(Reg); + + // Clear FP registers with a VSCCLRM. + (void)CMSEClearFPRegsV81(MBB, MBBI, ClearRegs); + + // Save floating-point context. + BuildMI(MBB, MBBI, DL, TII->get(ARM::VSTR_FPCXTS_pre), ARM::SP) + .addReg(ARM::SP) + .addImm(-8) + .add(predOps(ARMCC::AL)); + } +} + +// Restore FP registers if present +void ARMExpandPseudo::CMSERestoreFPRegs( + MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, DebugLoc &DL, + SmallVectorImpl &AvailableRegs) { + if (STI->hasV8_1MMainlineOps()) + CMSERestoreFPRegsV81(MBB, MBBI, DL, AvailableRegs); + else + CMSERestoreFPRegsV8(MBB, MBBI, DL, AvailableRegs); +} + +void ARMExpandPseudo::CMSERestoreFPRegsV8( + MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, DebugLoc &DL, + SmallVectorImpl &AvailableRegs) { + if (!STI->hasFPRegs()) + return; + + // Use AvailableRegs to store the fp regs + std::vector> ClearedFPRegs; + std::vector NonclearedFPRegs; + for (const MachineOperand &Op : MBBI->operands()) { + if (Op.isReg() && Op.isDef()) { + unsigned Reg = Op.getReg(); + assert(!ARM::DPRRegClass.contains(Reg) || + ARM::DPR_VFP2RegClass.contains(Reg)); + assert(!ARM::QPRRegClass.contains(Reg)); + if (ARM::DPR_VFP2RegClass.contains(Reg)) { + if (AvailableRegs.size() >= 2) { + unsigned SaveReg2 = AvailableRegs.pop_back_val(); + unsigned SaveReg1 = AvailableRegs.pop_back_val(); + ClearedFPRegs.emplace_back(Reg, SaveReg1, SaveReg2); + + // Save the fp register to the normal registers + BuildMI(MBB, MBBI, DL, TII->get(ARM::VMOVRRD)) + .addReg(SaveReg1, RegState::Define) + .addReg(SaveReg2, RegState::Define) + .addReg(Reg) + .add(predOps(ARMCC::AL)); + } else { + NonclearedFPRegs.push_back(Reg); + } + } else if (ARM::SPRRegClass.contains(Reg)) { + if (AvailableRegs.size() >= 1) { + unsigned SaveReg = AvailableRegs.pop_back_val(); + ClearedFPRegs.emplace_back(Reg, SaveReg, 0); + + // Save the fp register to the normal registers + BuildMI(MBB, MBBI, DL, TII->get(ARM::VMOVRS), SaveReg) + .addReg(Reg) + .add(predOps(ARMCC::AL)); + } else { + NonclearedFPRegs.push_back(Reg); + } + } + } + } + + // Push FP regs that cannot be restored via normal registers on the stack + for (unsigned Reg : NonclearedFPRegs) { + if (ARM::DPR_VFP2RegClass.contains(Reg)) + BuildMI(MBB, MBBI, DL, TII->get(ARM::VSTRD), Reg) + .addReg(ARM::SP) + .addImm((Reg - ARM::D0) * 2) + .add(predOps(ARMCC::AL)); + else if (ARM::SPRRegClass.contains(Reg)) + BuildMI(MBB, MBBI, DL, TII->get(ARM::VSTRS), Reg) + .addReg(ARM::SP) + .addImm(Reg - ARM::S0) + .add(predOps(ARMCC::AL)); + } + + // Lazy load fp regs from stack + BuildMI(MBB, MBBI, DL, TII->get(ARM::VLLDM)) + .addReg(ARM::SP) + .add(predOps(ARMCC::AL)); + + // Restore all FP registers via normal registers + for (const auto &Regs : ClearedFPRegs) { + unsigned Reg, SaveReg1, SaveReg2; + std::tie(Reg, SaveReg1, SaveReg2) = Regs; + if (ARM::DPR_VFP2RegClass.contains(Reg)) + BuildMI(MBB, MBBI, DL, TII->get(ARM::VMOVDRR), Reg) + .addReg(SaveReg1) + .addReg(SaveReg2) + .add(predOps(ARMCC::AL)); + else if (ARM::SPRRegClass.contains(Reg)) + BuildMI(MBB, MBBI, DL, TII->get(ARM::VMOVSR), Reg) + .addReg(SaveReg1) + .add(predOps(ARMCC::AL)); + } + + // Pop the stack space + BuildMI(MBB, MBBI, DL, TII->get(ARM::tADDspi), ARM::SP) + .addReg(ARM::SP) + .addImm(CMSE_FP_SAVE_SIZE >> 2) + .add(predOps(ARMCC::AL)); +} + +static bool definesOrUsesFPReg(const MachineInstr &MI) { + for (const MachineOperand &Op : MI.operands()) { + if (!Op.isReg()) + continue; + unsigned Reg = Op.getReg(); + if ((Reg >= ARM::Q0 && Reg <= ARM::Q7) || + (Reg >= ARM::D0 && Reg <= ARM::D15) || + (Reg >= ARM::S0 && Reg <= ARM::S31)) + return true; + } + return false; +} + +void ARMExpandPseudo::CMSERestoreFPRegsV81( + MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, DebugLoc &DL, + SmallVectorImpl &AvailableRegs) { + if (!definesOrUsesFPReg(*MBBI)) { + // Load FP registers from stack. + BuildMI(MBB, MBBI, DL, TII->get(ARM::VLLDM)) + .addReg(ARM::SP) + .add(predOps(ARMCC::AL)); + + // Pop the stack space + BuildMI(MBB, MBBI, DL, TII->get(ARM::tADDspi), ARM::SP) + .addReg(ARM::SP) + .addImm(CMSE_FP_SAVE_SIZE >> 2) + .add(predOps(ARMCC::AL)); + } else { + // Restore the floating point context. + BuildMI(MBB, MBBI, MBBI->getDebugLoc(), TII->get(ARM::VLDR_FPCXTS_post), + ARM::SP) + .addReg(ARM::SP) + .addImm(8) + .add(predOps(ARMCC::AL)); + + // Pop all the callee-saved registers (s16-s31). + MachineInstrBuilder VPOP = + BuildMI(MBB, MBBI, DL, TII->get(ARM::VLDMSIA_UPD), ARM::SP) + .addReg(ARM::SP) + .add(predOps(ARMCC::AL)); + for (int Reg = ARM::S16; Reg <= ARM::S31; ++Reg) + VPOP.addReg(Reg, RegState::Define); + } +} + /// Expand a CMP_SWAP pseudo-inst to an ldrex/strex loop as simply as /// possible. This only gets used at -O0 so we don't care about efficiency of /// the generated code. @@ -1155,6 +1751,89 @@ bool ARMExpandPseudo::ExpandCMP_SWAP_64(MachineBasicBlock &MBB, return true; } +static void CMSEPushCalleeSaves(const TargetInstrInfo &TII, + MachineBasicBlock &MBB, + MachineBasicBlock::iterator MBBI, int JumpReg, + bool Thumb1Only) { + const DebugLoc &DL = MBBI->getDebugLoc(); + if (Thumb1Only) { // push Lo and Hi regs separately + MachineInstrBuilder PushMIB = + BuildMI(MBB, MBBI, DL, TII.get(ARM::tPUSH)).add(predOps(ARMCC::AL)); + for (int Reg = ARM::R4; Reg < ARM::R8; ++Reg) + PushMIB.addReg(Reg, Reg != JumpReg ? RegState::Undef : 0); + + // Thumb1 can only tPUSH low regs, so we copy the high regs to the low + // regs that we just saved and push the low regs again, taking care to + // not clobber JumpReg. If JumpReg is one of the low registers, push first + // the values of r9-r11, and then r8. That would leave them ordered in + // memory, and allow us to later pop them with a single instructions. + // FIXME: Could also use any of r0-r3 that are free (including in the + // first PUSH above). + for (int LoReg = ARM::R7, HiReg = ARM::R11; LoReg >= ARM::R4; --LoReg) { + if (JumpReg == LoReg) + continue; + BuildMI(MBB, MBBI, DL, TII.get(ARM::tMOVr), LoReg) + .addReg(HiReg, RegState::Undef) + .add(predOps(ARMCC::AL)); + --HiReg; + } + MachineInstrBuilder PushMIB2 = + BuildMI(MBB, MBBI, DL, TII.get(ARM::tPUSH)).add(predOps(ARMCC::AL)); + for (int Reg = ARM::R4; Reg < ARM::R8; ++Reg) { + if (Reg == JumpReg) + continue; + PushMIB2.addReg(Reg, RegState::Kill); + } + + // If we couldn't use a low register for temporary storage (because it was + // the JumpReg), use r4 or r5, whichever is not JumpReg. It has already been + // saved. + if (JumpReg >= ARM::R4 && JumpReg <= ARM::R7) { + int LoReg = JumpReg == ARM::R4 ? ARM::R5 : ARM::R4; + BuildMI(MBB, MBBI, DL, TII.get(ARM::tMOVr), LoReg) + .addReg(ARM::R8) + .add(predOps(ARMCC::AL)); + BuildMI(MBB, MBBI, DL, TII.get(ARM::tPUSH)) + .add(predOps(ARMCC::AL)) + .addReg(LoReg); + } + } else { // push Lo and Hi registers with a single instruction + MachineInstrBuilder PushMIB = + BuildMI(MBB, MBBI, DL, TII.get(ARM::t2STMDB_UPD), ARM::SP) + .addReg(ARM::SP) + .add(predOps(ARMCC::AL)); + for (int Reg = ARM::R4; Reg < ARM::R12; ++Reg) + PushMIB.addReg(Reg, Reg != JumpReg ? RegState::Undef : 0); + } +} + +static void CMSEPopCalleeSaves(const TargetInstrInfo &TII, + MachineBasicBlock &MBB, + MachineBasicBlock::iterator MBBI, int JumpReg, + bool Thumb1Only) { + const DebugLoc &DL = MBBI->getDebugLoc(); + if (Thumb1Only) { + MachineInstrBuilder PopMIB = + BuildMI(MBB, MBBI, DL, TII.get(ARM::tPOP)).add(predOps(ARMCC::AL)); + for (int R = 0; R < 4; ++R) { + PopMIB.addReg(ARM::R4 + R, RegState::Define); + BuildMI(MBB, MBBI, DL, TII.get(ARM::tMOVr), ARM::R8 + R) + .addReg(ARM::R4 + R, RegState::Kill) + .add(predOps(ARMCC::AL)); + } + MachineInstrBuilder PopMIB2 = + BuildMI(MBB, MBBI, DL, TII.get(ARM::tPOP)).add(predOps(ARMCC::AL)); + for (int R = 0; R < 4; ++R) + PopMIB2.addReg(ARM::R4 + R, RegState::Define); + } else { // pop Lo and Hi registers with a single instruction + MachineInstrBuilder PopMIB = + BuildMI(MBB, MBBI, DL, TII.get(ARM::t2LDMIA_UPD), ARM::SP) + .addReg(ARM::SP) + .add(predOps(ARMCC::AL)); + for (int Reg = ARM::R4; Reg < ARM::R12; ++Reg) + PopMIB.addReg(Reg, RegState::Define); + } +} bool ARMExpandPseudo::ExpandMI(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, @@ -1220,6 +1899,99 @@ bool ARMExpandPseudo::ExpandMI(MachineBasicBlock &MBB, MBBI = NewMI; return true; } + case ARM::tBXNS_RET: { + MachineBasicBlock &AfterBB = CMSEClearFPRegs(MBB, MBBI); + + if (STI->hasV8_1MMainlineOps()) { + // Restore the non-secure floating point context. + BuildMI(MBB, MBBI, MBBI->getDebugLoc(), + TII->get(ARM::VLDR_FPCXTNS_post), ARM::SP) + .addReg(ARM::SP) + .addImm(4) + .add(predOps(ARMCC::AL)); + } + + // Clear all GPR that are not a use of the return instruction. + assert(llvm::all_of(MBBI->operands(), [](const MachineOperand &Op) { + return !Op.isReg() || Op.getReg() != ARM::R12; + })); + SmallVector ClearRegs; + determineGPRegsToClear( + *MBBI, {ARM::R0, ARM::R1, ARM::R2, ARM::R3, ARM::R12}, ClearRegs); + CMSEClearGPRegs(AfterBB, AfterBB.end(), MBBI->getDebugLoc(), ClearRegs, + ARM::LR); + + MachineInstrBuilder NewMI = + BuildMI(AfterBB, AfterBB.end(), MBBI->getDebugLoc(), + TII->get(ARM::tBXNS)) + .addReg(ARM::LR) + .add(predOps(ARMCC::AL)); + for (const MachineOperand &Op : MI.operands()) + NewMI->addOperand(Op); + MI.eraseFromParent(); + return true; + } + case ARM::tBLXNS_CALL: { + DebugLoc DL = MBBI->getDebugLoc(); + unsigned JumpReg = MBBI->getOperand(0).getReg(); + CMSEPushCalleeSaves(*TII, MBB, MBBI, JumpReg, + AFI->isThumb1OnlyFunction()); + + SmallVector ClearRegs; + determineGPRegsToClear(*MBBI, + {ARM::R0, ARM::R1, ARM::R2, ARM::R3, ARM::R4, + ARM::R5, ARM::R6, ARM::R7, ARM::R8, ARM::R9, + ARM::R10, ARM::R11, ARM::R12}, + ClearRegs); + auto OriginalClearRegs = ClearRegs; + + // Get the first cleared register as a scratch (to use later with tBIC). + // We need to use the first so we can ensure it is a low register. + unsigned ScratchReg = ClearRegs.front(); + + // Clear LSB of JumpReg + if (AFI->isThumb2Function()) { + BuildMI(MBB, MBBI, DL, TII->get(ARM::t2BICri), JumpReg) + .addReg(JumpReg) + .addImm(1) + .add(predOps(ARMCC::AL)) + .add(condCodeOp()); + } else { + // We need to use an extra register to cope with 8M Baseline, + // since we have saved all of the registers we are ok to trash a non + // argument register here. + BuildMI(MBB, MBBI, DL, TII->get(ARM::tMOVi8), ScratchReg) + .add(condCodeOp()) + .addImm(1) + .add(predOps(ARMCC::AL)); + BuildMI(MBB, MBBI, DL, TII->get(ARM::tBIC), JumpReg) + .addReg(ARM::CPSR, RegState::Define) + .addReg(JumpReg) + .addReg(ScratchReg) + .add(predOps(ARMCC::AL)); + } + + CMSESaveClearFPRegs(MBB, MBBI, DL, + ClearRegs); // save+clear FP regs with ClearRegs + CMSEClearGPRegs(MBB, MBBI, DL, ClearRegs, JumpReg); + + const MachineInstrBuilder NewCall = + BuildMI(MBB, MBBI, DL, TII->get(ARM::tBLXNSr)) + .add(predOps(ARMCC::AL)) + .addReg(JumpReg, RegState::Kill); + + for (int I = 1, E = MI.getNumOperands(); I != E; ++I) + NewCall->addOperand(MI.getOperand(I)); + if (MI.isCandidateForCallSiteEntry()) + MI.getMF()->moveCallSiteInfo(&MI, NewCall.getInstr()); + + CMSERestoreFPRegs(MBB, MBBI, DL, OriginalClearRegs); // restore FP registers + + CMSEPopCalleeSaves(*TII, MBB, MBBI, JumpReg, AFI->isThumb1OnlyFunction()); + + MI.eraseFromParent(); + return true; + } case ARM::VMOVHcc: case ARM::VMOVScc: case ARM::VMOVDcc: { diff --git a/llvm/lib/Target/ARM/ARMFastISel.cpp b/llvm/lib/Target/ARM/ARMFastISel.cpp index d5400178..f47e9ff 100644 --- a/llvm/lib/Target/ARM/ARMFastISel.cpp +++ b/llvm/lib/Target/ARM/ARMFastISel.cpp @@ -2077,6 +2077,7 @@ bool ARMFastISel::FinishCall(MVT RetVT, SmallVectorImpl &UsedRegs, bool ARMFastISel::SelectRet(const Instruction *I) { const ReturnInst *Ret = cast(I); const Function &F = *I->getParent()->getParent(); + const bool IsCmseNSEntry = F.hasFnAttribute("cmse_nonsecure_entry"); if (!FuncInfo.CanLowerReturn) return false; @@ -2153,8 +2154,17 @@ bool ARMFastISel::SelectRet(const Instruction *I) { RetRegs.push_back(VA.getLocReg()); } + unsigned RetOpc; + if (IsCmseNSEntry) + if (isThumb2) + RetOpc = ARM::tBXNS_RET; + else + llvm_unreachable("CMSE not valid for non-Thumb targets"); + else + RetOpc = Subtarget->getReturnOpcode(); + MachineInstrBuilder MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, - TII.get(Subtarget->getReturnOpcode())); + TII.get(RetOpc)); AddOptionalDefs(MIB); for (unsigned R : RetRegs) MIB.addReg(R, RegState::Implicit); diff --git a/llvm/lib/Target/ARM/ARMFrameLowering.cpp b/llvm/lib/Target/ARM/ARMFrameLowering.cpp index 00dad53..00b310a 100644 --- a/llvm/lib/Target/ARM/ARMFrameLowering.cpp +++ b/llvm/lib/Target/ARM/ARMFrameLowering.cpp @@ -322,14 +322,15 @@ static void emitAligningInstructions(MachineFunction &MF, ARMFunctionInfo *AFI, /// Unfortunately we cannot determine this value in determineCalleeSaves() yet /// as assignCalleeSavedSpillSlots() hasn't run at this point. Instead we use /// this to produce a conservative estimate that we check in an assert() later. -static int getMaxFPOffset(const Function &F, const ARMFunctionInfo &AFI) { +static int getMaxFPOffset(const ARMSubtarget &STI, const ARMFunctionInfo &AFI) { // For Thumb1, push.w isn't available, so the first push will always push // r7 and lr onto the stack first. if (AFI.isThumb1OnlyFunction()) return -AFI.getArgRegsSaveSize() - (2 * 4); // This is a conservative estimation: Assume the frame pointer being r7 and // pc("r15") up to r8 getting spilled before (= 8 registers). - return -AFI.getArgRegsSaveSize() - (8 * 4); + int FPCXTSaveSize = (STI.hasV8_1MMainlineOps() && AFI.isCmseNSEntryFunction()) ? 4 : 0; + return - FPCXTSaveSize - AFI.getArgRegsSaveSize() - (8 * 4); } void ARMFrameLowering::emitPrologue(MachineFunction &MF, @@ -350,6 +351,7 @@ void ARMFrameLowering::emitPrologue(MachineFunction &MF, unsigned ArgRegsSaveSize = AFI->getArgRegsSaveSize(); unsigned NumBytes = MFI.getStackSize(); const std::vector &CSI = MFI.getCalleeSavedInfo(); + int FPCXTSaveSize = 0; // Debug location must be unknown since the first debug location is used // to determine the end of the prologue. @@ -418,6 +420,9 @@ void ARMFrameLowering::emitPrologue(MachineFunction &MF, FramePtrSpillFI = FI; GPRCS1Size += 4; break; + case ARM::FPCXTNS: + FPCXTSaveSize = 4; + break; default: // This is a DPR. Exclude the aligned DPRCS2 spills. if (Reg == ARM::D8) @@ -427,26 +432,35 @@ void ARMFrameLowering::emitPrologue(MachineFunction &MF, } } - // Move past area 1. + // Move past FPCXT area. MachineBasicBlock::iterator LastPush = MBB.end(), GPRCS1Push, GPRCS2Push; + if (FPCXTSaveSize > 0) { + LastPush = MBBI++; + DefCFAOffsetCandidates.addInst(LastPush, FPCXTSaveSize, true); + } + + // Move past area 1. if (GPRCS1Size > 0) { GPRCS1Push = LastPush = MBBI++; DefCFAOffsetCandidates.addInst(LastPush, GPRCS1Size, true); } // Determine starting offsets of spill areas. - unsigned GPRCS1Offset = NumBytes - ArgRegsSaveSize - GPRCS1Size; + unsigned FPCXTOffset = NumBytes - ArgRegsSaveSize - FPCXTSaveSize; + unsigned GPRCS1Offset = FPCXTOffset - GPRCS1Size; unsigned GPRCS2Offset = GPRCS1Offset - GPRCS2Size; Align DPRAlign = DPRCSSize ? std::min(Align(8), Alignment) : Align(4); unsigned DPRGapSize = - (GPRCS1Size + GPRCS2Size + ArgRegsSaveSize) % DPRAlign.value(); + (GPRCS1Size + GPRCS2Size + FPCXTSaveSize + ArgRegsSaveSize) % + DPRAlign.value(); + unsigned DPRCSOffset = GPRCS2Offset - DPRGapSize - DPRCSSize; int FramePtrOffsetInPush = 0; if (HasFP) { int FPOffset = MFI.getObjectOffset(FramePtrSpillFI); - assert(getMaxFPOffset(MF.getFunction(), *AFI) <= FPOffset && + assert(getMaxFPOffset(STI, *AFI) <= FPOffset && "Max FP estimation is wrong"); - FramePtrOffsetInPush = FPOffset + ArgRegsSaveSize; + FramePtrOffsetInPush = FPOffset + ArgRegsSaveSize + FPCXTSaveSize; AFI->setFramePtrSpillOffset(MFI.getObjectOffset(FramePtrSpillFI) + NumBytes); } @@ -581,7 +595,7 @@ void ARMFrameLowering::emitPrologue(MachineFunction &MF, if (FramePtrOffsetInPush + PushSize != 0) { unsigned CFIIndex = MF.addFrameInst(MCCFIInstruction::createDefCfa( nullptr, MRI->getDwarfRegNum(FramePtr, true), - -(ArgRegsSaveSize - FramePtrOffsetInPush))); + -(FPCXTSaveSize + ArgRegsSaveSize - FramePtrOffsetInPush))); BuildMI(MBB, AfterPush, dl, TII.get(TargetOpcode::CFI_INSTRUCTION)) .addCFIIndex(CFIIndex) .setMIFlags(MachineInstr::FrameSetup); @@ -687,6 +701,7 @@ void ARMFrameLowering::emitPrologue(MachineFunction &MF, MFI.setOffsetAdjustment(MFI.getOffsetAdjustment() - AFI->getFramePtrSpillOffset()); + AFI->setFPCXTSaveAreaSize(FPCXTSaveSize); AFI->setGPRCalleeSavedArea1Size(GPRCS1Size); AFI->setGPRCalleeSavedArea2Size(GPRCS2Size); AFI->setDPRCalleeSavedGapSize(DPRGapSize); @@ -788,6 +803,7 @@ void ARMFrameLowering::emitEpilogue(MachineFunction &MF, // Move SP to start of FP callee save spill area. NumBytes -= (ArgRegsSaveSize + + AFI->getFPCXTSaveAreaSize() + AFI->getGPRCalleeSavedArea1Size() + AFI->getGPRCalleeSavedArea2Size() + AFI->getDPRCalleeSavedGapSize() + @@ -855,6 +871,7 @@ void ARMFrameLowering::emitEpilogue(MachineFunction &MF, if (AFI->getGPRCalleeSavedArea2Size()) MBBI++; if (AFI->getGPRCalleeSavedArea1Size()) MBBI++; + if (AFI->getFPCXTSaveAreaSize()) MBBI++; } if (ArgRegsSaveSize) @@ -1045,6 +1062,7 @@ void ARMFrameLowering::emitPopInst(MachineBasicBlock &MBB, bool isTailCall = false; bool isInterrupt = false; bool isTrap = false; + bool isCmseEntry = false; if (MBB.end() != MI) { DL = MI->getDebugLoc(); unsigned RetOpcode = MI->getOpcode(); @@ -1054,6 +1072,7 @@ void ARMFrameLowering::emitPopInst(MachineBasicBlock &MBB, isTrap = RetOpcode == ARM::TRAP || RetOpcode == ARM::TRAPNaCl || RetOpcode == ARM::tTRAP; + isCmseEntry = (RetOpcode == ARM::tBXNS || RetOpcode == ARM::tBXNS_RET); } SmallVector Regs; @@ -1071,7 +1090,7 @@ void ARMFrameLowering::emitPopInst(MachineBasicBlock &MBB, continue; if (Reg == ARM::LR && !isTailCall && !isVarArg && !isInterrupt && - !isTrap && STI.hasV5TOps()) { + !isCmseEntry && !isTrap && STI.hasV5TOps()) { if (MBB.succ_empty()) { Reg = ARM::PC; // Fold the return instruction into the LDM. @@ -1423,6 +1442,16 @@ bool ARMFrameLowering::spillCalleeSavedRegisters( ARM::t2STR_PRE : ARM::STR_PRE_IMM; unsigned FltOpc = ARM::VSTMDDB_UPD; unsigned NumAlignedDPRCS2Regs = AFI->getNumAlignedDPRCS2Regs(); + // Save the non-secure floating point context. + if (llvm::any_of(CSI, [](const CalleeSavedInfo &C) { + return C.getReg() == ARM::FPCXTNS; + })) { + BuildMI(MBB, MI, DebugLoc(), STI.getInstrInfo()->get(ARM::VSTR_FPCXTNS_pre), + ARM::SP) + .addReg(ARM::SP) + .addImm(-4) + .add(predOps(ARMCC::AL)); + } emitPushInst(MBB, MI, CSI, PushOpc, PushOneOpc, false, &isARMArea1Register, 0, MachineInstr::FrameSetup); emitPushInst(MBB, MI, CSI, PushOpc, PushOneOpc, false, &isARMArea2Register, 0, @@ -1615,6 +1644,16 @@ checkNumAlignedDPRCS2Regs(MachineFunction &MF, BitVector &SavedRegs) { SavedRegs.set(ARM::R4); } +bool ARMFrameLowering::enableShrinkWrapping(const MachineFunction &MF) const { + // For CMSE entry functions, we want to save the FPCXT_NS immediately + // upon function entry (resp. restore it immmediately before return) + if (STI.hasV8_1MMainlineOps() && + MF.getInfo()->isCmseNSEntryFunction()) + return false; + + return true; +} + void ARMFrameLowering::determineCalleeSaves(MachineFunction &MF, BitVector &SavedRegs, RegScavenger *RS) const { @@ -1684,6 +1723,10 @@ void ARMFrameLowering::determineCalleeSaves(MachineFunction &MF, if (RegInfo->hasBasePointer(MF)) SavedRegs.set(RegInfo->getBaseRegister()); + // On v8.1-M.Main CMSE entry functions save/restore FPCXT. + if (STI.hasV8_1MMainlineOps() && AFI->isCmseNSEntryFunction()) + CanEliminateFrame = false; + // Don't spill FP if the frame can be eliminated. This is determined // by scanning the callee-save registers to see if any is modified. const MCPhysReg *CSRegs = RegInfo->getCalleeSavedRegs(&MF); @@ -1842,7 +1885,7 @@ void ARMFrameLowering::determineCalleeSaves(MachineFunction &MF, // // We could do slightly better on Thumb1; in some cases, an sp-relative // offset would be legal even though an fp-relative offset is not. - int MaxFPOffset = getMaxFPOffset(MF.getFunction(), *AFI); + int MaxFPOffset = getMaxFPOffset(STI, *AFI); bool HasLargeArgumentList = HasFP && (MaxFixedOffset - MaxFPOffset) > (int)EstimatedRSFixedSizeLimit; @@ -2124,6 +2167,27 @@ void ARMFrameLowering::getCalleeSaves(const MachineFunction &MF, SavedRegs.set(ARM::R0); } +bool ARMFrameLowering::assignCalleeSavedSpillSlots( + MachineFunction &MF, const TargetRegisterInfo *TRI, + std::vector &CSI) const { + // For CMSE entry functions, handle floating-point context as if it was a + // callee-saved register. + if (STI.hasV8_1MMainlineOps() && + MF.getInfo()->isCmseNSEntryFunction()) { + CSI.emplace_back(ARM::FPCXTNS); + CSI.back().setRestored(false); + } + + return false; +} + +const TargetFrameLowering::SpillSlot * +ARMFrameLowering::getCalleeSavedSpillSlots(unsigned &NumEntries) const { + static const SpillSlot FixedSpillOffsets[] = {{ARM::FPCXTNS, -4}}; + NumEntries = array_lengthof(FixedSpillOffsets); + return FixedSpillOffsets; +} + MachineBasicBlock::iterator ARMFrameLowering::eliminateCallFramePseudoInstr( MachineFunction &MF, MachineBasicBlock &MBB, MachineBasicBlock::iterator I) const { diff --git a/llvm/lib/Target/ARM/ARMFrameLowering.h b/llvm/lib/Target/ARM/ARMFrameLowering.h index e46a873..dd4c0ca 100644 --- a/llvm/lib/Target/ARM/ARMFrameLowering.h +++ b/llvm/lib/Target/ARM/ARMFrameLowering.h @@ -63,15 +63,22 @@ public: MachineBasicBlock &MBB) const override; /// Returns true if the target will correctly handle shrink wrapping. - bool enableShrinkWrapping(const MachineFunction &MF) const override { - return true; - } + bool enableShrinkWrapping(const MachineFunction &MF) const override; + bool isProfitableForNoCSROpt(const Function &F) const override { // The no-CSR optimisation is bad for code size on ARM, because we can save // many registers with a single PUSH/POP pair. return false; } + bool + assignCalleeSavedSpillSlots(MachineFunction &MF, + const TargetRegisterInfo *TRI, + std::vector &CSI) const override; + + const SpillSlot * + getCalleeSavedSpillSlots(unsigned &NumEntries) const override; + private: void emitPushInst(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, ArrayRef CSI, unsigned StmOpc, diff --git a/llvm/lib/Target/ARM/ARMISelLowering.cpp b/llvm/lib/Target/ARM/ARMISelLowering.cpp index 2c6d124..80336f7 100644 --- a/llvm/lib/Target/ARM/ARMISelLowering.cpp +++ b/llvm/lib/Target/ARM/ARMISelLowering.cpp @@ -1562,10 +1562,12 @@ const char *ARMTargetLowering::getTargetNodeName(unsigned Opcode) const { case ARMISD::CALL: return "ARMISD::CALL"; case ARMISD::CALL_PRED: return "ARMISD::CALL_PRED"; case ARMISD::CALL_NOLINK: return "ARMISD::CALL_NOLINK"; + case ARMISD::tSECALL: return "ARMISD::tSECALL"; case ARMISD::BRCOND: return "ARMISD::BRCOND"; case ARMISD::BR_JT: return "ARMISD::BR_JT"; case ARMISD::BR2_JT: return "ARMISD::BR2_JT"; case ARMISD::RET_FLAG: return "ARMISD::RET_FLAG"; + case ARMISD::SERET_FLAG: return "ARMISD::SERET_FLAG"; case ARMISD::INTRET_FLAG: return "ARMISD::INTRET_FLAG"; case ARMISD::PIC_ADD: return "ARMISD::PIC_ADD"; case ARMISD::CMP: return "ARMISD::CMP"; @@ -2129,15 +2131,27 @@ ARMTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI, bool isVarArg = CLI.IsVarArg; MachineFunction &MF = DAG.getMachineFunction(); + ARMFunctionInfo *AFI = MF.getInfo(); MachineFunction::CallSiteInfo CSInfo; bool isStructRet = (Outs.empty()) ? false : Outs[0].Flags.isSRet(); bool isThisReturn = false; + bool isCmseNSCall = false; bool PreferIndirect = false; + // Determine whether this is a non-secure function call. + if (CLI.CB && CLI.CB->getAttributes().hasFnAttribute("cmse_nonsecure_call")) + isCmseNSCall = true; + // Disable tail calls if they're not supported. if (!Subtarget->supportsTailCall()) isTailCall = false; + // For both the non-secure calls and the returns from a CMSE entry function, + // the function needs to do some extra work afte r the call, or before the + // return, respectively, thus it cannot end with atail call + if (isCmseNSCall || AFI->isCmseNSEntryFunction()) + isTailCall = false; + if (isa(Callee)) { // If we're optimizing for minimum size and the function is called three or // more times in this block, we can improve codesize by calling indirectly @@ -2343,7 +2357,6 @@ ARMTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI, bool isARMFunc = !Subtarget->isThumb() || (isStub && !Subtarget->isMClass()); bool isLocalARMFunc = false; - ARMFunctionInfo *AFI = MF.getInfo(); auto PtrVt = getPointerTy(DAG.getDataLayout()); if (Subtarget->genLongCalls()) { @@ -2437,10 +2450,31 @@ ARMTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI, } } + if (isCmseNSCall) { + assert(!isARMFunc && !isDirect && + "Cannot handle call to ARM function or direct call"); + if (NumBytes > 0) { + DiagnosticInfoUnsupported Diag(DAG.getMachineFunction().getFunction(), + "call to non-secure function would " + "require passing arguments on stack", + dl.getDebugLoc()); + DAG.getContext()->diagnose(Diag); + } + if (isStructRet) { + DiagnosticInfoUnsupported Diag( + DAG.getMachineFunction().getFunction(), + "call to non-secure function would return value through pointer", + dl.getDebugLoc()); + DAG.getContext()->diagnose(Diag); + } + } + // FIXME: handle tail calls differently. unsigned CallOpc; if (Subtarget->isThumb()) { - if ((!isDirect || isARMFunc) && !Subtarget->hasV5TOps()) + if (isCmseNSCall) + CallOpc = ARMISD::tSECALL; + else if ((!isDirect || isARMFunc) && !Subtarget->hasV5TOps()) CallOpc = ARMISD::CALL_NOLINK; else CallOpc = ARMISD::CALL; @@ -2811,6 +2845,17 @@ ARMTargetLowering::LowerReturn(SDValue Chain, CallingConv::ID CallConv, ARMFunctionInfo *AFI = MF.getInfo(); AFI->setReturnRegsCount(RVLocs.size()); + // Report error if cmse entry function returns structure through first ptr arg. + if (AFI->isCmseNSEntryFunction() && MF.getFunction().hasStructRetAttr()) { + // Note: using an empty SDLoc(), as the first line of the function is a + // better place to report than the last line. + DiagnosticInfoUnsupported Diag( + DAG.getMachineFunction().getFunction(), + "secure entry function would return value through pointer", + SDLoc().getDebugLoc()); + DAG.getContext()->diagnose(Diag); + } + // Copy the result values into the output registers. for (unsigned i = 0, realRVLocIdx = 0; i != RVLocs.size(); @@ -2932,7 +2977,9 @@ ARMTargetLowering::LowerReturn(SDValue Chain, CallingConv::ID CallConv, return LowerInterruptReturn(RetOps, dl, DAG); } - return DAG.getNode(ARMISD::RET_FLAG, dl, MVT::Other, RetOps); + ARMISD::NodeType RetNode = AFI->isCmseNSEntryFunction() ? ARMISD::SERET_FLAG : + ARMISD::RET_FLAG; + return DAG.getNode(RetNode, dl, MVT::Other, RetOps); } bool ARMTargetLowering::isUsedByReturnOnly(SDNode *N, SDValue &Chain) const { diff --git a/llvm/lib/Target/ARM/ARMISelLowering.h b/llvm/lib/Target/ARM/ARMISelLowering.h index 5e9b077..c5f7183 100644 --- a/llvm/lib/Target/ARM/ARMISelLowering.h +++ b/llvm/lib/Target/ARM/ARMISelLowering.h @@ -68,10 +68,12 @@ class VectorType; CALL, // Function call. CALL_PRED, // Function call that's predicable. CALL_NOLINK, // Function call with branch not branch-and-link. + tSECALL, // CMSE non-secure function call. BRCOND, // Conditional branch. BR_JT, // Jumptable branch. BR2_JT, // Jumptable branch (2 level - jumptable entry is a jump). RET_FLAG, // Return with a flag operand. + SERET_FLAG, // CMSE Entry function return with a flag operand. INTRET_FLAG, // Interrupt return with an LR-offset and a flag operand. PIC_ADD, // Add with a PC operand and a PIC label. diff --git a/llvm/lib/Target/ARM/ARMInstrInfo.td b/llvm/lib/Target/ARM/ARMInstrInfo.td index 8f88538..6b990a5 100644 --- a/llvm/lib/Target/ARM/ARMInstrInfo.td +++ b/llvm/lib/Target/ARM/ARMInstrInfo.td @@ -159,6 +159,8 @@ def ARMcall_nolink : SDNode<"ARMISD::CALL_NOLINK", SDT_ARMcall, def ARMretflag : SDNode<"ARMISD::RET_FLAG", SDTNone, [SDNPHasChain, SDNPOptInGlue, SDNPVariadic]>; +def ARMseretflag : SDNode<"ARMISD::SERET_FLAG", SDTNone, + [SDNPHasChain, SDNPOptInGlue, SDNPVariadic]>; def ARMintretflag : SDNode<"ARMISD::INTRET_FLAG", SDT_ARMcall, [SDNPHasChain, SDNPOptInGlue, SDNPVariadic]>; def ARMcmov : SDNode<"ARMISD::CMOV", SDT_ARMCMov, diff --git a/llvm/lib/Target/ARM/ARMInstrThumb.td b/llvm/lib/Target/ARM/ARMInstrThumb.td index 2616422..7fae321 100644 --- a/llvm/lib/Target/ARM/ARMInstrThumb.td +++ b/llvm/lib/Target/ARM/ARMInstrThumb.td @@ -14,6 +14,10 @@ // Thumb specific DAG Nodes. // +def ARMtsecall : SDNode<"ARMISD::tSECALL", SDT_ARMcall, + [SDNPHasChain, SDNPOptInGlue, SDNPOutGlue, + SDNPVariadic]>; + def imm_sr_XFORM: SDNodeXFormgetZExtValue(); return CurDAG->getTargetConstant((Imm == 32 ? 0 : Imm), SDLoc(N), MVT::i32); @@ -499,6 +503,10 @@ let isReturn = 1, isTerminator = 1, isBarrier = 1 in { def tBX_RET : tPseudoExpand<(outs), (ins pred:$p), 2, IIC_Br, [(ARMretflag)], (tBX LR, pred:$p)>, Sched<[WriteBr]>; + // alternative return for CMSE entry functions + def tBXNS_RET : tPseudoInst<(outs), (ins), 2, IIC_Br, + [(ARMseretflag)]>, Sched<[WriteBr]>; + // Alternative return instruction used by vararg functions. def tBX_RET_vararg : tPseudoExpand<(outs), (ins tGPR:$Rm, pred:$p), 2, IIC_Br, [], @@ -560,6 +568,10 @@ let isCall = 1, let Unpredictable{1-0} = 0b11; } + def tBLXNS_CALL : PseudoInst<(outs), (ins GPRnopc:$func), IIC_Br, + [(ARMtsecall GPRnopc:$func)]>, + Requires<[IsThumb, Has8MSecExt]>, Sched<[WriteBr]>; + // ARMv4T def tBX_CALL : tPseudoInst<(outs), (ins tGPR:$func), 4, IIC_Br, diff --git a/llvm/lib/Target/ARM/ARMMachineFunctionInfo.cpp b/llvm/lib/Target/ARM/ARMMachineFunctionInfo.cpp index 3b676ca..507c3e6 100644 --- a/llvm/lib/Target/ARM/ARMMachineFunctionInfo.cpp +++ b/llvm/lib/Target/ARM/ARMMachineFunctionInfo.cpp @@ -15,4 +15,6 @@ void ARMFunctionInfo::anchor() {} ARMFunctionInfo::ARMFunctionInfo(MachineFunction &MF) : isThumb(MF.getSubtarget().isThumb()), - hasThumb2(MF.getSubtarget().hasThumb2()) {} + hasThumb2(MF.getSubtarget().hasThumb2()), + IsCmseNSEntry(MF.getFunction().hasFnAttribute("cmse_nonsecure_entry")), + IsCmseNSCall(MF.getFunction().hasFnAttribute("cmse_nonsecure_call")) {} diff --git a/llvm/lib/Target/ARM/ARMMachineFunctionInfo.h b/llvm/lib/Target/ARM/ARMMachineFunctionInfo.h index 85c6837..298c8a2 100644 --- a/llvm/lib/Target/ARM/ARMMachineFunctionInfo.h +++ b/llvm/lib/Target/ARM/ARMMachineFunctionInfo.h @@ -83,6 +83,7 @@ class ARMFunctionInfo : public MachineFunctionInfo { /// GPRCS1Size, GPRCS2Size, DPRCSSize - Sizes of callee saved register spills /// areas. + unsigned FPCXTSaveSize = 0; unsigned GPRCS1Size = 0; unsigned GPRCS2Size = 0; unsigned DPRCSAlignGapSize = 0; @@ -105,6 +106,10 @@ class ARMFunctionInfo : public MachineFunctionInfo { /// HasITBlocks - True if IT blocks have been inserted. bool HasITBlocks = false; + // Security Extensions + bool IsCmseNSEntry; + bool IsCmseNSCall; + /// CPEClones - Track constant pool entries clones created by Constant Island /// pass. DenseMap CPEClones; @@ -140,6 +145,9 @@ public: bool isThumb1OnlyFunction() const { return isThumb && !hasThumb2; } bool isThumb2Function() const { return isThumb && hasThumb2; } + bool isCmseNSEntryFunction() const { return IsCmseNSEntry; } + bool isCmseNSCallFunction() const { return IsCmseNSCall; } + unsigned getStoredByValParamsPadding() const { return StByValParamsPadding; } void setStoredByValParamsPadding(unsigned p) { StByValParamsPadding = p; } @@ -172,11 +180,13 @@ public: void setGPRCalleeSavedArea2Offset(unsigned o) { GPRCS2Offset = o; } void setDPRCalleeSavedAreaOffset(unsigned o) { DPRCSOffset = o; } + unsigned getFPCXTSaveAreaSize() const { return FPCXTSaveSize; } unsigned getGPRCalleeSavedArea1Size() const { return GPRCS1Size; } unsigned getGPRCalleeSavedArea2Size() const { return GPRCS2Size; } unsigned getDPRCalleeSavedGapSize() const { return DPRCSAlignGapSize; } unsigned getDPRCalleeSavedAreaSize() const { return DPRCSSize; } + void setFPCXTSaveAreaSize(unsigned s) { FPCXTSaveSize = s; } void setGPRCalleeSavedArea1Size(unsigned s) { GPRCS1Size = s; } void setGPRCalleeSavedArea2Size(unsigned s) { GPRCS2Size = s; } void setDPRCalleeSavedGapSize(unsigned s) { DPRCSAlignGapSize = s; } diff --git a/llvm/lib/Target/ARM/ARMRegisterInfo.td b/llvm/lib/Target/ARM/ARMRegisterInfo.td index 3b260f9..39cdb68 100644 --- a/llvm/lib/Target/ARM/ARMRegisterInfo.td +++ b/llvm/lib/Target/ARM/ARMRegisterInfo.td @@ -588,3 +588,6 @@ def Tuples4DSpc : RegisterTuples<[dsub_0, dsub_2, dsub_4, dsub_6], // Spaced quads of D registers. def DQuadSpc : RegisterClass<"ARM", [v4i64], 64, (add Tuples3DSpc)>; + +// FP context payload +def FPCXTRegs : RegisterClass<"ARM", [i32], 32, (add FPCXTNS)>; diff --git a/llvm/lib/Target/ARM/Thumb1FrameLowering.cpp b/llvm/lib/Target/ARM/Thumb1FrameLowering.cpp index 5676c4f..8222ebc 100644 --- a/llvm/lib/Target/ARM/Thumb1FrameLowering.cpp +++ b/llvm/lib/Target/ARM/Thumb1FrameLowering.cpp @@ -1047,6 +1047,10 @@ bool Thumb1FrameLowering::restoreCalleeSavedRegisters( if (!STI.hasV5TOps()) continue; + // CMSE entry functions must return via BXNS, see emitEpilogue. + if (AFI->isCmseNSEntryFunction()) + continue; + // Pop LR into PC. Reg = ARM::PC; (*MIB).setDesc(TII.get(ARM::tPOP_RET)); diff --git a/llvm/test/CodeGen/ARM/cmse-clear-float-bigend.mir b/llvm/test/CodeGen/ARM/cmse-clear-float-bigend.mir new file mode 100644 index 0000000..5c743d5 --- /dev/null +++ b/llvm/test/CodeGen/ARM/cmse-clear-float-bigend.mir @@ -0,0 +1,101 @@ +# RUN: llc -mcpu=cortex-m33 -run-pass=arm-pseudo %s -o - | FileCheck %s +--- | + target datalayout = "E-m:e-p:32:32-i64:64-v128:64:128-a:0:32-n32-S64" + target triple = "thumbebv8m.main-arm-none-eabi" + + ; Function Attrs: cmse_nonsecure_entry nounwind + define hidden arm_aapcs_vfpcc void @secure_foo(void (double, double, double, double, double, double, double, double)* %fptr) local_unnamed_addr #0 { + entry: + %0 = ptrtoint void (double, double, double, double, double, double, double, double)* %fptr to i32 + %and = and i32 %0, -2 + %1 = inttoptr i32 %and to void (double, double, double, double, double, double, double, double)* + call arm_aapcs_vfpcc void %1(double 0.000000e+00, double 1.000000e+00, double 2.000000e+00, double 3.000000e+00, double 4.000000e+00, double 5.000000e+00, double 6.000000e+00, double 7.000000e+00) #2 + ret void + } + + ; Function Attrs: nounwind + declare void @llvm.stackprotector(i8*, i8**) #1 + + attributes #0 = { "cmse_nonsecure_entry" nounwind "correctly-rounded-divide-sqrt-fp-math"="false" "denormal-fp-math"="preserve-sign" "disable-tail-calls"="false" "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="true" "no-jump-tables"="false" "no-nans-fp-math"="true" "no-signed-zeros-fp-math"="true" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="generic" "target-features"="+8msecext,+armv8-m.main,-d32,-fp64,+fp-armv8,+hwdiv,+thumb-mode,-crypto,-fullfp16,-neon" "unsafe-fp-math"="false" "use-soft-float"="false" } + attributes #1 = { nounwind } + attributes #2 = { "cmse_nonsecure_call" nounwind } + + !llvm.module.flags = !{!0, !1, !2, !3} + + !0 = !{i32 1, !"wchar_size", i32 4} + !1 = !{i32 1, !"static_rwdata", i32 1} + !2 = !{i32 1, !"enumsize_buildattr", i32 2} + !3 = !{i32 1, !"armlib_unavailable", i32 0} + +... +--- +name: secure_foo +alignment: 2 +tracksRegLiveness: true +liveins: + - { reg: '$r0' } +frameInfo: + stackSize: 8 + maxAlignment: 4 + adjustsStack: true + hasCalls: true + maxCallFrameSize: 0 +stack: + - { id: 0, type: spill-slot, offset: -4, size: 4, alignment: 4, stack-id: default, + callee-saved-register: '$lr' } + - { id: 1, type: spill-slot, offset: -8, size: 4, alignment: 4, stack-id: default, + callee-saved-register: '$r7' } +constants: + - id: 0 + value: 'double 0.000000e+00' + alignment: 8 + - id: 1 + value: 'double 1.000000e+00' + alignment: 8 + - id: 2 + value: 'double 2.000000e+00' + alignment: 8 + - id: 3 + value: 'double 3.000000e+00' + alignment: 8 + - id: 4 + value: 'double 4.000000e+00' + alignment: 8 + - id: 5 + value: 'double 5.000000e+00' + alignment: 8 + - id: 6 + value: 'double 6.000000e+00' + alignment: 8 + - id: 7 + value: 'double 7.000000e+00' + alignment: 8 +body: | + bb.0.entry: + liveins: $r0, $r7, $lr + + $sp = frame-setup t2STMDB_UPD $sp, 14, $noreg, killed $r7, killed $lr + frame-setup CFI_INSTRUCTION def_cfa_offset 8 + frame-setup CFI_INSTRUCTION offset $lr, -4 + frame-setup CFI_INSTRUCTION offset $r7, -8 + renamable $d0 = VLDRD %const.0, 0, 14, $noreg :: (load 8 from constant-pool) + renamable $d1 = VLDRD %const.1, 0, 14, $noreg :: (load 8 from constant-pool) + renamable $d2 = VLDRD %const.2, 0, 14, $noreg :: (load 8 from constant-pool) + renamable $d3 = VLDRD %const.3, 0, 14, $noreg :: (load 8 from constant-pool) + renamable $d4 = VLDRD %const.4, 0, 14, $noreg :: (load 8 from constant-pool) + renamable $d5 = VLDRD %const.5, 0, 14, $noreg :: (load 8 from constant-pool) + renamable $d6 = VLDRD %const.6, 0, 14, $noreg :: (load 8 from constant-pool) + renamable $d7 = VLDRD %const.7, 0, 14, $noreg :: (load 8 from constant-pool) + renamable $r0 = t2BICri killed renamable $r0, 1, 14, $noreg, $noreg + tBLXNS_CALL killed renamable $r0, csr_aapcs, implicit-def dead $lr, implicit $sp, implicit $d0, implicit $d1, implicit $d2, implicit $d3, implicit $d4, implicit $d5, implicit $d6, implicit $d7, implicit-def $sp + $sp = t2LDMIA_UPD $sp, 14, $noreg, def $r7, def $lr + tBXNS_RET + +... + +# CHECK: VLSTM +# CHECK-DAG: $s12 = VLDRS $sp, 12, 14 /* CC::al */, $noreg +# CHECK-DAG: $s13 = VLDRS $sp, 13, 14 /* CC::al */, $noreg +# CHECK-DAG: $s14 = VLDRS $sp, 14, 14 /* CC::al */, $noreg +# CHECK-DAG: $s15 = VLDRS $sp, 15, 14 /* CC::al */, $noreg +# CHECK: tBLXNSr diff --git a/llvm/test/CodeGen/ARM/cmse-clear-float-hard.ll b/llvm/test/CodeGen/ARM/cmse-clear-float-hard.ll new file mode 100644 index 0000000..1975b8f --- /dev/null +++ b/llvm/test/CodeGen/ARM/cmse-clear-float-hard.ll @@ -0,0 +1,811 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc %s -o - -mtriple=thumbv8m.main -mattr=+fp-armv8d16sp,+dsp -float-abi=hard | \ +; RUN: FileCheck %s --check-prefix=CHECK-8M --check-prefix=CHECK-8M-LE +; RUN: llc %s -o - -mtriple=thumbebv8m.main -mattr=+fp-armv8d16sp,+dsp -float-abi=hard | \ +; RUN: FileCheck %s --check-prefix=CHECK-8M --check-prefix=CHECK-8M-BE +; RUN: llc %s -o - -mtriple=thumbv8.1m.main -mattr=+fp-armv8d16sp,+dsp -float-abi=hard | \ +; RUN: FileCheck %s --check-prefix=CHECK-81M --check-prefix=CHECK-81M-LE +; RUN: llc %s -o - -mtriple=thumbebv8.1m.main -mattr=+fp-armv8d16sp,+dsp -float-abi=hard | \ +; RUN: FileCheck %s --check-prefix=CHECK-81M --check-prefix=CHECK-81M-BE +; RUN: llc %s -o - -mtriple=thumbv8.1m.main -mattr=+mve.fp -float-abi=hard | \ +; RUN: FileCheck %s --check-prefix=CHECK-81M --check-prefix=CHECK-81M-LE +; RUN: llc %s -o - -mtriple=thumbebv8.1m.main -mattr=+mve.fp -float-abi=hard | \ +; RUN: FileCheck %s --check-prefix=CHECK-81M --check-prefix=CHECK-81M-BE + +define float @f1(float (float)* nocapture %fptr) #0 { +; CHECK-8M-LABEL: f1: +; CHECK-8M: @ %bb.0: @ %entry +; CHECK-8M-NEXT: push {r7, lr} +; CHECK-8M-NEXT: vmov.f32 s0, #1.000000e+01 +; CHECK-8M-NEXT: blx r0 +; CHECK-8M-NEXT: pop.w {r7, lr} +; CHECK-8M-NEXT: mrs r12, control +; CHECK-8M-NEXT: tst.w r12, #8 +; CHECK-8M-NEXT: beq .LBB0_2 +; CHECK-8M-NEXT: @ %bb.1: @ %entry +; CHECK-8M-NEXT: vmrs r12, fpscr +; CHECK-8M-NEXT: vmov s1, lr +; CHECK-8M-NEXT: vmov d1, lr, lr +; CHECK-8M-NEXT: vmov d2, lr, lr +; CHECK-8M-NEXT: vmov d3, lr, lr +; CHECK-8M-NEXT: vmov d4, lr, lr +; CHECK-8M-NEXT: vmov d5, lr, lr +; CHECK-8M-NEXT: vmov d6, lr, lr +; CHECK-8M-NEXT: vmov d7, lr, lr +; CHECK-8M-NEXT: bic r12, r12, #159 +; CHECK-8M-NEXT: bic r12, r12, #4026531840 +; CHECK-8M-NEXT: vmsr fpscr, r12 +; CHECK-8M-NEXT: .LBB0_2: @ %entry +; CHECK-8M-NEXT: mov r0, lr +; CHECK-8M-NEXT: mov r1, lr +; CHECK-8M-NEXT: mov r2, lr +; CHECK-8M-NEXT: mov r3, lr +; CHECK-8M-NEXT: mov r12, lr +; CHECK-8M-NEXT: msr apsr_nzcvqg, lr +; CHECK-8M-NEXT: bxns lr +; +; CHECK-81M-LABEL: f1: +; CHECK-81M: @ %bb.0: @ %entry +; CHECK-81M-NEXT: vstr fpcxtns, [sp, #-4]! +; CHECK-81M-NEXT: push {r7, lr} +; CHECK-81M-NEXT: sub sp, #4 +; CHECK-81M-NEXT: vmov.f32 s0, #1.000000e+01 +; CHECK-81M-NEXT: blx r0 +; CHECK-81M-NEXT: add sp, #4 +; CHECK-81M-NEXT: pop.w {r7, lr} +; CHECK-81M-NEXT: vscclrm {s1, s2, s3, s4, s5, s6, s7, s8, s9, s10, s11, s12, s13, s14, s15, vpr} +; CHECK-81M-NEXT: vldr fpcxtns, [sp], #4 +; CHECK-81M-NEXT: clrm {r0, r1, r2, r3, r12, apsr} +; CHECK-81M-NEXT: bxns lr +entry: + %call = call float %fptr(float 10.0) #1 + ret float %call +} + +attributes #0 = { "cmse_nonsecure_entry" nounwind } +attributes #1 = { nounwind } + +define double @d1(double (double)* nocapture %fptr) #0 { +; CHECK-8M-LE-LABEL: d1: +; CHECK-8M-LE: @ %bb.0: @ %entry +; CHECK-8M-LE-NEXT: push {r7, lr} +; CHECK-8M-LE-NEXT: vldr d0, .LCPI1_0 +; CHECK-8M-LE-NEXT: blx r0 +; CHECK-8M-LE-NEXT: pop.w {r7, lr} +; CHECK-8M-LE-NEXT: mrs r12, control +; CHECK-8M-LE-NEXT: tst.w r12, #8 +; CHECK-8M-LE-NEXT: beq .LBB1_2 +; CHECK-8M-LE-NEXT: @ %bb.1: @ %entry +; CHECK-8M-LE-NEXT: vmrs r12, fpscr +; CHECK-8M-LE-NEXT: vmov d1, lr, lr +; CHECK-8M-LE-NEXT: vmov d2, lr, lr +; CHECK-8M-LE-NEXT: vmov d3, lr, lr +; CHECK-8M-LE-NEXT: vmov d4, lr, lr +; CHECK-8M-LE-NEXT: vmov d5, lr, lr +; CHECK-8M-LE-NEXT: vmov d6, lr, lr +; CHECK-8M-LE-NEXT: vmov d7, lr, lr +; CHECK-8M-LE-NEXT: bic r12, r12, #159 +; CHECK-8M-LE-NEXT: bic r12, r12, #4026531840 +; CHECK-8M-LE-NEXT: vmsr fpscr, r12 +; CHECK-8M-LE-NEXT: .LBB1_2: @ %entry +; CHECK-8M-LE-NEXT: mov r0, lr +; CHECK-8M-LE-NEXT: mov r1, lr +; CHECK-8M-LE-NEXT: mov r2, lr +; CHECK-8M-LE-NEXT: mov r3, lr +; CHECK-8M-LE-NEXT: mov r12, lr +; CHECK-8M-LE-NEXT: msr apsr_nzcvqg, lr +; CHECK-8M-LE-NEXT: bxns lr +; CHECK-8M-LE-NEXT: .p2align 3 +; CHECK-8M-LE-NEXT: @ %bb.3: +; CHECK-8M-LE-NEXT: .LCPI1_0: +; CHECK-8M-LE-NEXT: .long 0 @ double 10 +; CHECK-8M-LE-NEXT: .long 1076101120 +; +; CHECK-8M-BE-LABEL: d1: +; CHECK-8M-BE: @ %bb.0: @ %entry +; CHECK-8M-BE-NEXT: push {r7, lr} +; CHECK-8M-BE-NEXT: vldr d0, .LCPI1_0 +; CHECK-8M-BE-NEXT: blx r0 +; CHECK-8M-BE-NEXT: pop.w {r7, lr} +; CHECK-8M-BE-NEXT: mrs r12, control +; CHECK-8M-BE-NEXT: tst.w r12, #8 +; CHECK-8M-BE-NEXT: beq .LBB1_2 +; CHECK-8M-BE-NEXT: @ %bb.1: @ %entry +; CHECK-8M-BE-NEXT: vmrs r12, fpscr +; CHECK-8M-BE-NEXT: vmov d1, lr, lr +; CHECK-8M-BE-NEXT: vmov d2, lr, lr +; CHECK-8M-BE-NEXT: vmov d3, lr, lr +; CHECK-8M-BE-NEXT: vmov d4, lr, lr +; CHECK-8M-BE-NEXT: vmov d5, lr, lr +; CHECK-8M-BE-NEXT: vmov d6, lr, lr +; CHECK-8M-BE-NEXT: vmov d7, lr, lr +; CHECK-8M-BE-NEXT: bic r12, r12, #159 +; CHECK-8M-BE-NEXT: bic r12, r12, #4026531840 +; CHECK-8M-BE-NEXT: vmsr fpscr, r12 +; CHECK-8M-BE-NEXT: .LBB1_2: @ %entry +; CHECK-8M-BE-NEXT: mov r0, lr +; CHECK-8M-BE-NEXT: mov r1, lr +; CHECK-8M-BE-NEXT: mov r2, lr +; CHECK-8M-BE-NEXT: mov r3, lr +; CHECK-8M-BE-NEXT: mov r12, lr +; CHECK-8M-BE-NEXT: msr apsr_nzcvqg, lr +; CHECK-8M-BE-NEXT: bxns lr +; CHECK-8M-BE-NEXT: .p2align 3 +; CHECK-8M-BE-NEXT: @ %bb.3: +; CHECK-8M-BE-NEXT: .LCPI1_0: +; CHECK-8M-BE-NEXT: .long 1076101120 @ double 10 +; CHECK-8M-BE-NEXT: .long 0 +; +; CHECK-81M-LE-LABEL: d1: +; CHECK-81M-LE: @ %bb.0: @ %entry +; CHECK-81M-LE-NEXT: vstr fpcxtns, [sp, #-4]! +; CHECK-81M-LE-NEXT: push {r7, lr} +; CHECK-81M-LE-NEXT: sub sp, #4 +; CHECK-81M-LE-NEXT: vldr d0, .LCPI1_0 +; CHECK-81M-LE-NEXT: blx r0 +; CHECK-81M-LE-NEXT: add sp, #4 +; CHECK-81M-LE-NEXT: pop.w {r7, lr} +; CHECK-81M-LE-NEXT: vscclrm {s2, s3, s4, s5, s6, s7, s8, s9, s10, s11, s12, s13, s14, s15, vpr} +; CHECK-81M-LE-NEXT: vldr fpcxtns, [sp], #4 +; CHECK-81M-LE-NEXT: clrm {r0, r1, r2, r3, r12, apsr} +; CHECK-81M-LE-NEXT: bxns lr +; CHECK-81M-LE-NEXT: .p2align 3 +; CHECK-81M-LE-NEXT: @ %bb.1: +; CHECK-81M-LE-NEXT: .LCPI1_0: +; CHECK-81M-LE-NEXT: .long 0 @ double 10 +; CHECK-81M-LE-NEXT: .long 1076101120 +; +; CHECK-81M-BE-LABEL: d1: +; CHECK-81M-BE: @ %bb.0: @ %entry +; CHECK-81M-BE-NEXT: vstr fpcxtns, [sp, #-4]! +; CHECK-81M-BE-NEXT: push {r7, lr} +; CHECK-81M-BE-NEXT: sub sp, #4 +; CHECK-81M-BE-NEXT: vldr d0, .LCPI1_0 +; CHECK-81M-BE-NEXT: blx r0 +; CHECK-81M-BE-NEXT: add sp, #4 +; CHECK-81M-BE-NEXT: pop.w {r7, lr} +; CHECK-81M-BE-NEXT: vscclrm {s2, s3, s4, s5, s6, s7, s8, s9, s10, s11, s12, s13, s14, s15, vpr} +; CHECK-81M-BE-NEXT: vldr fpcxtns, [sp], #4 +; CHECK-81M-BE-NEXT: clrm {r0, r1, r2, r3, r12, apsr} +; CHECK-81M-BE-NEXT: bxns lr +; CHECK-81M-BE-NEXT: .p2align 3 +; CHECK-81M-BE-NEXT: @ %bb.1: +; CHECK-81M-BE-NEXT: .LCPI1_0: +; CHECK-81M-BE-NEXT: .long 1076101120 @ double 10 +; CHECK-81M-BE-NEXT: .long 0 +entry: + %call = call double %fptr(double 10.0) #1 + ret double %call +} + +define float @f2(float (float)* nocapture %fptr) #2 { +; CHECK-8M-LABEL: f2: +; CHECK-8M: @ %bb.0: @ %entry +; CHECK-8M-NEXT: push {r7, lr} +; CHECK-8M-NEXT: vmov.f32 s0, #1.000000e+01 +; CHECK-8M-NEXT: push.w {r4, r5, r6, r7, r8, r9, r10, r11} +; CHECK-8M-NEXT: bic r0, r0, #1 +; CHECK-8M-NEXT: sub sp, #136 +; CHECK-8M-NEXT: vmov r12, s0 +; CHECK-8M-NEXT: vlstm sp +; CHECK-8M-NEXT: vmov s0, r12 +; CHECK-8M-NEXT: ldr r1, [sp, #64] +; CHECK-8M-NEXT: bic r1, r1, #159 +; CHECK-8M-NEXT: bic r1, r1, #4026531840 +; CHECK-8M-NEXT: vmsr fpscr, r1 +; CHECK-8M-NEXT: mov r1, r0 +; CHECK-8M-NEXT: mov r2, r0 +; CHECK-8M-NEXT: mov r3, r0 +; CHECK-8M-NEXT: mov r4, r0 +; CHECK-8M-NEXT: mov r5, r0 +; CHECK-8M-NEXT: mov r6, r0 +; CHECK-8M-NEXT: mov r7, r0 +; CHECK-8M-NEXT: mov r8, r0 +; CHECK-8M-NEXT: mov r9, r0 +; CHECK-8M-NEXT: mov r10, r0 +; CHECK-8M-NEXT: mov r11, r0 +; CHECK-8M-NEXT: msr apsr_nzcvqg, r0 +; CHECK-8M-NEXT: blxns r0 +; CHECK-8M-NEXT: vmov r12, s0 +; CHECK-8M-NEXT: vlldm sp +; CHECK-8M-NEXT: vmov s0, r12 +; CHECK-8M-NEXT: add sp, #136 +; CHECK-8M-NEXT: pop.w {r4, r5, r6, r7, r8, r9, r10, r11} +; CHECK-8M-NEXT: pop {r7, pc} +; +; CHECK-81M-LABEL: f2: +; CHECK-81M: @ %bb.0: @ %entry +; CHECK-81M-NEXT: push {r7, lr} +; CHECK-81M-NEXT: vmov.f32 s0, #1.000000e+01 +; CHECK-81M-NEXT: push.w {r4, r5, r6, r7, r8, r9, r10, r11} +; CHECK-81M-NEXT: bic r0, r0, #1 +; CHECK-81M-NEXT: vpush {s16, s17, s18, s19, s20, s21, s22, s23, s24, s25, s26, s27, s28, s29, s30, s31} +; CHECK-81M-NEXT: vscclrm {s1, s2, s3, s4, s5, s6, s7, s8, s9, s10, s11, s12, s13, s14, s15, s16, s17, s18, s19, s20, s21, s22, s23, s24, s25, s26, s27, s28, s29, s30, s31, vpr} +; CHECK-81M-NEXT: vstr fpcxts, [sp, #-8]! +; CHECK-81M-NEXT: clrm {r1, r2, r3, r4, r5, r6, r7, r8, r9, r10, r11, r12, apsr} +; CHECK-81M-NEXT: blxns r0 +; CHECK-81M-NEXT: vldr fpcxts, [sp], #8 +; CHECK-81M-NEXT: vpop {s16, s17, s18, s19, s20, s21, s22, s23, s24, s25, s26, s27, s28, s29, s30, s31} +; CHECK-81M-NEXT: pop.w {r4, r5, r6, r7, r8, r9, r10, r11} +; CHECK-81M-NEXT: pop {r7, pc} +entry: + %call = call float %fptr(float 10.0) #3 + ret float %call +} + +attributes #2 = { nounwind } +attributes #3 = { "cmse_nonsecure_call" nounwind } + +define double @d2(double (double)* nocapture %fptr) #2 { +; CHECK-8M-LE-LABEL: d2: +; CHECK-8M-LE: @ %bb.0: @ %entry +; CHECK-8M-LE-NEXT: push {r7, lr} +; CHECK-8M-LE-NEXT: vldr d0, .LCPI3_0 +; CHECK-8M-LE-NEXT: push.w {r4, r5, r6, r7, r8, r9, r10, r11} +; CHECK-8M-LE-NEXT: bic r0, r0, #1 +; CHECK-8M-LE-NEXT: sub sp, #136 +; CHECK-8M-LE-NEXT: vmov r11, r12, d0 +; CHECK-8M-LE-NEXT: vlstm sp +; CHECK-8M-LE-NEXT: vmov d0, r11, r12 +; CHECK-8M-LE-NEXT: ldr r1, [sp, #64] +; CHECK-8M-LE-NEXT: bic r1, r1, #159 +; CHECK-8M-LE-NEXT: bic r1, r1, #4026531840 +; CHECK-8M-LE-NEXT: vmsr fpscr, r1 +; CHECK-8M-LE-NEXT: mov r1, r0 +; CHECK-8M-LE-NEXT: mov r2, r0 +; CHECK-8M-LE-NEXT: mov r3, r0 +; CHECK-8M-LE-NEXT: mov r4, r0 +; CHECK-8M-LE-NEXT: mov r5, r0 +; CHECK-8M-LE-NEXT: mov r6, r0 +; CHECK-8M-LE-NEXT: mov r7, r0 +; CHECK-8M-LE-NEXT: mov r8, r0 +; CHECK-8M-LE-NEXT: mov r9, r0 +; CHECK-8M-LE-NEXT: mov r10, r0 +; CHECK-8M-LE-NEXT: msr apsr_nzcvqg, r0 +; CHECK-8M-LE-NEXT: blxns r0 +; CHECK-8M-LE-NEXT: vmov r11, r12, d0 +; CHECK-8M-LE-NEXT: vlldm sp +; CHECK-8M-LE-NEXT: vmov d0, r11, r12 +; CHECK-8M-LE-NEXT: add sp, #136 +; CHECK-8M-LE-NEXT: pop.w {r4, r5, r6, r7, r8, r9, r10, r11} +; CHECK-8M-LE-NEXT: pop {r7, pc} +; CHECK-8M-LE-NEXT: .p2align 3 +; CHECK-8M-LE-NEXT: @ %bb.1: +; CHECK-8M-LE-NEXT: .LCPI3_0: +; CHECK-8M-LE-NEXT: .long 0 @ double 10 +; CHECK-8M-LE-NEXT: .long 1076101120 +; +; CHECK-8M-BE-LABEL: d2: +; CHECK-8M-BE: @ %bb.0: @ %entry +; CHECK-8M-BE-NEXT: push {r7, lr} +; CHECK-8M-BE-NEXT: vldr d0, .LCPI3_0 +; CHECK-8M-BE-NEXT: push.w {r4, r5, r6, r7, r8, r9, r10, r11} +; CHECK-8M-BE-NEXT: bic r0, r0, #1 +; CHECK-8M-BE-NEXT: sub sp, #136 +; CHECK-8M-BE-NEXT: vmov r11, r12, d0 +; CHECK-8M-BE-NEXT: vlstm sp +; CHECK-8M-BE-NEXT: vmov d0, r11, r12 +; CHECK-8M-BE-NEXT: ldr r1, [sp, #64] +; CHECK-8M-BE-NEXT: bic r1, r1, #159 +; CHECK-8M-BE-NEXT: bic r1, r1, #4026531840 +; CHECK-8M-BE-NEXT: vmsr fpscr, r1 +; CHECK-8M-BE-NEXT: mov r1, r0 +; CHECK-8M-BE-NEXT: mov r2, r0 +; CHECK-8M-BE-NEXT: mov r3, r0 +; CHECK-8M-BE-NEXT: mov r4, r0 +; CHECK-8M-BE-NEXT: mov r5, r0 +; CHECK-8M-BE-NEXT: mov r6, r0 +; CHECK-8M-BE-NEXT: mov r7, r0 +; CHECK-8M-BE-NEXT: mov r8, r0 +; CHECK-8M-BE-NEXT: mov r9, r0 +; CHECK-8M-BE-NEXT: mov r10, r0 +; CHECK-8M-BE-NEXT: msr apsr_nzcvqg, r0 +; CHECK-8M-BE-NEXT: blxns r0 +; CHECK-8M-BE-NEXT: vmov r11, r12, d0 +; CHECK-8M-BE-NEXT: vlldm sp +; CHECK-8M-BE-NEXT: vmov d0, r11, r12 +; CHECK-8M-BE-NEXT: add sp, #136 +; CHECK-8M-BE-NEXT: pop.w {r4, r5, r6, r7, r8, r9, r10, r11} +; CHECK-8M-BE-NEXT: pop {r7, pc} +; CHECK-8M-BE-NEXT: .p2align 3 +; CHECK-8M-BE-NEXT: @ %bb.1: +; CHECK-8M-BE-NEXT: .LCPI3_0: +; CHECK-8M-BE-NEXT: .long 1076101120 @ double 10 +; CHECK-8M-BE-NEXT: .long 0 +; +; CHECK-81M-LE-LABEL: d2: +; CHECK-81M-LE: @ %bb.0: @ %entry +; CHECK-81M-LE-NEXT: push {r7, lr} +; CHECK-81M-LE-NEXT: vldr d0, .LCPI3_0 +; CHECK-81M-LE-NEXT: push.w {r4, r5, r6, r7, r8, r9, r10, r11} +; CHECK-81M-LE-NEXT: bic r0, r0, #1 +; CHECK-81M-LE-NEXT: vpush {s16, s17, s18, s19, s20, s21, s22, s23, s24, s25, s26, s27, s28, s29, s30, s31} +; CHECK-81M-LE-NEXT: vscclrm {s2, s3, s4, s5, s6, s7, s8, s9, s10, s11, s12, s13, s14, s15, s16, s17, s18, s19, s20, s21, s22, s23, s24, s25, s26, s27, s28, s29, s30, s31, vpr} +; CHECK-81M-LE-NEXT: vstr fpcxts, [sp, #-8]! +; CHECK-81M-LE-NEXT: clrm {r1, r2, r3, r4, r5, r6, r7, r8, r9, r10, r11, r12, apsr} +; CHECK-81M-LE-NEXT: blxns r0 +; CHECK-81M-LE-NEXT: vldr fpcxts, [sp], #8 +; CHECK-81M-LE-NEXT: vpop {s16, s17, s18, s19, s20, s21, s22, s23, s24, s25, s26, s27, s28, s29, s30, s31} +; CHECK-81M-LE-NEXT: pop.w {r4, r5, r6, r7, r8, r9, r10, r11} +; CHECK-81M-LE-NEXT: pop {r7, pc} +; CHECK-81M-LE-NEXT: .p2align 3 +; CHECK-81M-LE-NEXT: @ %bb.1: +; CHECK-81M-LE-NEXT: .LCPI3_0: +; CHECK-81M-LE-NEXT: .long 0 @ double 10 +; CHECK-81M-LE-NEXT: .long 1076101120 +; +; CHECK-81M-BE-LABEL: d2: +; CHECK-81M-BE: @ %bb.0: @ %entry +; CHECK-81M-BE-NEXT: push {r7, lr} +; CHECK-81M-BE-NEXT: vldr d0, .LCPI3_0 +; CHECK-81M-BE-NEXT: push.w {r4, r5, r6, r7, r8, r9, r10, r11} +; CHECK-81M-BE-NEXT: bic r0, r0, #1 +; CHECK-81M-BE-NEXT: vpush {s16, s17, s18, s19, s20, s21, s22, s23, s24, s25, s26, s27, s28, s29, s30, s31} +; CHECK-81M-BE-NEXT: vscclrm {s2, s3, s4, s5, s6, s7, s8, s9, s10, s11, s12, s13, s14, s15, s16, s17, s18, s19, s20, s21, s22, s23, s24, s25, s26, s27, s28, s29, s30, s31, vpr} +; CHECK-81M-BE-NEXT: vstr fpcxts, [sp, #-8]! +; CHECK-81M-BE-NEXT: clrm {r1, r2, r3, r4, r5, r6, r7, r8, r9, r10, r11, r12, apsr} +; CHECK-81M-BE-NEXT: blxns r0 +; CHECK-81M-BE-NEXT: vldr fpcxts, [sp], #8 +; CHECK-81M-BE-NEXT: vpop {s16, s17, s18, s19, s20, s21, s22, s23, s24, s25, s26, s27, s28, s29, s30, s31} +; CHECK-81M-BE-NEXT: pop.w {r4, r5, r6, r7, r8, r9, r10, r11} +; CHECK-81M-BE-NEXT: pop {r7, pc} +; CHECK-81M-BE-NEXT: .p2align 3 +; CHECK-81M-BE-NEXT: @ %bb.1: +; CHECK-81M-BE-NEXT: .LCPI3_0: +; CHECK-81M-BE-NEXT: .long 1076101120 @ double 10 +; CHECK-81M-BE-NEXT: .long 0 +entry: + %call = call double %fptr(double 10.0) #3 + ret double %call +} + +define float @f3(float (float)* nocapture %fptr) #4 { +; CHECK-8M-LABEL: f3: +; CHECK-8M: @ %bb.0: @ %entry +; CHECK-8M-NEXT: push {r7, lr} +; CHECK-8M-NEXT: vmov.f32 s0, #1.000000e+01 +; CHECK-8M-NEXT: push.w {r4, r5, r6, r7, r8, r9, r10, r11} +; CHECK-8M-NEXT: bic r0, r0, #1 +; CHECK-8M-NEXT: sub sp, #136 +; CHECK-8M-NEXT: vmov r12, s0 +; CHECK-8M-NEXT: vlstm sp +; CHECK-8M-NEXT: vmov s0, r12 +; CHECK-8M-NEXT: ldr r1, [sp, #64] +; CHECK-8M-NEXT: bic r1, r1, #159 +; CHECK-8M-NEXT: bic r1, r1, #4026531840 +; CHECK-8M-NEXT: vmsr fpscr, r1 +; CHECK-8M-NEXT: mov r1, r0 +; CHECK-8M-NEXT: mov r2, r0 +; CHECK-8M-NEXT: mov r3, r0 +; CHECK-8M-NEXT: mov r4, r0 +; CHECK-8M-NEXT: mov r5, r0 +; CHECK-8M-NEXT: mov r6, r0 +; CHECK-8M-NEXT: mov r7, r0 +; CHECK-8M-NEXT: mov r8, r0 +; CHECK-8M-NEXT: mov r9, r0 +; CHECK-8M-NEXT: mov r10, r0 +; CHECK-8M-NEXT: mov r11, r0 +; CHECK-8M-NEXT: msr apsr_nzcvqg, r0 +; CHECK-8M-NEXT: blxns r0 +; CHECK-8M-NEXT: vmov r12, s0 +; CHECK-8M-NEXT: vlldm sp +; CHECK-8M-NEXT: vmov s0, r12 +; CHECK-8M-NEXT: add sp, #136 +; CHECK-8M-NEXT: pop.w {r4, r5, r6, r7, r8, r9, r10, r11} +; CHECK-8M-NEXT: pop {r7, pc} +; +; CHECK-81M-LABEL: f3: +; CHECK-81M: @ %bb.0: @ %entry +; CHECK-81M-NEXT: push {r7, lr} +; CHECK-81M-NEXT: vmov.f32 s0, #1.000000e+01 +; CHECK-81M-NEXT: push.w {r4, r5, r6, r7, r8, r9, r10, r11} +; CHECK-81M-NEXT: bic r0, r0, #1 +; CHECK-81M-NEXT: vpush {s16, s17, s18, s19, s20, s21, s22, s23, s24, s25, s26, s27, s28, s29, s30, s31} +; CHECK-81M-NEXT: vscclrm {s1, s2, s3, s4, s5, s6, s7, s8, s9, s10, s11, s12, s13, s14, s15, s16, s17, s18, s19, s20, s21, s22, s23, s24, s25, s26, s27, s28, s29, s30, s31, vpr} +; CHECK-81M-NEXT: vstr fpcxts, [sp, #-8]! +; CHECK-81M-NEXT: clrm {r1, r2, r3, r4, r5, r6, r7, r8, r9, r10, r11, r12, apsr} +; CHECK-81M-NEXT: blxns r0 +; CHECK-81M-NEXT: vldr fpcxts, [sp], #8 +; CHECK-81M-NEXT: vpop {s16, s17, s18, s19, s20, s21, s22, s23, s24, s25, s26, s27, s28, s29, s30, s31} +; CHECK-81M-NEXT: pop.w {r4, r5, r6, r7, r8, r9, r10, r11} +; CHECK-81M-NEXT: pop {r7, pc} +entry: + %call = tail call float %fptr(float 10.0) #5 + ret float %call +} + +attributes #4 = { nounwind } +attributes #5 = { "cmse_nonsecure_call" nounwind } + +define double @d3(double (double)* nocapture %fptr) #4 { +; CHECK-8M-LE-LABEL: d3: +; CHECK-8M-LE: @ %bb.0: @ %entry +; CHECK-8M-LE-NEXT: push {r7, lr} +; CHECK-8M-LE-NEXT: vldr d0, .LCPI5_0 +; CHECK-8M-LE-NEXT: push.w {r4, r5, r6, r7, r8, r9, r10, r11} +; CHECK-8M-LE-NEXT: bic r0, r0, #1 +; CHECK-8M-LE-NEXT: sub sp, #136 +; CHECK-8M-LE-NEXT: vmov r11, r12, d0 +; CHECK-8M-LE-NEXT: vlstm sp +; CHECK-8M-LE-NEXT: vmov d0, r11, r12 +; CHECK-8M-LE-NEXT: ldr r1, [sp, #64] +; CHECK-8M-LE-NEXT: bic r1, r1, #159 +; CHECK-8M-LE-NEXT: bic r1, r1, #4026531840 +; CHECK-8M-LE-NEXT: vmsr fpscr, r1 +; CHECK-8M-LE-NEXT: mov r1, r0 +; CHECK-8M-LE-NEXT: mov r2, r0 +; CHECK-8M-LE-NEXT: mov r3, r0 +; CHECK-8M-LE-NEXT: mov r4, r0 +; CHECK-8M-LE-NEXT: mov r5, r0 +; CHECK-8M-LE-NEXT: mov r6, r0 +; CHECK-8M-LE-NEXT: mov r7, r0 +; CHECK-8M-LE-NEXT: mov r8, r0 +; CHECK-8M-LE-NEXT: mov r9, r0 +; CHECK-8M-LE-NEXT: mov r10, r0 +; CHECK-8M-LE-NEXT: msr apsr_nzcvqg, r0 +; CHECK-8M-LE-NEXT: blxns r0 +; CHECK-8M-LE-NEXT: vmov r11, r12, d0 +; CHECK-8M-LE-NEXT: vlldm sp +; CHECK-8M-LE-NEXT: vmov d0, r11, r12 +; CHECK-8M-LE-NEXT: add sp, #136 +; CHECK-8M-LE-NEXT: pop.w {r4, r5, r6, r7, r8, r9, r10, r11} +; CHECK-8M-LE-NEXT: pop {r7, pc} +; CHECK-8M-LE-NEXT: .p2align 3 +; CHECK-8M-LE-NEXT: @ %bb.1: +; CHECK-8M-LE-NEXT: .LCPI5_0: +; CHECK-8M-LE-NEXT: .long 0 @ double 10 +; CHECK-8M-LE-NEXT: .long 1076101120 +; +; CHECK-8M-BE-LABEL: d3: +; CHECK-8M-BE: @ %bb.0: @ %entry +; CHECK-8M-BE-NEXT: push {r7, lr} +; CHECK-8M-BE-NEXT: vldr d0, .LCPI5_0 +; CHECK-8M-BE-NEXT: push.w {r4, r5, r6, r7, r8, r9, r10, r11} +; CHECK-8M-BE-NEXT: bic r0, r0, #1 +; CHECK-8M-BE-NEXT: sub sp, #136 +; CHECK-8M-BE-NEXT: vmov r11, r12, d0 +; CHECK-8M-BE-NEXT: vlstm sp +; CHECK-8M-BE-NEXT: vmov d0, r11, r12 +; CHECK-8M-BE-NEXT: ldr r1, [sp, #64] +; CHECK-8M-BE-NEXT: bic r1, r1, #159 +; CHECK-8M-BE-NEXT: bic r1, r1, #4026531840 +; CHECK-8M-BE-NEXT: vmsr fpscr, r1 +; CHECK-8M-BE-NEXT: mov r1, r0 +; CHECK-8M-BE-NEXT: mov r2, r0 +; CHECK-8M-BE-NEXT: mov r3, r0 +; CHECK-8M-BE-NEXT: mov r4, r0 +; CHECK-8M-BE-NEXT: mov r5, r0 +; CHECK-8M-BE-NEXT: mov r6, r0 +; CHECK-8M-BE-NEXT: mov r7, r0 +; CHECK-8M-BE-NEXT: mov r8, r0 +; CHECK-8M-BE-NEXT: mov r9, r0 +; CHECK-8M-BE-NEXT: mov r10, r0 +; CHECK-8M-BE-NEXT: msr apsr_nzcvqg, r0 +; CHECK-8M-BE-NEXT: blxns r0 +; CHECK-8M-BE-NEXT: vmov r11, r12, d0 +; CHECK-8M-BE-NEXT: vlldm sp +; CHECK-8M-BE-NEXT: vmov d0, r11, r12 +; CHECK-8M-BE-NEXT: add sp, #136 +; CHECK-8M-BE-NEXT: pop.w {r4, r5, r6, r7, r8, r9, r10, r11} +; CHECK-8M-BE-NEXT: pop {r7, pc} +; CHECK-8M-BE-NEXT: .p2align 3 +; CHECK-8M-BE-NEXT: @ %bb.1: +; CHECK-8M-BE-NEXT: .LCPI5_0: +; CHECK-8M-BE-NEXT: .long 1076101120 @ double 10 +; CHECK-8M-BE-NEXT: .long 0 +; +; CHECK-81M-LE-LABEL: d3: +; CHECK-81M-LE: @ %bb.0: @ %entry +; CHECK-81M-LE-NEXT: push {r7, lr} +; CHECK-81M-LE-NEXT: vldr d0, .LCPI5_0 +; CHECK-81M-LE-NEXT: push.w {r4, r5, r6, r7, r8, r9, r10, r11} +; CHECK-81M-LE-NEXT: bic r0, r0, #1 +; CHECK-81M-LE-NEXT: vpush {s16, s17, s18, s19, s20, s21, s22, s23, s24, s25, s26, s27, s28, s29, s30, s31} +; CHECK-81M-LE-NEXT: vscclrm {s2, s3, s4, s5, s6, s7, s8, s9, s10, s11, s12, s13, s14, s15, s16, s17, s18, s19, s20, s21, s22, s23, s24, s25, s26, s27, s28, s29, s30, s31, vpr} +; CHECK-81M-LE-NEXT: vstr fpcxts, [sp, #-8]! +; CHECK-81M-LE-NEXT: clrm {r1, r2, r3, r4, r5, r6, r7, r8, r9, r10, r11, r12, apsr} +; CHECK-81M-LE-NEXT: blxns r0 +; CHECK-81M-LE-NEXT: vldr fpcxts, [sp], #8 +; CHECK-81M-LE-NEXT: vpop {s16, s17, s18, s19, s20, s21, s22, s23, s24, s25, s26, s27, s28, s29, s30, s31} +; CHECK-81M-LE-NEXT: pop.w {r4, r5, r6, r7, r8, r9, r10, r11} +; CHECK-81M-LE-NEXT: pop {r7, pc} +; CHECK-81M-LE-NEXT: .p2align 3 +; CHECK-81M-LE-NEXT: @ %bb.1: +; CHECK-81M-LE-NEXT: .LCPI5_0: +; CHECK-81M-LE-NEXT: .long 0 @ double 10 +; CHECK-81M-LE-NEXT: .long 1076101120 +; +; CHECK-81M-BE-LABEL: d3: +; CHECK-81M-BE: @ %bb.0: @ %entry +; CHECK-81M-BE-NEXT: push {r7, lr} +; CHECK-81M-BE-NEXT: vldr d0, .LCPI5_0 +; CHECK-81M-BE-NEXT: push.w {r4, r5, r6, r7, r8, r9, r10, r11} +; CHECK-81M-BE-NEXT: bic r0, r0, #1 +; CHECK-81M-BE-NEXT: vpush {s16, s17, s18, s19, s20, s21, s22, s23, s24, s25, s26, s27, s28, s29, s30, s31} +; CHECK-81M-BE-NEXT: vscclrm {s2, s3, s4, s5, s6, s7, s8, s9, s10, s11, s12, s13, s14, s15, s16, s17, s18, s19, s20, s21, s22, s23, s24, s25, s26, s27, s28, s29, s30, s31, vpr} +; CHECK-81M-BE-NEXT: vstr fpcxts, [sp, #-8]! +; CHECK-81M-BE-NEXT: clrm {r1, r2, r3, r4, r5, r6, r7, r8, r9, r10, r11, r12, apsr} +; CHECK-81M-BE-NEXT: blxns r0 +; CHECK-81M-BE-NEXT: vldr fpcxts, [sp], #8 +; CHECK-81M-BE-NEXT: vpop {s16, s17, s18, s19, s20, s21, s22, s23, s24, s25, s26, s27, s28, s29, s30, s31} +; CHECK-81M-BE-NEXT: pop.w {r4, r5, r6, r7, r8, r9, r10, r11} +; CHECK-81M-BE-NEXT: pop {r7, pc} +; CHECK-81M-BE-NEXT: .p2align 3 +; CHECK-81M-BE-NEXT: @ %bb.1: +; CHECK-81M-BE-NEXT: .LCPI5_0: +; CHECK-81M-BE-NEXT: .long 1076101120 @ double 10 +; CHECK-81M-BE-NEXT: .long 0 +entry: + %call = tail call double %fptr(double 10.0) #5 + ret double %call +} + +define float @f4(float ()* nocapture %fptr) #6 { +; CHECK-8M-LABEL: f4: +; CHECK-8M: @ %bb.0: @ %entry +; CHECK-8M-NEXT: push {r7, lr} +; CHECK-8M-NEXT: push.w {r4, r5, r6, r7, r8, r9, r10, r11} +; CHECK-8M-NEXT: bic r0, r0, #1 +; CHECK-8M-NEXT: sub sp, #136 +; CHECK-8M-NEXT: vlstm sp +; CHECK-8M-NEXT: mov r1, r0 +; CHECK-8M-NEXT: mov r2, r0 +; CHECK-8M-NEXT: mov r3, r0 +; CHECK-8M-NEXT: mov r4, r0 +; CHECK-8M-NEXT: mov r5, r0 +; CHECK-8M-NEXT: mov r6, r0 +; CHECK-8M-NEXT: mov r7, r0 +; CHECK-8M-NEXT: mov r8, r0 +; CHECK-8M-NEXT: mov r9, r0 +; CHECK-8M-NEXT: mov r10, r0 +; CHECK-8M-NEXT: mov r11, r0 +; CHECK-8M-NEXT: mov r12, r0 +; CHECK-8M-NEXT: msr apsr_nzcvqg, r0 +; CHECK-8M-NEXT: blxns r0 +; CHECK-8M-NEXT: vmov r12, s0 +; CHECK-8M-NEXT: vlldm sp +; CHECK-8M-NEXT: vmov s0, r12 +; CHECK-8M-NEXT: add sp, #136 +; CHECK-8M-NEXT: pop.w {r4, r5, r6, r7, r8, r9, r10, r11} +; CHECK-8M-NEXT: pop {r7, pc} +; +; CHECK-81M-LABEL: f4: +; CHECK-81M: @ %bb.0: @ %entry +; CHECK-81M-NEXT: push {r7, lr} +; CHECK-81M-NEXT: push.w {r4, r5, r6, r7, r8, r9, r10, r11} +; CHECK-81M-NEXT: bic r0, r0, #1 +; CHECK-81M-NEXT: vpush {s16, s17, s18, s19, s20, s21, s22, s23, s24, s25, s26, s27, s28, s29, s30, s31} +; CHECK-81M-NEXT: vscclrm {s0, s1, s2, s3, s4, s5, s6, s7, s8, s9, s10, s11, s12, s13, s14, s15, s16, s17, s18, s19, s20, s21, s22, s23, s24, s25, s26, s27, s28, s29, s30, s31, vpr} +; CHECK-81M-NEXT: vstr fpcxts, [sp, #-8]! +; CHECK-81M-NEXT: clrm {r1, r2, r3, r4, r5, r6, r7, r8, r9, r10, r11, r12, apsr} +; CHECK-81M-NEXT: blxns r0 +; CHECK-81M-NEXT: vldr fpcxts, [sp], #8 +; CHECK-81M-NEXT: vpop {s16, s17, s18, s19, s20, s21, s22, s23, s24, s25, s26, s27, s28, s29, s30, s31} +; CHECK-81M-NEXT: pop.w {r4, r5, r6, r7, r8, r9, r10, r11} +; CHECK-81M-NEXT: pop {r7, pc} +entry: + %call = call float %fptr() #7 + ret float %call +} + +attributes #6 = { nounwind } +attributes #7 = { "cmse_nonsecure_call" nounwind } + +define double @d4(double ()* nocapture %fptr) #6 { +; CHECK-8M-LABEL: d4: +; CHECK-8M: @ %bb.0: @ %entry +; CHECK-8M-NEXT: push {r7, lr} +; CHECK-8M-NEXT: push.w {r4, r5, r6, r7, r8, r9, r10, r11} +; CHECK-8M-NEXT: bic r0, r0, #1 +; CHECK-8M-NEXT: sub sp, #136 +; CHECK-8M-NEXT: vlstm sp +; CHECK-8M-NEXT: mov r1, r0 +; CHECK-8M-NEXT: mov r2, r0 +; CHECK-8M-NEXT: mov r3, r0 +; CHECK-8M-NEXT: mov r4, r0 +; CHECK-8M-NEXT: mov r5, r0 +; CHECK-8M-NEXT: mov r6, r0 +; CHECK-8M-NEXT: mov r7, r0 +; CHECK-8M-NEXT: mov r8, r0 +; CHECK-8M-NEXT: mov r9, r0 +; CHECK-8M-NEXT: mov r10, r0 +; CHECK-8M-NEXT: mov r11, r0 +; CHECK-8M-NEXT: mov r12, r0 +; CHECK-8M-NEXT: msr apsr_nzcvqg, r0 +; CHECK-8M-NEXT: blxns r0 +; CHECK-8M-NEXT: vmov r11, r12, d0 +; CHECK-8M-NEXT: vlldm sp +; CHECK-8M-NEXT: vmov d0, r11, r12 +; CHECK-8M-NEXT: add sp, #136 +; CHECK-8M-NEXT: pop.w {r4, r5, r6, r7, r8, r9, r10, r11} +; CHECK-8M-NEXT: pop {r7, pc} +; +; CHECK-81M-LABEL: d4: +; CHECK-81M: @ %bb.0: @ %entry +; CHECK-81M-NEXT: push {r7, lr} +; CHECK-81M-NEXT: push.w {r4, r5, r6, r7, r8, r9, r10, r11} +; CHECK-81M-NEXT: bic r0, r0, #1 +; CHECK-81M-NEXT: vpush {s16, s17, s18, s19, s20, s21, s22, s23, s24, s25, s26, s27, s28, s29, s30, s31} +; CHECK-81M-NEXT: vscclrm {s0, s1, s2, s3, s4, s5, s6, s7, s8, s9, s10, s11, s12, s13, s14, s15, s16, s17, s18, s19, s20, s21, s22, s23, s24, s25, s26, s27, s28, s29, s30, s31, vpr} +; CHECK-81M-NEXT: vstr fpcxts, [sp, #-8]! +; CHECK-81M-NEXT: clrm {r1, r2, r3, r4, r5, r6, r7, r8, r9, r10, r11, r12, apsr} +; CHECK-81M-NEXT: blxns r0 +; CHECK-81M-NEXT: vldr fpcxts, [sp], #8 +; CHECK-81M-NEXT: vpop {s16, s17, s18, s19, s20, s21, s22, s23, s24, s25, s26, s27, s28, s29, s30, s31} +; CHECK-81M-NEXT: pop.w {r4, r5, r6, r7, r8, r9, r10, r11} +; CHECK-81M-NEXT: pop {r7, pc} +entry: + %call = call double %fptr() #7 + ret double %call +} + +define void @fd(void (float, double)* %f, float %a, double %b) #8 { +; CHECK-8M-LABEL: fd: +; CHECK-8M: @ %bb.0: @ %entry +; CHECK-8M-NEXT: push {r7, lr} +; CHECK-8M-NEXT: push.w {r4, r5, r6, r7, r8, r9, r10, r11} +; CHECK-8M-NEXT: bic r0, r0, #1 +; CHECK-8M-NEXT: sub sp, #136 +; CHECK-8M-NEXT: vmov r12, s0 +; CHECK-8M-NEXT: mov r2, r0 +; CHECK-8M-NEXT: vmov r10, r11, d1 +; CHECK-8M-NEXT: vlstm sp +; CHECK-8M-NEXT: vmov s0, r12 +; CHECK-8M-NEXT: vmov d1, r10, r11 +; CHECK-8M-NEXT: ldr r1, [sp, #64] +; CHECK-8M-NEXT: bic r1, r1, #159 +; CHECK-8M-NEXT: bic r1, r1, #4026531840 +; CHECK-8M-NEXT: vmsr fpscr, r1 +; CHECK-8M-NEXT: mov r1, r0 +; CHECK-8M-NEXT: mov r3, r0 +; CHECK-8M-NEXT: mov r4, r0 +; CHECK-8M-NEXT: mov r5, r0 +; CHECK-8M-NEXT: mov r6, r0 +; CHECK-8M-NEXT: mov r7, r0 +; CHECK-8M-NEXT: mov r8, r0 +; CHECK-8M-NEXT: mov r9, r0 +; CHECK-8M-NEXT: msr apsr_nzcvqg, r0 +; CHECK-8M-NEXT: blxns r0 +; CHECK-8M-NEXT: vlldm sp +; CHECK-8M-NEXT: add sp, #136 +; CHECK-8M-NEXT: pop.w {r4, r5, r6, r7, r8, r9, r10, r11} +; CHECK-8M-NEXT: pop {r7, pc} +; +; CHECK-81M-LABEL: fd: +; CHECK-81M: @ %bb.0: @ %entry +; CHECK-81M-NEXT: push {r7, lr} +; CHECK-81M-NEXT: push.w {r4, r5, r6, r7, r8, r9, r10, r11} +; CHECK-81M-NEXT: bic r0, r0, #1 +; CHECK-81M-NEXT: vpush {s16, s17, s18, s19, s20, s21, s22, s23, s24, s25, s26, s27, s28, s29, s30, s31} +; CHECK-81M-NEXT: vscclrm {s1, vpr} +; CHECK-81M-NEXT: vscclrm {s4, s5, s6, s7, s8, s9, s10, s11, s12, s13, s14, s15, s16, s17, s18, s19, s20, s21, s22, s23, s24, s25, s26, s27, s28, s29, s30, s31, vpr} +; CHECK-81M-NEXT: vstr fpcxts, [sp, #-8]! +; CHECK-81M-NEXT: clrm {r1, r2, r3, r4, r5, r6, r7, r8, r9, r10, r11, r12, apsr} +; CHECK-81M-NEXT: blxns r0 +; CHECK-81M-NEXT: vldr fpcxts, [sp], #8 +; CHECK-81M-NEXT: vpop {s16, s17, s18, s19, s20, s21, s22, s23, s24, s25, s26, s27, s28, s29, s30, s31} +; CHECK-81M-NEXT: pop.w {r4, r5, r6, r7, r8, r9, r10, r11} +; CHECK-81M-NEXT: pop {r7, pc} +entry: + call void %f(float %a, double %b) #9 + ret void +} + +attributes #8 = { nounwind } +attributes #9 = { "cmse_nonsecure_call" nounwind } + +define void @fdff(void (float, double, float, float)* %f, float %a, double %b, float %c, float %d) #8 { +; CHECK-8M-LABEL: fdff: +; CHECK-8M: @ %bb.0: @ %entry +; CHECK-8M-NEXT: push {r7, lr} +; CHECK-8M-NEXT: push.w {r4, r5, r6, r7, r8, r9, r10, r11} +; CHECK-8M-NEXT: bic r0, r0, #1 +; CHECK-8M-NEXT: sub sp, #136 +; CHECK-8M-NEXT: vmov r12, s0 +; CHECK-8M-NEXT: mov r2, r0 +; CHECK-8M-NEXT: vmov r10, r11, d1 +; CHECK-8M-NEXT: mov r3, r0 +; CHECK-8M-NEXT: vmov r9, s1 +; CHECK-8M-NEXT: mov r4, r0 +; CHECK-8M-NEXT: vmov r8, s4 +; CHECK-8M-NEXT: vlstm sp +; CHECK-8M-NEXT: vmov s0, r12 +; CHECK-8M-NEXT: vmov d1, r10, r11 +; CHECK-8M-NEXT: vmov s1, r9 +; CHECK-8M-NEXT: vmov s4, r8 +; CHECK-8M-NEXT: ldr r1, [sp, #64] +; CHECK-8M-NEXT: bic r1, r1, #159 +; CHECK-8M-NEXT: bic r1, r1, #4026531840 +; CHECK-8M-NEXT: vmsr fpscr, r1 +; CHECK-8M-NEXT: mov r1, r0 +; CHECK-8M-NEXT: mov r5, r0 +; CHECK-8M-NEXT: mov r6, r0 +; CHECK-8M-NEXT: mov r7, r0 +; CHECK-8M-NEXT: msr apsr_nzcvqg, r0 +; CHECK-8M-NEXT: blxns r0 +; CHECK-8M-NEXT: vlldm sp +; CHECK-8M-NEXT: add sp, #136 +; CHECK-8M-NEXT: pop.w {r4, r5, r6, r7, r8, r9, r10, r11} +; CHECK-8M-NEXT: pop {r7, pc} +; +; CHECK-81M-LABEL: fdff: +; CHECK-81M: @ %bb.0: @ %entry +; CHECK-81M-NEXT: push {r7, lr} +; CHECK-81M-NEXT: push.w {r4, r5, r6, r7, r8, r9, r10, r11} +; CHECK-81M-NEXT: bic r0, r0, #1 +; CHECK-81M-NEXT: vpush {s16, s17, s18, s19, s20, s21, s22, s23, s24, s25, s26, s27, s28, s29, s30, s31} +; CHECK-81M-NEXT: vscclrm {s5, s6, s7, s8, s9, s10, s11, s12, s13, s14, s15, s16, s17, s18, s19, s20, s21, s22, s23, s24, s25, s26, s27, s28, s29, s30, s31, vpr} +; CHECK-81M-NEXT: vstr fpcxts, [sp, #-8]! +; CHECK-81M-NEXT: clrm {r1, r2, r3, r4, r5, r6, r7, r8, r9, r10, r11, r12, apsr} +; CHECK-81M-NEXT: blxns r0 +; CHECK-81M-NEXT: vldr fpcxts, [sp], #8 +; CHECK-81M-NEXT: vpop {s16, s17, s18, s19, s20, s21, s22, s23, s24, s25, s26, s27, s28, s29, s30, s31} +; CHECK-81M-NEXT: pop.w {r4, r5, r6, r7, r8, r9, r10, r11} +; CHECK-81M-NEXT: pop {r7, pc} +entry: + call void %f(float %a, double %b, float %c, float %d) #9 + ret void +} + +define void @fidififid(void (float, i32, double, i32, float, i32, float, i32, double)* %fu, float %a, i32 %b, double %c, i32 %d, float %e, i32 %f, float %g, i32 %h, double %i) #8 { +; CHECK-8M-LABEL: fidififid: +; CHECK-8M: @ %bb.0: @ %entry +; CHECK-8M-NEXT: push {r7, lr} +; CHECK-8M-NEXT: mov lr, r3 +; CHECK-8M-NEXT: mov r12, r0 +; CHECK-8M-NEXT: mov r0, r1 +; CHECK-8M-NEXT: mov r1, r2 +; CHECK-8M-NEXT: ldr r3, [sp, #8] +; CHECK-8M-NEXT: mov r2, lr +; CHECK-8M-NEXT: push.w {r4, r5, r6, r7, r8, r9, r10, r11} +; CHECK-8M-NEXT: bic r12, r12, #1 +; CHECK-8M-NEXT: sub sp, #136 +; CHECK-8M-NEXT: vmov r11, s0 +; CHECK-8M-NEXT: vmov r9, r10, d1 +; CHECK-8M-NEXT: vmov r8, s1 +; CHECK-8M-NEXT: vmov r7, s4 +; CHECK-8M-NEXT: vmov r5, r6, d3 +; CHECK-8M-NEXT: vlstm sp +; CHECK-8M-NEXT: vmov s0, r11 +; CHECK-8M-NEXT: vmov d1, r9, r10 +; CHECK-8M-NEXT: vmov s1, r8 +; CHECK-8M-NEXT: vmov s4, r7 +; CHECK-8M-NEXT: vmov d3, r5, r6 +; CHECK-8M-NEXT: ldr r4, [sp, #64] +; CHECK-8M-NEXT: bic r4, r4, #159 +; CHECK-8M-NEXT: bic r4, r4, #4026531840 +; CHECK-8M-NEXT: vmsr fpscr, r4 +; CHECK-8M-NEXT: mov r4, r12 +; CHECK-8M-NEXT: msr apsr_nzcvqg, r12 +; CHECK-8M-NEXT: blxns r12 +; CHECK-8M-NEXT: vlldm sp +; CHECK-8M-NEXT: add sp, #136 +; CHECK-8M-NEXT: pop.w {r4, r5, r6, r7, r8, r9, r10, r11} +; CHECK-8M-NEXT: pop {r7, pc} +; +; CHECK-81M-LABEL: fidififid: +; CHECK-81M: @ %bb.0: @ %entry +; CHECK-81M-NEXT: push {r7, lr} +; CHECK-81M-NEXT: mov lr, r3 +; CHECK-81M-NEXT: mov r12, r0 +; CHECK-81M-NEXT: mov r0, r1 +; CHECK-81M-NEXT: mov r1, r2 +; CHECK-81M-NEXT: ldr r3, [sp, #8] +; CHECK-81M-NEXT: mov r2, lr +; CHECK-81M-NEXT: push.w {r4, r5, r6, r7, r8, r9, r10, r11} +; CHECK-81M-NEXT: bic r12, r12, #1 +; CHECK-81M-NEXT: vpush {s16, s17, s18, s19, s20, s21, s22, s23, s24, s25, s26, s27, s28, s29, s30, s31} +; CHECK-81M-NEXT: vscclrm {s5, vpr} +; CHECK-81M-NEXT: vscclrm {s8, s9, s10, s11, s12, s13, s14, s15, s16, s17, s18, s19, s20, s21, s22, s23, s24, s25, s26, s27, s28, s29, s30, s31, vpr} +; CHECK-81M-NEXT: vstr fpcxts, [sp, #-8]! +; CHECK-81M-NEXT: clrm {r4, r5, r6, r7, r8, r9, r10, r11, apsr} +; CHECK-81M-NEXT: blxns r12 +; CHECK-81M-NEXT: vldr fpcxts, [sp], #8 +; CHECK-81M-NEXT: vpop {s16, s17, s18, s19, s20, s21, s22, s23, s24, s25, s26, s27, s28, s29, s30, s31} +; CHECK-81M-NEXT: pop.w {r4, r5, r6, r7, r8, r9, r10, r11} +; CHECK-81M-NEXT: pop {r7, pc} +entry: + call void %fu(float %a, i32 %b, double %c, i32 %d, float %e, i32 %f, float %g, i32 %h, double %i) #9 + ret void +} + diff --git a/llvm/test/CodeGen/ARM/cmse-clear-float-hard2.ll b/llvm/test/CodeGen/ARM/cmse-clear-float-hard2.ll new file mode 100644 index 0000000..715ef0b --- /dev/null +++ b/llvm/test/CodeGen/ARM/cmse-clear-float-hard2.ll @@ -0,0 +1,144 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc %s -o - -mtriple=thumbv8m.main -mattr=+fp-armv8d16sp,+dsp -float-abi=hard | \ +; RUN: FileCheck %s --check-prefix=CHECK-V8-LE +; RUN: llc %s -o - -mtriple=thumbebv8m.main -mattr=+fp-armv8d16sp,+dsp -float-abi=hard | \ +; RUN: FileCheck %s --check-prefix=CHECK-V8-BE + +; RUN: llc %s -o - -mtriple=thumbv8.1m.main -mattr=+fp-armv8d16sp,+dsp -float-abi=hard | \ +; RUN: FileCheck %s --check-prefix=CHECK-V81-LE +; RUN: llc %s -o - -mtriple=thumbebv8.1m.main -mattr=+fp-armv8d16sp,+dsp -float-abi=hard | \ +; RUN: FileCheck %s --check-prefix=CHECK-V81-BE + +attributes #0 = { nounwind } +attributes #1 = { "cmse_nonsecure_call" nounwind } + +define void @fidififiddddff(void (float, i32, double, i32, float, i32, float, i32, double, double, double, double, float, float)* %fu, float %a, i32 %b, double %c, i32 %d, float %e, i32 %f, float %g, i32 %h, double %i, double %j, double %k, double %l, float %m, float %n) #0 { +; CHECK-V8-LE-LABEL: fidififiddddff: +; CHECK-V8-LE: @ %bb.0: @ %entry +; CHECK-V8-LE-NEXT: push {r7, lr} +; CHECK-V8-LE-NEXT: mov lr, r3 +; CHECK-V8-LE-NEXT: mov r12, r0 +; CHECK-V8-LE-NEXT: mov r0, r1 +; CHECK-V8-LE-NEXT: mov r1, r2 +; CHECK-V8-LE-NEXT: ldr r3, [sp, #8] +; CHECK-V8-LE-NEXT: mov r2, lr +; CHECK-V8-LE-NEXT: push.w {r4, r5, r6, r7, r8, r9, r10, r11} +; CHECK-V8-LE-NEXT: bic r12, r12, #1 +; CHECK-V8-LE-NEXT: sub sp, #136 +; CHECK-V8-LE-NEXT: vmov r4, s5 +; CHECK-V8-LE-NEXT: vmov r11, s0 +; CHECK-V8-LE-NEXT: vmov r9, r10, d1 +; CHECK-V8-LE-NEXT: vmov r8, s1 +; CHECK-V8-LE-NEXT: vmov r7, s4 +; CHECK-V8-LE-NEXT: vmov r5, r6, d3 +; CHECK-V8-LE-NEXT: vlstm sp +; CHECK-V8-LE-NEXT: vmov s0, r11 +; CHECK-V8-LE-NEXT: vmov d1, r9, r10 +; CHECK-V8-LE-NEXT: vmov s1, r8 +; CHECK-V8-LE-NEXT: vmov s4, r7 +; CHECK-V8-LE-NEXT: vmov d3, r5, r6 +; CHECK-V8-LE-NEXT: vmov s5, r4 +; CHECK-V8-LE-NEXT: vldr d4, [sp, #32] +; CHECK-V8-LE-NEXT: vldr d5, [sp, #40] +; CHECK-V8-LE-NEXT: vldr d6, [sp, #48] +; CHECK-V8-LE-NEXT: vldr s14, [sp, #56] +; CHECK-V8-LE-NEXT: ldr r4, [sp, #64] +; CHECK-V8-LE-NEXT: bic r4, r4, #159 +; CHECK-V8-LE-NEXT: bic r4, r4, #4026531840 +; CHECK-V8-LE-NEXT: vmsr fpscr, r4 +; CHECK-V8-LE-NEXT: msr apsr_nzcvqg, r12 +; CHECK-V8-LE-NEXT: blxns r12 +; CHECK-V8-LE-NEXT: vlldm sp +; CHECK-V8-LE-NEXT: add sp, #136 +; CHECK-V8-LE-NEXT: pop.w {r4, r5, r6, r7, r8, r9, r10, r11} +; CHECK-V8-LE-NEXT: pop {r7, pc} +; +; CHECK-V8-BE-LABEL: fidififiddddff: +; CHECK-V8-BE: @ %bb.0: @ %entry +; CHECK-V8-BE-NEXT: push {r7, lr} +; CHECK-V8-BE-NEXT: mov lr, r3 +; CHECK-V8-BE-NEXT: mov r12, r0 +; CHECK-V8-BE-NEXT: mov r0, r1 +; CHECK-V8-BE-NEXT: mov r1, r2 +; CHECK-V8-BE-NEXT: ldr r3, [sp, #8] +; CHECK-V8-BE-NEXT: mov r2, lr +; CHECK-V8-BE-NEXT: push.w {r4, r5, r6, r7, r8, r9, r10, r11} +; CHECK-V8-BE-NEXT: bic r12, r12, #1 +; CHECK-V8-BE-NEXT: sub sp, #136 +; CHECK-V8-BE-NEXT: vmov r4, s5 +; CHECK-V8-BE-NEXT: vmov r11, s0 +; CHECK-V8-BE-NEXT: vmov r9, r10, d1 +; CHECK-V8-BE-NEXT: vmov r8, s1 +; CHECK-V8-BE-NEXT: vmov r7, s4 +; CHECK-V8-BE-NEXT: vmov r5, r6, d3 +; CHECK-V8-BE-NEXT: vlstm sp +; CHECK-V8-BE-NEXT: vmov s0, r11 +; CHECK-V8-BE-NEXT: vmov d1, r9, r10 +; CHECK-V8-BE-NEXT: vmov s1, r8 +; CHECK-V8-BE-NEXT: vmov s4, r7 +; CHECK-V8-BE-NEXT: vmov d3, r5, r6 +; CHECK-V8-BE-NEXT: vmov s5, r4 +; CHECK-V8-BE-NEXT: vldr s8, [sp, #32] +; CHECK-V8-BE-NEXT: vldr s9, [sp, #36] +; CHECK-V8-BE-NEXT: vldr s10, [sp, #40] +; CHECK-V8-BE-NEXT: vldr s11, [sp, #44] +; CHECK-V8-BE-NEXT: vldr s12, [sp, #48] +; CHECK-V8-BE-NEXT: vldr s13, [sp, #52] +; CHECK-V8-BE-NEXT: vldr s14, [sp, #56] +; CHECK-V8-BE-NEXT: ldr r4, [sp, #64] +; CHECK-V8-BE-NEXT: bic r4, r4, #159 +; CHECK-V8-BE-NEXT: bic r4, r4, #4026531840 +; CHECK-V8-BE-NEXT: vmsr fpscr, r4 +; CHECK-V8-BE-NEXT: msr apsr_nzcvqg, r12 +; CHECK-V8-BE-NEXT: blxns r12 +; CHECK-V8-BE-NEXT: vlldm sp +; CHECK-V8-BE-NEXT: add sp, #136 +; CHECK-V8-BE-NEXT: pop.w {r4, r5, r6, r7, r8, r9, r10, r11} +; CHECK-V8-BE-NEXT: pop {r7, pc} +; +; CHECK-V81-LE-LABEL: fidififiddddff: +; CHECK-V81-LE: @ %bb.0: @ %entry +; CHECK-V81-LE-NEXT: push {r7, lr} +; CHECK-V81-LE-NEXT: mov lr, r3 +; CHECK-V81-LE-NEXT: mov r12, r0 +; CHECK-V81-LE-NEXT: mov r0, r1 +; CHECK-V81-LE-NEXT: mov r1, r2 +; CHECK-V81-LE-NEXT: ldr r3, [sp, #8] +; CHECK-V81-LE-NEXT: mov r2, lr +; CHECK-V81-LE-NEXT: push.w {r4, r5, r6, r7, r8, r9, r10, r11} +; CHECK-V81-LE-NEXT: bic r12, r12, #1 +; CHECK-V81-LE-NEXT: vpush {s16, s17, s18, s19, s20, s21, s22, s23, s24, s25, s26, s27, s28, s29, s30, s31} +; CHECK-V81-LE-NEXT: vscclrm {s15, s16, s17, s18, s19, s20, s21, s22, s23, s24, s25, s26, s27, s28, s29, s30, s31, vpr} +; CHECK-V81-LE-NEXT: vstr fpcxts, [sp, #-8]! +; CHECK-V81-LE-NEXT: clrm {r4, r5, r6, r7, r8, r9, r10, r11, apsr} +; CHECK-V81-LE-NEXT: blxns r12 +; CHECK-V81-LE-NEXT: vldr fpcxts, [sp], #8 +; CHECK-V81-LE-NEXT: vpop {s16, s17, s18, s19, s20, s21, s22, s23, s24, s25, s26, s27, s28, s29, s30, s31} +; CHECK-V81-LE-NEXT: pop.w {r4, r5, r6, r7, r8, r9, r10, r11} +; CHECK-V81-LE-NEXT: pop {r7, pc} +; +; CHECK-V81-BE-LABEL: fidififiddddff: +; CHECK-V81-BE: @ %bb.0: @ %entry +; CHECK-V81-BE-NEXT: push {r7, lr} +; CHECK-V81-BE-NEXT: mov lr, r3 +; CHECK-V81-BE-NEXT: mov r12, r0 +; CHECK-V81-BE-NEXT: mov r0, r1 +; CHECK-V81-BE-NEXT: mov r1, r2 +; CHECK-V81-BE-NEXT: ldr r3, [sp, #8] +; CHECK-V81-BE-NEXT: mov r2, lr +; CHECK-V81-BE-NEXT: push.w {r4, r5, r6, r7, r8, r9, r10, r11} +; CHECK-V81-BE-NEXT: bic r12, r12, #1 +; CHECK-V81-BE-NEXT: vpush {s16, s17, s18, s19, s20, s21, s22, s23, s24, s25, s26, s27, s28, s29, s30, s31} +; CHECK-V81-BE-NEXT: vscclrm {s15, s16, s17, s18, s19, s20, s21, s22, s23, s24, s25, s26, s27, s28, s29, s30, s31, vpr} +; CHECK-V81-BE-NEXT: vstr fpcxts, [sp, #-8]! +; CHECK-V81-BE-NEXT: clrm {r4, r5, r6, r7, r8, r9, r10, r11, apsr} +; CHECK-V81-BE-NEXT: blxns r12 +; CHECK-V81-BE-NEXT: vldr fpcxts, [sp], #8 +; CHECK-V81-BE-NEXT: vpop {s16, s17, s18, s19, s20, s21, s22, s23, s24, s25, s26, s27, s28, s29, s30, s31} +; CHECK-V81-BE-NEXT: pop.w {r4, r5, r6, r7, r8, r9, r10, r11} +; CHECK-V81-BE-NEXT: pop {r7, pc} +entry: + call void %fu(float %a, i32 %b, double %c, i32 %d, float %e, i32 %f, float %g, i32 %h, double %i, double %j, double %k, double %l, float %m, float %n) #1 + ret void +} + diff --git a/llvm/test/CodeGen/ARM/cmse-clear-float-mve.ll b/llvm/test/CodeGen/ARM/cmse-clear-float-mve.ll new file mode 100644 index 0000000..0da8080 --- /dev/null +++ b/llvm/test/CodeGen/ARM/cmse-clear-float-mve.ll @@ -0,0 +1,172 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -mtriple=thumbv8.1m.main-eabi -mattr=+8msecext,+mve.fp %s -o - | FileCheck %s --check-prefix=CHECK-SOFTFP +; RUN: llc -mtriple=thumbebv8.1m.main-eabi -mattr=+8msecext,+mve.fp %s -o - | FileCheck %s --check-prefix=CHECK-SOFTFP +; RUN: llc -mtriple=thumbv8.1m.main-eabi -mattr=+8msecext,+mve.fp --float-abi=hard %s -o - | FileCheck %s --check-prefix=CHECK-HARD +; RUN: llc -mtriple=thumbebv8.1m.main-eabi -mattr=+8msecext,+mve.fp --float-abi=hard %s -o - | FileCheck %s --check-prefix=CHECK-HARD + +declare <8 x i16> @g0(...) #0 +declare <4 x float> @g1(...) #0 + +;; +;; Test clearing before return to nonsecure state +;; + +define <8 x i16> @f0() #1 { +; CHECK-SOFTFP-LABEL: f0: +; CHECK-SOFTFP: @ %bb.0: @ %entry +; CHECK-SOFTFP-NEXT: vstr fpcxtns, [sp, #-4]! +; CHECK-SOFTFP-NEXT: .save {r7, lr} +; CHECK-SOFTFP-NEXT: push {r7, lr} +; CHECK-SOFTFP-NEXT: .pad #4 +; CHECK-SOFTFP-NEXT: sub sp, #4 +; CHECK-SOFTFP-NEXT: bl g0 +; CHECK-SOFTFP-NEXT: add sp, #4 +; CHECK-SOFTFP-NEXT: pop.w {r7, lr} +; CHECK-SOFTFP-NEXT: vscclrm {s0, s1, s2, s3, s4, s5, s6, s7, s8, s9, s10, s11, s12, s13, s14, s15, vpr} +; CHECK-SOFTFP-NEXT: vldr fpcxtns, [sp], #4 +; CHECK-SOFTFP-NEXT: clrm {r12, apsr} +; CHECK-SOFTFP-NEXT: bxns lr +; +; CHECK-HARD-LABEL: f0: +; CHECK-HARD: @ %bb.0: @ %entry +; CHECK-HARD-NEXT: vstr fpcxtns, [sp, #-4]! +; CHECK-HARD-NEXT: .save {r7, lr} +; CHECK-HARD-NEXT: push {r7, lr} +; CHECK-HARD-NEXT: .pad #4 +; CHECK-HARD-NEXT: sub sp, #4 +; CHECK-HARD-NEXT: bl g0 +; CHECK-HARD-NEXT: add sp, #4 +; CHECK-HARD-NEXT: pop.w {r7, lr} +; CHECK-HARD-NEXT: vscclrm {s4, s5, s6, s7, s8, s9, s10, s11, s12, s13, s14, s15, vpr} +; CHECK-HARD-NEXT: vldr fpcxtns, [sp], #4 +; CHECK-HARD-NEXT: clrm {r0, r1, r2, r3, r12, apsr} +; CHECK-HARD-NEXT: bxns lr +entry: + %call = call <8 x i16> bitcast (<8 x i16> (...)* @g0 to <8 x i16> ()*)() #0 + ret <8 x i16> %call +} + +define <4 x float> @f1() #1 { +; CHECK-SOFTFP-LABEL: f1: +; CHECK-SOFTFP: @ %bb.0: @ %entry +; CHECK-SOFTFP-NEXT: vstr fpcxtns, [sp, #-4]! +; CHECK-SOFTFP-NEXT: .save {r7, lr} +; CHECK-SOFTFP-NEXT: push {r7, lr} +; CHECK-SOFTFP-NEXT: .pad #4 +; CHECK-SOFTFP-NEXT: sub sp, #4 +; CHECK-SOFTFP-NEXT: bl g1 +; CHECK-SOFTFP-NEXT: add sp, #4 +; CHECK-SOFTFP-NEXT: pop.w {r7, lr} +; CHECK-SOFTFP-NEXT: vscclrm {s0, s1, s2, s3, s4, s5, s6, s7, s8, s9, s10, s11, s12, s13, s14, s15, vpr} +; CHECK-SOFTFP-NEXT: vldr fpcxtns, [sp], #4 +; CHECK-SOFTFP-NEXT: clrm {r12, apsr} +; CHECK-SOFTFP-NEXT: bxns lr +; +; CHECK-HARD-LABEL: f1: +; CHECK-HARD: @ %bb.0: @ %entry +; CHECK-HARD-NEXT: vstr fpcxtns, [sp, #-4]! +; CHECK-HARD-NEXT: .save {r7, lr} +; CHECK-HARD-NEXT: push {r7, lr} +; CHECK-HARD-NEXT: .pad #4 +; CHECK-HARD-NEXT: sub sp, #4 +; CHECK-HARD-NEXT: bl g1 +; CHECK-HARD-NEXT: add sp, #4 +; CHECK-HARD-NEXT: pop.w {r7, lr} +; CHECK-HARD-NEXT: vscclrm {s4, s5, s6, s7, s8, s9, s10, s11, s12, s13, s14, s15, vpr} +; CHECK-HARD-NEXT: vldr fpcxtns, [sp], #4 +; CHECK-HARD-NEXT: clrm {r0, r1, r2, r3, r12, apsr} +; CHECK-HARD-NEXT: bxns lr +entry: + %call = call nnan ninf nsz <4 x float> bitcast (<4 x float> (...)* @g1 to <4 x float> ()*)() #0 + ret <4 x float> %call +} + +;; +;; Test clearing around nonsecure calls +;; + +define void @f2(void (<8 x i16>)* nocapture %cb) #0 { +; CHECK-SOFTFP-LABEL: f2: +; CHECK-SOFTFP: @ %bb.0: @ %entry +; CHECK-SOFTFP-NEXT: .save {r4, lr} +; CHECK-SOFTFP-NEXT: push {r4, lr} +; CHECK-SOFTFP-NEXT: mov r4, r0 +; CHECK-SOFTFP-NEXT: bl g0 +; CHECK-SOFTFP-NEXT: push.w {r4, r5, r6, r7, r8, r9, r10, r11} +; CHECK-SOFTFP-NEXT: bic r4, r4, #1 +; CHECK-SOFTFP-NEXT: sub sp, #136 +; CHECK-SOFTFP-NEXT: vlstm sp +; CHECK-SOFTFP-NEXT: clrm {r5, r6, r7, r8, r9, r10, r11, r12, apsr} +; CHECK-SOFTFP-NEXT: blxns r4 +; CHECK-SOFTFP-NEXT: vlldm sp +; CHECK-SOFTFP-NEXT: add sp, #136 +; CHECK-SOFTFP-NEXT: pop.w {r4, r5, r6, r7, r8, r9, r10, r11} +; CHECK-SOFTFP-NEXT: pop {r4, pc} +; +; CHECK-HARD-LABEL: f2: +; CHECK-HARD: @ %bb.0: @ %entry +; CHECK-HARD-NEXT: .save {r4, lr} +; CHECK-HARD-NEXT: push {r4, lr} +; CHECK-HARD-NEXT: mov r4, r0 +; CHECK-HARD-NEXT: bl g0 +; CHECK-HARD-NEXT: push.w {r4, r5, r6, r7, r8, r9, r10, r11} +; CHECK-HARD-NEXT: bic r4, r4, #1 +; CHECK-HARD-NEXT: vpush {s16, s17, s18, s19, s20, s21, s22, s23, s24, s25, s26, s27, s28, s29, s30, s31} +; CHECK-HARD-NEXT: vscclrm {s4, s5, s6, s7, s8, s9, s10, s11, s12, s13, s14, s15, s16, s17, s18, s19, s20, s21, s22, s23, s24, s25, s26, s27, s28, s29, s30, s31, vpr} +; CHECK-HARD-NEXT: vstr fpcxts, [sp, #-8]! +; CHECK-HARD-NEXT: clrm {r0, r1, r2, r3, r5, r6, r7, r8, r9, r10, r11, r12, apsr} +; CHECK-HARD-NEXT: blxns r4 +; CHECK-HARD-NEXT: vldr fpcxts, [sp], #8 +; CHECK-HARD-NEXT: vpop {s16, s17, s18, s19, s20, s21, s22, s23, s24, s25, s26, s27, s28, s29, s30, s31} +; CHECK-HARD-NEXT: pop.w {r4, r5, r6, r7, r8, r9, r10, r11} +; CHECK-HARD-NEXT: pop {r4, pc} +entry: + %call = tail call <8 x i16> bitcast (<8 x i16> (...)* @g0 to <8 x i16> ()*)() #0 + tail call void %cb(<8 x i16> %call) #2 + ret void +} + +define void @f3(void (<4 x float>)* nocapture %cb) #0 { +; CHECK-SOFTFP-LABEL: f3: +; CHECK-SOFTFP: @ %bb.0: @ %entry +; CHECK-SOFTFP-NEXT: .save {r4, lr} +; CHECK-SOFTFP-NEXT: push {r4, lr} +; CHECK-SOFTFP-NEXT: mov r4, r0 +; CHECK-SOFTFP-NEXT: bl g1 +; CHECK-SOFTFP-NEXT: push.w {r4, r5, r6, r7, r8, r9, r10, r11} +; CHECK-SOFTFP-NEXT: bic r4, r4, #1 +; CHECK-SOFTFP-NEXT: sub sp, #136 +; CHECK-SOFTFP-NEXT: vlstm sp +; CHECK-SOFTFP-NEXT: clrm {r5, r6, r7, r8, r9, r10, r11, r12, apsr} +; CHECK-SOFTFP-NEXT: blxns r4 +; CHECK-SOFTFP-NEXT: vlldm sp +; CHECK-SOFTFP-NEXT: add sp, #136 +; CHECK-SOFTFP-NEXT: pop.w {r4, r5, r6, r7, r8, r9, r10, r11} +; CHECK-SOFTFP-NEXT: pop {r4, pc} +; +; CHECK-HARD-LABEL: f3: +; CHECK-HARD: @ %bb.0: @ %entry +; CHECK-HARD-NEXT: .save {r4, lr} +; CHECK-HARD-NEXT: push {r4, lr} +; CHECK-HARD-NEXT: mov r4, r0 +; CHECK-HARD-NEXT: bl g1 +; CHECK-HARD-NEXT: push.w {r4, r5, r6, r7, r8, r9, r10, r11} +; CHECK-HARD-NEXT: bic r4, r4, #1 +; CHECK-HARD-NEXT: vpush {s16, s17, s18, s19, s20, s21, s22, s23, s24, s25, s26, s27, s28, s29, s30, s31} +; CHECK-HARD-NEXT: vscclrm {s4, s5, s6, s7, s8, s9, s10, s11, s12, s13, s14, s15, s16, s17, s18, s19, s20, s21, s22, s23, s24, s25, s26, s27, s28, s29, s30, s31, vpr} +; CHECK-HARD-NEXT: vstr fpcxts, [sp, #-8]! +; CHECK-HARD-NEXT: clrm {r0, r1, r2, r3, r5, r6, r7, r8, r9, r10, r11, r12, apsr} +; CHECK-HARD-NEXT: blxns r4 +; CHECK-HARD-NEXT: vldr fpcxts, [sp], #8 +; CHECK-HARD-NEXT: vpop {s16, s17, s18, s19, s20, s21, s22, s23, s24, s25, s26, s27, s28, s29, s30, s31} +; CHECK-HARD-NEXT: pop.w {r4, r5, r6, r7, r8, r9, r10, r11} +; CHECK-HARD-NEXT: pop {r4, pc} +entry: + %call = tail call nnan ninf nsz <4 x float> bitcast (<4 x float> (...)* @g1 to <4 x float> ()*)() #0 + tail call void %cb(<4 x float> %call) #2 + ret void +} + +attributes #0 = { nounwind } +attributes #1 = { nounwind "cmse_nonsecure_entry" } +attributes #2 = { nounwind "cmse_nonsecure_call" } diff --git a/llvm/test/CodeGen/ARM/cmse-clear-float.ll b/llvm/test/CodeGen/ARM/cmse-clear-float.ll new file mode 100644 index 0000000..356d13c --- /dev/null +++ b/llvm/test/CodeGen/ARM/cmse-clear-float.ll @@ -0,0 +1,718 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc %s -o - -mtriple=thumbv8m.main -mattr=+fp-armv8d16sp,+dsp | \ +; RUN: FileCheck %s --check-prefix=CHECK-8M --check-prefix=CHECK-8M-LE +; RUN: llc %s -o - -mtriple=thumbebv8m.main -mattr=+fp-armv8d16sp,+dsp | \ +; RUN: FileCheck %s --check-prefix=CHECK-8M --check-prefix=CHECK-8M-BE + +; RUN: llc %s -o - -mtriple=thumbv8.1m.main -mattr=+fp-armv8d16sp,+dsp | \ +; RUN: FileCheck %s --check-prefix=CHECK-81M --check-prefix=CHECK-81M-LE +; RUN: llc %s -o - -mtriple=thumbebv8.1m.main -mattr=+fp-armv8d16sp,+dsp | \ +; RUN: FileCheck %s --check-prefix=CHECK-81M --check-prefix=CHECK-81M-BE +; RUN: llc %s -o - -mtriple=thumbv8.1m.main -mattr=+mve | \ +; RUN: FileCheck %s --check-prefix=CHECK-81M --check-prefix=CHECK-81M-LE +; RUN: llc %s -o - -mtriple=thumbebv8.1m.main -mattr=+mve | \ +; RUN: FileCheck %s --check-prefix=CHECK-81M --check-prefix=CHECK-81M-BE + +define float @f1(float (float)* nocapture %fptr) #0 { +; CHECK-8M-LABEL: f1: +; CHECK-8M: @ %bb.0: @ %entry +; CHECK-8M-NEXT: push {r7, lr} +; CHECK-8M-NEXT: mov r1, r0 +; CHECK-8M-NEXT: movs r0, #0 +; CHECK-8M-NEXT: movt r0, #16672 +; CHECK-8M-NEXT: blx r1 +; CHECK-8M-NEXT: pop.w {r7, lr} +; CHECK-8M-NEXT: mrs r12, control +; CHECK-8M-NEXT: tst.w r12, #8 +; CHECK-8M-NEXT: beq .LBB0_2 +; CHECK-8M-NEXT: @ %bb.1: @ %entry +; CHECK-8M-NEXT: vmrs r12, fpscr +; CHECK-8M-NEXT: vmov d0, lr, lr +; CHECK-8M-NEXT: vmov d1, lr, lr +; CHECK-8M-NEXT: vmov d2, lr, lr +; CHECK-8M-NEXT: vmov d3, lr, lr +; CHECK-8M-NEXT: vmov d4, lr, lr +; CHECK-8M-NEXT: vmov d5, lr, lr +; CHECK-8M-NEXT: vmov d6, lr, lr +; CHECK-8M-NEXT: vmov d7, lr, lr +; CHECK-8M-NEXT: bic r12, r12, #159 +; CHECK-8M-NEXT: bic r12, r12, #4026531840 +; CHECK-8M-NEXT: vmsr fpscr, r12 +; CHECK-8M-NEXT: .LBB0_2: @ %entry +; CHECK-8M-NEXT: mov r1, lr +; CHECK-8M-NEXT: mov r2, lr +; CHECK-8M-NEXT: mov r3, lr +; CHECK-8M-NEXT: mov r12, lr +; CHECK-8M-NEXT: msr apsr_nzcvqg, lr +; CHECK-8M-NEXT: bxns lr +; +; CHECK-81M-LABEL: f1: +; CHECK-81M: @ %bb.0: @ %entry +; CHECK-81M-NEXT: vstr fpcxtns, [sp, #-4]! +; CHECK-81M-NEXT: push {r7, lr} +; CHECK-81M-NEXT: sub sp, #4 +; CHECK-81M-NEXT: mov r1, r0 +; CHECK-81M-NEXT: movs r0, #0 +; CHECK-81M-NEXT: movt r0, #16672 +; CHECK-81M-NEXT: blx r1 +; CHECK-81M-NEXT: add sp, #4 +; CHECK-81M-NEXT: pop.w {r7, lr} +; CHECK-81M-NEXT: vscclrm {s0, s1, s2, s3, s4, s5, s6, s7, s8, s9, s10, s11, s12, s13, s14, s15, vpr} +; CHECK-81M-NEXT: vldr fpcxtns, [sp], #4 +; CHECK-81M-NEXT: clrm {r1, r2, r3, r12, apsr} +; CHECK-81M-NEXT: bxns lr +entry: + %call = call float %fptr(float 10.0) #1 + ret float %call +} + +attributes #0 = { "cmse_nonsecure_entry" nounwind } +attributes #1 = { nounwind } + +define double @d1(double (double)* nocapture %fptr) #0 { +; CHECK-8M-LE-LABEL: d1: +; CHECK-8M-LE: @ %bb.0: @ %entry +; CHECK-8M-LE-NEXT: push {r7, lr} +; CHECK-8M-LE-NEXT: vldr d0, .LCPI1_0 +; CHECK-8M-LE-NEXT: mov r2, r0 +; CHECK-8M-LE-NEXT: vmov r0, r1, d0 +; CHECK-8M-LE-NEXT: blx r2 +; CHECK-8M-LE-NEXT: pop.w {r7, lr} +; CHECK-8M-LE-NEXT: mrs r12, control +; CHECK-8M-LE-NEXT: tst.w r12, #8 +; CHECK-8M-LE-NEXT: beq .LBB1_2 +; CHECK-8M-LE-NEXT: @ %bb.1: @ %entry +; CHECK-8M-LE-NEXT: vmrs r12, fpscr +; CHECK-8M-LE-NEXT: vmov d0, lr, lr +; CHECK-8M-LE-NEXT: vmov d1, lr, lr +; CHECK-8M-LE-NEXT: vmov d2, lr, lr +; CHECK-8M-LE-NEXT: vmov d3, lr, lr +; CHECK-8M-LE-NEXT: vmov d4, lr, lr +; CHECK-8M-LE-NEXT: vmov d5, lr, lr +; CHECK-8M-LE-NEXT: vmov d6, lr, lr +; CHECK-8M-LE-NEXT: vmov d7, lr, lr +; CHECK-8M-LE-NEXT: bic r12, r12, #159 +; CHECK-8M-LE-NEXT: bic r12, r12, #4026531840 +; CHECK-8M-LE-NEXT: vmsr fpscr, r12 +; CHECK-8M-LE-NEXT: .LBB1_2: @ %entry +; CHECK-8M-LE-NEXT: mov r2, lr +; CHECK-8M-LE-NEXT: mov r3, lr +; CHECK-8M-LE-NEXT: mov r12, lr +; CHECK-8M-LE-NEXT: msr apsr_nzcvqg, lr +; CHECK-8M-LE-NEXT: bxns lr +; CHECK-8M-LE-NEXT: .p2align 3 +; CHECK-8M-LE-NEXT: @ %bb.3: +; CHECK-8M-LE-NEXT: .LCPI1_0: +; CHECK-8M-LE-NEXT: .long 0 @ double 10 +; CHECK-8M-LE-NEXT: .long 1076101120 +; +; CHECK-8M-BE-LABEL: d1: +; CHECK-8M-BE: @ %bb.0: @ %entry +; CHECK-8M-BE-NEXT: push {r7, lr} +; CHECK-8M-BE-NEXT: vldr d0, .LCPI1_0 +; CHECK-8M-BE-NEXT: mov r2, r0 +; CHECK-8M-BE-NEXT: vmov r1, r0, d0 +; CHECK-8M-BE-NEXT: blx r2 +; CHECK-8M-BE-NEXT: pop.w {r7, lr} +; CHECK-8M-BE-NEXT: mrs r12, control +; CHECK-8M-BE-NEXT: tst.w r12, #8 +; CHECK-8M-BE-NEXT: beq .LBB1_2 +; CHECK-8M-BE-NEXT: @ %bb.1: @ %entry +; CHECK-8M-BE-NEXT: vmrs r12, fpscr +; CHECK-8M-BE-NEXT: vmov d0, lr, lr +; CHECK-8M-BE-NEXT: vmov d1, lr, lr +; CHECK-8M-BE-NEXT: vmov d2, lr, lr +; CHECK-8M-BE-NEXT: vmov d3, lr, lr +; CHECK-8M-BE-NEXT: vmov d4, lr, lr +; CHECK-8M-BE-NEXT: vmov d5, lr, lr +; CHECK-8M-BE-NEXT: vmov d6, lr, lr +; CHECK-8M-BE-NEXT: vmov d7, lr, lr +; CHECK-8M-BE-NEXT: bic r12, r12, #159 +; CHECK-8M-BE-NEXT: bic r12, r12, #4026531840 +; CHECK-8M-BE-NEXT: vmsr fpscr, r12 +; CHECK-8M-BE-NEXT: .LBB1_2: @ %entry +; CHECK-8M-BE-NEXT: mov r2, lr +; CHECK-8M-BE-NEXT: mov r3, lr +; CHECK-8M-BE-NEXT: mov r12, lr +; CHECK-8M-BE-NEXT: msr apsr_nzcvqg, lr +; CHECK-8M-BE-NEXT: bxns lr +; CHECK-8M-BE-NEXT: .p2align 3 +; CHECK-8M-BE-NEXT: @ %bb.3: +; CHECK-8M-BE-NEXT: .LCPI1_0: +; CHECK-8M-BE-NEXT: .long 1076101120 @ double 10 +; CHECK-8M-BE-NEXT: .long 0 +; +; CHECK-81M-LE-LABEL: d1: +; CHECK-81M-LE: @ %bb.0: @ %entry +; CHECK-81M-LE-NEXT: vstr fpcxtns, [sp, #-4]! +; CHECK-81M-LE-NEXT: push {r7, lr} +; CHECK-81M-LE-NEXT: sub sp, #4 +; CHECK-81M-LE-NEXT: vldr d0, .LCPI1_0 +; CHECK-81M-LE-NEXT: mov r2, r0 +; CHECK-81M-LE-NEXT: vmov r0, r1, d0 +; CHECK-81M-LE-NEXT: blx r2 +; CHECK-81M-LE-NEXT: add sp, #4 +; CHECK-81M-LE-NEXT: pop.w {r7, lr} +; CHECK-81M-LE-NEXT: vscclrm {s0, s1, s2, s3, s4, s5, s6, s7, s8, s9, s10, s11, s12, s13, s14, s15, vpr} +; CHECK-81M-LE-NEXT: vldr fpcxtns, [sp], #4 +; CHECK-81M-LE-NEXT: clrm {r2, r3, r12, apsr} +; CHECK-81M-LE-NEXT: bxns lr +; CHECK-81M-LE-NEXT: .p2align 3 +; CHECK-81M-LE-NEXT: @ %bb.1: +; CHECK-81M-LE-NEXT: .LCPI1_0: +; CHECK-81M-LE-NEXT: .long 0 @ double 10 +; CHECK-81M-LE-NEXT: .long 1076101120 +; +; CHECK-81M-BE-LABEL: d1: +; CHECK-81M-BE: @ %bb.0: @ %entry +; CHECK-81M-BE-NEXT: vstr fpcxtns, [sp, #-4]! +; CHECK-81M-BE-NEXT: push {r7, lr} +; CHECK-81M-BE-NEXT: sub sp, #4 +; CHECK-81M-BE-NEXT: vldr d0, .LCPI1_0 +; CHECK-81M-BE-NEXT: mov r2, r0 +; CHECK-81M-BE-NEXT: vmov r1, r0, d0 +; CHECK-81M-BE-NEXT: blx r2 +; CHECK-81M-BE-NEXT: add sp, #4 +; CHECK-81M-BE-NEXT: pop.w {r7, lr} +; CHECK-81M-BE-NEXT: vscclrm {s0, s1, s2, s3, s4, s5, s6, s7, s8, s9, s10, s11, s12, s13, s14, s15, vpr} +; CHECK-81M-BE-NEXT: vldr fpcxtns, [sp], #4 +; CHECK-81M-BE-NEXT: clrm {r2, r3, r12, apsr} +; CHECK-81M-BE-NEXT: bxns lr +; CHECK-81M-BE-NEXT: .p2align 3 +; CHECK-81M-BE-NEXT: @ %bb.1: +; CHECK-81M-BE-NEXT: .LCPI1_0: +; CHECK-81M-BE-NEXT: .long 1076101120 @ double 10 +; CHECK-81M-BE-NEXT: .long 0 +entry: + %call = call double %fptr(double 10.0) #1 + ret double %call +} + +define float @f2(float (float)* nocapture %fptr) #2 { +; CHECK-8M-LABEL: f2: +; CHECK-8M: @ %bb.0: @ %entry +; CHECK-8M-NEXT: push {r7, lr} +; CHECK-8M-NEXT: mov r1, r0 +; CHECK-8M-NEXT: movs r0, #0 +; CHECK-8M-NEXT: movt r0, #16672 +; CHECK-8M-NEXT: push.w {r4, r5, r6, r7, r8, r9, r10, r11} +; CHECK-8M-NEXT: bic r1, r1, #1 +; CHECK-8M-NEXT: sub sp, #136 +; CHECK-8M-NEXT: vlstm sp +; CHECK-8M-NEXT: mov r2, r1 +; CHECK-8M-NEXT: mov r3, r1 +; CHECK-8M-NEXT: mov r4, r1 +; CHECK-8M-NEXT: mov r5, r1 +; CHECK-8M-NEXT: mov r6, r1 +; CHECK-8M-NEXT: mov r7, r1 +; CHECK-8M-NEXT: mov r8, r1 +; CHECK-8M-NEXT: mov r9, r1 +; CHECK-8M-NEXT: mov r10, r1 +; CHECK-8M-NEXT: mov r11, r1 +; CHECK-8M-NEXT: mov r12, r1 +; CHECK-8M-NEXT: msr apsr_nzcvqg, r1 +; CHECK-8M-NEXT: blxns r1 +; CHECK-8M-NEXT: vlldm sp +; CHECK-8M-NEXT: add sp, #136 +; CHECK-8M-NEXT: pop.w {r4, r5, r6, r7, r8, r9, r10, r11} +; CHECK-8M-NEXT: pop {r7, pc} +; +; CHECK-81M-LABEL: f2: +; CHECK-81M: @ %bb.0: @ %entry +; CHECK-81M-NEXT: push {r7, lr} +; CHECK-81M-NEXT: mov r1, r0 +; CHECK-81M-NEXT: movs r0, #0 +; CHECK-81M-NEXT: movt r0, #16672 +; CHECK-81M-NEXT: push.w {r4, r5, r6, r7, r8, r9, r10, r11} +; CHECK-81M-NEXT: bic r1, r1, #1 +; CHECK-81M-NEXT: sub sp, #136 +; CHECK-81M-NEXT: vlstm sp +; CHECK-81M-NEXT: clrm {r2, r3, r4, r5, r6, r7, r8, r9, r10, r11, r12, apsr} +; CHECK-81M-NEXT: blxns r1 +; CHECK-81M-NEXT: vlldm sp +; CHECK-81M-NEXT: add sp, #136 +; CHECK-81M-NEXT: pop.w {r4, r5, r6, r7, r8, r9, r10, r11} +; CHECK-81M-NEXT: pop {r7, pc} +entry: + %call = call float %fptr(float 10.0) #3 + ret float %call +} + +attributes #2 = { nounwind } +attributes #3 = { "cmse_nonsecure_call" nounwind } + +define double @d2(double (double)* nocapture %fptr) #2 { +; CHECK-8M-LE-LABEL: d2: +; CHECK-8M-LE: @ %bb.0: @ %entry +; CHECK-8M-LE-NEXT: push {r7, lr} +; CHECK-8M-LE-NEXT: vldr d0, .LCPI3_0 +; CHECK-8M-LE-NEXT: mov r2, r0 +; CHECK-8M-LE-NEXT: vmov r0, r1, d0 +; CHECK-8M-LE-NEXT: push.w {r4, r5, r6, r7, r8, r9, r10, r11} +; CHECK-8M-LE-NEXT: bic r2, r2, #1 +; CHECK-8M-LE-NEXT: sub sp, #136 +; CHECK-8M-LE-NEXT: vlstm sp +; CHECK-8M-LE-NEXT: mov r3, r2 +; CHECK-8M-LE-NEXT: mov r4, r2 +; CHECK-8M-LE-NEXT: mov r5, r2 +; CHECK-8M-LE-NEXT: mov r6, r2 +; CHECK-8M-LE-NEXT: mov r7, r2 +; CHECK-8M-LE-NEXT: mov r8, r2 +; CHECK-8M-LE-NEXT: mov r9, r2 +; CHECK-8M-LE-NEXT: mov r10, r2 +; CHECK-8M-LE-NEXT: mov r11, r2 +; CHECK-8M-LE-NEXT: mov r12, r2 +; CHECK-8M-LE-NEXT: msr apsr_nzcvqg, r2 +; CHECK-8M-LE-NEXT: blxns r2 +; CHECK-8M-LE-NEXT: vlldm sp +; CHECK-8M-LE-NEXT: add sp, #136 +; CHECK-8M-LE-NEXT: pop.w {r4, r5, r6, r7, r8, r9, r10, r11} +; CHECK-8M-LE-NEXT: pop {r7, pc} +; CHECK-8M-LE-NEXT: .p2align 3 +; CHECK-8M-LE-NEXT: @ %bb.1: +; CHECK-8M-LE-NEXT: .LCPI3_0: +; CHECK-8M-LE-NEXT: .long 0 @ double 10 +; CHECK-8M-LE-NEXT: .long 1076101120 +; +; CHECK-8M-BE-LABEL: d2: +; CHECK-8M-BE: @ %bb.0: @ %entry +; CHECK-8M-BE-NEXT: push {r7, lr} +; CHECK-8M-BE-NEXT: vldr d0, .LCPI3_0 +; CHECK-8M-BE-NEXT: mov r2, r0 +; CHECK-8M-BE-NEXT: vmov r1, r0, d0 +; CHECK-8M-BE-NEXT: push.w {r4, r5, r6, r7, r8, r9, r10, r11} +; CHECK-8M-BE-NEXT: bic r2, r2, #1 +; CHECK-8M-BE-NEXT: sub sp, #136 +; CHECK-8M-BE-NEXT: vlstm sp +; CHECK-8M-BE-NEXT: mov r3, r2 +; CHECK-8M-BE-NEXT: mov r4, r2 +; CHECK-8M-BE-NEXT: mov r5, r2 +; CHECK-8M-BE-NEXT: mov r6, r2 +; CHECK-8M-BE-NEXT: mov r7, r2 +; CHECK-8M-BE-NEXT: mov r8, r2 +; CHECK-8M-BE-NEXT: mov r9, r2 +; CHECK-8M-BE-NEXT: mov r10, r2 +; CHECK-8M-BE-NEXT: mov r11, r2 +; CHECK-8M-BE-NEXT: mov r12, r2 +; CHECK-8M-BE-NEXT: msr apsr_nzcvqg, r2 +; CHECK-8M-BE-NEXT: blxns r2 +; CHECK-8M-BE-NEXT: vlldm sp +; CHECK-8M-BE-NEXT: add sp, #136 +; CHECK-8M-BE-NEXT: pop.w {r4, r5, r6, r7, r8, r9, r10, r11} +; CHECK-8M-BE-NEXT: pop {r7, pc} +; CHECK-8M-BE-NEXT: .p2align 3 +; CHECK-8M-BE-NEXT: @ %bb.1: +; CHECK-8M-BE-NEXT: .LCPI3_0: +; CHECK-8M-BE-NEXT: .long 1076101120 @ double 10 +; CHECK-8M-BE-NEXT: .long 0 +; +; CHECK-81M-LE-LABEL: d2: +; CHECK-81M-LE: @ %bb.0: @ %entry +; CHECK-81M-LE-NEXT: push {r7, lr} +; CHECK-81M-LE-NEXT: vldr d0, .LCPI3_0 +; CHECK-81M-LE-NEXT: mov r2, r0 +; CHECK-81M-LE-NEXT: vmov r0, r1, d0 +; CHECK-81M-LE-NEXT: push.w {r4, r5, r6, r7, r8, r9, r10, r11} +; CHECK-81M-LE-NEXT: bic r2, r2, #1 +; CHECK-81M-LE-NEXT: sub sp, #136 +; CHECK-81M-LE-NEXT: vlstm sp +; CHECK-81M-LE-NEXT: clrm {r3, r4, r5, r6, r7, r8, r9, r10, r11, r12, apsr} +; CHECK-81M-LE-NEXT: blxns r2 +; CHECK-81M-LE-NEXT: vlldm sp +; CHECK-81M-LE-NEXT: add sp, #136 +; CHECK-81M-LE-NEXT: pop.w {r4, r5, r6, r7, r8, r9, r10, r11} +; CHECK-81M-LE-NEXT: pop {r7, pc} +; CHECK-81M-LE-NEXT: .p2align 3 +; CHECK-81M-LE-NEXT: @ %bb.1: +; CHECK-81M-LE-NEXT: .LCPI3_0: +; CHECK-81M-LE-NEXT: .long 0 @ double 10 +; CHECK-81M-LE-NEXT: .long 1076101120 +; +; CHECK-81M-BE-LABEL: d2: +; CHECK-81M-BE: @ %bb.0: @ %entry +; CHECK-81M-BE-NEXT: push {r7, lr} +; CHECK-81M-BE-NEXT: vldr d0, .LCPI3_0 +; CHECK-81M-BE-NEXT: mov r2, r0 +; CHECK-81M-BE-NEXT: vmov r1, r0, d0 +; CHECK-81M-BE-NEXT: push.w {r4, r5, r6, r7, r8, r9, r10, r11} +; CHECK-81M-BE-NEXT: bic r2, r2, #1 +; CHECK-81M-BE-NEXT: sub sp, #136 +; CHECK-81M-BE-NEXT: vlstm sp +; CHECK-81M-BE-NEXT: clrm {r3, r4, r5, r6, r7, r8, r9, r10, r11, r12, apsr} +; CHECK-81M-BE-NEXT: blxns r2 +; CHECK-81M-BE-NEXT: vlldm sp +; CHECK-81M-BE-NEXT: add sp, #136 +; CHECK-81M-BE-NEXT: pop.w {r4, r5, r6, r7, r8, r9, r10, r11} +; CHECK-81M-BE-NEXT: pop {r7, pc} +; CHECK-81M-BE-NEXT: .p2align 3 +; CHECK-81M-BE-NEXT: @ %bb.1: +; CHECK-81M-BE-NEXT: .LCPI3_0: +; CHECK-81M-BE-NEXT: .long 1076101120 @ double 10 +; CHECK-81M-BE-NEXT: .long 0 +entry: + %call = call double %fptr(double 10.0) #3 + ret double %call +} + +define float @f3(float (float)* nocapture %fptr) #4 { +; CHECK-8M-LABEL: f3: +; CHECK-8M: @ %bb.0: @ %entry +; CHECK-8M-NEXT: push {r7, lr} +; CHECK-8M-NEXT: mov r1, r0 +; CHECK-8M-NEXT: movs r0, #0 +; CHECK-8M-NEXT: movt r0, #16672 +; CHECK-8M-NEXT: push.w {r4, r5, r6, r7, r8, r9, r10, r11} +; CHECK-8M-NEXT: bic r1, r1, #1 +; CHECK-8M-NEXT: sub sp, #136 +; CHECK-8M-NEXT: vlstm sp +; CHECK-8M-NEXT: mov r2, r1 +; CHECK-8M-NEXT: mov r3, r1 +; CHECK-8M-NEXT: mov r4, r1 +; CHECK-8M-NEXT: mov r5, r1 +; CHECK-8M-NEXT: mov r6, r1 +; CHECK-8M-NEXT: mov r7, r1 +; CHECK-8M-NEXT: mov r8, r1 +; CHECK-8M-NEXT: mov r9, r1 +; CHECK-8M-NEXT: mov r10, r1 +; CHECK-8M-NEXT: mov r11, r1 +; CHECK-8M-NEXT: mov r12, r1 +; CHECK-8M-NEXT: msr apsr_nzcvqg, r1 +; CHECK-8M-NEXT: blxns r1 +; CHECK-8M-NEXT: vlldm sp +; CHECK-8M-NEXT: add sp, #136 +; CHECK-8M-NEXT: pop.w {r4, r5, r6, r7, r8, r9, r10, r11} +; CHECK-8M-NEXT: pop {r7, pc} +; +; CHECK-81M-LABEL: f3: +; CHECK-81M: @ %bb.0: @ %entry +; CHECK-81M-NEXT: push {r7, lr} +; CHECK-81M-NEXT: mov r1, r0 +; CHECK-81M-NEXT: movs r0, #0 +; CHECK-81M-NEXT: movt r0, #16672 +; CHECK-81M-NEXT: push.w {r4, r5, r6, r7, r8, r9, r10, r11} +; CHECK-81M-NEXT: bic r1, r1, #1 +; CHECK-81M-NEXT: sub sp, #136 +; CHECK-81M-NEXT: vlstm sp +; CHECK-81M-NEXT: clrm {r2, r3, r4, r5, r6, r7, r8, r9, r10, r11, r12, apsr} +; CHECK-81M-NEXT: blxns r1 +; CHECK-81M-NEXT: vlldm sp +; CHECK-81M-NEXT: add sp, #136 +; CHECK-81M-NEXT: pop.w {r4, r5, r6, r7, r8, r9, r10, r11} +; CHECK-81M-NEXT: pop {r7, pc} +entry: + %call = tail call float %fptr(float 10.0) #5 + ret float %call +} + +attributes #4 = { nounwind } +attributes #5 = { "cmse_nonsecure_call" nounwind } + +define double @d3(double (double)* nocapture %fptr) #4 { +; CHECK-8M-LE-LABEL: d3: +; CHECK-8M-LE: @ %bb.0: @ %entry +; CHECK-8M-LE-NEXT: push {r7, lr} +; CHECK-8M-LE-NEXT: vldr d0, .LCPI5_0 +; CHECK-8M-LE-NEXT: mov r2, r0 +; CHECK-8M-LE-NEXT: vmov r0, r1, d0 +; CHECK-8M-LE-NEXT: push.w {r4, r5, r6, r7, r8, r9, r10, r11} +; CHECK-8M-LE-NEXT: bic r2, r2, #1 +; CHECK-8M-LE-NEXT: sub sp, #136 +; CHECK-8M-LE-NEXT: vlstm sp +; CHECK-8M-LE-NEXT: mov r3, r2 +; CHECK-8M-LE-NEXT: mov r4, r2 +; CHECK-8M-LE-NEXT: mov r5, r2 +; CHECK-8M-LE-NEXT: mov r6, r2 +; CHECK-8M-LE-NEXT: mov r7, r2 +; CHECK-8M-LE-NEXT: mov r8, r2 +; CHECK-8M-LE-NEXT: mov r9, r2 +; CHECK-8M-LE-NEXT: mov r10, r2 +; CHECK-8M-LE-NEXT: mov r11, r2 +; CHECK-8M-LE-NEXT: mov r12, r2 +; CHECK-8M-LE-NEXT: msr apsr_nzcvqg, r2 +; CHECK-8M-LE-NEXT: blxns r2 +; CHECK-8M-LE-NEXT: vlldm sp +; CHECK-8M-LE-NEXT: add sp, #136 +; CHECK-8M-LE-NEXT: pop.w {r4, r5, r6, r7, r8, r9, r10, r11} +; CHECK-8M-LE-NEXT: pop {r7, pc} +; CHECK-8M-LE-NEXT: .p2align 3 +; CHECK-8M-LE-NEXT: @ %bb.1: +; CHECK-8M-LE-NEXT: .LCPI5_0: +; CHECK-8M-LE-NEXT: .long 0 @ double 10 +; CHECK-8M-LE-NEXT: .long 1076101120 +; +; CHECK-8M-BE-LABEL: d3: +; CHECK-8M-BE: @ %bb.0: @ %entry +; CHECK-8M-BE-NEXT: push {r7, lr} +; CHECK-8M-BE-NEXT: vldr d0, .LCPI5_0 +; CHECK-8M-BE-NEXT: mov r2, r0 +; CHECK-8M-BE-NEXT: vmov r1, r0, d0 +; CHECK-8M-BE-NEXT: push.w {r4, r5, r6, r7, r8, r9, r10, r11} +; CHECK-8M-BE-NEXT: bic r2, r2, #1 +; CHECK-8M-BE-NEXT: sub sp, #136 +; CHECK-8M-BE-NEXT: vlstm sp +; CHECK-8M-BE-NEXT: mov r3, r2 +; CHECK-8M-BE-NEXT: mov r4, r2 +; CHECK-8M-BE-NEXT: mov r5, r2 +; CHECK-8M-BE-NEXT: mov r6, r2 +; CHECK-8M-BE-NEXT: mov r7, r2 +; CHECK-8M-BE-NEXT: mov r8, r2 +; CHECK-8M-BE-NEXT: mov r9, r2 +; CHECK-8M-BE-NEXT: mov r10, r2 +; CHECK-8M-BE-NEXT: mov r11, r2 +; CHECK-8M-BE-NEXT: mov r12, r2 +; CHECK-8M-BE-NEXT: msr apsr_nzcvqg, r2 +; CHECK-8M-BE-NEXT: blxns r2 +; CHECK-8M-BE-NEXT: vlldm sp +; CHECK-8M-BE-NEXT: add sp, #136 +; CHECK-8M-BE-NEXT: pop.w {r4, r5, r6, r7, r8, r9, r10, r11} +; CHECK-8M-BE-NEXT: pop {r7, pc} +; CHECK-8M-BE-NEXT: .p2align 3 +; CHECK-8M-BE-NEXT: @ %bb.1: +; CHECK-8M-BE-NEXT: .LCPI5_0: +; CHECK-8M-BE-NEXT: .long 1076101120 @ double 10 +; CHECK-8M-BE-NEXT: .long 0 +; +; CHECK-81M-LE-LABEL: d3: +; CHECK-81M-LE: @ %bb.0: @ %entry +; CHECK-81M-LE-NEXT: push {r7, lr} +; CHECK-81M-LE-NEXT: vldr d0, .LCPI5_0 +; CHECK-81M-LE-NEXT: mov r2, r0 +; CHECK-81M-LE-NEXT: vmov r0, r1, d0 +; CHECK-81M-LE-NEXT: push.w {r4, r5, r6, r7, r8, r9, r10, r11} +; CHECK-81M-LE-NEXT: bic r2, r2, #1 +; CHECK-81M-LE-NEXT: sub sp, #136 +; CHECK-81M-LE-NEXT: vlstm sp +; CHECK-81M-LE-NEXT: clrm {r3, r4, r5, r6, r7, r8, r9, r10, r11, r12, apsr} +; CHECK-81M-LE-NEXT: blxns r2 +; CHECK-81M-LE-NEXT: vlldm sp +; CHECK-81M-LE-NEXT: add sp, #136 +; CHECK-81M-LE-NEXT: pop.w {r4, r5, r6, r7, r8, r9, r10, r11} +; CHECK-81M-LE-NEXT: pop {r7, pc} +; CHECK-81M-LE-NEXT: .p2align 3 +; CHECK-81M-LE-NEXT: @ %bb.1: +; CHECK-81M-LE-NEXT: .LCPI5_0: +; CHECK-81M-LE-NEXT: .long 0 @ double 10 +; CHECK-81M-LE-NEXT: .long 1076101120 +; +; CHECK-81M-BE-LABEL: d3: +; CHECK-81M-BE: @ %bb.0: @ %entry +; CHECK-81M-BE-NEXT: push {r7, lr} +; CHECK-81M-BE-NEXT: vldr d0, .LCPI5_0 +; CHECK-81M-BE-NEXT: mov r2, r0 +; CHECK-81M-BE-NEXT: vmov r1, r0, d0 +; CHECK-81M-BE-NEXT: push.w {r4, r5, r6, r7, r8, r9, r10, r11} +; CHECK-81M-BE-NEXT: bic r2, r2, #1 +; CHECK-81M-BE-NEXT: sub sp, #136 +; CHECK-81M-BE-NEXT: vlstm sp +; CHECK-81M-BE-NEXT: clrm {r3, r4, r5, r6, r7, r8, r9, r10, r11, r12, apsr} +; CHECK-81M-BE-NEXT: blxns r2 +; CHECK-81M-BE-NEXT: vlldm sp +; CHECK-81M-BE-NEXT: add sp, #136 +; CHECK-81M-BE-NEXT: pop.w {r4, r5, r6, r7, r8, r9, r10, r11} +; CHECK-81M-BE-NEXT: pop {r7, pc} +; CHECK-81M-BE-NEXT: .p2align 3 +; CHECK-81M-BE-NEXT: @ %bb.1: +; CHECK-81M-BE-NEXT: .LCPI5_0: +; CHECK-81M-BE-NEXT: .long 1076101120 @ double 10 +; CHECK-81M-BE-NEXT: .long 0 +entry: + %call = tail call double %fptr(double 10.0) #5 + ret double %call +} + +define float @f4(float ()* nocapture %fptr) #6 { +; CHECK-8M-LABEL: f4: +; CHECK-8M: @ %bb.0: @ %entry +; CHECK-8M-NEXT: push {r7, lr} +; CHECK-8M-NEXT: push.w {r4, r5, r6, r7, r8, r9, r10, r11} +; CHECK-8M-NEXT: bic r0, r0, #1 +; CHECK-8M-NEXT: sub sp, #136 +; CHECK-8M-NEXT: vlstm sp +; CHECK-8M-NEXT: mov r1, r0 +; CHECK-8M-NEXT: mov r2, r0 +; CHECK-8M-NEXT: mov r3, r0 +; CHECK-8M-NEXT: mov r4, r0 +; CHECK-8M-NEXT: mov r5, r0 +; CHECK-8M-NEXT: mov r6, r0 +; CHECK-8M-NEXT: mov r7, r0 +; CHECK-8M-NEXT: mov r8, r0 +; CHECK-8M-NEXT: mov r9, r0 +; CHECK-8M-NEXT: mov r10, r0 +; CHECK-8M-NEXT: mov r11, r0 +; CHECK-8M-NEXT: mov r12, r0 +; CHECK-8M-NEXT: msr apsr_nzcvqg, r0 +; CHECK-8M-NEXT: blxns r0 +; CHECK-8M-NEXT: vlldm sp +; CHECK-8M-NEXT: add sp, #136 +; CHECK-8M-NEXT: pop.w {r4, r5, r6, r7, r8, r9, r10, r11} +; CHECK-8M-NEXT: pop {r7, pc} +; +; CHECK-81M-LABEL: f4: +; CHECK-81M: @ %bb.0: @ %entry +; CHECK-81M-NEXT: push {r7, lr} +; CHECK-81M-NEXT: push.w {r4, r5, r6, r7, r8, r9, r10, r11} +; CHECK-81M-NEXT: bic r0, r0, #1 +; CHECK-81M-NEXT: sub sp, #136 +; CHECK-81M-NEXT: vlstm sp +; CHECK-81M-NEXT: clrm {r1, r2, r3, r4, r5, r6, r7, r8, r9, r10, r11, r12, apsr} +; CHECK-81M-NEXT: blxns r0 +; CHECK-81M-NEXT: vlldm sp +; CHECK-81M-NEXT: add sp, #136 +; CHECK-81M-NEXT: pop.w {r4, r5, r6, r7, r8, r9, r10, r11} +; CHECK-81M-NEXT: pop {r7, pc} +entry: + %call = call float %fptr() #7 + ret float %call +} + +attributes #6 = { nounwind } +attributes #7 = { "cmse_nonsecure_call" nounwind } + +define double @d4(double ()* nocapture %fptr) #6 { +; CHECK-8M-LABEL: d4: +; CHECK-8M: @ %bb.0: @ %entry +; CHECK-8M-NEXT: push {r7, lr} +; CHECK-8M-NEXT: push.w {r4, r5, r6, r7, r8, r9, r10, r11} +; CHECK-8M-NEXT: bic r0, r0, #1 +; CHECK-8M-NEXT: sub sp, #136 +; CHECK-8M-NEXT: vlstm sp +; CHECK-8M-NEXT: mov r1, r0 +; CHECK-8M-NEXT: mov r2, r0 +; CHECK-8M-NEXT: mov r3, r0 +; CHECK-8M-NEXT: mov r4, r0 +; CHECK-8M-NEXT: mov r5, r0 +; CHECK-8M-NEXT: mov r6, r0 +; CHECK-8M-NEXT: mov r7, r0 +; CHECK-8M-NEXT: mov r8, r0 +; CHECK-8M-NEXT: mov r9, r0 +; CHECK-8M-NEXT: mov r10, r0 +; CHECK-8M-NEXT: mov r11, r0 +; CHECK-8M-NEXT: mov r12, r0 +; CHECK-8M-NEXT: msr apsr_nzcvqg, r0 +; CHECK-8M-NEXT: blxns r0 +; CHECK-8M-NEXT: vlldm sp +; CHECK-8M-NEXT: add sp, #136 +; CHECK-8M-NEXT: pop.w {r4, r5, r6, r7, r8, r9, r10, r11} +; CHECK-8M-NEXT: pop {r7, pc} +; +; CHECK-81M-LABEL: d4: +; CHECK-81M: @ %bb.0: @ %entry +; CHECK-81M-NEXT: push {r7, lr} +; CHECK-81M-NEXT: push.w {r4, r5, r6, r7, r8, r9, r10, r11} +; CHECK-81M-NEXT: bic r0, r0, #1 +; CHECK-81M-NEXT: sub sp, #136 +; CHECK-81M-NEXT: vlstm sp +; CHECK-81M-NEXT: clrm {r1, r2, r3, r4, r5, r6, r7, r8, r9, r10, r11, r12, apsr} +; CHECK-81M-NEXT: blxns r0 +; CHECK-81M-NEXT: vlldm sp +; CHECK-81M-NEXT: add sp, #136 +; CHECK-81M-NEXT: pop.w {r4, r5, r6, r7, r8, r9, r10, r11} +; CHECK-81M-NEXT: pop {r7, pc} +entry: + %call = call double %fptr() #7 + ret double %call +} + +define void @fd(void (float, double)* %f, float %a, double %b) #8 { +; CHECK-8M-LABEL: fd: +; CHECK-8M: @ %bb.0: @ %entry +; CHECK-8M-NEXT: push {r7, lr} +; CHECK-8M-NEXT: mov r12, r0 +; CHECK-8M-NEXT: mov r0, r1 +; CHECK-8M-NEXT: push.w {r4, r5, r6, r7, r8, r9, r10, r11} +; CHECK-8M-NEXT: bic r12, r12, #1 +; CHECK-8M-NEXT: sub sp, #136 +; CHECK-8M-NEXT: vlstm sp +; CHECK-8M-NEXT: mov r1, r12 +; CHECK-8M-NEXT: mov r4, r12 +; CHECK-8M-NEXT: mov r5, r12 +; CHECK-8M-NEXT: mov r6, r12 +; CHECK-8M-NEXT: mov r7, r12 +; CHECK-8M-NEXT: mov r8, r12 +; CHECK-8M-NEXT: mov r9, r12 +; CHECK-8M-NEXT: mov r10, r12 +; CHECK-8M-NEXT: mov r11, r12 +; CHECK-8M-NEXT: msr apsr_nzcvqg, r12 +; CHECK-8M-NEXT: blxns r12 +; CHECK-8M-NEXT: vlldm sp +; CHECK-8M-NEXT: add sp, #136 +; CHECK-8M-NEXT: pop.w {r4, r5, r6, r7, r8, r9, r10, r11} +; CHECK-8M-NEXT: pop {r7, pc} +; +; CHECK-81M-LABEL: fd: +; CHECK-81M: @ %bb.0: @ %entry +; CHECK-81M-NEXT: push {r7, lr} +; CHECK-81M-NEXT: mov r12, r0 +; CHECK-81M-NEXT: mov r0, r1 +; CHECK-81M-NEXT: push.w {r4, r5, r6, r7, r8, r9, r10, r11} +; CHECK-81M-NEXT: bic r12, r12, #1 +; CHECK-81M-NEXT: sub sp, #136 +; CHECK-81M-NEXT: vlstm sp +; CHECK-81M-NEXT: clrm {r1, r4, r5, r6, r7, r8, r9, r10, r11, apsr} +; CHECK-81M-NEXT: blxns r12 +; CHECK-81M-NEXT: vlldm sp +; CHECK-81M-NEXT: add sp, #136 +; CHECK-81M-NEXT: pop.w {r4, r5, r6, r7, r8, r9, r10, r11} +; CHECK-81M-NEXT: pop {r7, pc} +entry: + call void %f(float %a, double %b) #9 + ret void +} + +attributes #8 = { nounwind } +attributes #9 = { "cmse_nonsecure_call" nounwind } + +define float @f1_minsize(float (float)* nocapture %fptr) #10 { +; CHECK-8M-LABEL: f1_minsize: +; CHECK-8M: @ %bb.0: @ %entry +; CHECK-8M-NEXT: push {r7, lr} +; CHECK-8M-NEXT: mov r1, r0 +; CHECK-8M-NEXT: ldr r0, .LCPI9_0 +; CHECK-8M-NEXT: blx r1 +; CHECK-8M-NEXT: pop.w {r7, lr} +; CHECK-8M-NEXT: vmrs r12, fpscr +; CHECK-8M-NEXT: vmov d0, lr, lr +; CHECK-8M-NEXT: vmov d1, lr, lr +; CHECK-8M-NEXT: mov r1, lr +; CHECK-8M-NEXT: vmov d2, lr, lr +; CHECK-8M-NEXT: mov r2, lr +; CHECK-8M-NEXT: vmov d3, lr, lr +; CHECK-8M-NEXT: mov r3, lr +; CHECK-8M-NEXT: vmov d4, lr, lr +; CHECK-8M-NEXT: vmov d5, lr, lr +; CHECK-8M-NEXT: vmov d6, lr, lr +; CHECK-8M-NEXT: vmov d7, lr, lr +; CHECK-8M-NEXT: bic r12, r12, #159 +; CHECK-8M-NEXT: bic r12, r12, #4026531840 +; CHECK-8M-NEXT: vmsr fpscr, r12 +; CHECK-8M-NEXT: mov r12, lr +; CHECK-8M-NEXT: msr apsr_nzcvqg, lr +; CHECK-8M-NEXT: bxns lr +; CHECK-8M-NEXT: .p2align 2 +; CHECK-8M-NEXT: @ %bb.1: +; CHECK-8M-NEXT: .LCPI9_0: +; CHECK-8M-NEXT: .long 1092616192 @ 0x41200000 +; +; CHECK-81M-LABEL: f1_minsize: +; CHECK-81M: @ %bb.0: @ %entry +; CHECK-81M-NEXT: vstr fpcxtns, [sp, #-4]! +; CHECK-81M-NEXT: push {r6, r7, lr} +; CHECK-81M-NEXT: mov r1, r0 +; CHECK-81M-NEXT: ldr r0, .LCPI9_0 +; CHECK-81M-NEXT: blx r1 +; CHECK-81M-NEXT: pop.w {r3, r7, lr} +; CHECK-81M-NEXT: vscclrm {s0, s1, s2, s3, s4, s5, s6, s7, s8, s9, s10, s11, s12, s13, s14, s15, vpr} +; CHECK-81M-NEXT: vldr fpcxtns, [sp], #4 +; CHECK-81M-NEXT: clrm {r1, r2, r3, r12, apsr} +; CHECK-81M-NEXT: bxns lr +; CHECK-81M-NEXT: .p2align 2 +; CHECK-81M-NEXT: @ %bb.1: +; CHECK-81M-NEXT: .LCPI9_0: +; CHECK-81M-NEXT: .long 1092616192 @ 0x41200000 +entry: + %call = call float %fptr(float 10.0) #11 + ret float %call +} + +attributes #10 = { "cmse_nonsecure_entry" minsize nounwind } +attributes #11 = { nounwind } diff --git a/llvm/test/CodeGen/ARM/cmse-clear.ll b/llvm/test/CodeGen/ARM/cmse-clear.ll new file mode 100644 index 0000000..5c5a867 --- /dev/null +++ b/llvm/test/CodeGen/ARM/cmse-clear.ll @@ -0,0 +1,634 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc %s -o - -mtriple=thumbv8m.base | \ +; RUN: FileCheck %s --check-prefix=CHECK-8B +; RUN: llc %s -o - -mtriple=thumbebv8m.base | \ +; RUN: FileCheck %s --check-prefix=CHECK-8B +; RUN: llc %s -o - -mtriple=thumbv8m.main -mattr=-fpregs,+dsp | \ +; RUN: FileCheck %s --check-prefix=CHECK-8M-SOFT +; RUN: llc %s -o - -mtriple=thumbebv8m.main -mattr=-fpregs,+dsp | \ +; RUN: FileCheck %s --check-prefix=CHECK-8M-SOFT +; RUN: llc %s -o - -mtriple=thumbv8m.main -mattr=+fp-armv8d16sp,+dsp | \ +; RUN: FileCheck %s --check-prefix=CHECK-8M-SOFTFP +; RUN: llc %s -o - -mtriple=thumbebv8m.main -mattr=+fp-armv8d16sp,+dsp | \ +; RUN: FileCheck %s --check-prefix=CHECK-8M-SOFTFP +; RUN: llc %s -o - -mtriple=thumbv8.1m.main -mattr=-fpregs,+dsp | \ +; RUN: FileCheck %s --check-prefix=CHECK-81M-SOFT +; RUN: llc %s -o - -mtriple=thumbebv8.1m.main -mattr=-fpregs,+dsp | \ +; RUN: FileCheck %s --check-prefix=CHECK-81M-SOFT +; RUN: llc %s -o - -mtriple=thumbv8.1m.main -mattr=+fp-armv8d16sp,+dsp | \ +; RUN: FileCheck %s --check-prefix=CHECK-81M-SOFTFP +; RUN: llc %s -o - -mtriple=thumbebv8.1m.main -mattr=+fp-armv8d16sp,+dsp | \ +; RUN: FileCheck %s --check-prefix=CHECK-81M-SOFTFP +; RUN: llc %s -o - -mtriple=thumbv8.1m.main -mattr=+mve | \ +; RUN: FileCheck %s --check-prefix=CHECK-81M-SOFTFP +; RUN: llc %s -o - -mtriple=thumbebv8.1m.main -mattr=mve | \ +; RUN: FileCheck %s --check-prefix=CHECK-81M-SOFTFP + +define i32 @ns_entry(i32 (i32)* nocapture %fptr) #0 { +; CHECK-8B-LABEL: ns_entry: +; CHECK-8B: @ %bb.0: @ %entry +; CHECK-8B-NEXT: push {r7, lr} +; CHECK-8B-NEXT: mov r1, r0 +; CHECK-8B-NEXT: movs r0, #10 +; CHECK-8B-NEXT: blx r1 +; CHECK-8B-NEXT: pop {r7} +; CHECK-8B-NEXT: pop {r1} +; CHECK-8B-NEXT: mov lr, r1 +; CHECK-8B-NEXT: mov r1, lr +; CHECK-8B-NEXT: mov r2, lr +; CHECK-8B-NEXT: mov r3, lr +; CHECK-8B-NEXT: mov r12, lr +; CHECK-8B-NEXT: msr apsr, lr +; CHECK-8B-NEXT: bxns lr +; +; CHECK-8M-SOFT-LABEL: ns_entry: +; CHECK-8M-SOFT: @ %bb.0: @ %entry +; CHECK-8M-SOFT-NEXT: push {r7, lr} +; CHECK-8M-SOFT-NEXT: mov r1, r0 +; CHECK-8M-SOFT-NEXT: movs r0, #10 +; CHECK-8M-SOFT-NEXT: blx r1 +; CHECK-8M-SOFT-NEXT: pop.w {r7, lr} +; CHECK-8M-SOFT-NEXT: mov r1, lr +; CHECK-8M-SOFT-NEXT: mov r2, lr +; CHECK-8M-SOFT-NEXT: mov r3, lr +; CHECK-8M-SOFT-NEXT: mov r12, lr +; CHECK-8M-SOFT-NEXT: msr apsr_nzcvqg, lr +; CHECK-8M-SOFT-NEXT: bxns lr +; +; CHECK-8M-SOFTFP-LABEL: ns_entry: +; CHECK-8M-SOFTFP: @ %bb.0: @ %entry +; CHECK-8M-SOFTFP-NEXT: push {r7, lr} +; CHECK-8M-SOFTFP-NEXT: mov r1, r0 +; CHECK-8M-SOFTFP-NEXT: movs r0, #10 +; CHECK-8M-SOFTFP-NEXT: blx r1 +; CHECK-8M-SOFTFP-NEXT: pop.w {r7, lr} +; CHECK-8M-SOFTFP-NEXT: mrs r12, control +; CHECK-8M-SOFTFP-NEXT: tst.w r12, #8 +; CHECK-8M-SOFTFP-NEXT: beq .LBB0_2 +; CHECK-8M-SOFTFP-NEXT: @ %bb.1: @ %entry +; CHECK-8M-SOFTFP-NEXT: vmrs r12, fpscr +; CHECK-8M-SOFTFP-NEXT: vmov d0, lr, lr +; CHECK-8M-SOFTFP-NEXT: vmov d1, lr, lr +; CHECK-8M-SOFTFP-NEXT: vmov d2, lr, lr +; CHECK-8M-SOFTFP-NEXT: vmov d3, lr, lr +; CHECK-8M-SOFTFP-NEXT: vmov d4, lr, lr +; CHECK-8M-SOFTFP-NEXT: vmov d5, lr, lr +; CHECK-8M-SOFTFP-NEXT: vmov d6, lr, lr +; CHECK-8M-SOFTFP-NEXT: vmov d7, lr, lr +; CHECK-8M-SOFTFP-NEXT: bic r12, r12, #159 +; CHECK-8M-SOFTFP-NEXT: bic r12, r12, #4026531840 +; CHECK-8M-SOFTFP-NEXT: vmsr fpscr, r12 +; CHECK-8M-SOFTFP-NEXT: .LBB0_2: @ %entry +; CHECK-8M-SOFTFP-NEXT: mov r1, lr +; CHECK-8M-SOFTFP-NEXT: mov r2, lr +; CHECK-8M-SOFTFP-NEXT: mov r3, lr +; CHECK-8M-SOFTFP-NEXT: mov r12, lr +; CHECK-8M-SOFTFP-NEXT: msr apsr_nzcvqg, lr +; CHECK-8M-SOFTFP-NEXT: bxns lr +; +; CHECK-81M-SOFT-LABEL: ns_entry: +; CHECK-81M-SOFT: @ %bb.0: @ %entry +; CHECK-81M-SOFT-NEXT: vstr fpcxtns, [sp, #-4]! +; CHECK-81M-SOFT-NEXT: push {r7, lr} +; CHECK-81M-SOFT-NEXT: sub sp, #4 +; CHECK-81M-SOFT-NEXT: mov r1, r0 +; CHECK-81M-SOFT-NEXT: movs r0, #10 +; CHECK-81M-SOFT-NEXT: blx r1 +; CHECK-81M-SOFT-NEXT: add sp, #4 +; CHECK-81M-SOFT-NEXT: pop.w {r7, lr} +; CHECK-81M-SOFT-NEXT: vscclrm {s0, s1, s2, s3, s4, s5, s6, s7, s8, s9, s10, s11, s12, s13, s14, s15, vpr} +; CHECK-81M-SOFT-NEXT: vldr fpcxtns, [sp], #4 +; CHECK-81M-SOFT-NEXT: clrm {r1, r2, r3, r12, apsr} +; CHECK-81M-SOFT-NEXT: bxns lr +; +; CHECK-81M-SOFTFP-LABEL: ns_entry: +; CHECK-81M-SOFTFP: @ %bb.0: @ %entry +; CHECK-81M-SOFTFP-NEXT: vstr fpcxtns, [sp, #-4]! +; CHECK-81M-SOFTFP-NEXT: push {r7, lr} +; CHECK-81M-SOFTFP-NEXT: sub sp, #4 +; CHECK-81M-SOFTFP-NEXT: mov r1, r0 +; CHECK-81M-SOFTFP-NEXT: movs r0, #10 +; CHECK-81M-SOFTFP-NEXT: blx r1 +; CHECK-81M-SOFTFP-NEXT: add sp, #4 +; CHECK-81M-SOFTFP-NEXT: pop.w {r7, lr} +; CHECK-81M-SOFTFP-NEXT: vscclrm {s0, s1, s2, s3, s4, s5, s6, s7, s8, s9, s10, s11, s12, s13, s14, s15, vpr} +; CHECK-81M-SOFTFP-NEXT: vldr fpcxtns, [sp], #4 +; CHECK-81M-SOFTFP-NEXT: clrm {r1, r2, r3, r12, apsr} +; CHECK-81M-SOFTFP-NEXT: bxns lr +entry: + %call = call i32 %fptr(i32 10) #1 + ret i32 %call +} + +attributes #0 = { "cmse_nonsecure_entry" nounwind } +attributes #1 = { nounwind } + + +define i32 @ns_call(i32 (i32)* nocapture %fptr) #2 { +; CHECK-8B-LABEL: ns_call: +; CHECK-8B: @ %bb.0: @ %entry +; CHECK-8B-NEXT: push {r7, lr} +; CHECK-8B-NEXT: mov r1, r0 +; CHECK-8B-NEXT: movs r0, #10 +; CHECK-8B-NEXT: push {r4, r5, r6, r7} +; CHECK-8B-NEXT: mov r7, r11 +; CHECK-8B-NEXT: mov r6, r10 +; CHECK-8B-NEXT: mov r5, r9 +; CHECK-8B-NEXT: mov r4, r8 +; CHECK-8B-NEXT: push {r4, r5, r6, r7} +; CHECK-8B-NEXT: mov r2, #1 +; CHECK-8B-NEXT: bics r1, r2 +; CHECK-8B-NEXT: mov r2, r1 +; CHECK-8B-NEXT: mov r3, r1 +; CHECK-8B-NEXT: mov r4, r1 +; CHECK-8B-NEXT: mov r5, r1 +; CHECK-8B-NEXT: mov r6, r1 +; CHECK-8B-NEXT: mov r7, r1 +; CHECK-8B-NEXT: mov r8, r1 +; CHECK-8B-NEXT: mov r9, r1 +; CHECK-8B-NEXT: mov r10, r1 +; CHECK-8B-NEXT: mov r11, r1 +; CHECK-8B-NEXT: mov r12, r1 +; CHECK-8B-NEXT: msr apsr, r1 +; CHECK-8B-NEXT: blxns r1 +; CHECK-8B-NEXT: pop {r4, r5, r6, r7} +; CHECK-8B-NEXT: mov r8, r4 +; CHECK-8B-NEXT: mov r9, r5 +; CHECK-8B-NEXT: mov r10, r6 +; CHECK-8B-NEXT: mov r11, r7 +; CHECK-8B-NEXT: pop {r4, r5, r6, r7} +; CHECK-8B-NEXT: pop {r7, pc} +; +; CHECK-8M-SOFT-LABEL: ns_call: +; CHECK-8M-SOFT: @ %bb.0: @ %entry +; CHECK-8M-SOFT-NEXT: push {r7, lr} +; CHECK-8M-SOFT-NEXT: mov r1, r0 +; CHECK-8M-SOFT-NEXT: movs r0, #10 +; CHECK-8M-SOFT-NEXT: push.w {r4, r5, r6, r7, r8, r9, r10, r11} +; CHECK-8M-SOFT-NEXT: bic r1, r1, #1 +; CHECK-8M-SOFT-NEXT: msr apsr_nzcvqg, r1 +; CHECK-8M-SOFT-NEXT: mov r2, r1 +; CHECK-8M-SOFT-NEXT: mov r3, r1 +; CHECK-8M-SOFT-NEXT: mov r4, r1 +; CHECK-8M-SOFT-NEXT: mov r5, r1 +; CHECK-8M-SOFT-NEXT: mov r6, r1 +; CHECK-8M-SOFT-NEXT: mov r7, r1 +; CHECK-8M-SOFT-NEXT: mov r8, r1 +; CHECK-8M-SOFT-NEXT: mov r9, r1 +; CHECK-8M-SOFT-NEXT: mov r10, r1 +; CHECK-8M-SOFT-NEXT: mov r11, r1 +; CHECK-8M-SOFT-NEXT: mov r12, r1 +; CHECK-8M-SOFT-NEXT: blxns r1 +; CHECK-8M-SOFT-NEXT: pop.w {r4, r5, r6, r7, r8, r9, r10, r11} +; CHECK-8M-SOFT-NEXT: pop {r7, pc} +; +; CHECK-8M-SOFTFP-LABEL: ns_call: +; CHECK-8M-SOFTFP: @ %bb.0: @ %entry +; CHECK-8M-SOFTFP-NEXT: push {r7, lr} +; CHECK-8M-SOFTFP-NEXT: mov r1, r0 +; CHECK-8M-SOFTFP-NEXT: movs r0, #10 +; CHECK-8M-SOFTFP-NEXT: push.w {r4, r5, r6, r7, r8, r9, r10, r11} +; CHECK-8M-SOFTFP-NEXT: bic r1, r1, #1 +; CHECK-8M-SOFTFP-NEXT: sub sp, #136 +; CHECK-8M-SOFTFP-NEXT: vlstm sp +; CHECK-8M-SOFTFP-NEXT: mov r2, r1 +; CHECK-8M-SOFTFP-NEXT: mov r3, r1 +; CHECK-8M-SOFTFP-NEXT: mov r4, r1 +; CHECK-8M-SOFTFP-NEXT: mov r5, r1 +; CHECK-8M-SOFTFP-NEXT: mov r6, r1 +; CHECK-8M-SOFTFP-NEXT: mov r7, r1 +; CHECK-8M-SOFTFP-NEXT: mov r8, r1 +; CHECK-8M-SOFTFP-NEXT: mov r9, r1 +; CHECK-8M-SOFTFP-NEXT: mov r10, r1 +; CHECK-8M-SOFTFP-NEXT: mov r11, r1 +; CHECK-8M-SOFTFP-NEXT: mov r12, r1 +; CHECK-8M-SOFTFP-NEXT: msr apsr_nzcvqg, r1 +; CHECK-8M-SOFTFP-NEXT: blxns r1 +; CHECK-8M-SOFTFP-NEXT: vlldm sp +; CHECK-8M-SOFTFP-NEXT: add sp, #136 +; CHECK-8M-SOFTFP-NEXT: pop.w {r4, r5, r6, r7, r8, r9, r10, r11} +; CHECK-8M-SOFTFP-NEXT: pop {r7, pc} +; +; CHECK-81M-SOFT-LABEL: ns_call: +; CHECK-81M-SOFT: @ %bb.0: @ %entry +; CHECK-81M-SOFT-NEXT: push {r7, lr} +; CHECK-81M-SOFT-NEXT: mov r1, r0 +; CHECK-81M-SOFT-NEXT: movs r0, #10 +; CHECK-81M-SOFT-NEXT: push.w {r4, r5, r6, r7, r8, r9, r10, r11} +; CHECK-81M-SOFT-NEXT: bic r1, r1, #1 +; CHECK-81M-SOFT-NEXT: sub sp, #136 +; CHECK-81M-SOFT-NEXT: vlstm sp +; CHECK-81M-SOFT-NEXT: clrm {r2, r3, r4, r5, r6, r7, r8, r9, r10, r11, r12, apsr} +; CHECK-81M-SOFT-NEXT: blxns r1 +; CHECK-81M-SOFT-NEXT: vlldm sp +; CHECK-81M-SOFT-NEXT: add sp, #136 +; CHECK-81M-SOFT-NEXT: pop.w {r4, r5, r6, r7, r8, r9, r10, r11} +; CHECK-81M-SOFT-NEXT: pop {r7, pc} +; +; CHECK-81M-SOFTFP-LABEL: ns_call: +; CHECK-81M-SOFTFP: @ %bb.0: @ %entry +; CHECK-81M-SOFTFP-NEXT: push {r7, lr} +; CHECK-81M-SOFTFP-NEXT: mov r1, r0 +; CHECK-81M-SOFTFP-NEXT: movs r0, #10 +; CHECK-81M-SOFTFP-NEXT: push.w {r4, r5, r6, r7, r8, r9, r10, r11} +; CHECK-81M-SOFTFP-NEXT: bic r1, r1, #1 +; CHECK-81M-SOFTFP-NEXT: sub sp, #136 +; CHECK-81M-SOFTFP-NEXT: vlstm sp +; CHECK-81M-SOFTFP-NEXT: clrm {r2, r3, r4, r5, r6, r7, r8, r9, r10, r11, r12, apsr} +; CHECK-81M-SOFTFP-NEXT: blxns r1 +; CHECK-81M-SOFTFP-NEXT: vlldm sp +; CHECK-81M-SOFTFP-NEXT: add sp, #136 +; CHECK-81M-SOFTFP-NEXT: pop.w {r4, r5, r6, r7, r8, r9, r10, r11} +; CHECK-81M-SOFTFP-NEXT: pop {r7, pc} +entry: + %call = call i32 %fptr(i32 10) #3 + ret i32 %call +} + +attributes #2 = { nounwind } +attributes #3 = { "cmse_nonsecure_call" nounwind } + + +define i32 @ns_tail_call(i32 (i32)* nocapture %fptr) #4 { +; CHECK-8B-LABEL: ns_tail_call: +; CHECK-8B: @ %bb.0: @ %entry +; CHECK-8B-NEXT: push {r7, lr} +; CHECK-8B-NEXT: mov r1, r0 +; CHECK-8B-NEXT: movs r0, #10 +; CHECK-8B-NEXT: push {r4, r5, r6, r7} +; CHECK-8B-NEXT: mov r7, r11 +; CHECK-8B-NEXT: mov r6, r10 +; CHECK-8B-NEXT: mov r5, r9 +; CHECK-8B-NEXT: mov r4, r8 +; CHECK-8B-NEXT: push {r4, r5, r6, r7} +; CHECK-8B-NEXT: mov r2, #1 +; CHECK-8B-NEXT: bics r1, r2 +; CHECK-8B-NEXT: mov r2, r1 +; CHECK-8B-NEXT: mov r3, r1 +; CHECK-8B-NEXT: mov r4, r1 +; CHECK-8B-NEXT: mov r5, r1 +; CHECK-8B-NEXT: mov r6, r1 +; CHECK-8B-NEXT: mov r7, r1 +; CHECK-8B-NEXT: mov r8, r1 +; CHECK-8B-NEXT: mov r9, r1 +; CHECK-8B-NEXT: mov r10, r1 +; CHECK-8B-NEXT: mov r11, r1 +; CHECK-8B-NEXT: mov r12, r1 +; CHECK-8B-NEXT: msr apsr, r1 +; CHECK-8B-NEXT: blxns r1 +; CHECK-8B-NEXT: pop {r4, r5, r6, r7} +; CHECK-8B-NEXT: mov r8, r4 +; CHECK-8B-NEXT: mov r9, r5 +; CHECK-8B-NEXT: mov r10, r6 +; CHECK-8B-NEXT: mov r11, r7 +; CHECK-8B-NEXT: pop {r4, r5, r6, r7} +; CHECK-8B-NEXT: pop {r7, pc} +; +; CHECK-8M-SOFT-LABEL: ns_tail_call: +; CHECK-8M-SOFT: @ %bb.0: @ %entry +; CHECK-8M-SOFT-NEXT: push {r7, lr} +; CHECK-8M-SOFT-NEXT: mov r1, r0 +; CHECK-8M-SOFT-NEXT: movs r0, #10 +; CHECK-8M-SOFT-NEXT: push.w {r4, r5, r6, r7, r8, r9, r10, r11} +; CHECK-8M-SOFT-NEXT: bic r1, r1, #1 +; CHECK-8M-SOFT-NEXT: msr apsr_nzcvqg, r1 +; CHECK-8M-SOFT-NEXT: mov r2, r1 +; CHECK-8M-SOFT-NEXT: mov r3, r1 +; CHECK-8M-SOFT-NEXT: mov r4, r1 +; CHECK-8M-SOFT-NEXT: mov r5, r1 +; CHECK-8M-SOFT-NEXT: mov r6, r1 +; CHECK-8M-SOFT-NEXT: mov r7, r1 +; CHECK-8M-SOFT-NEXT: mov r8, r1 +; CHECK-8M-SOFT-NEXT: mov r9, r1 +; CHECK-8M-SOFT-NEXT: mov r10, r1 +; CHECK-8M-SOFT-NEXT: mov r11, r1 +; CHECK-8M-SOFT-NEXT: mov r12, r1 +; CHECK-8M-SOFT-NEXT: blxns r1 +; CHECK-8M-SOFT-NEXT: pop.w {r4, r5, r6, r7, r8, r9, r10, r11} +; CHECK-8M-SOFT-NEXT: pop {r7, pc} +; +; CHECK-8M-SOFTFP-LABEL: ns_tail_call: +; CHECK-8M-SOFTFP: @ %bb.0: @ %entry +; CHECK-8M-SOFTFP-NEXT: push {r7, lr} +; CHECK-8M-SOFTFP-NEXT: mov r1, r0 +; CHECK-8M-SOFTFP-NEXT: movs r0, #10 +; CHECK-8M-SOFTFP-NEXT: push.w {r4, r5, r6, r7, r8, r9, r10, r11} +; CHECK-8M-SOFTFP-NEXT: bic r1, r1, #1 +; CHECK-8M-SOFTFP-NEXT: sub sp, #136 +; CHECK-8M-SOFTFP-NEXT: vlstm sp +; CHECK-8M-SOFTFP-NEXT: mov r2, r1 +; CHECK-8M-SOFTFP-NEXT: mov r3, r1 +; CHECK-8M-SOFTFP-NEXT: mov r4, r1 +; CHECK-8M-SOFTFP-NEXT: mov r5, r1 +; CHECK-8M-SOFTFP-NEXT: mov r6, r1 +; CHECK-8M-SOFTFP-NEXT: mov r7, r1 +; CHECK-8M-SOFTFP-NEXT: mov r8, r1 +; CHECK-8M-SOFTFP-NEXT: mov r9, r1 +; CHECK-8M-SOFTFP-NEXT: mov r10, r1 +; CHECK-8M-SOFTFP-NEXT: mov r11, r1 +; CHECK-8M-SOFTFP-NEXT: mov r12, r1 +; CHECK-8M-SOFTFP-NEXT: msr apsr_nzcvqg, r1 +; CHECK-8M-SOFTFP-NEXT: blxns r1 +; CHECK-8M-SOFTFP-NEXT: vlldm sp +; CHECK-8M-SOFTFP-NEXT: add sp, #136 +; CHECK-8M-SOFTFP-NEXT: pop.w {r4, r5, r6, r7, r8, r9, r10, r11} +; CHECK-8M-SOFTFP-NEXT: pop {r7, pc} +; +; CHECK-81M-SOFT-LABEL: ns_tail_call: +; CHECK-81M-SOFT: @ %bb.0: @ %entry +; CHECK-81M-SOFT-NEXT: push {r7, lr} +; CHECK-81M-SOFT-NEXT: mov r1, r0 +; CHECK-81M-SOFT-NEXT: movs r0, #10 +; CHECK-81M-SOFT-NEXT: push.w {r4, r5, r6, r7, r8, r9, r10, r11} +; CHECK-81M-SOFT-NEXT: bic r1, r1, #1 +; CHECK-81M-SOFT-NEXT: sub sp, #136 +; CHECK-81M-SOFT-NEXT: vlstm sp +; CHECK-81M-SOFT-NEXT: clrm {r2, r3, r4, r5, r6, r7, r8, r9, r10, r11, r12, apsr} +; CHECK-81M-SOFT-NEXT: blxns r1 +; CHECK-81M-SOFT-NEXT: vlldm sp +; CHECK-81M-SOFT-NEXT: add sp, #136 +; CHECK-81M-SOFT-NEXT: pop.w {r4, r5, r6, r7, r8, r9, r10, r11} +; CHECK-81M-SOFT-NEXT: pop {r7, pc} +; +; CHECK-81M-SOFTFP-LABEL: ns_tail_call: +; CHECK-81M-SOFTFP: @ %bb.0: @ %entry +; CHECK-81M-SOFTFP-NEXT: push {r7, lr} +; CHECK-81M-SOFTFP-NEXT: mov r1, r0 +; CHECK-81M-SOFTFP-NEXT: movs r0, #10 +; CHECK-81M-SOFTFP-NEXT: push.w {r4, r5, r6, r7, r8, r9, r10, r11} +; CHECK-81M-SOFTFP-NEXT: bic r1, r1, #1 +; CHECK-81M-SOFTFP-NEXT: sub sp, #136 +; CHECK-81M-SOFTFP-NEXT: vlstm sp +; CHECK-81M-SOFTFP-NEXT: clrm {r2, r3, r4, r5, r6, r7, r8, r9, r10, r11, r12, apsr} +; CHECK-81M-SOFTFP-NEXT: blxns r1 +; CHECK-81M-SOFTFP-NEXT: vlldm sp +; CHECK-81M-SOFTFP-NEXT: add sp, #136 +; CHECK-81M-SOFTFP-NEXT: pop.w {r4, r5, r6, r7, r8, r9, r10, r11} +; CHECK-81M-SOFTFP-NEXT: pop {r7, pc} +entry: + %call = tail call i32 %fptr(i32 10) #5 + ret i32 %call +} + +attributes #4 = { nounwind } +attributes #5 = { "cmse_nonsecure_call" nounwind } + + +define void (i32, i32, i32, i32)* @ns_tail_call_many_args(void (i32, i32, i32, i32)* %f, i32 %a, i32 %b, i32 %c, i32 %d) #6 { +; CHECK-8B-LABEL: ns_tail_call_many_args: +; CHECK-8B: @ %bb.0: +; CHECK-8B-NEXT: push {r4, r5, r7, lr} +; CHECK-8B-NEXT: mov r5, r3 +; CHECK-8B-NEXT: mov r4, r0 +; CHECK-8B-NEXT: ldr r3, [sp, #16] +; CHECK-8B-NEXT: mov r0, r1 +; CHECK-8B-NEXT: mov r1, r2 +; CHECK-8B-NEXT: mov r2, r5 +; CHECK-8B-NEXT: push {r4, r5, r6, r7} +; CHECK-8B-NEXT: mov r7, r11 +; CHECK-8B-NEXT: mov r6, r10 +; CHECK-8B-NEXT: mov r5, r9 +; CHECK-8B-NEXT: push {r5, r6, r7} +; CHECK-8B-NEXT: mov r5, r8 +; CHECK-8B-NEXT: push {r5} +; CHECK-8B-NEXT: mov r5, #1 +; CHECK-8B-NEXT: bics r4, r5 +; CHECK-8B-NEXT: mov r5, r4 +; CHECK-8B-NEXT: mov r6, r4 +; CHECK-8B-NEXT: mov r7, r4 +; CHECK-8B-NEXT: mov r8, r4 +; CHECK-8B-NEXT: mov r9, r4 +; CHECK-8B-NEXT: mov r10, r4 +; CHECK-8B-NEXT: mov r11, r4 +; CHECK-8B-NEXT: mov r12, r4 +; CHECK-8B-NEXT: msr apsr, r4 +; CHECK-8B-NEXT: blxns r4 +; CHECK-8B-NEXT: pop {r4, r5, r6, r7} +; CHECK-8B-NEXT: mov r8, r4 +; CHECK-8B-NEXT: mov r9, r5 +; CHECK-8B-NEXT: mov r10, r6 +; CHECK-8B-NEXT: mov r11, r7 +; CHECK-8B-NEXT: pop {r4, r5, r6, r7} +; CHECK-8B-NEXT: mov r0, r4 +; CHECK-8B-NEXT: pop {r4, r5, r7, pc} +; +; CHECK-8M-SOFT-LABEL: ns_tail_call_many_args: +; CHECK-8M-SOFT: @ %bb.0: +; CHECK-8M-SOFT-NEXT: push {r4, lr} +; CHECK-8M-SOFT-NEXT: mov r4, r0 +; CHECK-8M-SOFT-NEXT: mov r12, r3 +; CHECK-8M-SOFT-NEXT: mov r0, r1 +; CHECK-8M-SOFT-NEXT: mov r1, r2 +; CHECK-8M-SOFT-NEXT: ldr r3, [sp, #8] +; CHECK-8M-SOFT-NEXT: mov r2, r12 +; CHECK-8M-SOFT-NEXT: push.w {r4, r5, r6, r7, r8, r9, r10, r11} +; CHECK-8M-SOFT-NEXT: bic r4, r4, #1 +; CHECK-8M-SOFT-NEXT: msr apsr_nzcvqg, r4 +; CHECK-8M-SOFT-NEXT: mov r5, r4 +; CHECK-8M-SOFT-NEXT: mov r6, r4 +; CHECK-8M-SOFT-NEXT: mov r7, r4 +; CHECK-8M-SOFT-NEXT: mov r8, r4 +; CHECK-8M-SOFT-NEXT: mov r9, r4 +; CHECK-8M-SOFT-NEXT: mov r10, r4 +; CHECK-8M-SOFT-NEXT: mov r11, r4 +; CHECK-8M-SOFT-NEXT: mov r12, r4 +; CHECK-8M-SOFT-NEXT: blxns r4 +; CHECK-8M-SOFT-NEXT: pop.w {r4, r5, r6, r7, r8, r9, r10, r11} +; CHECK-8M-SOFT-NEXT: mov r0, r4 +; CHECK-8M-SOFT-NEXT: pop {r4, pc} +; +; CHECK-8M-SOFTFP-LABEL: ns_tail_call_many_args: +; CHECK-8M-SOFTFP: @ %bb.0: +; CHECK-8M-SOFTFP-NEXT: push {r4, lr} +; CHECK-8M-SOFTFP-NEXT: mov r4, r0 +; CHECK-8M-SOFTFP-NEXT: mov r12, r3 +; CHECK-8M-SOFTFP-NEXT: mov r0, r1 +; CHECK-8M-SOFTFP-NEXT: mov r1, r2 +; CHECK-8M-SOFTFP-NEXT: ldr r3, [sp, #8] +; CHECK-8M-SOFTFP-NEXT: mov r2, r12 +; CHECK-8M-SOFTFP-NEXT: push.w {r4, r5, r6, r7, r8, r9, r10, r11} +; CHECK-8M-SOFTFP-NEXT: bic r4, r4, #1 +; CHECK-8M-SOFTFP-NEXT: sub sp, #136 +; CHECK-8M-SOFTFP-NEXT: vlstm sp +; CHECK-8M-SOFTFP-NEXT: mov r5, r4 +; CHECK-8M-SOFTFP-NEXT: mov r6, r4 +; CHECK-8M-SOFTFP-NEXT: mov r7, r4 +; CHECK-8M-SOFTFP-NEXT: mov r8, r4 +; CHECK-8M-SOFTFP-NEXT: mov r9, r4 +; CHECK-8M-SOFTFP-NEXT: mov r10, r4 +; CHECK-8M-SOFTFP-NEXT: mov r11, r4 +; CHECK-8M-SOFTFP-NEXT: mov r12, r4 +; CHECK-8M-SOFTFP-NEXT: msr apsr_nzcvqg, r4 +; CHECK-8M-SOFTFP-NEXT: blxns r4 +; CHECK-8M-SOFTFP-NEXT: vlldm sp +; CHECK-8M-SOFTFP-NEXT: add sp, #136 +; CHECK-8M-SOFTFP-NEXT: pop.w {r4, r5, r6, r7, r8, r9, r10, r11} +; CHECK-8M-SOFTFP-NEXT: mov r0, r4 +; CHECK-8M-SOFTFP-NEXT: pop {r4, pc} +; +; CHECK-81M-SOFT-LABEL: ns_tail_call_many_args: +; CHECK-81M-SOFT: @ %bb.0: +; CHECK-81M-SOFT-NEXT: push {r4, lr} +; CHECK-81M-SOFT-NEXT: mov r4, r0 +; CHECK-81M-SOFT-NEXT: mov r12, r3 +; CHECK-81M-SOFT-NEXT: mov r0, r1 +; CHECK-81M-SOFT-NEXT: mov r1, r2 +; CHECK-81M-SOFT-NEXT: ldr r3, [sp, #8] +; CHECK-81M-SOFT-NEXT: mov r2, r12 +; CHECK-81M-SOFT-NEXT: push.w {r4, r5, r6, r7, r8, r9, r10, r11} +; CHECK-81M-SOFT-NEXT: bic r4, r4, #1 +; CHECK-81M-SOFT-NEXT: sub sp, #136 +; CHECK-81M-SOFT-NEXT: vlstm sp +; CHECK-81M-SOFT-NEXT: clrm {r5, r6, r7, r8, r9, r10, r11, r12, apsr} +; CHECK-81M-SOFT-NEXT: blxns r4 +; CHECK-81M-SOFT-NEXT: vlldm sp +; CHECK-81M-SOFT-NEXT: add sp, #136 +; CHECK-81M-SOFT-NEXT: pop.w {r4, r5, r6, r7, r8, r9, r10, r11} +; CHECK-81M-SOFT-NEXT: mov r0, r4 +; CHECK-81M-SOFT-NEXT: pop {r4, pc} +; +; CHECK-81M-SOFTFP-LABEL: ns_tail_call_many_args: +; CHECK-81M-SOFTFP: @ %bb.0: +; CHECK-81M-SOFTFP-NEXT: push {r4, lr} +; CHECK-81M-SOFTFP-NEXT: mov r4, r0 +; CHECK-81M-SOFTFP-NEXT: mov r12, r3 +; CHECK-81M-SOFTFP-NEXT: mov r0, r1 +; CHECK-81M-SOFTFP-NEXT: mov r1, r2 +; CHECK-81M-SOFTFP-NEXT: ldr r3, [sp, #8] +; CHECK-81M-SOFTFP-NEXT: mov r2, r12 +; CHECK-81M-SOFTFP-NEXT: push.w {r4, r5, r6, r7, r8, r9, r10, r11} +; CHECK-81M-SOFTFP-NEXT: bic r4, r4, #1 +; CHECK-81M-SOFTFP-NEXT: sub sp, #136 +; CHECK-81M-SOFTFP-NEXT: vlstm sp +; CHECK-81M-SOFTFP-NEXT: clrm {r5, r6, r7, r8, r9, r10, r11, r12, apsr} +; CHECK-81M-SOFTFP-NEXT: blxns r4 +; CHECK-81M-SOFTFP-NEXT: vlldm sp +; CHECK-81M-SOFTFP-NEXT: add sp, #136 +; CHECK-81M-SOFTFP-NEXT: pop.w {r4, r5, r6, r7, r8, r9, r10, r11} +; CHECK-81M-SOFTFP-NEXT: mov r0, r4 +; CHECK-81M-SOFTFP-NEXT: pop {r4, pc} + tail call void %f(i32 %a, i32 %b, i32 %c, i32 %d) #7 + ret void (i32, i32, i32, i32)* %f +} + +attributes #6 = { nounwind } +attributes #7 = { "cmse_nonsecure_call" nounwind } + + +define i32 @ns_call_void(i32 %reg0, i32 ()* nocapture %fptr) #8 { +; CHECK-8B-LABEL: ns_call_void: +; CHECK-8B: @ %bb.0: @ %entry +; CHECK-8B-NEXT: push {r7, lr} +; CHECK-8B-NEXT: push {r4, r5, r6, r7} +; CHECK-8B-NEXT: mov r7, r11 +; CHECK-8B-NEXT: mov r6, r10 +; CHECK-8B-NEXT: mov r5, r9 +; CHECK-8B-NEXT: mov r4, r8 +; CHECK-8B-NEXT: push {r4, r5, r6, r7} +; CHECK-8B-NEXT: mov r0, #1 +; CHECK-8B-NEXT: bics r1, r0 +; CHECK-8B-NEXT: mov r0, r1 +; CHECK-8B-NEXT: mov r2, r1 +; CHECK-8B-NEXT: mov r3, r1 +; CHECK-8B-NEXT: mov r4, r1 +; CHECK-8B-NEXT: mov r5, r1 +; CHECK-8B-NEXT: mov r6, r1 +; CHECK-8B-NEXT: mov r7, r1 +; CHECK-8B-NEXT: mov r8, r1 +; CHECK-8B-NEXT: mov r9, r1 +; CHECK-8B-NEXT: mov r10, r1 +; CHECK-8B-NEXT: mov r11, r1 +; CHECK-8B-NEXT: mov r12, r1 +; CHECK-8B-NEXT: msr apsr, r1 +; CHECK-8B-NEXT: blxns r1 +; CHECK-8B-NEXT: pop {r4, r5, r6, r7} +; CHECK-8B-NEXT: mov r8, r4 +; CHECK-8B-NEXT: mov r9, r5 +; CHECK-8B-NEXT: mov r10, r6 +; CHECK-8B-NEXT: mov r11, r7 +; CHECK-8B-NEXT: pop {r4, r5, r6, r7} +; CHECK-8B-NEXT: pop {r7, pc} +; +; CHECK-8M-SOFT-LABEL: ns_call_void: +; CHECK-8M-SOFT: @ %bb.0: @ %entry +; CHECK-8M-SOFT-NEXT: push {r7, lr} +; CHECK-8M-SOFT-NEXT: push.w {r4, r5, r6, r7, r8, r9, r10, r11} +; CHECK-8M-SOFT-NEXT: bic r1, r1, #1 +; CHECK-8M-SOFT-NEXT: msr apsr_nzcvqg, r1 +; CHECK-8M-SOFT-NEXT: mov r0, r1 +; CHECK-8M-SOFT-NEXT: mov r2, r1 +; CHECK-8M-SOFT-NEXT: mov r3, r1 +; CHECK-8M-SOFT-NEXT: mov r4, r1 +; CHECK-8M-SOFT-NEXT: mov r5, r1 +; CHECK-8M-SOFT-NEXT: mov r6, r1 +; CHECK-8M-SOFT-NEXT: mov r7, r1 +; CHECK-8M-SOFT-NEXT: mov r8, r1 +; CHECK-8M-SOFT-NEXT: mov r9, r1 +; CHECK-8M-SOFT-NEXT: mov r10, r1 +; CHECK-8M-SOFT-NEXT: mov r11, r1 +; CHECK-8M-SOFT-NEXT: mov r12, r1 +; CHECK-8M-SOFT-NEXT: blxns r1 +; CHECK-8M-SOFT-NEXT: pop.w {r4, r5, r6, r7, r8, r9, r10, r11} +; CHECK-8M-SOFT-NEXT: pop {r7, pc} +; +; CHECK-8M-SOFTFP-LABEL: ns_call_void: +; CHECK-8M-SOFTFP: @ %bb.0: @ %entry +; CHECK-8M-SOFTFP-NEXT: push {r7, lr} +; CHECK-8M-SOFTFP-NEXT: push.w {r4, r5, r6, r7, r8, r9, r10, r11} +; CHECK-8M-SOFTFP-NEXT: bic r1, r1, #1 +; CHECK-8M-SOFTFP-NEXT: sub sp, #136 +; CHECK-8M-SOFTFP-NEXT: vlstm sp +; CHECK-8M-SOFTFP-NEXT: mov r0, r1 +; CHECK-8M-SOFTFP-NEXT: mov r2, r1 +; CHECK-8M-SOFTFP-NEXT: mov r3, r1 +; CHECK-8M-SOFTFP-NEXT: mov r4, r1 +; CHECK-8M-SOFTFP-NEXT: mov r5, r1 +; CHECK-8M-SOFTFP-NEXT: mov r6, r1 +; CHECK-8M-SOFTFP-NEXT: mov r7, r1 +; CHECK-8M-SOFTFP-NEXT: mov r8, r1 +; CHECK-8M-SOFTFP-NEXT: mov r9, r1 +; CHECK-8M-SOFTFP-NEXT: mov r10, r1 +; CHECK-8M-SOFTFP-NEXT: mov r11, r1 +; CHECK-8M-SOFTFP-NEXT: mov r12, r1 +; CHECK-8M-SOFTFP-NEXT: msr apsr_nzcvqg, r1 +; CHECK-8M-SOFTFP-NEXT: blxns r1 +; CHECK-8M-SOFTFP-NEXT: vlldm sp +; CHECK-8M-SOFTFP-NEXT: add sp, #136 +; CHECK-8M-SOFTFP-NEXT: pop.w {r4, r5, r6, r7, r8, r9, r10, r11} +; CHECK-8M-SOFTFP-NEXT: pop {r7, pc} +; +; CHECK-81M-SOFT-LABEL: ns_call_void: +; CHECK-81M-SOFT: @ %bb.0: @ %entry +; CHECK-81M-SOFT-NEXT: push {r7, lr} +; CHECK-81M-SOFT-NEXT: push.w {r4, r5, r6, r7, r8, r9, r10, r11} +; CHECK-81M-SOFT-NEXT: bic r1, r1, #1 +; CHECK-81M-SOFT-NEXT: sub sp, #136 +; CHECK-81M-SOFT-NEXT: vlstm sp +; CHECK-81M-SOFT-NEXT: clrm {r0, r2, r3, r4, r5, r6, r7, r8, r9, r10, r11, r12, apsr} +; CHECK-81M-SOFT-NEXT: blxns r1 +; CHECK-81M-SOFT-NEXT: vlldm sp +; CHECK-81M-SOFT-NEXT: add sp, #136 +; CHECK-81M-SOFT-NEXT: pop.w {r4, r5, r6, r7, r8, r9, r10, r11} +; CHECK-81M-SOFT-NEXT: pop {r7, pc} +; +; CHECK-81M-SOFTFP-LABEL: ns_call_void: +; CHECK-81M-SOFTFP: @ %bb.0: @ %entry +; CHECK-81M-SOFTFP-NEXT: push {r7, lr} +; CHECK-81M-SOFTFP-NEXT: push.w {r4, r5, r6, r7, r8, r9, r10, r11} +; CHECK-81M-SOFTFP-NEXT: bic r1, r1, #1 +; CHECK-81M-SOFTFP-NEXT: sub sp, #136 +; CHECK-81M-SOFTFP-NEXT: vlstm sp +; CHECK-81M-SOFTFP-NEXT: clrm {r0, r2, r3, r4, r5, r6, r7, r8, r9, r10, r11, r12, apsr} +; CHECK-81M-SOFTFP-NEXT: blxns r1 +; CHECK-81M-SOFTFP-NEXT: vlldm sp +; CHECK-81M-SOFTFP-NEXT: add sp, #136 +; CHECK-81M-SOFTFP-NEXT: pop.w {r4, r5, r6, r7, r8, r9, r10, r11} +; CHECK-81M-SOFTFP-NEXT: pop {r7, pc} +entry: + %call = call i32 %fptr() #9 + ret i32 %call +} + +attributes #8 = { nounwind } +attributes #9 = { "cmse_nonsecure_call" nounwind } + diff --git a/llvm/test/CodeGen/ARM/cmse-clrm-it-block.ll b/llvm/test/CodeGen/ARM/cmse-clrm-it-block.ll new file mode 100644 index 0000000..377e560 --- /dev/null +++ b/llvm/test/CodeGen/ARM/cmse-clrm-it-block.ll @@ -0,0 +1,24 @@ +; RUN: llc -mtriple=thumbv8.1m.main -mattr=+8msecext %s -o - | FileCheck %s + +define hidden i32 @f(i32 %0, i32 (i32)* nocapture %1) local_unnamed_addr #0 { + %3 = call i32 %1(i32 %0) #2 + %4 = icmp eq i32 %3, 1 + br i1 %4, label %6, label %5 + +5: ; preds = %2 + call void bitcast (void (...)* @g to void ()*)() #3 + unreachable + +6: ; preds = %2 + ret i32 1 +} +; CHECK-NOT: clrm eq +; CHECK: clrm {r1, r2, r3, r12, apsr} +; CHECK: bl g + +declare dso_local void @g(...) local_unnamed_addr #1 + +attributes #0 = { nounwind "cmse_nonsecure_entry" } +attributes #1 = { noreturn } +attributes #2 = { nounwind "cmse_nonsecure_call" } +attributes #3 = { noreturn nounwind } diff --git a/llvm/test/CodeGen/ARM/cmse-expand-bxns-ret.mir b/llvm/test/CodeGen/ARM/cmse-expand-bxns-ret.mir new file mode 100644 index 0000000..af74536 --- /dev/null +++ b/llvm/test/CodeGen/ARM/cmse-expand-bxns-ret.mir @@ -0,0 +1,26 @@ +# RUN: llc -mcpu=cortex-m33 -run-pass=arm-pseudo %s -o - | FileCheck %s +--- | + target datalayout = "e-m:e-p:32:32-i64:64-v128:64:128-a:0:32-n32-S64" + target triple = "thumbv8m.main-arm-none-eabi" + + define hidden void @foo() local_unnamed_addr #0 { + entry: + ret void + } +... +--- +name: foo +alignment: 2 +tracksRegLiveness: true +frameInfo: + maxCallFrameSize: 0 +body: | + bb.0.entry: + tBXNS_RET +... + +# CHECK-LABEL: name: foo +# CHECK: bb.0.entry: +# CHECK-NEXT: successors: %bb.1(0x40000000), %bb.2(0x40000000) +# CHECK: bb.1.entry: +# CHECK-NEXT: successors: %bb.2(0x80000000) diff --git a/llvm/test/CodeGen/ARM/cmse.ll b/llvm/test/CodeGen/ARM/cmse.ll new file mode 100644 index 0000000..2b54099 --- /dev/null +++ b/llvm/test/CodeGen/ARM/cmse.ll @@ -0,0 +1,346 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc %s -o - -mtriple=thumbv8m.base | \ +; RUN: FileCheck %s --check-prefix=CHECK-8B --check-prefix=CHECK-8B-LE +; RUN: llc %s -o - -mtriple=thumbebv8m.base | \ +; RUN: FileCheck %s --check-prefix=CHECK-8B --check-prefix=CHECK-8B-BE +; RUN: llc %s -o - -mtriple=thumbv8m.main | \ +; RUN: FileCheck %s --check-prefix=CHECK-8M --check-prefix=CHECK-8M-LE +; RUN: llc %s -o - -mtriple=thumbebv8m.main | \ +; RUN: FileCheck %s --check-prefix=CHECK-8M --check-prefix=CHECK-8M-BE + +; RUN: llc %s -o - -mtriple=thumbv8.1m.main | \ +; RUN: FileCheck %s --check-prefix=CHECK-81M --check-prefix=CHECK-81M-LE +; RUN: llc %s -o - -mtriple=thumbebv8.1m.main | \ +; RUN: FileCheck %s --check-prefix=CHECK-81M --check-prefix=CHECK-81M-BE + +define void @func1(void ()* nocapture %fptr) #0 { +; CHECK-8B-LABEL: func1: +; CHECK-8B: @ %bb.0: @ %entry +; CHECK-8B-NEXT: push {r7, lr} +; CHECK-8B-NEXT: push {r4, r5, r6, r7} +; CHECK-8B-NEXT: mov r7, r11 +; CHECK-8B-NEXT: mov r6, r10 +; CHECK-8B-NEXT: mov r5, r9 +; CHECK-8B-NEXT: mov r4, r8 +; CHECK-8B-NEXT: push {r4, r5, r6, r7} +; CHECK-8B-NEXT: mov r1, #1 +; CHECK-8B-NEXT: bics r0, r1 +; CHECK-8B-NEXT: mov r1, r0 +; CHECK-8B-NEXT: mov r2, r0 +; CHECK-8B-NEXT: mov r3, r0 +; CHECK-8B-NEXT: mov r4, r0 +; CHECK-8B-NEXT: mov r5, r0 +; CHECK-8B-NEXT: mov r6, r0 +; CHECK-8B-NEXT: mov r7, r0 +; CHECK-8B-NEXT: mov r8, r0 +; CHECK-8B-NEXT: mov r9, r0 +; CHECK-8B-NEXT: mov r10, r0 +; CHECK-8B-NEXT: mov r11, r0 +; CHECK-8B-NEXT: mov r12, r0 +; CHECK-8B-NEXT: msr apsr, r0 +; CHECK-8B-NEXT: blxns r0 +; CHECK-8B-NEXT: pop {r4, r5, r6, r7} +; CHECK-8B-NEXT: mov r8, r4 +; CHECK-8B-NEXT: mov r9, r5 +; CHECK-8B-NEXT: mov r10, r6 +; CHECK-8B-NEXT: mov r11, r7 +; CHECK-8B-NEXT: pop {r4, r5, r6, r7} +; CHECK-8B-NEXT: pop {r7} +; CHECK-8B-NEXT: pop {r0} +; CHECK-8B-NEXT: mov lr, r0 +; CHECK-8B-NEXT: mov r0, lr +; CHECK-8B-NEXT: mov r1, lr +; CHECK-8B-NEXT: mov r2, lr +; CHECK-8B-NEXT: mov r3, lr +; CHECK-8B-NEXT: mov r12, lr +; CHECK-8B-NEXT: msr apsr, lr +; CHECK-8B-NEXT: bxns lr +; +; CHECK-8M-LABEL: func1: +; CHECK-8M: @ %bb.0: @ %entry +; CHECK-8M-NEXT: push {r7, lr} +; CHECK-8M-NEXT: push.w {r4, r5, r6, r7, r8, r9, r10, r11} +; CHECK-8M-NEXT: bic r0, r0, #1 +; CHECK-8M-NEXT: msr apsr_nzcvq, r0 +; CHECK-8M-NEXT: mov r1, r0 +; CHECK-8M-NEXT: mov r2, r0 +; CHECK-8M-NEXT: mov r3, r0 +; CHECK-8M-NEXT: mov r4, r0 +; CHECK-8M-NEXT: mov r5, r0 +; CHECK-8M-NEXT: mov r6, r0 +; CHECK-8M-NEXT: mov r7, r0 +; CHECK-8M-NEXT: mov r8, r0 +; CHECK-8M-NEXT: mov r9, r0 +; CHECK-8M-NEXT: mov r10, r0 +; CHECK-8M-NEXT: mov r11, r0 +; CHECK-8M-NEXT: mov r12, r0 +; CHECK-8M-NEXT: blxns r0 +; CHECK-8M-NEXT: pop.w {r4, r5, r6, r7, r8, r9, r10, r11} +; CHECK-8M-NEXT: pop.w {r7, lr} +; CHECK-8M-NEXT: mov r0, lr +; CHECK-8M-NEXT: mov r1, lr +; CHECK-8M-NEXT: mov r2, lr +; CHECK-8M-NEXT: mov r3, lr +; CHECK-8M-NEXT: mov r12, lr +; CHECK-8M-NEXT: msr apsr_nzcvq, lr +; CHECK-8M-NEXT: bxns lr +; +; CHECK-81M-LABEL: func1: +; CHECK-81M: @ %bb.0: @ %entry +; CHECK-81M-NEXT: vstr fpcxtns, [sp, #-4]! +; CHECK-81M-NEXT: push {r7, lr} +; CHECK-81M-NEXT: sub sp, #4 +; CHECK-81M-NEXT: push.w {r4, r5, r6, r7, r8, r9, r10, r11} +; CHECK-81M-NEXT: bic r0, r0, #1 +; CHECK-81M-NEXT: sub sp, #136 +; CHECK-81M-NEXT: vlstm sp +; CHECK-81M-NEXT: clrm {r1, r2, r3, r4, r5, r6, r7, r8, r9, r10, r11, r12, apsr} +; CHECK-81M-NEXT: blxns r0 +; CHECK-81M-NEXT: vlldm sp +; CHECK-81M-NEXT: add sp, #136 +; CHECK-81M-NEXT: pop.w {r4, r5, r6, r7, r8, r9, r10, r11} +; CHECK-81M-NEXT: add sp, #4 +; CHECK-81M-NEXT: pop.w {r7, lr} +; CHECK-81M-NEXT: vscclrm {s0, s1, s2, s3, s4, s5, s6, s7, s8, s9, s10, s11, s12, s13, s14, s15, vpr} +; CHECK-81M-NEXT: vldr fpcxtns, [sp], #4 +; CHECK-81M-NEXT: clrm {r0, r1, r2, r3, r12, apsr} +; CHECK-81M-NEXT: bxns lr +entry: + call void %fptr() #1 + ret void +} + +attributes #0 = { "cmse_nonsecure_entry" nounwind } +attributes #1 = { "cmse_nonsecure_call" nounwind } + +define void @func2(void ()* nocapture %fptr) #2 { +; CHECK-8B-LABEL: func2: +; CHECK-8B: @ %bb.0: @ %entry +; CHECK-8B-NEXT: push {r7, lr} +; CHECK-8B-NEXT: push {r4, r5, r6, r7} +; CHECK-8B-NEXT: mov r7, r11 +; CHECK-8B-NEXT: mov r6, r10 +; CHECK-8B-NEXT: mov r5, r9 +; CHECK-8B-NEXT: mov r4, r8 +; CHECK-8B-NEXT: push {r4, r5, r6, r7} +; CHECK-8B-NEXT: mov r1, #1 +; CHECK-8B-NEXT: bics r0, r1 +; CHECK-8B-NEXT: mov r1, r0 +; CHECK-8B-NEXT: mov r2, r0 +; CHECK-8B-NEXT: mov r3, r0 +; CHECK-8B-NEXT: mov r4, r0 +; CHECK-8B-NEXT: mov r5, r0 +; CHECK-8B-NEXT: mov r6, r0 +; CHECK-8B-NEXT: mov r7, r0 +; CHECK-8B-NEXT: mov r8, r0 +; CHECK-8B-NEXT: mov r9, r0 +; CHECK-8B-NEXT: mov r10, r0 +; CHECK-8B-NEXT: mov r11, r0 +; CHECK-8B-NEXT: mov r12, r0 +; CHECK-8B-NEXT: msr apsr, r0 +; CHECK-8B-NEXT: blxns r0 +; CHECK-8B-NEXT: pop {r4, r5, r6, r7} +; CHECK-8B-NEXT: mov r8, r4 +; CHECK-8B-NEXT: mov r9, r5 +; CHECK-8B-NEXT: mov r10, r6 +; CHECK-8B-NEXT: mov r11, r7 +; CHECK-8B-NEXT: pop {r4, r5, r6, r7} +; CHECK-8B-NEXT: pop {r7, pc} +; +; CHECK-8M-LABEL: func2: +; CHECK-8M: @ %bb.0: @ %entry +; CHECK-8M-NEXT: push {r7, lr} +; CHECK-8M-NEXT: push.w {r4, r5, r6, r7, r8, r9, r10, r11} +; CHECK-8M-NEXT: bic r0, r0, #1 +; CHECK-8M-NEXT: msr apsr_nzcvq, r0 +; CHECK-8M-NEXT: mov r1, r0 +; CHECK-8M-NEXT: mov r2, r0 +; CHECK-8M-NEXT: mov r3, r0 +; CHECK-8M-NEXT: mov r4, r0 +; CHECK-8M-NEXT: mov r5, r0 +; CHECK-8M-NEXT: mov r6, r0 +; CHECK-8M-NEXT: mov r7, r0 +; CHECK-8M-NEXT: mov r8, r0 +; CHECK-8M-NEXT: mov r9, r0 +; CHECK-8M-NEXT: mov r10, r0 +; CHECK-8M-NEXT: mov r11, r0 +; CHECK-8M-NEXT: mov r12, r0 +; CHECK-8M-NEXT: blxns r0 +; CHECK-8M-NEXT: pop.w {r4, r5, r6, r7, r8, r9, r10, r11} +; CHECK-8M-NEXT: pop {r7, pc} +; +; CHECK-81M-LABEL: func2: +; CHECK-81M: @ %bb.0: @ %entry +; CHECK-81M-NEXT: push {r7, lr} +; CHECK-81M-NEXT: push.w {r4, r5, r6, r7, r8, r9, r10, r11} +; CHECK-81M-NEXT: bic r0, r0, #1 +; CHECK-81M-NEXT: sub sp, #136 +; CHECK-81M-NEXT: vlstm sp +; CHECK-81M-NEXT: clrm {r1, r2, r3, r4, r5, r6, r7, r8, r9, r10, r11, r12, apsr} +; CHECK-81M-NEXT: blxns r0 +; CHECK-81M-NEXT: vlldm sp +; CHECK-81M-NEXT: add sp, #136 +; CHECK-81M-NEXT: pop.w {r4, r5, r6, r7, r8, r9, r10, r11} +; CHECK-81M-NEXT: pop {r7, pc} +entry: + tail call void %fptr() #3 + ret void +} + +attributes #2 = { nounwind } +attributes #3 = { "cmse_nonsecure_call" nounwind } + +define void @func3() #4 { +; CHECK-8B-LABEL: func3: +; CHECK-8B: @ %bb.0: @ %entry +; CHECK-8B-NEXT: mov r0, lr +; CHECK-8B-NEXT: mov r1, lr +; CHECK-8B-NEXT: mov r2, lr +; CHECK-8B-NEXT: mov r3, lr +; CHECK-8B-NEXT: mov r12, lr +; CHECK-8B-NEXT: msr apsr, lr +; CHECK-8B-NEXT: bxns lr +; +; CHECK-8M-LABEL: func3: +; CHECK-8M: @ %bb.0: @ %entry +; CHECK-8M-NEXT: mov r0, lr +; CHECK-8M-NEXT: mov r1, lr +; CHECK-8M-NEXT: mov r2, lr +; CHECK-8M-NEXT: mov r3, lr +; CHECK-8M-NEXT: mov r12, lr +; CHECK-8M-NEXT: msr apsr_nzcvq, lr +; CHECK-8M-NEXT: bxns lr +; +; CHECK-81M-LABEL: func3: +; CHECK-81M: @ %bb.0: @ %entry +; CHECK-81M-NEXT: vstr fpcxtns, [sp, #-4]! +; CHECK-81M-NEXT: vscclrm {s0, s1, s2, s3, s4, s5, s6, s7, s8, s9, s10, s11, s12, s13, s14, s15, vpr} +; CHECK-81M-NEXT: vldr fpcxtns, [sp], #4 +; CHECK-81M-NEXT: clrm {r0, r1, r2, r3, r12, apsr} +; CHECK-81M-NEXT: bxns lr +entry: + ret void +} + +define void @func4() #4 { +; CHECK-8B-LABEL: func4: +; CHECK-8B: @ %bb.0: @ %entry +; CHECK-8B-NEXT: push {r7, lr} +; CHECK-8B-NEXT: bl func3 +; CHECK-8B-NEXT: pop {r7} +; CHECK-8B-NEXT: pop {r0} +; CHECK-8B-NEXT: mov lr, r0 +; CHECK-8B-NEXT: mov r0, lr +; CHECK-8B-NEXT: mov r1, lr +; CHECK-8B-NEXT: mov r2, lr +; CHECK-8B-NEXT: mov r3, lr +; CHECK-8B-NEXT: mov r12, lr +; CHECK-8B-NEXT: msr apsr, lr +; CHECK-8B-NEXT: bxns lr +; +; CHECK-8M-LABEL: func4: +; CHECK-8M: @ %bb.0: @ %entry +; CHECK-8M-NEXT: push {r7, lr} +; CHECK-8M-NEXT: bl func3 +; CHECK-8M-NEXT: pop.w {r7, lr} +; CHECK-8M-NEXT: mov r0, lr +; CHECK-8M-NEXT: mov r1, lr +; CHECK-8M-NEXT: mov r2, lr +; CHECK-8M-NEXT: mov r3, lr +; CHECK-8M-NEXT: mov r12, lr +; CHECK-8M-NEXT: msr apsr_nzcvq, lr +; CHECK-8M-NEXT: bxns lr +; +; CHECK-81M-LABEL: func4: +; CHECK-81M: @ %bb.0: @ %entry +; CHECK-81M-NEXT: vstr fpcxtns, [sp, #-4]! +; CHECK-81M-NEXT: push {r7, lr} +; CHECK-81M-NEXT: sub sp, #4 +; CHECK-81M-NEXT: bl func3 +; CHECK-81M-NEXT: add sp, #4 +; CHECK-81M-NEXT: pop.w {r7, lr} +; CHECK-81M-NEXT: vscclrm {s0, s1, s2, s3, s4, s5, s6, s7, s8, s9, s10, s11, s12, s13, s14, s15, vpr} +; CHECK-81M-NEXT: vldr fpcxtns, [sp], #4 +; CHECK-81M-NEXT: clrm {r0, r1, r2, r3, r12, apsr} +; CHECK-81M-NEXT: bxns lr +entry: + tail call void @func3() #5 + ret void +} + +declare void @func51(i8 *); + +define void @func5() #4 { +; CHECK-8B-LABEL: func5: +; CHECK-8B: @ %bb.0: +; CHECK-8B-NEXT: push {r4, r6, r7, lr} +; CHECK-8B-NEXT: add r7, sp, #8 +; CHECK-8B-NEXT: sub sp, #16 +; CHECK-8B-NEXT: mov r4, sp +; CHECK-8B-NEXT: lsrs r4, r4, #4 +; CHECK-8B-NEXT: lsls r4, r4, #4 +; CHECK-8B-NEXT: mov sp, r4 +; CHECK-8B-NEXT: mov r0, sp +; CHECK-8B-NEXT: bl func51 +; CHECK-8B-NEXT: subs r4, r7, #7 +; CHECK-8B-NEXT: subs r4, #1 +; CHECK-8B-NEXT: mov sp, r4 +; CHECK-8B-NEXT: pop {r4, r6, r7} +; CHECK-8B-NEXT: pop {r0} +; CHECK-8B-NEXT: mov lr, r0 +; CHECK-8B-NEXT: mov r0, lr +; CHECK-8B-NEXT: mov r1, lr +; CHECK-8B-NEXT: mov r2, lr +; CHECK-8B-NEXT: mov r3, lr +; CHECK-8B-NEXT: mov r12, lr +; CHECK-8B-NEXT: msr apsr, lr +; CHECK-8B-NEXT: bxns lr +; +; CHECK-8M-LABEL: func5: +; CHECK-8M: @ %bb.0: +; CHECK-8M-NEXT: push {r4, r6, r7, lr} +; CHECK-8M-NEXT: add r7, sp, #8 +; CHECK-8M-NEXT: sub sp, #16 +; CHECK-8M-NEXT: mov r4, sp +; CHECK-8M-NEXT: bfc r4, #0, #4 +; CHECK-8M-NEXT: mov sp, r4 +; CHECK-8M-NEXT: mov r0, sp +; CHECK-8M-NEXT: bl func51 +; CHECK-8M-NEXT: sub.w r4, r7, #8 +; CHECK-8M-NEXT: mov sp, r4 +; CHECK-8M-NEXT: pop.w {r4, r6, r7, lr} +; CHECK-8M-NEXT: mov r0, lr +; CHECK-8M-NEXT: mov r1, lr +; CHECK-8M-NEXT: mov r2, lr +; CHECK-8M-NEXT: mov r3, lr +; CHECK-8M-NEXT: mov r12, lr +; CHECK-8M-NEXT: msr apsr_nzcvq, lr +; CHECK-8M-NEXT: bxns lr +; +; CHECK-81M-LABEL: func5: +; CHECK-81M: @ %bb.0: +; CHECK-81M-NEXT: vstr fpcxtns, [sp, #-4]! +; CHECK-81M-NEXT: push {r4, r6, r7, lr} +; CHECK-81M-NEXT: add r7, sp, #8 +; CHECK-81M-NEXT: sub sp, #12 +; CHECK-81M-NEXT: mov r4, sp +; CHECK-81M-NEXT: bfc r4, #0, #4 +; CHECK-81M-NEXT: mov sp, r4 +; CHECK-81M-NEXT: mov r0, sp +; CHECK-81M-NEXT: bl func51 +; CHECK-81M-NEXT: sub.w r4, r7, #8 +; CHECK-81M-NEXT: mov sp, r4 +; CHECK-81M-NEXT: pop.w {r4, r6, r7, lr} +; CHECK-81M-NEXT: vscclrm {s0, s1, s2, s3, s4, s5, s6, s7, s8, s9, s10, s11, s12, s13, s14, s15, vpr} +; CHECK-81M-NEXT: vldr fpcxtns, [sp], #4 +; CHECK-81M-NEXT: clrm {r0, r1, r2, r3, r12, apsr} +; CHECK-81M-NEXT: bxns lr + %1 = alloca i8, align 16 + call void @func51(i8* nonnull %1) #5 + ret void +} + + +attributes #4 = { "cmse_nonsecure_entry" nounwind } +attributes #5 = { nounwind } +