BothFlags<[NoXarchOption, CC1Option], " the AAPCS standard requirement stating that"
" volatile bit-field width is dictated by the field container type. (ARM only).">>,
Group<m_arm_Features_Group>;
-
+def mframe_chain : Joined<["-"], "mframe-chain=">,
+ Group<m_arm_Features_Group>, Values<"none,aapcs,aapcs+leaf">,
+ HelpText<"Select the frame chain model used to emit frame records (Arm only).">;
def mgeneral_regs_only : Flag<["-"], "mgeneral-regs-only">, Group<m_Group>,
HelpText<"Generate code which only uses the general purpose registers (AArch64/x86 only)">;
def mfix_cmse_cve_2021_35465 : Flag<["-"], "mfix-cmse-cve-2021-35465">,
}
}
+ // Propagate frame-chain model selection
+ if (Arg *A = Args.getLastArg(options::OPT_mframe_chain)) {
+ StringRef FrameChainOption = A->getValue();
+ if (FrameChainOption.startswith("aapcs"))
+ Features.push_back("+aapcs-frame-chain");
+ if (FrameChainOption == "aapcs+leaf")
+ Features.push_back("+aapcs-frame-chain-leaf");
+ }
+
// CMSE: Check for target 8M (for -mcmse to be applicable) is performed later.
if (Args.getLastArg(options::OPT_mcmse))
Features.push_back("+8msecext");
"FixCortexA57AES1742098", "true",
"Work around Cortex-A57 Erratum 1742098 / Cortex-A72 Erratum 1655431 (AES)">;
+def FeatureAAPCSFrameChain : SubtargetFeature<"aapcs-frame-chain",
+ "CreateAAPCSFrameChain", "true",
+ "Create an AAPCS compliant frame chain">;
+
+def FeatureAAPCSFrameChainLeaf : SubtargetFeature<"aapcs-frame-chain-leaf",
+ "CreateAAPCSFrameChainLeaf", "true",
+ "Create an AAPCS compliant frame chain "
+ "for leaf functions",
+ [FeatureAAPCSFrameChain]>;
+
//===----------------------------------------------------------------------===//
// ARM architecture class
//
ARMBaseRegisterInfo::getCalleeSavedRegs(const MachineFunction *MF) const {
const ARMSubtarget &STI = MF->getSubtarget<ARMSubtarget>();
bool UseSplitPush = STI.splitFramePushPop(*MF);
- const MCPhysReg *RegList =
- STI.isTargetDarwin()
- ? CSR_iOS_SaveList
- : (UseSplitPush ? CSR_AAPCS_SplitPush_SaveList : CSR_AAPCS_SaveList);
-
const Function &F = MF->getFunction();
+
if (F.getCallingConv() == CallingConv::GHC) {
// GHC set of callee saved regs is empty as all those regs are
// used for passing STG regs around
} else if (F.getCallingConv() == CallingConv::SwiftTail) {
return STI.isTargetDarwin()
? CSR_iOS_SwiftTail_SaveList
- : (UseSplitPush ? CSR_AAPCS_SplitPush_SwiftTail_SaveList
+ : (UseSplitPush ? CSR_ATPCS_SplitPush_SwiftTail_SaveList
: CSR_AAPCS_SwiftTail_SaveList);
} else if (F.hasFnAttribute("interrupt")) {
if (STI.isMClass()) {
// M-class CPUs have hardware which saves the registers needed to allow a
// function conforming to the AAPCS to function as a handler.
- return UseSplitPush ? CSR_AAPCS_SplitPush_SaveList : CSR_AAPCS_SaveList;
+ return UseSplitPush ? CSR_ATPCS_SplitPush_SaveList : CSR_AAPCS_SaveList;
} else if (F.getFnAttribute("interrupt").getValueAsString() == "FIQ") {
// Fast interrupt mode gives the handler a private copy of R8-R14, so less
// need to be saved to restore user-mode state.
if (STI.isTargetDarwin())
return CSR_iOS_SwiftError_SaveList;
- return UseSplitPush ? CSR_AAPCS_SplitPush_SwiftError_SaveList :
+ return UseSplitPush ? CSR_ATPCS_SplitPush_SwiftError_SaveList :
CSR_AAPCS_SwiftError_SaveList;
}
return MF->getInfo<ARMFunctionInfo>()->isSplitCSR()
? CSR_iOS_CXX_TLS_PE_SaveList
: CSR_iOS_CXX_TLS_SaveList;
- return RegList;
+
+ if (STI.isTargetDarwin())
+ return CSR_iOS_SaveList;
+
+ if (UseSplitPush)
+ return STI.createAAPCSFrameChain() ? CSR_AAPCS_SplitPush_SaveList
+ : CSR_ATPCS_SplitPush_SaveList;
+
+ return CSR_AAPCS_SaveList;
}
const MCPhysReg *ARMBaseRegisterInfo::getCalleeSavedRegsViaCopy(
BitVector Reserved(getNumRegs());
markSuperRegs(Reserved, ARM::PC);
- if (TFI->hasFP(MF))
+ if (TFI->isFPReserved(MF))
markSuperRegs(Reserved, STI.getFramePointerReg());
if (hasBasePointer(MF))
markSuperRegs(Reserved, BasePtr);
// The order of callee-saved registers needs to match the order we actually push
// them in FrameLowering, because this order is what's used by
// PrologEpilogInserter to allocate frame index slots. So when R7 is the frame
-// pointer, we use this AAPCS alternative.
-def CSR_AAPCS_SplitPush : CalleeSavedRegs<(add LR, R7, R6, R5, R4,
+// pointer, we use this ATPCS alternative.
+def CSR_ATPCS_SplitPush : CalleeSavedRegs<(add LR, R7, R6, R5, R4,
R11, R10, R9, R8,
(sequence "D%u", 15, 8))>;
LR, R11)>;
// R8 is used to pass swifterror, remove it from CSR.
-def CSR_AAPCS_SplitPush_SwiftError : CalleeSavedRegs<(sub CSR_AAPCS_SplitPush,
+def CSR_ATPCS_SplitPush_SwiftError : CalleeSavedRegs<(sub CSR_ATPCS_SplitPush,
R8)>;
// R10 is used to pass swifterror, remove it from CSR.
-def CSR_AAPCS_SplitPush_SwiftTail : CalleeSavedRegs<(sub CSR_AAPCS_SplitPush,
+def CSR_ATPCS_SplitPush_SwiftTail : CalleeSavedRegs<(sub CSR_ATPCS_SplitPush,
R10)>;
+// When enforcing an AAPCS compliant frame chain, R11 is used as the frame
+// pointer even for Thumb targets, where split pushes are necessary.
+// This AAPCS alternative makes sure the frame index slots match the push
+// order in that case.
+def CSR_AAPCS_SplitPush : CalleeSavedRegs<(add LR, R11,
+ R7, R6, R5, R4,
+ R10, R9, R8,
+ (sequence "D%u", 15, 8))>;
+
// Constructors and destructors return 'this' in the ARM C++ ABI; since 'this'
// and the pointer return value are both passed in R0 in these cases, this can
// be partially modelled by treating R0 as a callee-saved register
// | |
// |-----------------------------------|
// | |
-// | prev_fp, prev_lr |
+// | prev_lr |
+// | prev_fp |
// | (a.k.a. "frame record") |
// | |
// |- - - - - - - - - - - - - - - - - -| <- fp (r7 or r11)
MFI.isFrameAddressTaken());
}
+/// isFPReserved - Return true if the frame pointer register should be
+/// considered a reserved register on the scope of the specified function.
+bool ARMFrameLowering::isFPReserved(const MachineFunction &MF) const {
+ return hasFP(MF) || MF.getSubtarget<ARMSubtarget>().createAAPCSFrameChain();
+}
+
/// hasReservedCallFrame - Under normal circumstances, when a frame pointer is
/// not required, we reserve argument space for call sites in the function
/// immediately on entry to the current function. This eliminates the need for
// into spill area 1, including the FP in R11. In either case, it
// is in area one and the adjustment needs to take place just after
// that push.
+ // FIXME: The above is not necessary true when PACBTI is enabled.
+ // AAPCS requires use of R11, and PACBTI gets in the way of regular pushes,
+ // so FP ends up on area two.
MachineBasicBlock::iterator AfterPush;
if (HasFP) {
AfterPush = std::next(GPRCS1Push);
return true;
}
+static bool requiresAAPCSFrameRecord(const MachineFunction &MF) {
+ const auto &Subtarget = MF.getSubtarget<ARMSubtarget>();
+ return Subtarget.createAAPCSFrameChainLeaf() ||
+ (Subtarget.createAAPCSFrameChain() && MF.getFrameInfo().hasCalls());
+}
+
+// Thumb1 may require a spill when storing to a frame index through FP, for
+// cases where FP is a high register (R11). This scans the function for cases
+// where this may happen.
+static bool canSpillOnFrameIndexAccess(const MachineFunction &MF,
+ const TargetFrameLowering &TFI) {
+ const ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
+ if (!AFI->isThumb1OnlyFunction())
+ return false;
+
+ for (const auto &MBB : MF)
+ for (const auto &MI : MBB)
+ if (MI.getOpcode() == ARM::tSTRspi || MI.getOpcode() == ARM::tSTRi)
+ for (const auto &Op : MI.operands())
+ if (Op.isFI()) {
+ Register Reg;
+ TFI.getFrameIndexReference(MF, Op.getIndex(), Reg);
+ if (ARM::hGPRRegClass.contains(Reg) && Reg != ARM::SP)
+ return true;
+ }
+ return false;
+}
+
void ARMFrameLowering::determineCalleeSaves(MachineFunction &MF,
BitVector &SavedRegs,
RegScavenger *RS) const {
// to take advantage the eliminateFrameIndex machinery. This also ensures it
// is spilled in the order specified by getCalleeSavedRegs() to make it easier
// to combine multiple loads / stores.
- bool CanEliminateFrame = true;
+ bool CanEliminateFrame = !(requiresAAPCSFrameRecord(MF) && hasFP(MF));
bool CS1Spilled = false;
bool LRSpilled = false;
unsigned NumGPRSpills = 0;
// Functions with VLAs or extremely large call frames are rare, and
// if a function is allocating more than 1KB of stack, an extra 4-byte
// slot probably isn't relevant.
+ //
+ // A special case is the scenario where r11 is used as FP, where accesses
+ // to a frame index will require its value to be moved into a low reg.
+ // This is handled later on, once we are able to determine if we have any
+ // fp-relative accesses.
if (RegInfo->hasBasePointer(MF))
EstimatedRSStackSizeLimit = (1U << 5) * 4;
else
SavedRegs.set(FramePtr);
// If the frame pointer is required by the ABI, also spill LR so that we
// emit a complete frame record.
- if (MF.getTarget().Options.DisableFramePointerElim(MF) && !LRSpilled) {
+ if ((requiresAAPCSFrameRecord(MF) ||
+ MF.getTarget().Options.DisableFramePointerElim(MF)) &&
+ !LRSpilled) {
SavedRegs.set(ARM::LR);
LRSpilled = true;
NumGPRSpills++;
}
// r7 can be used if it is not being used as the frame pointer.
- if (!HasFP) {
+ if (!HasFP || FramePtr != ARM::R7) {
if (SavedRegs.test(ARM::R7)) {
--RegDeficit;
LLVM_DEBUG(dbgs() << "%r7 is saved low register, RegDeficit = "
// to materialize a stack offset. If so, either spill one additional
// callee-saved register or reserve a special spill slot to facilitate
// register scavenging. Thumb1 needs a spill slot for stack pointer
- // adjustments also, even when the frame itself is small.
- if (BigFrameOffsets && !ExtraCSSpill) {
+ // adjustments and for frame index accesses when FP is high register,
+ // even when the frame itself is small.
+ if (!ExtraCSSpill &&
+ (BigFrameOffsets || canSpillOnFrameIndexAccess(MF, *this))) {
// If any non-reserved CS register isn't spilled, just spill one or two
// extra. That should take care of it!
unsigned NumExtras = TargetAlign.value() / 4;
bool enableCalleeSaveSkip(const MachineFunction &MF) const override;
bool hasFP(const MachineFunction &MF) const override;
+ bool isFPReserved(const MachineFunction &MF) const;
bool hasReservedCallFrame(const MachineFunction &MF) const override;
bool canSimplifyCallFramePseudos(const MachineFunction &MF) const override;
StackOffset getFrameIndexReference(const MachineFunction &MF, int FI,
/// GPRCS1Size, GPRCS2Size, DPRCSSize - Sizes of callee saved register spills
/// areas.
unsigned FPCXTSaveSize = 0;
+ unsigned FRSaveSize = 0;
unsigned GPRCS1Size = 0;
unsigned GPRCS2Size = 0;
unsigned DPRCSAlignGapSize = 0;
void setDPRCalleeSavedAreaOffset(unsigned o) { DPRCSOffset = o; }
unsigned getFPCXTSaveAreaSize() const { return FPCXTSaveSize; }
+ unsigned getFrameRecordSavedAreaSize() const { return FRSaveSize; }
unsigned getGPRCalleeSavedArea1Size() const { return GPRCS1Size; }
unsigned getGPRCalleeSavedArea2Size() const { return GPRCS2Size; }
unsigned getDPRCalleeSavedGapSize() const { return DPRCSAlignGapSize; }
unsigned getDPRCalleeSavedAreaSize() const { return DPRCSSize; }
void setFPCXTSaveAreaSize(unsigned s) { FPCXTSaveSize = s; }
+ void setFrameRecordSavedAreaSize(unsigned s) { FRSaveSize = s; }
void setGPRCalleeSavedArea1Size(unsigned s) { GPRCS1Size = s; }
void setGPRCalleeSavedArea2Size(unsigned s) { GPRCS2Size = s; }
void setDPRCalleeSavedGapSize(unsigned s) { DPRCSAlignGapSize = s; }
}
MCPhysReg getFramePointerReg() const {
- if (isTargetDarwin() || (!isTargetWindows() && isThumb()))
+ if (isTargetDarwin() ||
+ (!isTargetWindows() && isThumb() && !createAAPCSFrameChain()))
return ARM::R7;
return ARM::R11;
}
// Determine the sizes of each callee-save spill areas and record which frame
// belongs to which callee-save spill areas.
- unsigned GPRCS1Size = 0, GPRCS2Size = 0, DPRCSSize = 0;
+ unsigned FRSize = 0, GPRCS1Size = 0, GPRCS2Size = 0, DPRCSSize = 0;
int FramePtrSpillFI = 0;
if (ArgRegsSaveSize) {
return;
}
+ bool HasFrameRecordArea = hasFP(MF) && ARM::hGPRRegClass.contains(FramePtr);
+
for (const CalleeSavedInfo &I : CSI) {
Register Reg = I.getReg();
int FI = I.getFrameIdx();
+ if (Reg == FramePtr)
+ FramePtrSpillFI = FI;
switch (Reg) {
+ case ARM::R11:
+ if (HasFrameRecordArea) {
+ FRSize += 4;
+ break;
+ }
+ LLVM_FALLTHROUGH;
case ARM::R8:
case ARM::R9:
case ARM::R10:
- case ARM::R11:
if (STI.splitFramePushPop(MF)) {
GPRCS2Size += 4;
break;
}
LLVM_FALLTHROUGH;
+ case ARM::LR:
+ if (HasFrameRecordArea) {
+ FRSize += 4;
+ break;
+ }
+ LLVM_FALLTHROUGH;
case ARM::R4:
case ARM::R5:
case ARM::R6:
case ARM::R7:
- case ARM::LR:
- if (Reg == FramePtr)
- FramePtrSpillFI = FI;
GPRCS1Size += 4;
break;
default:
}
}
+ MachineBasicBlock::iterator FRPush, GPRCS1Push, GPRCS2Push;
+ if (HasFrameRecordArea) {
+ // Skip Frame Record setup:
+ // push {lr}
+ // mov lr, r11
+ // push {lr}
+ std::advance(MBBI, 2);
+ FRPush = MBBI++;
+ }
+
if (MBBI != MBB.end() && MBBI->getOpcode() == ARM::tPUSH) {
+ GPRCS1Push = MBBI;
++MBBI;
}
+ // Find last push instruction for GPRCS2 - spilling of high registers
+ // (r8-r11) could consist of multiple tPUSH and tMOVr instructions.
+ while (true) {
+ MachineBasicBlock::iterator OldMBBI = MBBI;
+ // Skip a run of tMOVr instructions
+ while (MBBI != MBB.end() && MBBI->getOpcode() == ARM::tMOVr &&
+ MBBI->getFlag(MachineInstr::FrameSetup))
+ MBBI++;
+ if (MBBI != MBB.end() && MBBI->getOpcode() == ARM::tPUSH &&
+ MBBI->getFlag(MachineInstr::FrameSetup)) {
+ GPRCS2Push = MBBI;
+ MBBI++;
+ } else {
+ // We have reached an instruction which is not a push, so the previous
+ // run of tMOVr instructions (which may have been empty) was not part of
+ // the prologue. Reset MBBI back to the last PUSH of the prologue.
+ MBBI = OldMBBI;
+ break;
+ }
+ }
+
// Determine starting offsets of spill areas.
- unsigned DPRCSOffset = NumBytes - ArgRegsSaveSize - (GPRCS1Size + GPRCS2Size + DPRCSSize);
+ unsigned DPRCSOffset = NumBytes - ArgRegsSaveSize -
+ (FRSize + GPRCS1Size + GPRCS2Size + DPRCSSize);
unsigned GPRCS2Offset = DPRCSOffset + DPRCSSize;
unsigned GPRCS1Offset = GPRCS2Offset + GPRCS2Size;
bool HasFP = hasFP(MF);
if (HasFP)
AFI->setFramePtrSpillOffset(MFI.getObjectOffset(FramePtrSpillFI) +
NumBytes);
+ if (HasFrameRecordArea)
+ AFI->setFrameRecordSavedAreaSize(FRSize);
AFI->setGPRCalleeSavedArea1Offset(GPRCS1Offset);
AFI->setGPRCalleeSavedArea2Offset(GPRCS2Offset);
AFI->setDPRCalleeSavedAreaOffset(DPRCSOffset);
int FramePtrOffsetInBlock = 0;
unsigned adjustedGPRCS1Size = GPRCS1Size;
if (GPRCS1Size > 0 && GPRCS2Size == 0 &&
- tryFoldSPUpdateIntoPushPop(STI, MF, &*std::prev(MBBI), NumBytes)) {
+ tryFoldSPUpdateIntoPushPop(STI, MF, &*(GPRCS1Push), NumBytes)) {
FramePtrOffsetInBlock = NumBytes;
adjustedGPRCS1Size += NumBytes;
NumBytes = 0;
}
-
- if (adjustedGPRCS1Size) {
- CFAOffset += adjustedGPRCS1Size;
- unsigned CFIIndex =
- MF.addFrameInst(MCCFIInstruction::cfiDefCfaOffset(nullptr, CFAOffset));
- BuildMI(MBB, MBBI, dl, TII.get(TargetOpcode::CFI_INSTRUCTION))
- .addCFIIndex(CFIIndex)
- .setMIFlags(MachineInstr::FrameSetup);
- }
- for (const CalleeSavedInfo &I : CSI) {
- Register Reg = I.getReg();
- int FI = I.getFrameIdx();
- switch (Reg) {
- case ARM::R8:
- case ARM::R9:
- case ARM::R10:
- case ARM::R11:
- case ARM::R12:
- if (STI.splitFramePushPop(MF))
- break;
- LLVM_FALLTHROUGH;
- case ARM::R0:
- case ARM::R1:
- case ARM::R2:
- case ARM::R3:
- case ARM::R4:
- case ARM::R5:
- case ARM::R6:
- case ARM::R7:
- case ARM::LR:
- unsigned CFIIndex = MF.addFrameInst(MCCFIInstruction::createOffset(
- nullptr, MRI->getDwarfRegNum(Reg, true), MFI.getObjectOffset(FI)));
- BuildMI(MBB, MBBI, dl, TII.get(TargetOpcode::CFI_INSTRUCTION))
- .addCFIIndex(CFIIndex)
- .setMIFlags(MachineInstr::FrameSetup);
- break;
- }
- }
+ CFAOffset += adjustedGPRCS1Size;
// Adjust FP so it point to the stack slot that contains the previous FP.
if (HasFP) {
- FramePtrOffsetInBlock +=
- MFI.getObjectOffset(FramePtrSpillFI) + GPRCS1Size + ArgRegsSaveSize;
- BuildMI(MBB, MBBI, dl, TII.get(ARM::tADDrSPi), FramePtr)
- .addReg(ARM::SP)
- .addImm(FramePtrOffsetInBlock / 4)
- .setMIFlags(MachineInstr::FrameSetup)
- .add(predOps(ARMCC::AL));
+ MachineBasicBlock::iterator AfterPush =
+ HasFrameRecordArea ? std::next(FRPush) : std::next(GPRCS1Push);
+ if (HasFrameRecordArea) {
+ // We have just finished pushing the previous FP into the stack,
+ // so simply capture the SP value as the new Frame Pointer.
+ BuildMI(MBB, AfterPush, dl, TII.get(ARM::tMOVr), FramePtr)
+ .addReg(ARM::SP)
+ .setMIFlags(MachineInstr::FrameSetup)
+ .add(predOps(ARMCC::AL));
+ } else {
+ FramePtrOffsetInBlock +=
+ MFI.getObjectOffset(FramePtrSpillFI) + GPRCS1Size + ArgRegsSaveSize;
+ BuildMI(MBB, AfterPush, dl, TII.get(ARM::tADDrSPi), FramePtr)
+ .addReg(ARM::SP)
+ .addImm(FramePtrOffsetInBlock / 4)
+ .setMIFlags(MachineInstr::FrameSetup)
+ .add(predOps(ARMCC::AL));
+ }
+
if(FramePtrOffsetInBlock) {
- CFAOffset -= FramePtrOffsetInBlock;
unsigned CFIIndex = MF.addFrameInst(MCCFIInstruction::cfiDefCfa(
- nullptr, MRI->getDwarfRegNum(FramePtr, true), CFAOffset));
- BuildMI(MBB, MBBI, dl, TII.get(TargetOpcode::CFI_INSTRUCTION))
+ nullptr, MRI->getDwarfRegNum(FramePtr, true), (CFAOffset - FramePtrOffsetInBlock)));
+ BuildMI(MBB, AfterPush, dl, TII.get(TargetOpcode::CFI_INSTRUCTION))
.addCFIIndex(CFIIndex)
.setMIFlags(MachineInstr::FrameSetup);
} else {
unsigned CFIIndex =
MF.addFrameInst(MCCFIInstruction::createDefCfaRegister(
nullptr, MRI->getDwarfRegNum(FramePtr, true)));
- BuildMI(MBB, MBBI, dl, TII.get(TargetOpcode::CFI_INSTRUCTION))
+ BuildMI(MBB, AfterPush, dl, TII.get(TargetOpcode::CFI_INSTRUCTION))
.addCFIIndex(CFIIndex)
.setMIFlags(MachineInstr::FrameSetup);
}
AFI->setShouldRestoreSPFromFP(true);
}
- // Skip past the spilling of r8-r11, which could consist of multiple tPUSH
- // and tMOVr instructions. We don't need to add any call frame information
- // in-between these instructions, because they do not modify the high
- // registers.
- while (true) {
- MachineBasicBlock::iterator OldMBBI = MBBI;
- // Skip a run of tMOVr instructions
- while (MBBI != MBB.end() && MBBI->getOpcode() == ARM::tMOVr)
- MBBI++;
- if (MBBI != MBB.end() && MBBI->getOpcode() == ARM::tPUSH) {
- MBBI++;
- } else {
- // We have reached an instruction which is not a push, so the previous
- // run of tMOVr instructions (which may have been empty) was not part of
- // the prologue. Reset MBBI back to the last PUSH of the prologue.
- MBBI = OldMBBI;
- break;
+ // Emit call frame information for the callee-saved low registers.
+ if (GPRCS1Size > 0) {
+ MachineBasicBlock::iterator Pos = std::next(GPRCS1Push);
+ if (adjustedGPRCS1Size) {
+ unsigned CFIIndex =
+ MF.addFrameInst(MCCFIInstruction::cfiDefCfaOffset(nullptr, CFAOffset));
+ BuildMI(MBB, Pos, dl, TII.get(TargetOpcode::CFI_INSTRUCTION))
+ .addCFIIndex(CFIIndex)
+ .setMIFlags(MachineInstr::FrameSetup);
+ }
+ for (const CalleeSavedInfo &I : CSI) {
+ Register Reg = I.getReg();
+ int FI = I.getFrameIdx();
+ switch (Reg) {
+ case ARM::R8:
+ case ARM::R9:
+ case ARM::R10:
+ case ARM::R11:
+ case ARM::R12:
+ if (STI.splitFramePushPop(MF))
+ break;
+ LLVM_FALLTHROUGH;
+ case ARM::R0:
+ case ARM::R1:
+ case ARM::R2:
+ case ARM::R3:
+ case ARM::R4:
+ case ARM::R5:
+ case ARM::R6:
+ case ARM::R7:
+ case ARM::LR:
+ unsigned CFIIndex = MF.addFrameInst(MCCFIInstruction::createOffset(
+ nullptr, MRI->getDwarfRegNum(Reg, true), MFI.getObjectOffset(FI)));
+ BuildMI(MBB, Pos, dl, TII.get(TargetOpcode::CFI_INSTRUCTION))
+ .addCFIIndex(CFIIndex)
+ .setMIFlags(MachineInstr::FrameSetup);
+ break;
+ }
}
}
// Emit call frame information for the callee-saved high registers.
- for (auto &I : CSI) {
- Register Reg = I.getReg();
- int FI = I.getFrameIdx();
- switch (Reg) {
- case ARM::R8:
- case ARM::R9:
- case ARM::R10:
- case ARM::R11:
- case ARM::R12: {
- unsigned CFIIndex = MF.addFrameInst(MCCFIInstruction::createOffset(
- nullptr, MRI->getDwarfRegNum(Reg, true), MFI.getObjectOffset(FI)));
- BuildMI(MBB, MBBI, dl, TII.get(TargetOpcode::CFI_INSTRUCTION))
- .addCFIIndex(CFIIndex)
- .setMIFlags(MachineInstr::FrameSetup);
- break;
- }
- default:
- break;
+ if (GPRCS2Size > 0) {
+ MachineBasicBlock::iterator Pos = std::next(GPRCS2Push);
+ for (auto &I : CSI) {
+ Register Reg = I.getReg();
+ int FI = I.getFrameIdx();
+ switch (Reg) {
+ case ARM::R8:
+ case ARM::R9:
+ case ARM::R10:
+ case ARM::R11:
+ case ARM::R12: {
+ unsigned CFIIndex = MF.addFrameInst(MCCFIInstruction::createOffset(
+ nullptr, MRI->getDwarfRegNum(Reg, true), MFI.getObjectOffset(FI)));
+ BuildMI(MBB, Pos, dl, TII.get(TargetOpcode::CFI_INSTRUCTION))
+ .addCFIIndex(CFIIndex)
+ .setMIFlags(MachineInstr::FrameSetup);
+ break;
+ }
+ default:
+ break;
+ }
}
}
}
// Move SP to start of FP callee save spill area.
- NumBytes -= (AFI->getGPRCalleeSavedArea1Size() +
+ NumBytes -= (AFI->getFrameRecordSavedAreaSize() +
+ AFI->getGPRCalleeSavedArea1Size() +
AFI->getGPRCalleeSavedArea2Size() +
AFI->getDPRCalleeSavedAreaSize() +
ArgRegsSaveSize);
return true;
}
-using ARMRegSet = std::bitset<ARM::NUM_TARGET_REGS>;
-
-// Return the first iteraror after CurrentReg which is present in EnabledRegs,
-// or OrderEnd if no further registers are in that set. This does not advance
-// the iterator fiorst, so returns CurrentReg if it is in EnabledRegs.
-static const unsigned *findNextOrderedReg(const unsigned *CurrentReg,
- const ARMRegSet &EnabledRegs,
- const unsigned *OrderEnd) {
- while (CurrentReg != OrderEnd && !EnabledRegs[*CurrentReg])
- ++CurrentReg;
- return CurrentReg;
-}
-
-bool Thumb1FrameLowering::spillCalleeSavedRegisters(
- MachineBasicBlock &MBB, MachineBasicBlock::iterator MI,
- ArrayRef<CalleeSavedInfo> CSI, const TargetRegisterInfo *TRI) const {
- if (CSI.empty())
- return false;
-
- DebugLoc DL;
- const TargetInstrInfo &TII = *STI.getInstrInfo();
- MachineFunction &MF = *MBB.getParent();
- const ARMBaseRegisterInfo *RegInfo = static_cast<const ARMBaseRegisterInfo *>(
- MF.getSubtarget().getRegisterInfo());
-
- ARMRegSet LoRegsToSave; // r0-r7, lr
- ARMRegSet HiRegsToSave; // r8-r11
- ARMRegSet CopyRegs; // Registers which can be used after pushing
- // LoRegs for saving HiRegs.
-
- for (const CalleeSavedInfo &I : llvm::reverse(CSI)) {
- Register Reg = I.getReg();
-
+static const SmallVector<Register> OrderedLowRegs = {ARM::R4, ARM::R5, ARM::R6,
+ ARM::R7, ARM::LR};
+static const SmallVector<Register> OrderedHighRegs = {ARM::R8, ARM::R9,
+ ARM::R10, ARM::R11};
+static const SmallVector<Register> OrderedCopyRegs = {
+ ARM::R0, ARM::R1, ARM::R2, ARM::R3, ARM::R4,
+ ARM::R5, ARM::R6, ARM::R7, ARM::LR};
+
+static void splitLowAndHighRegs(const std::set<Register> &Regs,
+ std::set<Register> &LowRegs,
+ std::set<Register> &HighRegs) {
+ for (Register Reg : Regs) {
if (ARM::tGPRRegClass.contains(Reg) || Reg == ARM::LR) {
- LoRegsToSave[Reg] = true;
+ LowRegs.insert(Reg);
} else if (ARM::hGPRRegClass.contains(Reg) && Reg != ARM::LR) {
- HiRegsToSave[Reg] = true;
+ HighRegs.insert(Reg);
} else {
llvm_unreachable("callee-saved register of unexpected class");
}
-
- if ((ARM::tGPRRegClass.contains(Reg) || Reg == ARM::LR) &&
- !MF.getRegInfo().isLiveIn(Reg) &&
- !(hasFP(MF) && Reg == RegInfo->getFrameRegister(MF)))
- CopyRegs[Reg] = true;
}
+}
- // Unused argument registers can be used for the high register saving.
- for (unsigned ArgReg : {ARM::R0, ARM::R1, ARM::R2, ARM::R3})
- if (!MF.getRegInfo().isLiveIn(ArgReg))
- CopyRegs[ArgReg] = true;
+template <typename It>
+It getNextOrderedReg(It OrderedStartIt, It OrderedEndIt,
+ const std::set<Register> &RegSet) {
+ return std::find_if(OrderedStartIt, OrderedEndIt,
+ [&](Register Reg) { return RegSet.count(Reg); });
+}
- // Push the low registers and lr
+static void pushRegsToStack(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MI,
+ const TargetInstrInfo &TII,
+ const std::set<Register> &RegsToSave,
+ const std::set<Register> &CopyRegs) {
+ MachineFunction &MF = *MBB.getParent();
const MachineRegisterInfo &MRI = MF.getRegInfo();
- if (!LoRegsToSave.none()) {
+ DebugLoc DL;
+
+ std::set<Register> LowRegs, HighRegs;
+ splitLowAndHighRegs(RegsToSave, LowRegs, HighRegs);
+
+ // Push low regs first
+ if (!LowRegs.empty()) {
MachineInstrBuilder MIB =
BuildMI(MBB, MI, DL, TII.get(ARM::tPUSH)).add(predOps(ARMCC::AL));
- for (unsigned Reg : {ARM::R4, ARM::R5, ARM::R6, ARM::R7, ARM::LR}) {
- if (LoRegsToSave[Reg]) {
+ for (unsigned Reg : OrderedLowRegs) {
+ if (LowRegs.count(Reg)) {
bool isKill = !MRI.isLiveIn(Reg);
if (isKill && !MRI.isReserved(Reg))
MBB.addLiveIn(Reg);
MIB.setMIFlags(MachineInstr::FrameSetup);
}
- // Push the high registers. There are no store instructions that can access
- // these registers directly, so we have to move them to low registers, and
- // push them. This might take multiple pushes, as it is possible for there to
+ // Now push the high registers
+ // There are no store instructions that can access high registers directly,
+ // so we have to move them to low registers, and push them.
+ // This might take multiple pushes, as it is possible for there to
// be fewer low registers available than high registers which need saving.
- // These are in reverse order so that in the case where we need to use
+ // Find the first register to save.
+ // Registers must be processed in reverse order so that in case we need to use
// multiple PUSH instructions, the order of the registers on the stack still
// matches the unwind info. They need to be swicthed back to ascending order
// before adding to the PUSH instruction.
- static const unsigned AllCopyRegs[] = {ARM::LR, ARM::R7, ARM::R6,
- ARM::R5, ARM::R4, ARM::R3,
- ARM::R2, ARM::R1, ARM::R0};
- static const unsigned AllHighRegs[] = {ARM::R11, ARM::R10, ARM::R9, ARM::R8};
-
- const unsigned *AllCopyRegsEnd = std::end(AllCopyRegs);
- const unsigned *AllHighRegsEnd = std::end(AllHighRegs);
-
- // Find the first register to save.
- const unsigned *HiRegToSave = findNextOrderedReg(
- std::begin(AllHighRegs), HiRegsToSave, AllHighRegsEnd);
+ auto HiRegToSave = getNextOrderedReg(OrderedHighRegs.rbegin(),
+ OrderedHighRegs.rend(),
+ HighRegs);
- while (HiRegToSave != AllHighRegsEnd) {
+ while (HiRegToSave != OrderedHighRegs.rend()) {
// Find the first low register to use.
- const unsigned *CopyReg =
- findNextOrderedReg(std::begin(AllCopyRegs), CopyRegs, AllCopyRegsEnd);
+ auto CopyRegIt = getNextOrderedReg(OrderedCopyRegs.rbegin(),
+ OrderedCopyRegs.rend(),
+ CopyRegs);
// Create the PUSH, but don't insert it yet (the MOVs need to come first).
MachineInstrBuilder PushMIB = BuildMI(MF, DL, TII.get(ARM::tPUSH))
.setMIFlags(MachineInstr::FrameSetup);
SmallVector<unsigned, 4> RegsToPush;
- while (HiRegToSave != AllHighRegsEnd && CopyReg != AllCopyRegsEnd) {
- if (HiRegsToSave[*HiRegToSave]) {
+ while (HiRegToSave != OrderedHighRegs.rend() &&
+ CopyRegIt != OrderedCopyRegs.rend()) {
+ if (HighRegs.count(*HiRegToSave)) {
bool isKill = !MRI.isLiveIn(*HiRegToSave);
if (isKill && !MRI.isReserved(*HiRegToSave))
MBB.addLiveIn(*HiRegToSave);
// Emit a MOV from the high reg to the low reg.
BuildMI(MBB, MI, DL, TII.get(ARM::tMOVr))
- .addReg(*CopyReg, RegState::Define)
+ .addReg(*CopyRegIt, RegState::Define)
.addReg(*HiRegToSave, getKillRegState(isKill))
.add(predOps(ARMCC::AL))
.setMIFlags(MachineInstr::FrameSetup);
// Record the register that must be added to the PUSH.
- RegsToPush.push_back(*CopyReg);
-
- CopyReg = findNextOrderedReg(++CopyReg, CopyRegs, AllCopyRegsEnd);
- HiRegToSave =
- findNextOrderedReg(++HiRegToSave, HiRegsToSave, AllHighRegsEnd);
+ RegsToPush.push_back(*CopyRegIt);
+
+ CopyRegIt = getNextOrderedReg(std::next(CopyRegIt),
+ OrderedCopyRegs.rend(),
+ CopyRegs);
+ HiRegToSave = getNextOrderedReg(std::next(HiRegToSave),
+ OrderedHighRegs.rend(),
+ HighRegs);
}
}
// Insert the PUSH instruction after the MOVs.
MBB.insert(MI, PushMIB);
}
-
- return true;
}
-bool Thumb1FrameLowering::restoreCalleeSavedRegisters(
- MachineBasicBlock &MBB, MachineBasicBlock::iterator MI,
- MutableArrayRef<CalleeSavedInfo> CSI, const TargetRegisterInfo *TRI) const {
- if (CSI.empty())
- return false;
+static void popRegsFromStack(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator &MI,
+ const TargetInstrInfo &TII,
+ const std::set<Register> &RegsToRestore,
+ const std::set<Register> &AvailableCopyRegs,
+ bool IsVarArg, bool HasV5Ops) {
+ if (RegsToRestore.empty())
+ return;
MachineFunction &MF = *MBB.getParent();
ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
- const TargetInstrInfo &TII = *STI.getInstrInfo();
- const ARMBaseRegisterInfo *RegInfo = static_cast<const ARMBaseRegisterInfo *>(
- MF.getSubtarget().getRegisterInfo());
-
- bool isVarArg = AFI->getArgRegsSaveSize() > 0;
DebugLoc DL = MI != MBB.end() ? MI->getDebugLoc() : DebugLoc();
- ARMRegSet LoRegsToRestore;
- ARMRegSet HiRegsToRestore;
- // Low registers (r0-r7) which can be used to restore the high registers.
- ARMRegSet CopyRegs;
-
- for (CalleeSavedInfo I : CSI) {
- Register Reg = I.getReg();
-
- if (ARM::tGPRRegClass.contains(Reg) || Reg == ARM::LR) {
- LoRegsToRestore[Reg] = true;
- } else if (ARM::hGPRRegClass.contains(Reg) && Reg != ARM::LR) {
- HiRegsToRestore[Reg] = true;
- } else {
- llvm_unreachable("callee-saved register of unexpected class");
- }
-
- // If this is a low register not used as the frame pointer, we may want to
- // use it for restoring the high registers.
- if ((ARM::tGPRRegClass.contains(Reg)) &&
- !(hasFP(MF) && Reg == RegInfo->getFrameRegister(MF)))
- CopyRegs[Reg] = true;
- }
-
- // If this is a return block, we may be able to use some unused return value
- // registers for restoring the high regs.
- auto Terminator = MBB.getFirstTerminator();
- if (Terminator != MBB.end() && Terminator->getOpcode() == ARM::tBX_RET) {
- CopyRegs[ARM::R0] = true;
- CopyRegs[ARM::R1] = true;
- CopyRegs[ARM::R2] = true;
- CopyRegs[ARM::R3] = true;
- for (auto Op : Terminator->implicit_operands()) {
- if (Op.isReg())
- CopyRegs[Op.getReg()] = false;
- }
- }
-
- static const unsigned AllCopyRegs[] = {ARM::R0, ARM::R1, ARM::R2, ARM::R3,
- ARM::R4, ARM::R5, ARM::R6, ARM::R7};
- static const unsigned AllHighRegs[] = {ARM::R8, ARM::R9, ARM::R10, ARM::R11};
+ std::set<Register> LowRegs, HighRegs;
+ splitLowAndHighRegs(RegsToRestore, LowRegs, HighRegs);
- const unsigned *AllCopyRegsEnd = std::end(AllCopyRegs);
- const unsigned *AllHighRegsEnd = std::end(AllHighRegs);
+ // Pop the high registers first
+ // There are no store instructions that can access high registers directly,
+ // so we have to pop into low registers and them move to the high registers.
+ // This might take multiple pops, as it is possible for there to
+ // be fewer low registers available than high registers which need restoring.
// Find the first register to restore.
- auto HiRegToRestore = findNextOrderedReg(std::begin(AllHighRegs),
- HiRegsToRestore, AllHighRegsEnd);
+ auto HiRegToRestore = getNextOrderedReg(OrderedHighRegs.begin(),
+ OrderedHighRegs.end(),
+ HighRegs);
+
+ std::set<Register> CopyRegs = AvailableCopyRegs;
+ Register LowScratchReg;
+ if (!HighRegs.empty() && CopyRegs.empty()) {
+ // No copy regs are available to pop high regs. Let's make use of a return
+ // register and the scratch register (IP/R12) to copy things around.
+ LowScratchReg = ARM::R0;
+ BuildMI(MBB, MI, DL, TII.get(ARM::tMOVr))
+ .addReg(ARM::R12, RegState::Define)
+ .addReg(LowScratchReg, RegState::Kill)
+ .add(predOps(ARMCC::AL))
+ .setMIFlag(MachineInstr::FrameDestroy);
+ CopyRegs.insert(LowScratchReg);
+ }
- while (HiRegToRestore != AllHighRegsEnd) {
- assert(!CopyRegs.none());
+ while (HiRegToRestore != OrderedHighRegs.end()) {
+ assert(!CopyRegs.empty());
// Find the first low register to use.
- auto CopyReg =
- findNextOrderedReg(std::begin(AllCopyRegs), CopyRegs, AllCopyRegsEnd);
+ auto CopyReg = getNextOrderedReg(OrderedCopyRegs.begin(),
+ OrderedCopyRegs.end(),
+ CopyRegs);
// Create the POP instruction.
MachineInstrBuilder PopMIB = BuildMI(MBB, MI, DL, TII.get(ARM::tPOP))
.add(predOps(ARMCC::AL))
.setMIFlag(MachineInstr::FrameDestroy);
- while (HiRegToRestore != AllHighRegsEnd && CopyReg != AllCopyRegsEnd) {
+ while (HiRegToRestore != OrderedHighRegs.end() &&
+ CopyReg != OrderedCopyRegs.end()) {
// Add the low register to the POP.
PopMIB.addReg(*CopyReg, RegState::Define);
.add(predOps(ARMCC::AL))
.setMIFlag(MachineInstr::FrameDestroy);
- CopyReg = findNextOrderedReg(++CopyReg, CopyRegs, AllCopyRegsEnd);
- HiRegToRestore =
- findNextOrderedReg(++HiRegToRestore, HiRegsToRestore, AllHighRegsEnd);
+ CopyReg = getNextOrderedReg(std::next(CopyReg),
+ OrderedCopyRegs.end(),
+ CopyRegs);
+ HiRegToRestore = getNextOrderedReg(std::next(HiRegToRestore),
+ OrderedHighRegs.end(),
+ HighRegs);
}
}
- MachineInstrBuilder MIB = BuildMI(MF, DL, TII.get(ARM::tPOP))
- .add(predOps(ARMCC::AL))
- .setMIFlag(MachineInstr::FrameDestroy);
-
- bool NeedsPop = false;
- for (CalleeSavedInfo &Info : llvm::reverse(CSI)) {
- Register Reg = Info.getReg();
-
- // High registers (excluding lr) have already been dealt with
- if (!(ARM::tGPRRegClass.contains(Reg) || Reg == ARM::LR))
- continue;
-
- if (Reg == ARM::LR) {
- Info.setRestored(false);
- if (!MBB.succ_empty() ||
- MI->getOpcode() == ARM::TCRETURNdi ||
- MI->getOpcode() == ARM::TCRETURNri)
- // LR may only be popped into PC, as part of return sequence.
- // If this isn't the return sequence, we'll need emitPopSpecialFixUp
- // to restore LR the hard way.
- // FIXME: if we don't pass any stack arguments it would be actually
- // advantageous *and* correct to do the conversion to an ordinary call
- // instruction here.
- continue;
- // Special epilogue for vararg functions. See emitEpilogue
- if (isVarArg)
- continue;
- // ARMv4T requires BX, see emitEpilogue
- if (!STI.hasV5TOps())
- continue;
+ // Restore low register used as scratch if necessary
+ if (LowScratchReg.isValid()) {
+ BuildMI(MBB, MI, DL, TII.get(ARM::tMOVr))
+ .addReg(LowScratchReg, RegState::Define)
+ .addReg(ARM::R12, RegState::Kill)
+ .add(predOps(ARMCC::AL))
+ .setMIFlag(MachineInstr::FrameDestroy);
+ }
- // CMSE entry functions must return via BXNS, see emitEpilogue.
- if (AFI->isCmseNSEntryFunction())
+ // Now pop the low registers
+ if (!LowRegs.empty()) {
+ MachineInstrBuilder MIB = BuildMI(MF, DL, TII.get(ARM::tPOP))
+ .add(predOps(ARMCC::AL))
+ .setMIFlag(MachineInstr::FrameDestroy);
+
+ bool NeedsPop = false;
+ for (Register Reg : OrderedLowRegs) {
+ if (!LowRegs.count(Reg))
continue;
- // Pop LR into PC.
- Reg = ARM::PC;
- (*MIB).setDesc(TII.get(ARM::tPOP_RET));
- if (MI != MBB.end())
- MIB.copyImplicitOps(*MI);
- MI = MBB.erase(MI);
+ if (Reg == ARM::LR) {
+ if (!MBB.succ_empty() ||
+ MI->getOpcode() == ARM::TCRETURNdi ||
+ MI->getOpcode() == ARM::TCRETURNri)
+ // LR may only be popped into PC, as part of return sequence.
+ // If this isn't the return sequence, we'll need emitPopSpecialFixUp
+ // to restore LR the hard way.
+ // FIXME: if we don't pass any stack arguments it would be actually
+ // advantageous *and* correct to do the conversion to an ordinary call
+ // instruction here.
+ continue;
+ // Special epilogue for vararg functions. See emitEpilogue
+ if (IsVarArg)
+ continue;
+ // ARMv4T requires BX, see emitEpilogue
+ if (!HasV5Ops)
+ continue;
+
+ // CMSE entry functions must return via BXNS, see emitEpilogue.
+ if (AFI->isCmseNSEntryFunction())
+ continue;
+
+ // Pop LR into PC.
+ Reg = ARM::PC;
+ (*MIB).setDesc(TII.get(ARM::tPOP_RET));
+ if (MI != MBB.end())
+ MIB.copyImplicitOps(*MI);
+ MI = MBB.erase(MI);
+ }
+ MIB.addReg(Reg, getDefRegState(true));
+ NeedsPop = true;
}
- MIB.addReg(Reg, getDefRegState(true));
- NeedsPop = true;
+
+ // It's illegal to emit pop instruction without operands.
+ if (NeedsPop)
+ MBB.insert(MI, &*MIB);
+ else
+ MF.deleteMachineInstr(MIB);
}
+}
+
+bool Thumb1FrameLowering::spillCalleeSavedRegisters(
+ MachineBasicBlock &MBB, MachineBasicBlock::iterator MI,
+ ArrayRef<CalleeSavedInfo> CSI, const TargetRegisterInfo *TRI) const {
+ if (CSI.empty())
+ return false;
+
+ const TargetInstrInfo &TII = *STI.getInstrInfo();
+ MachineFunction &MF = *MBB.getParent();
+ const ARMBaseRegisterInfo *RegInfo = static_cast<const ARMBaseRegisterInfo *>(
+ MF.getSubtarget().getRegisterInfo());
+ Register FPReg = RegInfo->getFrameRegister(MF);
+
+ // In case FP is a high reg, we need a separate push sequence to generate
+ // a correct Frame Record
+ bool NeedsFrameRecordPush = hasFP(MF) && ARM::hGPRRegClass.contains(FPReg);
+
+ std::set<Register> FrameRecord;
+ std::set<Register> SpilledGPRs;
+ for (const CalleeSavedInfo &I : CSI) {
+ Register Reg = I.getReg();
+ if (NeedsFrameRecordPush && (Reg == FPReg || Reg == ARM::LR))
+ FrameRecord.insert(Reg);
+ else
+ SpilledGPRs.insert(Reg);
+ }
+
+ pushRegsToStack(MBB, MI, TII, FrameRecord, {ARM::LR});
+
+ // Determine intermediate registers which can be used for pushing high regs:
+ // - Spilled low regs
+ // - Unused argument registers
+ std::set<Register> CopyRegs;
+ for (Register Reg : SpilledGPRs)
+ if ((ARM::tGPRRegClass.contains(Reg) || Reg == ARM::LR) &&
+ !MF.getRegInfo().isLiveIn(Reg) && !(hasFP(MF) && Reg == FPReg))
+ CopyRegs.insert(Reg);
+ for (unsigned ArgReg : {ARM::R0, ARM::R1, ARM::R2, ARM::R3})
+ if (!MF.getRegInfo().isLiveIn(ArgReg))
+ CopyRegs.insert(ArgReg);
+
+ pushRegsToStack(MBB, MI, TII, SpilledGPRs, CopyRegs);
+
+ return true;
+}
+
+bool Thumb1FrameLowering::restoreCalleeSavedRegisters(
+ MachineBasicBlock &MBB, MachineBasicBlock::iterator MI,
+ MutableArrayRef<CalleeSavedInfo> CSI, const TargetRegisterInfo *TRI) const {
+ if (CSI.empty())
+ return false;
+
+ MachineFunction &MF = *MBB.getParent();
+ ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
+ const TargetInstrInfo &TII = *STI.getInstrInfo();
+ const ARMBaseRegisterInfo *RegInfo = static_cast<const ARMBaseRegisterInfo *>(
+ MF.getSubtarget().getRegisterInfo());
+ bool IsVarArg = AFI->getArgRegsSaveSize() > 0;
+ Register FPReg = RegInfo->getFrameRegister(MF);
+
+ // In case FP is a high reg, we need a separate pop sequence to generate
+ // a correct Frame Record
+ bool NeedsFrameRecordPop = hasFP(MF) && ARM::hGPRRegClass.contains(FPReg);
+
+ std::set<Register> FrameRecord;
+ std::set<Register> SpilledGPRs;
+ for (CalleeSavedInfo &I : CSI) {
+ Register Reg = I.getReg();
+ if (NeedsFrameRecordPop && (Reg == FPReg || Reg == ARM::LR))
+ FrameRecord.insert(Reg);
+ else
+ SpilledGPRs.insert(Reg);
+
+ if (Reg == ARM::LR)
+ I.setRestored(false);
+ }
+
+ // Determine intermidiate registers which can be used for popping high regs:
+ // - Spilled low regs
+ // - Unused return registers
+ std::set<Register> CopyRegs;
+ std::set<Register> UnusedReturnRegs;
+ for (Register Reg : SpilledGPRs)
+ if ((ARM::tGPRRegClass.contains(Reg)) && !(hasFP(MF) && Reg == FPReg))
+ CopyRegs.insert(Reg);
+ auto Terminator = MBB.getFirstTerminator();
+ if (Terminator != MBB.end() && Terminator->getOpcode() == ARM::tBX_RET) {
+ UnusedReturnRegs.insert(ARM::R0);
+ UnusedReturnRegs.insert(ARM::R1);
+ UnusedReturnRegs.insert(ARM::R2);
+ UnusedReturnRegs.insert(ARM::R3);
+ for (auto Op : Terminator->implicit_operands()) {
+ if (Op.isReg())
+ UnusedReturnRegs.erase(Op.getReg());
+ }
+ }
+ CopyRegs.insert(UnusedReturnRegs.begin(), UnusedReturnRegs.end());
+
+ // First pop regular spilled regs.
+ popRegsFromStack(MBB, MI, TII, SpilledGPRs, CopyRegs, IsVarArg,
+ STI.hasV5TOps());
+
+ // LR may only be popped into pc, as part of a return sequence.
+ // Check that no other pop instructions are inserted after that.
+ assert((!SpilledGPRs.count(ARM::LR) || FrameRecord.empty()) &&
+ "Can't insert pop after return sequence");
- // It's illegal to emit pop instruction without operands.
- if (NeedsPop)
- MBB.insert(MI, &*MIB);
- else
- MF.deleteMachineInstr(MIB);
+ // Now pop Frame Record regs.
+ // Only unused return registers can be used as copy regs at this point.
+ popRegsFromStack(MBB, MI, TII, FrameRecord, UnusedReturnRegs, IsVarArg,
+ STI.hasV5TOps());
return true;
}
const ARMBaseInstrInfo &TII) const {
MachineInstr &MI = *II;
MachineBasicBlock &MBB = *MI.getParent();
+ MachineFunction &MF = *MBB.getParent();
assert(MBB.getParent()->getSubtarget<ARMSubtarget>().isThumb1Only() &&
"This isn't needed for thumb2!");
DebugLoc dl = MI.getDebugLoc();
if ((unsigned)Offset <= Mask * Scale) {
// Replace the FrameIndex with the frame register (e.g., sp).
- MI.getOperand(FrameRegIdx).ChangeToRegister(FrameReg, false);
+ Register DestReg = FrameReg;
+
+ // In case FrameReg is a high register, move it to a low reg to ensure it
+ // can be used as an operand.
+ if (ARM::hGPRRegClass.contains(FrameReg) && FrameReg != ARM::SP) {
+ DestReg = MF.getRegInfo().createVirtualRegister(&ARM::tGPRRegClass);
+ BuildMI(MBB, II, dl, TII.get(ARM::tMOVr), DestReg)
+ .addReg(FrameReg)
+ .add(predOps(ARMCC::AL));
+ }
+
+ MI.getOperand(FrameRegIdx).ChangeToRegister(DestReg, false);
ImmOp.ChangeToImmediate(ImmedOffset);
// If we're using a register where sp was stored, convert the instruction
Offset, false, TII, *this);
else {
emitLoadConstPool(MBB, II, dl, TmpReg, 0, Offset);
- UseRR = true;
+ if (!ARM::hGPRRegClass.contains(FrameReg)) {
+ UseRR = true;
+ } else {
+ // If FrameReg is a high register, add the reg values in a separate
+ // instruction as the load won't be able to access it.
+ BuildMI(MBB, II, dl, TII.get(ARM::tADDhirr), TmpReg)
+ .addReg(TmpReg)
+ .addReg(FrameReg)
+ .add(predOps(ARMCC::AL));
+ }
}
} else {
emitThumbRegPlusImmediate(MBB, II, dl, TmpReg, FrameReg, Offset, TII,
MI.setDesc(TII.get(UseRR ? ARM::tLDRr : ARM::tLDRi));
MI.getOperand(FIOperandNum).ChangeToRegister(TmpReg, false, false, true);
- if (UseRR)
+ if (UseRR) {
+ assert(!ARM::hGPRRegClass.contains(FrameReg) &&
+ "Thumb1 loads can't use high register");
// Use [reg, reg] addrmode. Replace the immediate operand w/ the frame
// register. The offset is already handled in the vreg value.
MI.getOperand(FIOperandNum+1).ChangeToRegister(FrameReg, false, false,
false);
+ }
} else if (MI.mayStore()) {
VReg = MF.getRegInfo().createVirtualRegister(&ARM::tGPRRegClass);
bool UseRR = false;
Offset, false, TII, *this);
else {
emitLoadConstPool(MBB, II, dl, VReg, 0, Offset);
- UseRR = true;
+ if (!ARM::hGPRRegClass.contains(FrameReg)) {
+ UseRR = true;
+ } else {
+ // If FrameReg is a high register, add the reg values in a separate
+ // instruction as the load won't be able to access it.
+ BuildMI(MBB, II, dl, TII.get(ARM::tADDhirr), VReg)
+ .addReg(VReg)
+ .addReg(FrameReg)
+ .add(predOps(ARMCC::AL));
+ }
}
} else
emitThumbRegPlusImmediate(MBB, II, dl, VReg, FrameReg, Offset, TII,
*this);
MI.setDesc(TII.get(UseRR ? ARM::tSTRr : ARM::tSTRi));
MI.getOperand(FIOperandNum).ChangeToRegister(VReg, false, false, true);
- if (UseRR)
+ if (UseRR) {
+ assert(!ARM::hGPRRegClass.contains(FrameReg) &&
+ "Thumb1 stores can't use high register");
// Use [reg, reg] addrmode. Replace the immediate operand w/ the frame
// register. The offset is already handled in the vreg value.
MI.getOperand(FIOperandNum+1).ChangeToRegister(FrameReg, false, false,
false);
+ }
} else {
llvm_unreachable("Unexpected opcode!");
}
--- /dev/null
+; RUN: not llc -mtriple arm-arm-none-eabi -filetype asm -o - %s -frame-pointer=all 2>&1 | FileCheck %s --check-prefix=RESERVED-R11
+; RUN: not llc -mtriple arm-arm-none-eabi -filetype asm -o - %s -frame-pointer=all -mattr=+aapcs-frame-chain 2>&1 | FileCheck %s --check-prefix=RESERVED-R11
+; RUN: not llc -mtriple arm-arm-none-eabi -filetype asm -o - %s -frame-pointer=all -mattr=+aapcs-frame-chain-leaf 2>&1 | FileCheck %s --check-prefix=RESERVED-R11
+; RUN: llc -mtriple arm-arm-none-eabi -filetype asm -o - %s -frame-pointer=non-leaf 2>&1 | FileCheck %s --check-prefix=RESERVED-NONE
+; RUN: not llc -mtriple arm-arm-none-eabi -filetype asm -o - %s -frame-pointer=non-leaf -mattr=+aapcs-frame-chain 2>&1 | FileCheck %s --check-prefix=RESERVED-R11
+; RUN: not llc -mtriple arm-arm-none-eabi -filetype asm -o - %s -frame-pointer=non-leaf -mattr=+aapcs-frame-chain-leaf 2>&1 | FileCheck %s --check-prefix=RESERVED-R11
+; RUN: llc -mtriple arm-arm-none-eabi -filetype asm -o - %s -frame-pointer=none 2>&1 | FileCheck %s --check-prefix=RESERVED-NONE
+; RUN: not llc -mtriple arm-arm-none-eabi -filetype asm -o - %s -frame-pointer=none -mattr=+aapcs-frame-chain 2>&1 | FileCheck %s --check-prefix=RESERVED-R11
+; RUN: not llc -mtriple arm-arm-none-eabi -filetype asm -o - %s -frame-pointer=none -mattr=+aapcs-frame-chain-leaf 2>&1 | FileCheck %s --check-prefix=RESERVED-R11
+
+declare void @leaf(i32 %input)
+
+define void @reserved_r7(i32 %input) {
+; RESERVED-NONE-NOT: error: write to reserved register 'R7'
+; RESERVED-R11-NOT: error: write to reserved register 'R7'
+ %1 = call i32 asm sideeffect "mov $0, $1", "={r7},r"(i32 %input)
+ ret void
+}
+
+define void @reserved_r11(i32 %input) {
+; RESERVED-NONE-NOT: error: write to reserved register 'R11'
+; RESERVED-R11: error: write to reserved register 'R11'
+ %1 = call i32 asm sideeffect "mov $0, $1", "={r11},r"(i32 %input)
+ ret void
+}
--- /dev/null
+; RUN: llc -mtriple arm-arm-none-eabi -filetype asm -o - %s -frame-pointer=all | FileCheck %s --check-prefixes=FP,LEAF-FP
+; RUN: llc -mtriple arm-arm-none-eabi -filetype asm -o - %s -frame-pointer=all -mattr=+aapcs-frame-chain | FileCheck %s --check-prefixes=FP-AAPCS,LEAF-FP
+; RUN: llc -mtriple arm-arm-none-eabi -filetype asm -o - %s -frame-pointer=all -mattr=+aapcs-frame-chain-leaf | FileCheck %s --check-prefixes=FP-AAPCS,LEAF-FP-AAPCS
+; RUN: llc -mtriple arm-arm-none-eabi -filetype asm -o - %s -frame-pointer=non-leaf | FileCheck %s --check-prefixes=FP,LEAF-NOFP
+; RUN: llc -mtriple arm-arm-none-eabi -filetype asm -o - %s -frame-pointer=non-leaf -mattr=+aapcs-frame-chain | FileCheck %s --check-prefixes=FP-AAPCS,LEAF-NOFP
+; RUN: llc -mtriple arm-arm-none-eabi -filetype asm -o - %s -frame-pointer=non-leaf -mattr=+aapcs-frame-chain-leaf | FileCheck %s --check-prefixes=FP-AAPCS,LEAF-NOFP-AAPCS
+; RUN: llc -mtriple arm-arm-none-eabi -filetype asm -o - %s -frame-pointer=none | FileCheck %s --check-prefixes=NOFP,LEAF-NOFP
+; RUN: llc -mtriple arm-arm-none-eabi -filetype asm -o - %s -frame-pointer=none -mattr=+aapcs-frame-chain | FileCheck %s --check-prefixes=NOFP-AAPCS,LEAF-NOFP
+; RUN: llc -mtriple arm-arm-none-eabi -filetype asm -o - %s -frame-pointer=none -mattr=+aapcs-frame-chain-leaf | FileCheck %s --check-prefixes=NOFP-AAPCS,LEAF-NOFP-AAPCS
+
+define dso_local noundef i32 @leaf(i32 noundef %0) {
+; LEAF-FP-LABEL: leaf:
+; LEAF-FP: @ %bb.0:
+; LEAF-FP-NEXT: .pad #4
+; LEAF-FP-NEXT: sub sp, sp, #4
+; LEAF-FP-NEXT: str r0, [sp]
+; LEAF-FP-NEXT: add r0, r0, #4
+; LEAF-FP-NEXT: add sp, sp, #4
+; LEAF-FP-NEXT: mov pc, lr
+;
+; LEAF-FP-AAPCS-LABEL: leaf:
+; LEAF-FP-AAPCS: @ %bb.0:
+; LEAF-FP-AAPCS-NEXT: .save {r11, lr}
+; LEAF-FP-AAPCS-NEXT: push {r11, lr}
+; LEAF-FP-AAPCS-NEXT: .setfp r11, sp
+; LEAF-FP-AAPCS-NEXT: mov r11, sp
+; LEAF-FP-AAPCS-NEXT: push {r0}
+; LEAF-FP-AAPCS-NEXT: add r0, r0, #4
+; LEAF-FP-AAPCS-NEXT: mov sp, r11
+; LEAF-FP-AAPCS-NEXT: pop {r11, lr}
+; LEAF-FP-AAPCS-NEXT: mov pc, lr
+;
+; LEAF-NOFP-LABEL: leaf:
+; LEAF-NOFP: @ %bb.0:
+; LEAF-NOFP-NEXT: .pad #4
+; LEAF-NOFP-NEXT: sub sp, sp, #4
+; LEAF-NOFP-NEXT: str r0, [sp]
+; LEAF-NOFP-NEXT: add r0, r0, #4
+; LEAF-NOFP-NEXT: add sp, sp, #4
+; LEAF-NOFP-NEXT: mov pc, lr
+;
+; LEAF-NOFP-AAPCS-LABEL: leaf:
+; LEAF-NOFP-AAPCS: @ %bb.0:
+; LEAF-NOFP-AAPCS-NEXT: .pad #4
+; LEAF-NOFP-AAPCS-NEXT: sub sp, sp, #4
+; LEAF-NOFP-AAPCS-NEXT: str r0, [sp]
+; LEAF-NOFP-AAPCS-NEXT: add r0, r0, #4
+; LEAF-NOFP-AAPCS-NEXT: add sp, sp, #4
+; LEAF-NOFP-AAPCS-NEXT: mov pc, lr
+ %2 = alloca i32, align 4
+ store i32 %0, i32* %2, align 4
+ %3 = load i32, i32* %2, align 4
+ %4 = add nsw i32 %3, 4
+ ret i32 %4
+}
+
+define dso_local noundef i32 @non_leaf(i32 noundef %0) {
+; FP-LABEL: non_leaf:
+; FP: @ %bb.0:
+; FP-NEXT: .save {r11, lr}
+; FP-NEXT: push {r11, lr}
+; FP-NEXT: .setfp r11, sp
+; FP-NEXT: mov r11, sp
+; FP-NEXT: .pad #8
+; FP-NEXT: sub sp, sp, #8
+; FP-NEXT: str r0, [sp, #4]
+; FP-NEXT: bl leaf
+; FP-NEXT: add r0, r0, #1
+; FP-NEXT: mov sp, r11
+; FP-NEXT: pop {r11, lr}
+; FP-NEXT: mov pc, lr
+;
+; FP-AAPCS-LABEL: non_leaf:
+; FP-AAPCS: @ %bb.0:
+; FP-AAPCS-NEXT: .save {r11, lr}
+; FP-AAPCS-NEXT: push {r11, lr}
+; FP-AAPCS-NEXT: .setfp r11, sp
+; FP-AAPCS-NEXT: mov r11, sp
+; FP-AAPCS-NEXT: .pad #8
+; FP-AAPCS-NEXT: sub sp, sp, #8
+; FP-AAPCS-NEXT: str r0, [sp, #4]
+; FP-AAPCS-NEXT: bl leaf
+; FP-AAPCS-NEXT: add r0, r0, #1
+; FP-AAPCS-NEXT: mov sp, r11
+; FP-AAPCS-NEXT: pop {r11, lr}
+; FP-AAPCS-NEXT: mov pc, lr
+;
+; NOFP-LABEL: non_leaf:
+; NOFP: @ %bb.0:
+; NOFP-NEXT: .save {r11, lr}
+; NOFP-NEXT: push {r11, lr}
+; NOFP-NEXT: .pad #8
+; NOFP-NEXT: sub sp, sp, #8
+; NOFP-NEXT: str r0, [sp, #4]
+; NOFP-NEXT: bl leaf
+; NOFP-NEXT: add r0, r0, #1
+; NOFP-NEXT: add sp, sp, #8
+; NOFP-NEXT: pop {r11, lr}
+; NOFP-NEXT: mov pc, lr
+;
+; NOFP-AAPCS-LABEL: non_leaf:
+; NOFP-AAPCS: @ %bb.0:
+; NOFP-AAPCS-NEXT: .save {r11, lr}
+; NOFP-AAPCS-NEXT: push {r11, lr}
+; NOFP-AAPCS-NEXT: .pad #8
+; NOFP-AAPCS-NEXT: sub sp, sp, #8
+; NOFP-AAPCS-NEXT: str r0, [sp, #4]
+; NOFP-AAPCS-NEXT: bl leaf
+; NOFP-AAPCS-NEXT: add r0, r0, #1
+; NOFP-AAPCS-NEXT: add sp, sp, #8
+; NOFP-AAPCS-NEXT: pop {r11, lr}
+; NOFP-AAPCS-NEXT: mov pc, lr
+ %2 = alloca i32, align 4
+ store i32 %0, i32* %2, align 4
+ %3 = load i32, i32* %2, align 4
+ %4 = call noundef i32 @leaf(i32 noundef %3)
+ %5 = add nsw i32 %4, 1
+ ret i32 %5
+}
+
+declare i8* @llvm.stacksave()
+define dso_local void @required_fp(i32 %0, i32 %1) {
+; LEAF-FP-LABEL: required_fp:
+; LEAF-FP: @ %bb.0:
+; LEAF-FP-NEXT: .save {r4, r5, r11, lr}
+; LEAF-FP-NEXT: push {r4, r5, r11, lr}
+; LEAF-FP-NEXT: .setfp r11, sp, #8
+; LEAF-FP-NEXT: add r11, sp, #8
+; LEAF-FP-NEXT: .pad #24
+; LEAF-FP-NEXT: sub sp, sp, #24
+; LEAF-FP-NEXT: str r1, [r11, #-16]
+; LEAF-FP-NEXT: mov r1, #7
+; LEAF-FP-NEXT: add r1, r1, r0, lsl #2
+; LEAF-FP-NEXT: str r0, [r11, #-12]
+; LEAF-FP-NEXT: bic r1, r1, #7
+; LEAF-FP-NEXT: str sp, [r11, #-24]
+; LEAF-FP-NEXT: sub sp, sp, r1
+; LEAF-FP-NEXT: mov r1, #0
+; LEAF-FP-NEXT: str r0, [r11, #-32]
+; LEAF-FP-NEXT: str r1, [r11, #-28]
+; LEAF-FP-NEXT: sub sp, r11, #8
+; LEAF-FP-NEXT: pop {r4, r5, r11, lr}
+; LEAF-FP-NEXT: mov pc, lr
+;
+; LEAF-FP-AAPCS-LABEL: required_fp:
+; LEAF-FP-AAPCS: @ %bb.0:
+; LEAF-FP-AAPCS-NEXT: .save {r4, r5, r11, lr}
+; LEAF-FP-AAPCS-NEXT: push {r4, r5, r11, lr}
+; LEAF-FP-AAPCS-NEXT: .setfp r11, sp, #8
+; LEAF-FP-AAPCS-NEXT: add r11, sp, #8
+; LEAF-FP-AAPCS-NEXT: .pad #24
+; LEAF-FP-AAPCS-NEXT: sub sp, sp, #24
+; LEAF-FP-AAPCS-NEXT: str r1, [r11, #-16]
+; LEAF-FP-AAPCS-NEXT: mov r1, #7
+; LEAF-FP-AAPCS-NEXT: add r1, r1, r0, lsl #2
+; LEAF-FP-AAPCS-NEXT: str r0, [r11, #-12]
+; LEAF-FP-AAPCS-NEXT: bic r1, r1, #7
+; LEAF-FP-AAPCS-NEXT: str sp, [r11, #-24]
+; LEAF-FP-AAPCS-NEXT: sub sp, sp, r1
+; LEAF-FP-AAPCS-NEXT: mov r1, #0
+; LEAF-FP-AAPCS-NEXT: str r0, [r11, #-32]
+; LEAF-FP-AAPCS-NEXT: str r1, [r11, #-28]
+; LEAF-FP-AAPCS-NEXT: sub sp, r11, #8
+; LEAF-FP-AAPCS-NEXT: pop {r4, r5, r11, lr}
+; LEAF-FP-AAPCS-NEXT: mov pc, lr
+;
+; LEAF-NOFP-LABEL: required_fp:
+; LEAF-NOFP: @ %bb.0:
+; LEAF-NOFP-NEXT: .save {r4, r5, r11}
+; LEAF-NOFP-NEXT: push {r4, r5, r11}
+; LEAF-NOFP-NEXT: .setfp r11, sp, #8
+; LEAF-NOFP-NEXT: add r11, sp, #8
+; LEAF-NOFP-NEXT: .pad #20
+; LEAF-NOFP-NEXT: sub sp, sp, #20
+; LEAF-NOFP-NEXT: str r1, [r11, #-16]
+; LEAF-NOFP-NEXT: mov r1, #7
+; LEAF-NOFP-NEXT: add r1, r1, r0, lsl #2
+; LEAF-NOFP-NEXT: str r0, [r11, #-12]
+; LEAF-NOFP-NEXT: bic r1, r1, #7
+; LEAF-NOFP-NEXT: str sp, [r11, #-20]
+; LEAF-NOFP-NEXT: sub sp, sp, r1
+; LEAF-NOFP-NEXT: mov r1, #0
+; LEAF-NOFP-NEXT: str r0, [r11, #-28]
+; LEAF-NOFP-NEXT: str r1, [r11, #-24]
+; LEAF-NOFP-NEXT: sub sp, r11, #8
+; LEAF-NOFP-NEXT: pop {r4, r5, r11}
+; LEAF-NOFP-NEXT: mov pc, lr
+;
+; LEAF-NOFP-AAPCS-LABEL: required_fp:
+; LEAF-NOFP-AAPCS: @ %bb.0:
+; LEAF-NOFP-AAPCS-NEXT: .save {r4, r5, r11, lr}
+; LEAF-NOFP-AAPCS-NEXT: push {r4, r5, r11, lr}
+; LEAF-NOFP-AAPCS-NEXT: .setfp r11, sp, #8
+; LEAF-NOFP-AAPCS-NEXT: add r11, sp, #8
+; LEAF-NOFP-AAPCS-NEXT: .pad #24
+; LEAF-NOFP-AAPCS-NEXT: sub sp, sp, #24
+; LEAF-NOFP-AAPCS-NEXT: str r1, [r11, #-16]
+; LEAF-NOFP-AAPCS-NEXT: mov r1, #7
+; LEAF-NOFP-AAPCS-NEXT: add r1, r1, r0, lsl #2
+; LEAF-NOFP-AAPCS-NEXT: str r0, [r11, #-12]
+; LEAF-NOFP-AAPCS-NEXT: bic r1, r1, #7
+; LEAF-NOFP-AAPCS-NEXT: str sp, [r11, #-24]
+; LEAF-NOFP-AAPCS-NEXT: sub sp, sp, r1
+; LEAF-NOFP-AAPCS-NEXT: mov r1, #0
+; LEAF-NOFP-AAPCS-NEXT: str r0, [r11, #-32]
+; LEAF-NOFP-AAPCS-NEXT: str r1, [r11, #-28]
+; LEAF-NOFP-AAPCS-NEXT: sub sp, r11, #8
+; LEAF-NOFP-AAPCS-NEXT: pop {r4, r5, r11, lr}
+; LEAF-NOFP-AAPCS-NEXT: mov pc, lr
+ %3 = alloca i32, align 4
+ %4 = alloca i32, align 4
+ %5 = alloca i8*, align 8
+ %6 = alloca i64, align 8
+ store i32 %0, i32* %3, align 4
+ store i32 %1, i32* %4, align 4
+ %7 = load i32, i32* %3, align 4
+ %8 = zext i32 %7 to i64
+ %9 = call i8* @llvm.stacksave()
+ store i8* %9, i8** %5, align 8
+ %10 = alloca i32, i64 %8, align 4
+ store i64 %8, i64* %6, align 8
+ ret void
+}
-; RUN: llc -mtriple=thumbv6m-eabi -frame-pointer=none %s -o - | FileCheck %s
+; RUN: llc -mtriple=thumbv6m-eabi -frame-pointer=none %s -o - --verify-machineinstrs | FileCheck %s --check-prefixes=CHECK,CHECK-NOFP,CHECK-ATPCS
+; RUN: llc -mtriple=thumbv6m-eabi -frame-pointer=all %s -o - --verify-machineinstrs | FileCheck %s --check-prefixes=CHECK,CHECK-FP-ATPCS,CHECK-ATPCS
+; RUN: llc -mtriple=thumbv6m-eabi -frame-pointer=none -mattr=+aapcs-frame-chain-leaf %s -o - --verify-machineinstrs | FileCheck %s --check-prefixes=CHECK,CHECK-NOFP,CHECK-AAPCS
+; RUN: llc -mtriple=thumbv6m-eabi -frame-pointer=all -mattr=+aapcs-frame-chain-leaf %s -o - --verify-machineinstrs | FileCheck %s --check-prefixes=CHECK,CHECK-FP-AAPCS,CHECK-AAPCS
; struct S { int x[128]; } s;
; int f(int *, int, int, int, struct S);
@s = common dso_local global %struct.S zeroinitializer, align 4
declare void @llvm.va_start(i8*)
+declare dso_local i32 @i(i32) local_unnamed_addr
declare dso_local i32 @g(i32*, i32, i32, i32, i32, i32) local_unnamed_addr
declare dso_local i32 @f(i32*, i32, i32, i32, %struct.S* byval(%struct.S) align 4) local_unnamed_addr
declare dso_local i32 @h(i32*, i32*, i32*) local_unnamed_addr
; Test access to arguments, passed on stack (including varargs)
;
-; Usual case, access via SP
+; Usual case, access via SP if FP is not available
; int test_args_sp(int a, int b, int c, int d, int e) {
; int v[4];
; return g(v, a, b, c, d, e);
}
; CHECK-LABEL: test_args_sp
; Load `e`
-; CHECK: ldr r0, [sp, #32]
+; CHECK-NOFP: ldr r0, [sp, #32]
+; CHECK-FP-ATPCS: ldr r0, [r7, #8]
+; CHECK-FP-AAPCS: mov r0, r11
+; CHECK-FP-AAPCS: ldr r0, [r0, #8]
; CHECK-NEXT: str r3, [sp]
; Pass `e` on stack
; CHECK-NEXT: str r0, [sp, #4]
; Three incoming varargs in registers
; CHECK: sub sp, #12
; CHECK: sub sp, #28
-; Incoming arguments area is accessed via SP
-; CHECK: add r0, sp, #36
-; CHECK: stm r0!, {r1, r2, r3}
+; Incoming arguments area is accessed via SP if FP is not available
+; CHECK-NOFP: add r0, sp, #36
+; CHECK-NOFP: stm r0!, {r1, r2, r3}
+; CHECK-FP-ATPCS: mov r0, r7
+; CHECK-FP-ATPCS: adds r0, #8
+; CHECK-FP-ATPCS: stm r0!, {r1, r2, r3}
+; CHECK-FP-AAPCS: mov r0, r11
+; CHECK-FP-AAPCS: str r1, [r0, #8]
+; CHECK-FP-AAPCS: mov r0, r11
+; CHECK-FP-AAPCS: str r2, [r0, #12]
+; CHECK-FP-AAPCS: mov r0, r11
+; CHECK-FP-AAPCS: str r3, [r0, #16]
; Re-aligned stack, access via FP
; int test_args_realign(int a, int b, int c, int d, int e) {
}
; CHECK-LABEL: test_args_realign
; Setup frame pointer
-; CHECK: add r7, sp, #8
+; CHECK-ATPCS: add r7, sp, #8
+; CHECK-AAPCS: mov r11, sp
; Align stack
; CHECK: mov r4, sp
; CHECK-NEXT: lsrs r4, r4, #4
; CHECK-NEXT: lsls r4, r4, #4
; CHECK-NEXT: mov sp, r4
; Load `e` via FP
-; CHECK: ldr r0, [r7, #8]
+; CHECK-ATPCS: ldr r0, [r7, #8]
+; CHECK-AAPCS: mov r0, r11
+; CHECK-AAPCS: ldr r0, [r0, #8]
; CHECK-NEXT: str r3, [sp]
; Pass `e` as argument
; CHECK-NEXT: str r0, [sp, #4]
; Three incoming register varargs
; CHECK: sub sp, #12
; Setup frame pointer
-; CHECK: add r7, sp, #8
+; CHECK-ATPCS: add r7, sp, #8
+; CHECK-AAPCS: mov r11, sp
; Align stack
; CHECK: mov r4, sp
; CHECK-NEXT: lsrs r4, r4, #4
; CHECK-NEXT: lsls r4, r4, #4
; CHECK-NEXT: mov sp, r4
; Incoming register varargs stored via FP
-; CHECK: mov r0, r7
-; CHECK-NEXT: adds r0, #8
-; CHECK-NEXT: stm r0!, {r1, r2, r3}
+; CHECK-ATPCS: mov r0, r7
+; CHECK-ATPCS-NEXT: adds r0, #8
+; CHECK-ATPCS-NEXT: stm r0!, {r1, r2, r3}
+; CHECK-AAPCS: mov r0, r11
+; CHECK-AAPCS: str r1, [r0, #8]
+; CHECK-AAPCS: mov r0, r11
+; CHECK-AAPCS: str r2, [r0, #12]
+; CHECK-AAPCS: mov r0, r11
+; CHECK-AAPCS: str r3, [r0, #16]
; VLAs present, access via FP
; int test_args_vla(int a, int b, int c, int d, int e) {
; int v[a];
}
; CHECK-LABEL: test_args_vla
; Setup frame pointer
-; CHECK: add r7, sp, #12
+; CHECK-ATPCS: add r7, sp, #12
+; CHECK-AAPCS: mov r11, sp
; Allocate outgoing stack arguments space
-; CHECK: sub sp, #4
+; CHECK: sub sp, #8
; Load `e` via FP
-; CHECK: ldr r5, [r7, #8]
+; CHECK-ATPCS: ldr r5, [r7, #8]
+; CHECK-AAPCS: mov r5, r11
+; CHECK-AAPCS: ldr r5, [r5, #8]
; Pass `d` and `e` as arguments
; CHECK-NEXT: str r3, [sp]
; CHECK-NEXT: str r5, [sp, #4]
; Three incoming register varargs
; CHECK: sub sp, #12
; Setup frame pointer
-; CHECK: add r7, sp, #8
+; CHECK-ATPCS: add r7, sp, #8
+; CHECK-AAPCS: mov r11, sp
; Register varargs stored via FP
-; CHECK-DAG: str r3, [r7, #16]
-; CHECK-DAG: str r2, [r7, #12]
-; CHECK-DAG: str r1, [r7, #8]
+; CHECK-ATPCS-DAG: str r3, [r7, #16]
+; CHECK-ATPCS-DAG: str r2, [r7, #12]
+; CHECK-ATPCS-DAG: str r1, [r7, #8]
+; CHECK-AAPCS-DAG: mov r5, r11
+; CHECK-AAPCS-DAG: str r1, [r5, #8]
+; CHECK-AAPCS-DAG: mov r1, r11
+; CHECK-AAPCS-DAG: str r3, [r1, #16]
+; CHECK-AAPCS-DAG: mov r1, r11
+; CHECK-AAPCS-DAG: str r2, [r1, #12]
; Moving SP, access via SP
; int test_args_moving_sp(int a, int b, int c, int d, int e) {
ret i32 %add7
}
; CHECK-LABEL: test_args_moving_sp
-; 20 bytes callee-saved area
-; CHECK: push {r4, r5, r6, r7, lr}
-; 20 bytes locals
-; CHECK: sub sp, #20
+; 20 bytes callee-saved area without FP
+; CHECK-NOFP: push {r4, r5, r6, r7, lr}
+; 20 bytes callee-saved area for ATPCS
+; CHECK-FP-ATPCS: push {r4, r5, r6, r7, lr}
+; 24 bytes callee-saved area for AAPCS as codegen prefers an even number of GPRs spilled
+; CHECK-FP-AAPCS: push {lr}
+; CHECK-FP-AAPCS: mov lr, r11
+; CHECK-FP-AAPCS: push {lr}
+; CHECK-FP-AAPCS: push {r4, r5, r6, r7}
+; 20 bytes locals without FP
+; CHECK-NOFP: sub sp, #20
+; 28 bytes locals with FP for ATPCS
+; CHECK-FP-ATPCS: sub sp, #28
+; 24 bytes locals with FP for AAPCS
+; CHECK-FP-AAPCS: sub sp, #24
; Setup base pointer
; CHECK: mov r6, sp
; Allocate outgoing arguments space
; CHECK: sub sp, #508
; CHECK: sub sp, #4
-; Load `e` via BP, 40 = 20 + 20
-; CHECK: ldr r3, [r6, #40]
+; Load `e` via BP if FP is not present (40 = 20 + 20)
+; CHECK-NOFP: ldr r3, [r6, #40]
+; Load `e` via FP otherwise
+; CHECK-FP-ATPCS: ldr r3, [r7, #8]
+; CHECK-FP-AAPCS: mov r0, r11
+; CHECK-FP-AAPCS: ldr r3, [r0, #8]
; CHECK: bl f
; Stack restored before next call
; CHECK-NEXT: add sp, #508
; CHECK-LABEL: test_varargs_moving_sp
; Three incoming register varargs
; CHECK: sub sp, #12
-; 16 bytes callee-saves
-; CHECK: push {r4, r5, r6, lr}
-; 20 bytes locals
-; CHECK: sub sp, #20
-; Incoming varargs stored via BP, 36 = 20 + 16
-; CHECK: mov r0, r6
-; CHECK-NEXT: adds r0, #36
-; CHECK-NEXT: stm r0!, {r1, r2, r3}
+; 16 bytes callee-saves without FP
+; CHECK-NOFP: push {r4, r5, r6, lr}
+; 24 bytes callee-saves with FP
+; CHECK-FP-ATPCS: push {r4, r5, r6, r7, lr}
+; CHECK-FP-AAPCS: push {lr}
+; CHECK-FP-AAPCS: mov lr, r11
+; CHECK-FP-AAPCS: push {lr}
+; CHECK-FP-AAPCS: push {r4, r5, r6, r7}
+; Locals area
+; CHECK-NOFP: sub sp, #20
+; CHECK-FP-ATPCS: sub sp, #24
+; CHECK-FP-AAPCS: sub sp, #20
+; Incoming varargs stored via BP if FP is not present (36 = 20 + 16)
+; CHECK-NOFP: mov r0, r6
+; CHECK-NOFP-NEXT: adds r0, #36
+; CHECK-NOFP-NEXT: stm r0!, {r1, r2, r3}
+; Incoming varargs stored via FP otherwise
+; CHECK-FP-ATPCS: mov r0, r7
+; CHECK-FP-ATPCS-NEXT: adds r0, #8
+; CHECK-FP-ATPCS-NEXT: stm r0!, {r1, r2, r3}
+; CHECK-FP-AAPCS: mov r0, r11
+; CHECK-FP-AAPCS-NEXT: str r1, [r0, #8]
+; CHECK-FP-AAPCS-NEXT: mov r0, r11
+; CHECK-FP-AAPCS-NEXT: str r2, [r0, #12]
+; CHECK-FP-AAPCS-NEXT: mov r0, r11
+; CHECK-FP-AAPCS-NEXT: str r3, [r0, #16]
+
+; struct S { int x[128]; } s;
+; int test(S a, int b) {
+; return i(b);
+; }
+define dso_local i32 @test_args_large_offset(%struct.S* byval(%struct.S) align 4 %0, i32 %1) local_unnamed_addr {
+ %3 = alloca i32, align 4
+ store i32 %1, i32* %3, align 4
+ %4 = load i32, i32* %3, align 4
+ %5 = call i32 @i(i32 %4)
+ ret i32 %5
+}
+; CHECK-LABEL: test_args_large_offset
+; Without FP: Access to large offset is made using SP
+; CHECK-NOFP: ldr r0, [sp, #520]
+; With FP: Access to large offset is made through a const pool using FP
+; CHECK-FP: ldr r0, .LCPI0_0
+; CHECK-FP-ATPCS: ldr r0, [r0, r7]
+; CHECK-FP-AAPCS: add r0, r11
+; CHECK-FP-AAPCS: ldr r0, [r0]
+; CHECK: bl i
;
; Access to locals
}
; CHECK-LABEL: test_local_realign
; Setup frame pointer
-; CHECK: add r7, sp, #8
+; CHECK-ATPCS: add r7, sp, #8
+; CHECK-AAPCS: mov r11, sp
; Re-align stack
; CHECK: mov r4, sp
; CHECK-NEXT: lsrs r4, r4, #4
}
; CHECK-LABEL: test_local_vla
; Setup frame pointer
-; CHECK: add r7, sp, #12
+; CHECK-ATPCS: add r7, sp, #12
+; CHECK-AAPCS: mov r11, sp
+; Locas area
+; CHECK-ATPCS: sub sp, #12
+; CHECK-AAPCS: sub sp, #16
; Setup base pointer
; CHECK: mov r6, sp
-; CHECK: mov r5, r6
+; CHECK-ATPCS: mov r5, r6
+; CHECK-AAPCS: adds r5, r6, #4
; Arguments to `h` compute relative to BP
; CHECK: adds r0, r6, #7
-; CHECK-NEXT: adds r0, #1
-; CHECK-NEXT: adds r1, r6, #4
-; CHECK-NEXT: mov r2, r6
+; CHECK-ATPCS-NEXT: adds r0, #1
+; CHECK-ATPCS-NEXT: adds r1, r6, #4
+; CHECK-ATPCS-NEXT: mov r2, r6
+; CHECK-AAPCS-NEXT: adds r0, #5
+; CHECK-AAPCS-NEXT: adds r1, r6, #7
+; CHECK-AAPCS-NEXT: adds r1, #1
+; CHECK-AAPCS-NEXT: adds r2, r6, #4
; CHECK-NEXT: bl h
; Load `x`, `y`, `z` via BP (r5 should still have the value of r6 from the move
; above)
}
; CHECK-LABEL: test_local_moving_sp
; Locals area
-; CHECK: sub sp, #36
+; CHECK-NOFP: sub sp, #36
+; CHECK-FP-ATPCS: sub sp, #44
+; CHECK-FP-AAPCS: sub sp, #40
; Setup BP
; CHECK: mov r6, sp
; Outoging arguments
; CHECK-NEXT: sub sp, #508
; CHECK-NEXT: sub sp, #8
; Argument addresses computed relative to BP
-; CHECK: adds r4, r6, #7
-; CHECK-NEXT: adds r4, #13
-; CHECK: adds r1, r6, #7
-; CHECK-NEXT: adds r1, #9
-; CHECK: adds r5, r6, #7
-; CHECK-NEXT: adds r5, #5
+; CHECK-NOFP: adds r4, r6, #7
+; CHECK-NOFP-NEXT: adds r4, #13
+; CHECK-NOFP: adds r1, r6, #7
+; CHECK-NOFP-NEXT: adds r1, #9
+; CHECK-NOFP: adds r5, r6, #7
+; CHECK-NOFP-NEXT: adds r5, #5
+; CHECK-FP-ATPCS: adds r0, r6, #7
+; CHECK-FP-ATPCS-NEXT: adds r0, #21
+; CHECK-FP-ATPCS: adds r1, r6, #7
+; CHECK-FP-ATPCS-NEXT: adds r1, #17
+; CHECK-FP-ATPCS: adds r5, r6, #7
+; CHECK-FP-ATPCS-NEXT: adds r5, #13
+; CHECK-FP-AAPCS: adds r4, r6, #7
+; CHECK-FP-AAPCS-NEXT: adds r4, #17
+; CHECK-FP-AAPCS: adds r1, r6, #7
+; CHECK-FP-AAPCS-NEXT: adds r1, #13
+; CHECK-FP-AAPCS: adds r5, r6, #7
+; CHECK-FP-AAPCS-NEXT: adds r5, #9
; CHECK: bl u
; Stack restored before next call
; CHECK: add sp, #508
--- /dev/null
+; RUN: not llc -mtriple thumbv6m-arm-none-eabi -filetype asm -o - %s -frame-pointer=all 2>&1 | FileCheck %s --check-prefix=RESERVED-R7
+; RUN: not llc -mtriple thumbv6m-arm-none-eabi -filetype asm -o - %s -frame-pointer=all -mattr=+aapcs-frame-chain 2>&1 | FileCheck %s --check-prefix=RESERVED-R11
+; RUN: not llc -mtriple thumbv6m-arm-none-eabi -filetype asm -o - %s -frame-pointer=all -mattr=+aapcs-frame-chain-leaf 2>&1 | FileCheck %s --check-prefix=RESERVED-R11
+; RUN: llc -mtriple thumbv6m-arm-none-eabi -filetype asm -o - %s -frame-pointer=non-leaf 2>&1 | FileCheck %s --check-prefix=RESERVED-NONE
+; RUN: not llc -mtriple thumbv6m-arm-none-eabi -filetype asm -o - %s -frame-pointer=non-leaf -mattr=+aapcs-frame-chain 2>&1 | FileCheck %s --check-prefix=RESERVED-R11
+; RUN: not llc -mtriple thumbv6m-arm-none-eabi -filetype asm -o - %s -frame-pointer=non-leaf -mattr=+aapcs-frame-chain-leaf 2>&1 | FileCheck %s --check-prefix=RESERVED-R11
+; RUN: llc -mtriple thumbv6m-arm-none-eabi -filetype asm -o - %s -frame-pointer=none 2>&1 | FileCheck %s --check-prefix=RESERVED-NONE
+; RUN: not llc -mtriple thumbv6m-arm-none-eabi -filetype asm -o - %s -frame-pointer=none -mattr=+aapcs-frame-chain 2>&1 | FileCheck %s --check-prefix=RESERVED-R11
+; RUN: not llc -mtriple thumbv6m-arm-none-eabi -filetype asm -o - %s -frame-pointer=none -mattr=+aapcs-frame-chain-leaf 2>&1 | FileCheck %s --check-prefix=RESERVED-R11
+
+declare void @leaf(i32 %input)
+
+define void @reserved_r7(i32 %input) {
+; RESERVED-NONE-NOT: error: write to reserved register 'R7'
+; RESERVED-R7: error: write to reserved register 'R7'
+; RESERVED-R11-NOT: error: write to reserved register 'R7'
+ %1 = call i32 asm sideeffect "mov $0, $1", "={r7},r"(i32 %input)
+ ret void
+}
+
+define void @reserved_r11(i32 %input) {
+; RESERVED-NONE-NOT: error: write to reserved register 'R11'
+; RESERVED-R7-NOT: error: write to reserved register 'R11'
+; RESERVED-R11: error: write to reserved register 'R11'
+ %1 = call i32 asm sideeffect "mov $0, $1", "={r11},r"(i32 %input)
+ ret void
+}
--- /dev/null
+; RUN: llc -mtriple thumbv6m-arm-none-eabi -filetype asm -o - %s -frame-pointer=all --verify-machineinstrs | FileCheck %s --check-prefixes=FP,LEAF-FP
+; RUN: llc -mtriple thumbv6m-arm-none-eabi -filetype asm -o - %s -frame-pointer=all -mattr=+aapcs-frame-chain --verify-machineinstrs | FileCheck %s --check-prefixes=FP-AAPCS,LEAF-FP
+; RUN: llc -mtriple thumbv6m-arm-none-eabi -filetype asm -o - %s -frame-pointer=all -mattr=+aapcs-frame-chain-leaf --verify-machineinstrs | FileCheck %s --check-prefixes=FP-AAPCS,LEAF-FP-AAPCS
+; RUN: llc -mtriple thumbv6m-arm-none-eabi -filetype asm -o - %s -frame-pointer=non-leaf --verify-machineinstrs | FileCheck %s --check-prefixes=FP,LEAF-NOFP
+; RUN: llc -mtriple thumbv6m-arm-none-eabi -filetype asm -o - %s -frame-pointer=non-leaf -mattr=+aapcs-frame-chain --verify-machineinstrs | FileCheck %s --check-prefixes=FP-AAPCS,LEAF-NOFP
+; RUN: llc -mtriple thumbv6m-arm-none-eabi -filetype asm -o - %s -frame-pointer=non-leaf -mattr=+aapcs-frame-chain-leaf --verify-machineinstrs | FileCheck %s --check-prefixes=FP-AAPCS,LEAF-NOFP-AAPCS
+; RUN: llc -mtriple thumbv6m-arm-none-eabi -filetype asm -o - %s -frame-pointer=none --verify-machineinstrs | FileCheck %s --check-prefixes=NOFP,LEAF-NOFP
+; RUN: llc -mtriple thumbv6m-arm-none-eabi -filetype asm -o - %s -frame-pointer=none -mattr=+aapcs-frame-chain --verify-machineinstrs | FileCheck %s --check-prefixes=NOFP-AAPCS,LEAF-NOFP
+; RUN: llc -mtriple thumbv6m-arm-none-eabi -filetype asm -o - %s -frame-pointer=none -mattr=+aapcs-frame-chain-leaf --verify-machineinstrs | FileCheck %s --check-prefixes=NOFP-AAPCS,LEAF-NOFP-AAPCS
+
+define dso_local noundef i32 @leaf(i32 noundef %0) {
+; LEAF-FP-LABEL: leaf:
+; LEAF-FP: @ %bb.0:
+; LEAF-FP-NEXT: .pad #4
+; LEAF-FP-NEXT: sub sp, #4
+; LEAF-FP-NEXT: str r0, [sp]
+; LEAF-FP-NEXT: adds r0, r0, #4
+; LEAF-FP-NEXT: add sp, #4
+; LEAF-FP-NEXT: bx lr
+;
+; LEAF-FP-AAPCS-LABEL: leaf:
+; LEAF-FP-AAPCS: @ %bb.0:
+; LEAF-FP-AAPCS-NEXT: .save {lr}
+; LEAF-FP-AAPCS-NEXT: push {lr}
+; LEAF-FP-AAPCS-NEXT: mov lr, r11
+; LEAF-FP-AAPCS-NEXT: .save {r11}
+; LEAF-FP-AAPCS-NEXT: push {lr}
+; LEAF-FP-AAPCS-NEXT: .setfp r11, sp
+; LEAF-FP-AAPCS-NEXT: mov r11, sp
+; LEAF-FP-AAPCS-NEXT: .pad #4
+; LEAF-FP-AAPCS-NEXT: sub sp, #4
+; LEAF-FP-AAPCS-NEXT: str r0, [sp]
+; LEAF-FP-AAPCS-NEXT: adds r0, r0, #4
+; LEAF-FP-AAPCS-NEXT: add sp, #4
+; LEAF-FP-AAPCS-NEXT: pop {r1}
+; LEAF-FP-AAPCS-NEXT: mov r11, r1
+; LEAF-FP-AAPCS-NEXT: pop {pc}
+;
+; LEAF-NOFP-LABEL: leaf:
+; LEAF-NOFP: @ %bb.0:
+; LEAF-NOFP-NEXT: .pad #4
+; LEAF-NOFP-NEXT: sub sp, #4
+; LEAF-NOFP-NEXT: str r0, [sp]
+; LEAF-NOFP-NEXT: adds r0, r0, #4
+; LEAF-NOFP-NEXT: add sp, #4
+; LEAF-NOFP-NEXT: bx lr
+;
+; LEAF-NOFP-AAPCS-LABEL: leaf:
+; LEAF-NOFP-AAPCS: @ %bb.0:
+; LEAF-NOFP-AAPCS-NEXT: .pad #4
+; LEAF-NOFP-AAPCS-NEXT: sub sp, #4
+; LEAF-NOFP-AAPCS-NEXT: str r0, [sp]
+; LEAF-NOFP-AAPCS-NEXT: adds r0, r0, #4
+; LEAF-NOFP-AAPCS-NEXT: add sp, #4
+; LEAF-NOFP-AAPCS-NEXT: bx lr
+ %2 = alloca i32, align 4
+ store i32 %0, i32* %2, align 4
+ %3 = load i32, i32* %2, align 4
+ %4 = add nsw i32 %3, 4
+ ret i32 %4
+}
+
+define dso_local noundef i32 @non_leaf(i32 noundef %0) {
+; FP-LABEL: non_leaf:
+; FP: @ %bb.0:
+; FP-NEXT: .save {r7, lr}
+; FP-NEXT: push {r7, lr}
+; FP-NEXT: .setfp r7, sp
+; FP-NEXT: add r7, sp, #0
+; FP-NEXT: .pad #8
+; FP-NEXT: sub sp, #8
+; FP-NEXT: str r0, [sp, #4]
+; FP-NEXT: bl leaf
+; FP-NEXT: adds r0, r0, #1
+; FP-NEXT: add sp, #8
+; FP-NEXT: pop {r7, pc}
+;
+; FP-AAPCS-LABEL: non_leaf:
+; FP-AAPCS: @ %bb.0:
+; FP-AAPCS-NEXT: .save {lr}
+; FP-AAPCS-NEXT: push {lr}
+; FP-AAPCS-NEXT: mov lr, r11
+; FP-AAPCS-NEXT: .save {r11}
+; FP-AAPCS-NEXT: push {lr}
+; FP-AAPCS-NEXT: .setfp r11, sp
+; FP-AAPCS-NEXT: mov r11, sp
+; FP-AAPCS-NEXT: .pad #8
+; FP-AAPCS-NEXT: sub sp, #8
+; FP-AAPCS-NEXT: str r0, [sp, #4]
+; FP-AAPCS-NEXT: bl leaf
+; FP-AAPCS-NEXT: adds r0, r0, #1
+; FP-AAPCS-NEXT: add sp, #8
+; FP-AAPCS-NEXT: pop {r1}
+; FP-AAPCS-NEXT: mov r11, r1
+; FP-AAPCS-NEXT: pop {pc}
+;
+; NOFP-LABEL: non_leaf:
+; NOFP: @ %bb.0:
+; NOFP-NEXT: .save {r7, lr}
+; NOFP-NEXT: push {r7, lr}
+; NOFP-NEXT: .pad #8
+; NOFP-NEXT: sub sp, #8
+; NOFP-NEXT: str r0, [sp, #4]
+; NOFP-NEXT: bl leaf
+; NOFP-NEXT: adds r0, r0, #1
+; NOFP-NEXT: add sp, #8
+; NOFP-NEXT: pop {r7, pc}
+;
+; NOFP-AAPCS-LABEL: non_leaf:
+; NOFP-AAPCS: @ %bb.0:
+; NOFP-AAPCS-NEXT: .save {r7, lr}
+; NOFP-AAPCS-NEXT: push {r7, lr}
+; NOFP-AAPCS-NEXT: .pad #8
+; NOFP-AAPCS-NEXT: sub sp, #8
+; NOFP-AAPCS-NEXT: str r0, [sp, #4]
+; NOFP-AAPCS-NEXT: bl leaf
+; NOFP-AAPCS-NEXT: adds r0, r0, #1
+; NOFP-AAPCS-NEXT: add sp, #8
+; NOFP-AAPCS-NEXT: pop {r7, pc}
+ %2 = alloca i32, align 4
+ store i32 %0, i32* %2, align 4
+ %3 = load i32, i32* %2, align 4
+ %4 = call noundef i32 @leaf(i32 noundef %3)
+ %5 = add nsw i32 %4, 1
+ ret i32 %5
+}
+
+declare i8* @llvm.stacksave()
+define dso_local void @required_fp(i32 %0, i32 %1) {
+; FP-LABEL: required_fp:
+; FP: @ %bb.0:
+; FP-NEXT: .save {r4, r6, r7, lr}
+; FP-NEXT: push {r4, r6, r7, lr}
+; FP-NEXT: .setfp r7, sp, #8
+; FP-NEXT: add r7, sp, #8
+; FP-NEXT: .pad #24
+; FP-NEXT: sub sp, #24
+; FP-NEXT: mov r6, sp
+; FP-NEXT: mov r2, r6
+; FP-NEXT: str r1, [r2, #16]
+; FP-NEXT: str r0, [r2, #20]
+; FP-NEXT: mov r1, sp
+; FP-NEXT: str r1, [r2, #8]
+; FP-NEXT: lsls r1, r0, #2
+; FP-NEXT: adds r1, r1, #7
+; FP-NEXT: movs r3, #7
+; FP-NEXT: bics r1, r3
+; FP-NEXT: mov r3, sp
+; FP-NEXT: subs r1, r3, r1
+; FP-NEXT: mov sp, r1
+; FP-NEXT: movs r1, #0
+; FP-NEXT: str r1, [r6, #4]
+; FP-NEXT: str r0, [r2]
+; FP-NEXT: subs r4, r7, #7
+; FP-NEXT: subs r4, #1
+; FP-NEXT: mov sp, r4
+; FP-NEXT: pop {r4, r6, r7, pc}
+;
+; FP-AAPCS-LABEL: required_fp:
+; FP-AAPCS: @ %bb.0:
+; FP-AAPCS-NEXT: .save {lr}
+; FP-AAPCS-NEXT: push {lr}
+; FP-AAPCS-NEXT: mov lr, r11
+; FP-AAPCS-NEXT: .save {r11}
+; FP-AAPCS-NEXT: push {lr}
+; FP-AAPCS-NEXT: .setfp r11, sp
+; FP-AAPCS-NEXT: mov r11, sp
+; FP-AAPCS-NEXT: .save {r4, r6}
+; FP-AAPCS-NEXT: push {r4, r6}
+; FP-AAPCS-NEXT: .pad #24
+; FP-AAPCS-NEXT: sub sp, #24
+; FP-AAPCS-NEXT: mov r6, sp
+; FP-AAPCS-NEXT: mov r2, r6
+; FP-AAPCS-NEXT: str r1, [r2, #16]
+; FP-AAPCS-NEXT: str r0, [r2, #20]
+; FP-AAPCS-NEXT: mov r1, sp
+; FP-AAPCS-NEXT: str r1, [r2, #8]
+; FP-AAPCS-NEXT: lsls r1, r0, #2
+; FP-AAPCS-NEXT: adds r1, r1, #7
+; FP-AAPCS-NEXT: movs r3, #7
+; FP-AAPCS-NEXT: bics r1, r3
+; FP-AAPCS-NEXT: mov r3, sp
+; FP-AAPCS-NEXT: subs r1, r3, r1
+; FP-AAPCS-NEXT: mov sp, r1
+; FP-AAPCS-NEXT: movs r1, #0
+; FP-AAPCS-NEXT: str r1, [r6, #4]
+; FP-AAPCS-NEXT: str r0, [r2]
+; FP-AAPCS-NEXT: mov r4, r11
+; FP-AAPCS-NEXT: subs r4, #8
+; FP-AAPCS-NEXT: mov sp, r4
+; FP-AAPCS-NEXT: pop {r4, r6}
+; FP-AAPCS-NEXT: pop {r0}
+; FP-AAPCS-NEXT: mov r11, r0
+; FP-AAPCS-NEXT: pop {pc}
+;
+; NOFP-LABEL: required_fp:
+; NOFP: @ %bb.0:
+; NOFP-NEXT: .save {r4, r6, r7, lr}
+; NOFP-NEXT: push {r4, r6, r7, lr}
+; NOFP-NEXT: .setfp r7, sp, #8
+; NOFP-NEXT: add r7, sp, #8
+; NOFP-NEXT: .pad #24
+; NOFP-NEXT: sub sp, #24
+; NOFP-NEXT: mov r6, sp
+; NOFP-NEXT: mov r2, r6
+; NOFP-NEXT: str r1, [r2, #16]
+; NOFP-NEXT: str r0, [r2, #20]
+; NOFP-NEXT: mov r1, sp
+; NOFP-NEXT: str r1, [r2, #8]
+; NOFP-NEXT: lsls r1, r0, #2
+; NOFP-NEXT: adds r1, r1, #7
+; NOFP-NEXT: movs r3, #7
+; NOFP-NEXT: bics r1, r3
+; NOFP-NEXT: mov r3, sp
+; NOFP-NEXT: subs r1, r3, r1
+; NOFP-NEXT: mov sp, r1
+; NOFP-NEXT: movs r1, #0
+; NOFP-NEXT: str r1, [r6, #4]
+; NOFP-NEXT: str r0, [r2]
+; NOFP-NEXT: subs r4, r7, #7
+; NOFP-NEXT: subs r4, #1
+; NOFP-NEXT: mov sp, r4
+; NOFP-NEXT: pop {r4, r6, r7, pc}
+;
+; NOFP-AAPCS-LABEL: required_fp:
+; NOFP-AAPCS: @ %bb.0:
+; NOFP-AAPCS-NEXT: .save {lr}
+; NOFP-AAPCS-NEXT: push {lr}
+; NOFP-AAPCS-NEXT: mov lr, r11
+; NOFP-AAPCS-NEXT: .save {r11}
+; NOFP-AAPCS-NEXT: push {lr}
+; NOFP-AAPCS-NEXT: .setfp r11, sp
+; NOFP-AAPCS-NEXT: mov r11, sp
+; NOFP-AAPCS-NEXT: .save {r4, r6}
+; NOFP-AAPCS-NEXT: push {r4, r6}
+; NOFP-AAPCS-NEXT: .pad #24
+; NOFP-AAPCS-NEXT: sub sp, #24
+; NOFP-AAPCS-NEXT: mov r6, sp
+; NOFP-AAPCS-NEXT: mov r2, r6
+; NOFP-AAPCS-NEXT: str r1, [r2, #16]
+; NOFP-AAPCS-NEXT: str r0, [r2, #20]
+; NOFP-AAPCS-NEXT: mov r1, sp
+; NOFP-AAPCS-NEXT: str r1, [r2, #8]
+; NOFP-AAPCS-NEXT: lsls r1, r0, #2
+; NOFP-AAPCS-NEXT: adds r1, r1, #7
+; NOFP-AAPCS-NEXT: movs r3, #7
+; NOFP-AAPCS-NEXT: bics r1, r3
+; NOFP-AAPCS-NEXT: mov r3, sp
+; NOFP-AAPCS-NEXT: subs r1, r3, r1
+; NOFP-AAPCS-NEXT: mov sp, r1
+; NOFP-AAPCS-NEXT: movs r1, #0
+; NOFP-AAPCS-NEXT: str r1, [r6, #4]
+; NOFP-AAPCS-NEXT: str r0, [r2]
+; NOFP-AAPCS-NEXT: mov r4, r11
+; NOFP-AAPCS-NEXT: subs r4, #8
+; NOFP-AAPCS-NEXT: mov sp, r4
+; NOFP-AAPCS-NEXT: pop {r4, r6}
+; NOFP-AAPCS-NEXT: pop {r0}
+; NOFP-AAPCS-NEXT: mov r11, r0
+; NOFP-AAPCS-NEXT: pop {pc}
+ %3 = alloca i32, align 4
+ %4 = alloca i32, align 4
+ %5 = alloca i8*, align 8
+ %6 = alloca i64, align 8
+ store i32 %0, i32* %3, align 4
+ store i32 %1, i32* %4, align 4
+ %7 = load i32, i32* %3, align 4
+ %8 = zext i32 %7 to i64
+ %9 = call i8* @llvm.stacksave()
+ store i8* %9, i8** %5, align 8
+ %10 = alloca i32, i64 %8, align 4
+ store i64 %8, i64* %6, align 8
+ ret void
+}