From 6119053dab67129eb1700dbf36db3524dd3e421f Mon Sep 17 00:00:00 2001 From: Lucas Prates Date: Fri, 6 May 2022 10:31:11 +0100 Subject: [PATCH] [ARM][Thumb] Command-line option to ensure AAPCS compliant Frame Records Currently the a AAPCS compliant frame record is not always created for functions when it should. Although a consistent frame record might not be required in some cases, there are still scenarios where applications may want to make use of the call hierarchy made available trough it. In order to enable the use of AAPCS compliant frame records whilst keep backwards compatibility, this patch introduces a new command-line option (`-mframe-chain=[none|aapcs|aapcs+leaf]`) for Aarch32 and Thumb backends. The option allows users to explicitly select when to use it, and is also useful to ensure the extra overhead introduced by the frame records is only introduced when necessary, in particular for Thumb targets. Reviewed By: efriedma Differential Revision: https://reviews.llvm.org/D125094 --- clang/include/clang/Driver/Options.td | 4 +- clang/lib/Driver/ToolChains/Arch/ARM.cpp | 9 + llvm/lib/Target/ARM/ARM.td | 10 + llvm/lib/Target/ARM/ARMBaseRegisterInfo.cpp | 24 +- llvm/lib/Target/ARM/ARMCallingConv.td | 17 +- llvm/lib/Target/ARM/ARMFrameLowering.cpp | 59 +- llvm/lib/Target/ARM/ARMFrameLowering.h | 1 + llvm/lib/Target/ARM/ARMMachineFunctionInfo.h | 3 + llvm/lib/Target/ARM/ARMSubtarget.h | 3 +- llvm/lib/Target/ARM/Thumb1FrameLowering.cpp | 675 ++++++++++++--------- llvm/lib/Target/ARM/ThumbRegisterInfo.cpp | 54 +- llvm/test/CodeGen/ARM/frame-chain-reserved-fp.ll | 25 + llvm/test/CodeGen/ARM/frame-chain.ll | 223 +++++++ llvm/test/CodeGen/Thumb/frame-access.ll | 206 +++++-- llvm/test/CodeGen/Thumb/frame-chain-reserved-fp.ll | 27 + llvm/test/CodeGen/Thumb/frame-chain.ll | 288 +++++++++ 16 files changed, 1273 insertions(+), 355 deletions(-) create mode 100644 llvm/test/CodeGen/ARM/frame-chain-reserved-fp.ll create mode 100644 llvm/test/CodeGen/ARM/frame-chain.ll create mode 100644 llvm/test/CodeGen/Thumb/frame-chain-reserved-fp.ll create mode 100644 llvm/test/CodeGen/Thumb/frame-chain.ll diff --git a/clang/include/clang/Driver/Options.td b/clang/include/clang/Driver/Options.td index 002cd6c..a311781 100644 --- a/clang/include/clang/Driver/Options.td +++ b/clang/include/clang/Driver/Options.td @@ -3467,7 +3467,9 @@ defm aapcs_bitfield_width : BoolOption<"f", "aapcs-bitfield-width", BothFlags<[NoXarchOption, CC1Option], " the AAPCS standard requirement stating that" " volatile bit-field width is dictated by the field container type. (ARM only).">>, Group; - +def mframe_chain : Joined<["-"], "mframe-chain=">, + Group, Values<"none,aapcs,aapcs+leaf">, + HelpText<"Select the frame chain model used to emit frame records (Arm only).">; def mgeneral_regs_only : Flag<["-"], "mgeneral-regs-only">, Group, HelpText<"Generate code which only uses the general purpose registers (AArch64/x86 only)">; def mfix_cmse_cve_2021_35465 : Flag<["-"], "mfix-cmse-cve-2021-35465">, diff --git a/clang/lib/Driver/ToolChains/Arch/ARM.cpp b/clang/lib/Driver/ToolChains/Arch/ARM.cpp index dc6b35e..b79d1f0 100644 --- a/clang/lib/Driver/ToolChains/Arch/ARM.cpp +++ b/clang/lib/Driver/ToolChains/Arch/ARM.cpp @@ -718,6 +718,15 @@ fp16_fml_fallthrough: } } + // Propagate frame-chain model selection + if (Arg *A = Args.getLastArg(options::OPT_mframe_chain)) { + StringRef FrameChainOption = A->getValue(); + if (FrameChainOption.startswith("aapcs")) + Features.push_back("+aapcs-frame-chain"); + if (FrameChainOption == "aapcs+leaf") + Features.push_back("+aapcs-frame-chain-leaf"); + } + // CMSE: Check for target 8M (for -mcmse to be applicable) is performed later. if (Args.getLastArg(options::OPT_mcmse)) Features.push_back("+8msecext"); diff --git a/llvm/lib/Target/ARM/ARM.td b/llvm/lib/Target/ARM/ARM.td index e8970b9..48559a8 100644 --- a/llvm/lib/Target/ARM/ARM.td +++ b/llvm/lib/Target/ARM/ARM.td @@ -546,6 +546,16 @@ def FeatureFixCortexA57AES1742098 : SubtargetFeature<"fix-cortex-a57-aes-1742098 "FixCortexA57AES1742098", "true", "Work around Cortex-A57 Erratum 1742098 / Cortex-A72 Erratum 1655431 (AES)">; +def FeatureAAPCSFrameChain : SubtargetFeature<"aapcs-frame-chain", + "CreateAAPCSFrameChain", "true", + "Create an AAPCS compliant frame chain">; + +def FeatureAAPCSFrameChainLeaf : SubtargetFeature<"aapcs-frame-chain-leaf", + "CreateAAPCSFrameChainLeaf", "true", + "Create an AAPCS compliant frame chain " + "for leaf functions", + [FeatureAAPCSFrameChain]>; + //===----------------------------------------------------------------------===// // ARM architecture class // diff --git a/llvm/lib/Target/ARM/ARMBaseRegisterInfo.cpp b/llvm/lib/Target/ARM/ARMBaseRegisterInfo.cpp index cae72e4..1d0e743 100644 --- a/llvm/lib/Target/ARM/ARMBaseRegisterInfo.cpp +++ b/llvm/lib/Target/ARM/ARMBaseRegisterInfo.cpp @@ -63,12 +63,8 @@ const MCPhysReg* ARMBaseRegisterInfo::getCalleeSavedRegs(const MachineFunction *MF) const { const ARMSubtarget &STI = MF->getSubtarget(); bool UseSplitPush = STI.splitFramePushPop(*MF); - const MCPhysReg *RegList = - STI.isTargetDarwin() - ? CSR_iOS_SaveList - : (UseSplitPush ? CSR_AAPCS_SplitPush_SaveList : CSR_AAPCS_SaveList); - const Function &F = MF->getFunction(); + if (F.getCallingConv() == CallingConv::GHC) { // GHC set of callee saved regs is empty as all those regs are // used for passing STG regs around @@ -80,13 +76,13 @@ ARMBaseRegisterInfo::getCalleeSavedRegs(const MachineFunction *MF) const { } else if (F.getCallingConv() == CallingConv::SwiftTail) { return STI.isTargetDarwin() ? CSR_iOS_SwiftTail_SaveList - : (UseSplitPush ? CSR_AAPCS_SplitPush_SwiftTail_SaveList + : (UseSplitPush ? CSR_ATPCS_SplitPush_SwiftTail_SaveList : CSR_AAPCS_SwiftTail_SaveList); } else if (F.hasFnAttribute("interrupt")) { if (STI.isMClass()) { // M-class CPUs have hardware which saves the registers needed to allow a // function conforming to the AAPCS to function as a handler. - return UseSplitPush ? CSR_AAPCS_SplitPush_SaveList : CSR_AAPCS_SaveList; + return UseSplitPush ? CSR_ATPCS_SplitPush_SaveList : CSR_AAPCS_SaveList; } else if (F.getFnAttribute("interrupt").getValueAsString() == "FIQ") { // Fast interrupt mode gives the handler a private copy of R8-R14, so less // need to be saved to restore user-mode state. @@ -103,7 +99,7 @@ ARMBaseRegisterInfo::getCalleeSavedRegs(const MachineFunction *MF) const { if (STI.isTargetDarwin()) return CSR_iOS_SwiftError_SaveList; - return UseSplitPush ? CSR_AAPCS_SplitPush_SwiftError_SaveList : + return UseSplitPush ? CSR_ATPCS_SplitPush_SwiftError_SaveList : CSR_AAPCS_SwiftError_SaveList; } @@ -111,7 +107,15 @@ ARMBaseRegisterInfo::getCalleeSavedRegs(const MachineFunction *MF) const { return MF->getInfo()->isSplitCSR() ? CSR_iOS_CXX_TLS_PE_SaveList : CSR_iOS_CXX_TLS_SaveList; - return RegList; + + if (STI.isTargetDarwin()) + return CSR_iOS_SaveList; + + if (UseSplitPush) + return STI.createAAPCSFrameChain() ? CSR_AAPCS_SplitPush_SaveList + : CSR_ATPCS_SplitPush_SaveList; + + return CSR_AAPCS_SaveList; } const MCPhysReg *ARMBaseRegisterInfo::getCalleeSavedRegsViaCopy( @@ -240,7 +244,7 @@ bool ARMBaseRegisterInfo::isInlineAsmReadOnlyReg(const MachineFunction &MF, BitVector Reserved(getNumRegs()); markSuperRegs(Reserved, ARM::PC); - if (TFI->hasFP(MF)) + if (TFI->isFPReserved(MF)) markSuperRegs(Reserved, STI.getFramePointerReg()); if (hasBasePointer(MF)) markSuperRegs(Reserved, BasePtr); diff --git a/llvm/lib/Target/ARM/ARMCallingConv.td b/llvm/lib/Target/ARM/ARMCallingConv.td index 45b9e48..d14424c 100644 --- a/llvm/lib/Target/ARM/ARMCallingConv.td +++ b/llvm/lib/Target/ARM/ARMCallingConv.td @@ -284,8 +284,8 @@ def CSR_AAPCS_SwiftTail : CalleeSavedRegs<(sub CSR_AAPCS, R10)>; // The order of callee-saved registers needs to match the order we actually push // them in FrameLowering, because this order is what's used by // PrologEpilogInserter to allocate frame index slots. So when R7 is the frame -// pointer, we use this AAPCS alternative. -def CSR_AAPCS_SplitPush : CalleeSavedRegs<(add LR, R7, R6, R5, R4, +// pointer, we use this ATPCS alternative. +def CSR_ATPCS_SplitPush : CalleeSavedRegs<(add LR, R7, R6, R5, R4, R11, R10, R9, R8, (sequence "D%u", 15, 8))>; @@ -294,13 +294,22 @@ def CSR_Win_SplitFP : CalleeSavedRegs<(add R10, R9, R8, R7, R6, R5, R4, LR, R11)>; // R8 is used to pass swifterror, remove it from CSR. -def CSR_AAPCS_SplitPush_SwiftError : CalleeSavedRegs<(sub CSR_AAPCS_SplitPush, +def CSR_ATPCS_SplitPush_SwiftError : CalleeSavedRegs<(sub CSR_ATPCS_SplitPush, R8)>; // R10 is used to pass swifterror, remove it from CSR. -def CSR_AAPCS_SplitPush_SwiftTail : CalleeSavedRegs<(sub CSR_AAPCS_SplitPush, +def CSR_ATPCS_SplitPush_SwiftTail : CalleeSavedRegs<(sub CSR_ATPCS_SplitPush, R10)>; +// When enforcing an AAPCS compliant frame chain, R11 is used as the frame +// pointer even for Thumb targets, where split pushes are necessary. +// This AAPCS alternative makes sure the frame index slots match the push +// order in that case. +def CSR_AAPCS_SplitPush : CalleeSavedRegs<(add LR, R11, + R7, R6, R5, R4, + R10, R9, R8, + (sequence "D%u", 15, 8))>; + // Constructors and destructors return 'this' in the ARM C++ ABI; since 'this' // and the pointer return value are both passed in R0 in these cases, this can // be partially modelled by treating R0 as a callee-saved register diff --git a/llvm/lib/Target/ARM/ARMFrameLowering.cpp b/llvm/lib/Target/ARM/ARMFrameLowering.cpp index 014b81c..48b4d26 100644 --- a/llvm/lib/Target/ARM/ARMFrameLowering.cpp +++ b/llvm/lib/Target/ARM/ARMFrameLowering.cpp @@ -47,7 +47,8 @@ // | | // |-----------------------------------| // | | -// | prev_fp, prev_lr | +// | prev_lr | +// | prev_fp | // | (a.k.a. "frame record") | // | | // |- - - - - - - - - - - - - - - - - -| <- fp (r7 or r11) @@ -211,6 +212,12 @@ bool ARMFrameLowering::hasFP(const MachineFunction &MF) const { MFI.isFrameAddressTaken()); } +/// isFPReserved - Return true if the frame pointer register should be +/// considered a reserved register on the scope of the specified function. +bool ARMFrameLowering::isFPReserved(const MachineFunction &MF) const { + return hasFP(MF) || MF.getSubtarget().createAAPCSFrameChain(); +} + /// hasReservedCallFrame - Under normal circumstances, when a frame pointer is /// not required, we reserve argument space for call sites in the function /// immediately on entry to the current function. This eliminates the need for @@ -1033,6 +1040,9 @@ void ARMFrameLowering::emitPrologue(MachineFunction &MF, // into spill area 1, including the FP in R11. In either case, it // is in area one and the adjustment needs to take place just after // that push. + // FIXME: The above is not necessary true when PACBTI is enabled. + // AAPCS requires use of R11, and PACBTI gets in the way of regular pushes, + // so FP ends up on area two. MachineBasicBlock::iterator AfterPush; if (HasFP) { AfterPush = std::next(GPRCS1Push); @@ -2196,6 +2206,34 @@ bool ARMFrameLowering::enableShrinkWrapping(const MachineFunction &MF) const { return true; } +static bool requiresAAPCSFrameRecord(const MachineFunction &MF) { + const auto &Subtarget = MF.getSubtarget(); + return Subtarget.createAAPCSFrameChainLeaf() || + (Subtarget.createAAPCSFrameChain() && MF.getFrameInfo().hasCalls()); +} + +// Thumb1 may require a spill when storing to a frame index through FP, for +// cases where FP is a high register (R11). This scans the function for cases +// where this may happen. +static bool canSpillOnFrameIndexAccess(const MachineFunction &MF, + const TargetFrameLowering &TFI) { + const ARMFunctionInfo *AFI = MF.getInfo(); + if (!AFI->isThumb1OnlyFunction()) + return false; + + for (const auto &MBB : MF) + for (const auto &MI : MBB) + if (MI.getOpcode() == ARM::tSTRspi || MI.getOpcode() == ARM::tSTRi) + for (const auto &Op : MI.operands()) + if (Op.isFI()) { + Register Reg; + TFI.getFrameIndexReference(MF, Op.getIndex(), Reg); + if (ARM::hGPRRegClass.contains(Reg) && Reg != ARM::SP) + return true; + } + return false; +} + void ARMFrameLowering::determineCalleeSaves(MachineFunction &MF, BitVector &SavedRegs, RegScavenger *RS) const { @@ -2204,7 +2242,7 @@ void ARMFrameLowering::determineCalleeSaves(MachineFunction &MF, // to take advantage the eliminateFrameIndex machinery. This also ensures it // is spilled in the order specified by getCalleeSavedRegs() to make it easier // to combine multiple loads / stores. - bool CanEliminateFrame = true; + bool CanEliminateFrame = !(requiresAAPCSFrameRecord(MF) && hasFP(MF)); bool CS1Spilled = false; bool LRSpilled = false; unsigned NumGPRSpills = 0; @@ -2399,6 +2437,11 @@ void ARMFrameLowering::determineCalleeSaves(MachineFunction &MF, // Functions with VLAs or extremely large call frames are rare, and // if a function is allocating more than 1KB of stack, an extra 4-byte // slot probably isn't relevant. + // + // A special case is the scenario where r11 is used as FP, where accesses + // to a frame index will require its value to be moved into a low reg. + // This is handled later on, once we are able to determine if we have any + // fp-relative accesses. if (RegInfo->hasBasePointer(MF)) EstimatedRSStackSizeLimit = (1U << 5) * 4; else @@ -2445,7 +2488,9 @@ void ARMFrameLowering::determineCalleeSaves(MachineFunction &MF, SavedRegs.set(FramePtr); // If the frame pointer is required by the ABI, also spill LR so that we // emit a complete frame record. - if (MF.getTarget().Options.DisableFramePointerElim(MF) && !LRSpilled) { + if ((requiresAAPCSFrameRecord(MF) || + MF.getTarget().Options.DisableFramePointerElim(MF)) && + !LRSpilled) { SavedRegs.set(ARM::LR); LRSpilled = true; NumGPRSpills++; @@ -2527,7 +2572,7 @@ void ARMFrameLowering::determineCalleeSaves(MachineFunction &MF, } // r7 can be used if it is not being used as the frame pointer. - if (!HasFP) { + if (!HasFP || FramePtr != ARM::R7) { if (SavedRegs.test(ARM::R7)) { --RegDeficit; LLVM_DEBUG(dbgs() << "%r7 is saved low register, RegDeficit = " @@ -2648,8 +2693,10 @@ void ARMFrameLowering::determineCalleeSaves(MachineFunction &MF, // to materialize a stack offset. If so, either spill one additional // callee-saved register or reserve a special spill slot to facilitate // register scavenging. Thumb1 needs a spill slot for stack pointer - // adjustments also, even when the frame itself is small. - if (BigFrameOffsets && !ExtraCSSpill) { + // adjustments and for frame index accesses when FP is high register, + // even when the frame itself is small. + if (!ExtraCSSpill && + (BigFrameOffsets || canSpillOnFrameIndexAccess(MF, *this))) { // If any non-reserved CS register isn't spilled, just spill one or two // extra. That should take care of it! unsigned NumExtras = TargetAlign.value() / 4; diff --git a/llvm/lib/Target/ARM/ARMFrameLowering.h b/llvm/lib/Target/ARM/ARMFrameLowering.h index 9822e23..16f2ce6 100644 --- a/llvm/lib/Target/ARM/ARMFrameLowering.h +++ b/llvm/lib/Target/ARM/ARMFrameLowering.h @@ -46,6 +46,7 @@ public: bool enableCalleeSaveSkip(const MachineFunction &MF) const override; bool hasFP(const MachineFunction &MF) const override; + bool isFPReserved(const MachineFunction &MF) const; bool hasReservedCallFrame(const MachineFunction &MF) const override; bool canSimplifyCallFramePseudos(const MachineFunction &MF) const override; StackOffset getFrameIndexReference(const MachineFunction &MF, int FI, diff --git a/llvm/lib/Target/ARM/ARMMachineFunctionInfo.h b/llvm/lib/Target/ARM/ARMMachineFunctionInfo.h index eaf682f..e906fea 100644 --- a/llvm/lib/Target/ARM/ARMMachineFunctionInfo.h +++ b/llvm/lib/Target/ARM/ARMMachineFunctionInfo.h @@ -86,6 +86,7 @@ class ARMFunctionInfo : public MachineFunctionInfo { /// GPRCS1Size, GPRCS2Size, DPRCSSize - Sizes of callee saved register spills /// areas. unsigned FPCXTSaveSize = 0; + unsigned FRSaveSize = 0; unsigned GPRCS1Size = 0; unsigned GPRCS2Size = 0; unsigned DPRCSAlignGapSize = 0; @@ -203,12 +204,14 @@ public: void setDPRCalleeSavedAreaOffset(unsigned o) { DPRCSOffset = o; } unsigned getFPCXTSaveAreaSize() const { return FPCXTSaveSize; } + unsigned getFrameRecordSavedAreaSize() const { return FRSaveSize; } unsigned getGPRCalleeSavedArea1Size() const { return GPRCS1Size; } unsigned getGPRCalleeSavedArea2Size() const { return GPRCS2Size; } unsigned getDPRCalleeSavedGapSize() const { return DPRCSAlignGapSize; } unsigned getDPRCalleeSavedAreaSize() const { return DPRCSSize; } void setFPCXTSaveAreaSize(unsigned s) { FPCXTSaveSize = s; } + void setFrameRecordSavedAreaSize(unsigned s) { FRSaveSize = s; } void setGPRCalleeSavedArea1Size(unsigned s) { GPRCS1Size = s; } void setGPRCalleeSavedArea2Size(unsigned s) { GPRCS2Size = s; } void setDPRCalleeSavedGapSize(unsigned s) { DPRCSAlignGapSize = s; } diff --git a/llvm/lib/Target/ARM/ARMSubtarget.h b/llvm/lib/Target/ARM/ARMSubtarget.h index 3f978f5..460ec62 100644 --- a/llvm/lib/Target/ARM/ARMSubtarget.h +++ b/llvm/lib/Target/ARM/ARMSubtarget.h @@ -430,7 +430,8 @@ public: } MCPhysReg getFramePointerReg() const { - if (isTargetDarwin() || (!isTargetWindows() && isThumb())) + if (isTargetDarwin() || + (!isTargetWindows() && isThumb() && !createAAPCSFrameChain())) return ARM::R7; return ARM::R11; } diff --git a/llvm/lib/Target/ARM/Thumb1FrameLowering.cpp b/llvm/lib/Target/ARM/Thumb1FrameLowering.cpp index 48688cb..98bf3e8 100644 --- a/llvm/lib/Target/ARM/Thumb1FrameLowering.cpp +++ b/llvm/lib/Target/ARM/Thumb1FrameLowering.cpp @@ -176,7 +176,7 @@ void Thumb1FrameLowering::emitPrologue(MachineFunction &MF, // Determine the sizes of each callee-save spill areas and record which frame // belongs to which callee-save spill areas. - unsigned GPRCS1Size = 0, GPRCS2Size = 0, DPRCSSize = 0; + unsigned FRSize = 0, GPRCS1Size = 0, GPRCS2Size = 0, DPRCSSize = 0; int FramePtrSpillFI = 0; if (ArgRegsSaveSize) { @@ -205,26 +205,38 @@ void Thumb1FrameLowering::emitPrologue(MachineFunction &MF, return; } + bool HasFrameRecordArea = hasFP(MF) && ARM::hGPRRegClass.contains(FramePtr); + for (const CalleeSavedInfo &I : CSI) { Register Reg = I.getReg(); int FI = I.getFrameIdx(); + if (Reg == FramePtr) + FramePtrSpillFI = FI; switch (Reg) { + case ARM::R11: + if (HasFrameRecordArea) { + FRSize += 4; + break; + } + LLVM_FALLTHROUGH; case ARM::R8: case ARM::R9: case ARM::R10: - case ARM::R11: if (STI.splitFramePushPop(MF)) { GPRCS2Size += 4; break; } LLVM_FALLTHROUGH; + case ARM::LR: + if (HasFrameRecordArea) { + FRSize += 4; + break; + } + LLVM_FALLTHROUGH; case ARM::R4: case ARM::R5: case ARM::R6: case ARM::R7: - case ARM::LR: - if (Reg == FramePtr) - FramePtrSpillFI = FI; GPRCS1Size += 4; break; default: @@ -232,18 +244,53 @@ void Thumb1FrameLowering::emitPrologue(MachineFunction &MF, } } + MachineBasicBlock::iterator FRPush, GPRCS1Push, GPRCS2Push; + if (HasFrameRecordArea) { + // Skip Frame Record setup: + // push {lr} + // mov lr, r11 + // push {lr} + std::advance(MBBI, 2); + FRPush = MBBI++; + } + if (MBBI != MBB.end() && MBBI->getOpcode() == ARM::tPUSH) { + GPRCS1Push = MBBI; ++MBBI; } + // Find last push instruction for GPRCS2 - spilling of high registers + // (r8-r11) could consist of multiple tPUSH and tMOVr instructions. + while (true) { + MachineBasicBlock::iterator OldMBBI = MBBI; + // Skip a run of tMOVr instructions + while (MBBI != MBB.end() && MBBI->getOpcode() == ARM::tMOVr && + MBBI->getFlag(MachineInstr::FrameSetup)) + MBBI++; + if (MBBI != MBB.end() && MBBI->getOpcode() == ARM::tPUSH && + MBBI->getFlag(MachineInstr::FrameSetup)) { + GPRCS2Push = MBBI; + MBBI++; + } else { + // We have reached an instruction which is not a push, so the previous + // run of tMOVr instructions (which may have been empty) was not part of + // the prologue. Reset MBBI back to the last PUSH of the prologue. + MBBI = OldMBBI; + break; + } + } + // Determine starting offsets of spill areas. - unsigned DPRCSOffset = NumBytes - ArgRegsSaveSize - (GPRCS1Size + GPRCS2Size + DPRCSSize); + unsigned DPRCSOffset = NumBytes - ArgRegsSaveSize - + (FRSize + GPRCS1Size + GPRCS2Size + DPRCSSize); unsigned GPRCS2Offset = DPRCSOffset + DPRCSSize; unsigned GPRCS1Offset = GPRCS2Offset + GPRCS2Size; bool HasFP = hasFP(MF); if (HasFP) AFI->setFramePtrSpillOffset(MFI.getObjectOffset(FramePtrSpillFI) + NumBytes); + if (HasFrameRecordArea) + AFI->setFrameRecordSavedAreaSize(FRSize); AFI->setGPRCalleeSavedArea1Offset(GPRCS1Offset); AFI->setGPRCalleeSavedArea2Offset(GPRCS2Offset); AFI->setDPRCalleeSavedAreaOffset(DPRCSOffset); @@ -252,71 +299,45 @@ void Thumb1FrameLowering::emitPrologue(MachineFunction &MF, int FramePtrOffsetInBlock = 0; unsigned adjustedGPRCS1Size = GPRCS1Size; if (GPRCS1Size > 0 && GPRCS2Size == 0 && - tryFoldSPUpdateIntoPushPop(STI, MF, &*std::prev(MBBI), NumBytes)) { + tryFoldSPUpdateIntoPushPop(STI, MF, &*(GPRCS1Push), NumBytes)) { FramePtrOffsetInBlock = NumBytes; adjustedGPRCS1Size += NumBytes; NumBytes = 0; } - - if (adjustedGPRCS1Size) { - CFAOffset += adjustedGPRCS1Size; - unsigned CFIIndex = - MF.addFrameInst(MCCFIInstruction::cfiDefCfaOffset(nullptr, CFAOffset)); - BuildMI(MBB, MBBI, dl, TII.get(TargetOpcode::CFI_INSTRUCTION)) - .addCFIIndex(CFIIndex) - .setMIFlags(MachineInstr::FrameSetup); - } - for (const CalleeSavedInfo &I : CSI) { - Register Reg = I.getReg(); - int FI = I.getFrameIdx(); - switch (Reg) { - case ARM::R8: - case ARM::R9: - case ARM::R10: - case ARM::R11: - case ARM::R12: - if (STI.splitFramePushPop(MF)) - break; - LLVM_FALLTHROUGH; - case ARM::R0: - case ARM::R1: - case ARM::R2: - case ARM::R3: - case ARM::R4: - case ARM::R5: - case ARM::R6: - case ARM::R7: - case ARM::LR: - unsigned CFIIndex = MF.addFrameInst(MCCFIInstruction::createOffset( - nullptr, MRI->getDwarfRegNum(Reg, true), MFI.getObjectOffset(FI))); - BuildMI(MBB, MBBI, dl, TII.get(TargetOpcode::CFI_INSTRUCTION)) - .addCFIIndex(CFIIndex) - .setMIFlags(MachineInstr::FrameSetup); - break; - } - } + CFAOffset += adjustedGPRCS1Size; // Adjust FP so it point to the stack slot that contains the previous FP. if (HasFP) { - FramePtrOffsetInBlock += - MFI.getObjectOffset(FramePtrSpillFI) + GPRCS1Size + ArgRegsSaveSize; - BuildMI(MBB, MBBI, dl, TII.get(ARM::tADDrSPi), FramePtr) - .addReg(ARM::SP) - .addImm(FramePtrOffsetInBlock / 4) - .setMIFlags(MachineInstr::FrameSetup) - .add(predOps(ARMCC::AL)); + MachineBasicBlock::iterator AfterPush = + HasFrameRecordArea ? std::next(FRPush) : std::next(GPRCS1Push); + if (HasFrameRecordArea) { + // We have just finished pushing the previous FP into the stack, + // so simply capture the SP value as the new Frame Pointer. + BuildMI(MBB, AfterPush, dl, TII.get(ARM::tMOVr), FramePtr) + .addReg(ARM::SP) + .setMIFlags(MachineInstr::FrameSetup) + .add(predOps(ARMCC::AL)); + } else { + FramePtrOffsetInBlock += + MFI.getObjectOffset(FramePtrSpillFI) + GPRCS1Size + ArgRegsSaveSize; + BuildMI(MBB, AfterPush, dl, TII.get(ARM::tADDrSPi), FramePtr) + .addReg(ARM::SP) + .addImm(FramePtrOffsetInBlock / 4) + .setMIFlags(MachineInstr::FrameSetup) + .add(predOps(ARMCC::AL)); + } + if(FramePtrOffsetInBlock) { - CFAOffset -= FramePtrOffsetInBlock; unsigned CFIIndex = MF.addFrameInst(MCCFIInstruction::cfiDefCfa( - nullptr, MRI->getDwarfRegNum(FramePtr, true), CFAOffset)); - BuildMI(MBB, MBBI, dl, TII.get(TargetOpcode::CFI_INSTRUCTION)) + nullptr, MRI->getDwarfRegNum(FramePtr, true), (CFAOffset - FramePtrOffsetInBlock))); + BuildMI(MBB, AfterPush, dl, TII.get(TargetOpcode::CFI_INSTRUCTION)) .addCFIIndex(CFIIndex) .setMIFlags(MachineInstr::FrameSetup); } else { unsigned CFIIndex = MF.addFrameInst(MCCFIInstruction::createDefCfaRegister( nullptr, MRI->getDwarfRegNum(FramePtr, true))); - BuildMI(MBB, MBBI, dl, TII.get(TargetOpcode::CFI_INSTRUCTION)) + BuildMI(MBB, AfterPush, dl, TII.get(TargetOpcode::CFI_INSTRUCTION)) .addCFIIndex(CFIIndex) .setMIFlags(MachineInstr::FrameSetup); } @@ -326,45 +347,69 @@ void Thumb1FrameLowering::emitPrologue(MachineFunction &MF, AFI->setShouldRestoreSPFromFP(true); } - // Skip past the spilling of r8-r11, which could consist of multiple tPUSH - // and tMOVr instructions. We don't need to add any call frame information - // in-between these instructions, because they do not modify the high - // registers. - while (true) { - MachineBasicBlock::iterator OldMBBI = MBBI; - // Skip a run of tMOVr instructions - while (MBBI != MBB.end() && MBBI->getOpcode() == ARM::tMOVr) - MBBI++; - if (MBBI != MBB.end() && MBBI->getOpcode() == ARM::tPUSH) { - MBBI++; - } else { - // We have reached an instruction which is not a push, so the previous - // run of tMOVr instructions (which may have been empty) was not part of - // the prologue. Reset MBBI back to the last PUSH of the prologue. - MBBI = OldMBBI; - break; + // Emit call frame information for the callee-saved low registers. + if (GPRCS1Size > 0) { + MachineBasicBlock::iterator Pos = std::next(GPRCS1Push); + if (adjustedGPRCS1Size) { + unsigned CFIIndex = + MF.addFrameInst(MCCFIInstruction::cfiDefCfaOffset(nullptr, CFAOffset)); + BuildMI(MBB, Pos, dl, TII.get(TargetOpcode::CFI_INSTRUCTION)) + .addCFIIndex(CFIIndex) + .setMIFlags(MachineInstr::FrameSetup); + } + for (const CalleeSavedInfo &I : CSI) { + Register Reg = I.getReg(); + int FI = I.getFrameIdx(); + switch (Reg) { + case ARM::R8: + case ARM::R9: + case ARM::R10: + case ARM::R11: + case ARM::R12: + if (STI.splitFramePushPop(MF)) + break; + LLVM_FALLTHROUGH; + case ARM::R0: + case ARM::R1: + case ARM::R2: + case ARM::R3: + case ARM::R4: + case ARM::R5: + case ARM::R6: + case ARM::R7: + case ARM::LR: + unsigned CFIIndex = MF.addFrameInst(MCCFIInstruction::createOffset( + nullptr, MRI->getDwarfRegNum(Reg, true), MFI.getObjectOffset(FI))); + BuildMI(MBB, Pos, dl, TII.get(TargetOpcode::CFI_INSTRUCTION)) + .addCFIIndex(CFIIndex) + .setMIFlags(MachineInstr::FrameSetup); + break; + } } } // Emit call frame information for the callee-saved high registers. - for (auto &I : CSI) { - Register Reg = I.getReg(); - int FI = I.getFrameIdx(); - switch (Reg) { - case ARM::R8: - case ARM::R9: - case ARM::R10: - case ARM::R11: - case ARM::R12: { - unsigned CFIIndex = MF.addFrameInst(MCCFIInstruction::createOffset( - nullptr, MRI->getDwarfRegNum(Reg, true), MFI.getObjectOffset(FI))); - BuildMI(MBB, MBBI, dl, TII.get(TargetOpcode::CFI_INSTRUCTION)) - .addCFIIndex(CFIIndex) - .setMIFlags(MachineInstr::FrameSetup); - break; - } - default: - break; + if (GPRCS2Size > 0) { + MachineBasicBlock::iterator Pos = std::next(GPRCS2Push); + for (auto &I : CSI) { + Register Reg = I.getReg(); + int FI = I.getFrameIdx(); + switch (Reg) { + case ARM::R8: + case ARM::R9: + case ARM::R10: + case ARM::R11: + case ARM::R12: { + unsigned CFIIndex = MF.addFrameInst(MCCFIInstruction::createOffset( + nullptr, MRI->getDwarfRegNum(Reg, true), MFI.getObjectOffset(FI))); + BuildMI(MBB, Pos, dl, TII.get(TargetOpcode::CFI_INSTRUCTION)) + .addCFIIndex(CFIIndex) + .setMIFlags(MachineInstr::FrameSetup); + break; + } + default: + break; + } } } @@ -487,7 +532,8 @@ void Thumb1FrameLowering::emitEpilogue(MachineFunction &MF, } // Move SP to start of FP callee save spill area. - NumBytes -= (AFI->getGPRCalleeSavedArea1Size() + + NumBytes -= (AFI->getFrameRecordSavedAreaSize() + + AFI->getGPRCalleeSavedArea1Size() + AFI->getGPRCalleeSavedArea2Size() + AFI->getDPRCalleeSavedAreaSize() + ArgRegsSaveSize); @@ -790,65 +836,53 @@ bool Thumb1FrameLowering::emitPopSpecialFixUp(MachineBasicBlock &MBB, return true; } -using ARMRegSet = std::bitset; - -// Return the first iteraror after CurrentReg which is present in EnabledRegs, -// or OrderEnd if no further registers are in that set. This does not advance -// the iterator fiorst, so returns CurrentReg if it is in EnabledRegs. -static const unsigned *findNextOrderedReg(const unsigned *CurrentReg, - const ARMRegSet &EnabledRegs, - const unsigned *OrderEnd) { - while (CurrentReg != OrderEnd && !EnabledRegs[*CurrentReg]) - ++CurrentReg; - return CurrentReg; -} - -bool Thumb1FrameLowering::spillCalleeSavedRegisters( - MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, - ArrayRef CSI, const TargetRegisterInfo *TRI) const { - if (CSI.empty()) - return false; - - DebugLoc DL; - const TargetInstrInfo &TII = *STI.getInstrInfo(); - MachineFunction &MF = *MBB.getParent(); - const ARMBaseRegisterInfo *RegInfo = static_cast( - MF.getSubtarget().getRegisterInfo()); - - ARMRegSet LoRegsToSave; // r0-r7, lr - ARMRegSet HiRegsToSave; // r8-r11 - ARMRegSet CopyRegs; // Registers which can be used after pushing - // LoRegs for saving HiRegs. - - for (const CalleeSavedInfo &I : llvm::reverse(CSI)) { - Register Reg = I.getReg(); - +static const SmallVector OrderedLowRegs = {ARM::R4, ARM::R5, ARM::R6, + ARM::R7, ARM::LR}; +static const SmallVector OrderedHighRegs = {ARM::R8, ARM::R9, + ARM::R10, ARM::R11}; +static const SmallVector OrderedCopyRegs = { + ARM::R0, ARM::R1, ARM::R2, ARM::R3, ARM::R4, + ARM::R5, ARM::R6, ARM::R7, ARM::LR}; + +static void splitLowAndHighRegs(const std::set &Regs, + std::set &LowRegs, + std::set &HighRegs) { + for (Register Reg : Regs) { if (ARM::tGPRRegClass.contains(Reg) || Reg == ARM::LR) { - LoRegsToSave[Reg] = true; + LowRegs.insert(Reg); } else if (ARM::hGPRRegClass.contains(Reg) && Reg != ARM::LR) { - HiRegsToSave[Reg] = true; + HighRegs.insert(Reg); } else { llvm_unreachable("callee-saved register of unexpected class"); } - - if ((ARM::tGPRRegClass.contains(Reg) || Reg == ARM::LR) && - !MF.getRegInfo().isLiveIn(Reg) && - !(hasFP(MF) && Reg == RegInfo->getFrameRegister(MF))) - CopyRegs[Reg] = true; } +} - // Unused argument registers can be used for the high register saving. - for (unsigned ArgReg : {ARM::R0, ARM::R1, ARM::R2, ARM::R3}) - if (!MF.getRegInfo().isLiveIn(ArgReg)) - CopyRegs[ArgReg] = true; +template +It getNextOrderedReg(It OrderedStartIt, It OrderedEndIt, + const std::set &RegSet) { + return std::find_if(OrderedStartIt, OrderedEndIt, + [&](Register Reg) { return RegSet.count(Reg); }); +} - // Push the low registers and lr +static void pushRegsToStack(MachineBasicBlock &MBB, + MachineBasicBlock::iterator MI, + const TargetInstrInfo &TII, + const std::set &RegsToSave, + const std::set &CopyRegs) { + MachineFunction &MF = *MBB.getParent(); const MachineRegisterInfo &MRI = MF.getRegInfo(); - if (!LoRegsToSave.none()) { + DebugLoc DL; + + std::set LowRegs, HighRegs; + splitLowAndHighRegs(RegsToSave, LowRegs, HighRegs); + + // Push low regs first + if (!LowRegs.empty()) { MachineInstrBuilder MIB = BuildMI(MBB, MI, DL, TII.get(ARM::tPUSH)).add(predOps(ARMCC::AL)); - for (unsigned Reg : {ARM::R4, ARM::R5, ARM::R6, ARM::R7, ARM::LR}) { - if (LoRegsToSave[Reg]) { + for (unsigned Reg : OrderedLowRegs) { + if (LowRegs.count(Reg)) { bool isKill = !MRI.isLiveIn(Reg); if (isKill && !MRI.isReserved(Reg)) MBB.addLiveIn(Reg); @@ -859,31 +893,26 @@ bool Thumb1FrameLowering::spillCalleeSavedRegisters( MIB.setMIFlags(MachineInstr::FrameSetup); } - // Push the high registers. There are no store instructions that can access - // these registers directly, so we have to move them to low registers, and - // push them. This might take multiple pushes, as it is possible for there to + // Now push the high registers + // There are no store instructions that can access high registers directly, + // so we have to move them to low registers, and push them. + // This might take multiple pushes, as it is possible for there to // be fewer low registers available than high registers which need saving. - // These are in reverse order so that in the case where we need to use + // Find the first register to save. + // Registers must be processed in reverse order so that in case we need to use // multiple PUSH instructions, the order of the registers on the stack still // matches the unwind info. They need to be swicthed back to ascending order // before adding to the PUSH instruction. - static const unsigned AllCopyRegs[] = {ARM::LR, ARM::R7, ARM::R6, - ARM::R5, ARM::R4, ARM::R3, - ARM::R2, ARM::R1, ARM::R0}; - static const unsigned AllHighRegs[] = {ARM::R11, ARM::R10, ARM::R9, ARM::R8}; + auto HiRegToSave = getNextOrderedReg(OrderedHighRegs.rbegin(), + OrderedHighRegs.rend(), + HighRegs); - const unsigned *AllCopyRegsEnd = std::end(AllCopyRegs); - const unsigned *AllHighRegsEnd = std::end(AllHighRegs); - - // Find the first register to save. - const unsigned *HiRegToSave = findNextOrderedReg( - std::begin(AllHighRegs), HiRegsToSave, AllHighRegsEnd); - - while (HiRegToSave != AllHighRegsEnd) { + while (HiRegToSave != OrderedHighRegs.rend()) { // Find the first low register to use. - const unsigned *CopyReg = - findNextOrderedReg(std::begin(AllCopyRegs), CopyRegs, AllCopyRegsEnd); + auto CopyRegIt = getNextOrderedReg(OrderedCopyRegs.rbegin(), + OrderedCopyRegs.rend(), + CopyRegs); // Create the PUSH, but don't insert it yet (the MOVs need to come first). MachineInstrBuilder PushMIB = BuildMI(MF, DL, TII.get(ARM::tPUSH)) @@ -891,25 +920,29 @@ bool Thumb1FrameLowering::spillCalleeSavedRegisters( .setMIFlags(MachineInstr::FrameSetup); SmallVector RegsToPush; - while (HiRegToSave != AllHighRegsEnd && CopyReg != AllCopyRegsEnd) { - if (HiRegsToSave[*HiRegToSave]) { + while (HiRegToSave != OrderedHighRegs.rend() && + CopyRegIt != OrderedCopyRegs.rend()) { + if (HighRegs.count(*HiRegToSave)) { bool isKill = !MRI.isLiveIn(*HiRegToSave); if (isKill && !MRI.isReserved(*HiRegToSave)) MBB.addLiveIn(*HiRegToSave); // Emit a MOV from the high reg to the low reg. BuildMI(MBB, MI, DL, TII.get(ARM::tMOVr)) - .addReg(*CopyReg, RegState::Define) + .addReg(*CopyRegIt, RegState::Define) .addReg(*HiRegToSave, getKillRegState(isKill)) .add(predOps(ARMCC::AL)) .setMIFlags(MachineInstr::FrameSetup); // Record the register that must be added to the PUSH. - RegsToPush.push_back(*CopyReg); - - CopyReg = findNextOrderedReg(++CopyReg, CopyRegs, AllCopyRegsEnd); - HiRegToSave = - findNextOrderedReg(++HiRegToSave, HiRegsToSave, AllHighRegsEnd); + RegsToPush.push_back(*CopyRegIt); + + CopyRegIt = getNextOrderedReg(std::next(CopyRegIt), + OrderedCopyRegs.rend(), + CopyRegs); + HiRegToSave = getNextOrderedReg(std::next(HiRegToSave), + OrderedHighRegs.rend(), + HighRegs); } } @@ -920,85 +953,60 @@ bool Thumb1FrameLowering::spillCalleeSavedRegisters( // Insert the PUSH instruction after the MOVs. MBB.insert(MI, PushMIB); } - - return true; } -bool Thumb1FrameLowering::restoreCalleeSavedRegisters( - MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, - MutableArrayRef CSI, const TargetRegisterInfo *TRI) const { - if (CSI.empty()) - return false; - +static void popRegsFromStack(MachineBasicBlock &MBB, + MachineBasicBlock::iterator MI, + const TargetInstrInfo &TII, + const std::set &RegsToRestore, + const std::set &AvailableCopyRegs, + bool IsVarArg, bool HasV5Ops) { MachineFunction &MF = *MBB.getParent(); ARMFunctionInfo *AFI = MF.getInfo(); - const TargetInstrInfo &TII = *STI.getInstrInfo(); - const ARMBaseRegisterInfo *RegInfo = static_cast( - MF.getSubtarget().getRegisterInfo()); - - bool isVarArg = AFI->getArgRegsSaveSize() > 0; DebugLoc DL = MI != MBB.end() ? MI->getDebugLoc() : DebugLoc(); - ARMRegSet LoRegsToRestore; - ARMRegSet HiRegsToRestore; - // Low registers (r0-r7) which can be used to restore the high registers. - ARMRegSet CopyRegs; - - for (CalleeSavedInfo I : CSI) { - Register Reg = I.getReg(); - - if (ARM::tGPRRegClass.contains(Reg) || Reg == ARM::LR) { - LoRegsToRestore[Reg] = true; - } else if (ARM::hGPRRegClass.contains(Reg) && Reg != ARM::LR) { - HiRegsToRestore[Reg] = true; - } else { - llvm_unreachable("callee-saved register of unexpected class"); - } - - // If this is a low register not used as the frame pointer, we may want to - // use it for restoring the high registers. - if ((ARM::tGPRRegClass.contains(Reg)) && - !(hasFP(MF) && Reg == RegInfo->getFrameRegister(MF))) - CopyRegs[Reg] = true; - } - - // If this is a return block, we may be able to use some unused return value - // registers for restoring the high regs. - auto Terminator = MBB.getFirstTerminator(); - if (Terminator != MBB.end() && Terminator->getOpcode() == ARM::tBX_RET) { - CopyRegs[ARM::R0] = true; - CopyRegs[ARM::R1] = true; - CopyRegs[ARM::R2] = true; - CopyRegs[ARM::R3] = true; - for (auto Op : Terminator->implicit_operands()) { - if (Op.isReg()) - CopyRegs[Op.getReg()] = false; - } - } - - static const unsigned AllCopyRegs[] = {ARM::R0, ARM::R1, ARM::R2, ARM::R3, - ARM::R4, ARM::R5, ARM::R6, ARM::R7}; - static const unsigned AllHighRegs[] = {ARM::R8, ARM::R9, ARM::R10, ARM::R11}; + std::set LowRegs, HighRegs; + splitLowAndHighRegs(RegsToRestore, LowRegs, HighRegs); - const unsigned *AllCopyRegsEnd = std::end(AllCopyRegs); - const unsigned *AllHighRegsEnd = std::end(AllHighRegs); + // Pop the high registers first + // There are no store instructions that can access high registers directly, + // so we have to pop into low registers and them move to the high registers. + // This might take multiple pops, as it is possible for there to + // be fewer low registers available than high registers which need restoring. // Find the first register to restore. - auto HiRegToRestore = findNextOrderedReg(std::begin(AllHighRegs), - HiRegsToRestore, AllHighRegsEnd); + auto HiRegToRestore = getNextOrderedReg(OrderedHighRegs.begin(), + OrderedHighRegs.end(), + HighRegs); + + std::set CopyRegs = AvailableCopyRegs; + Register LowScratchReg; + if (!HighRegs.empty() && CopyRegs.empty()) { + // No copy regs are available to pop high regs. Let's make use of a return + // register and the scratch register (IP/R12) to copy things around. + LowScratchReg = ARM::R0; + BuildMI(MBB, MI, DL, TII.get(ARM::tMOVr)) + .addReg(ARM::R12, RegState::Define) + .addReg(LowScratchReg, RegState::Kill) + .add(predOps(ARMCC::AL)) + .setMIFlag(MachineInstr::FrameDestroy); + CopyRegs.insert(LowScratchReg); + } - while (HiRegToRestore != AllHighRegsEnd) { - assert(!CopyRegs.none()); + while (HiRegToRestore != OrderedHighRegs.end()) { + assert(!CopyRegs.empty()); // Find the first low register to use. - auto CopyReg = - findNextOrderedReg(std::begin(AllCopyRegs), CopyRegs, AllCopyRegsEnd); + auto CopyReg = getNextOrderedReg(OrderedCopyRegs.begin(), + OrderedCopyRegs.end(), + CopyRegs); // Create the POP instruction. MachineInstrBuilder PopMIB = BuildMI(MBB, MI, DL, TII.get(ARM::tPOP)) .add(predOps(ARMCC::AL)) .setMIFlag(MachineInstr::FrameDestroy); - while (HiRegToRestore != AllHighRegsEnd && CopyReg != AllCopyRegsEnd) { + while (HiRegToRestore != OrderedHighRegs.end() && + CopyReg != OrderedCopyRegs.end()) { // Add the low register to the POP. PopMIB.addReg(*CopyReg, RegState::Define); @@ -1009,63 +1017,178 @@ bool Thumb1FrameLowering::restoreCalleeSavedRegisters( .add(predOps(ARMCC::AL)) .setMIFlag(MachineInstr::FrameDestroy); - CopyReg = findNextOrderedReg(++CopyReg, CopyRegs, AllCopyRegsEnd); - HiRegToRestore = - findNextOrderedReg(++HiRegToRestore, HiRegsToRestore, AllHighRegsEnd); + CopyReg = getNextOrderedReg(std::next(CopyReg), + OrderedCopyRegs.end(), + CopyRegs); + HiRegToRestore = getNextOrderedReg(std::next(HiRegToRestore), + OrderedHighRegs.end(), + HighRegs); } } - MachineInstrBuilder MIB = BuildMI(MF, DL, TII.get(ARM::tPOP)) - .add(predOps(ARMCC::AL)) - .setMIFlag(MachineInstr::FrameDestroy); - - bool NeedsPop = false; - for (CalleeSavedInfo &Info : llvm::reverse(CSI)) { - Register Reg = Info.getReg(); - - // High registers (excluding lr) have already been dealt with - if (!(ARM::tGPRRegClass.contains(Reg) || Reg == ARM::LR)) - continue; - - if (Reg == ARM::LR) { - Info.setRestored(false); - if (!MBB.succ_empty() || - MI->getOpcode() == ARM::TCRETURNdi || - MI->getOpcode() == ARM::TCRETURNri) - // LR may only be popped into PC, as part of return sequence. - // If this isn't the return sequence, we'll need emitPopSpecialFixUp - // to restore LR the hard way. - // FIXME: if we don't pass any stack arguments it would be actually - // advantageous *and* correct to do the conversion to an ordinary call - // instruction here. - continue; - // Special epilogue for vararg functions. See emitEpilogue - if (isVarArg) - continue; - // ARMv4T requires BX, see emitEpilogue - if (!STI.hasV5TOps()) - continue; + // Restore low register used as scratch if necessary + if (LowScratchReg.isValid()) { + BuildMI(MBB, MI, DL, TII.get(ARM::tMOVr)) + .addReg(LowScratchReg, RegState::Define) + .addReg(ARM::R12, RegState::Kill) + .add(predOps(ARMCC::AL)) + .setMIFlag(MachineInstr::FrameDestroy); + } + + // Now pop the low registers + if (!LowRegs.empty()) { + MachineInstrBuilder MIB = BuildMI(MF, DL, TII.get(ARM::tPOP)) + .add(predOps(ARMCC::AL)) + .setMIFlag(MachineInstr::FrameDestroy); - // CMSE entry functions must return via BXNS, see emitEpilogue. - if (AFI->isCmseNSEntryFunction()) + bool NeedsPop = false; + for (Register Reg : OrderedLowRegs) { + if (!LowRegs.count(Reg)) continue; - // Pop LR into PC. - Reg = ARM::PC; - (*MIB).setDesc(TII.get(ARM::tPOP_RET)); - if (MI != MBB.end()) - MIB.copyImplicitOps(*MI); - MI = MBB.erase(MI); + if (Reg == ARM::LR) { + if (!MBB.succ_empty() || + MI->getOpcode() == ARM::TCRETURNdi || + MI->getOpcode() == ARM::TCRETURNri) + // LR may only be popped into PC, as part of return sequence. + // If this isn't the return sequence, we'll need emitPopSpecialFixUp + // to restore LR the hard way. + // FIXME: if we don't pass any stack arguments it would be actually + // advantageous *and* correct to do the conversion to an ordinary call + // instruction here. + continue; + // Special epilogue for vararg functions. See emitEpilogue + if (IsVarArg) + continue; + // ARMv4T requires BX, see emitEpilogue + if (!HasV5Ops) + continue; + + // CMSE entry functions must return via BXNS, see emitEpilogue. + if (AFI->isCmseNSEntryFunction()) + continue; + + // Pop LR into PC. + Reg = ARM::PC; + (*MIB).setDesc(TII.get(ARM::tPOP_RET)); + if (MI != MBB.end()) + MIB.copyImplicitOps(*MI); + MI = MBB.erase(MI); + } + MIB.addReg(Reg, getDefRegState(true)); + NeedsPop = true; + } + + // It's illegal to emit pop instruction without operands. + if (NeedsPop) + MBB.insert(MI, &*MIB); + else + MF.deleteMachineInstr(MIB); + } +} + +bool Thumb1FrameLowering::spillCalleeSavedRegisters( + MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, + ArrayRef CSI, const TargetRegisterInfo *TRI) const { + if (CSI.empty()) + return false; + + const TargetInstrInfo &TII = *STI.getInstrInfo(); + MachineFunction &MF = *MBB.getParent(); + const ARMBaseRegisterInfo *RegInfo = static_cast( + MF.getSubtarget().getRegisterInfo()); + Register FPReg = RegInfo->getFrameRegister(MF); + + // In case FP is a high reg, we need a separate push sequence to generate + // a correct Frame Record + bool NeedsFrameRecordPush = hasFP(MF) && ARM::hGPRRegClass.contains(FPReg); + + std::set FrameRecord; + std::set SpilledGPRs; + for (const CalleeSavedInfo &I : CSI) { + Register Reg = I.getReg(); + if (NeedsFrameRecordPush && (Reg == FPReg || Reg == ARM::LR)) + FrameRecord.insert(Reg); + else + SpilledGPRs.insert(Reg); + } + + pushRegsToStack(MBB, MI, TII, FrameRecord, {ARM::LR}); + + // Determine intermediate registers which can be used for pushing high regs: + // - Spilled low regs + // - Unused argument registers + std::set CopyRegs; + for (Register Reg : SpilledGPRs) + if ((ARM::tGPRRegClass.contains(Reg) || Reg == ARM::LR) && + !MF.getRegInfo().isLiveIn(Reg) && !(hasFP(MF) && Reg == FPReg)) + CopyRegs.insert(Reg); + for (unsigned ArgReg : {ARM::R0, ARM::R1, ARM::R2, ARM::R3}) + if (!MF.getRegInfo().isLiveIn(ArgReg)) + CopyRegs.insert(ArgReg); + + pushRegsToStack(MBB, MI, TII, SpilledGPRs, CopyRegs); + + return true; +} + +bool Thumb1FrameLowering::restoreCalleeSavedRegisters( + MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, + MutableArrayRef CSI, const TargetRegisterInfo *TRI) const { + if (CSI.empty()) + return false; + + MachineFunction &MF = *MBB.getParent(); + ARMFunctionInfo *AFI = MF.getInfo(); + const TargetInstrInfo &TII = *STI.getInstrInfo(); + const ARMBaseRegisterInfo *RegInfo = static_cast( + MF.getSubtarget().getRegisterInfo()); + bool IsVarArg = AFI->getArgRegsSaveSize() > 0; + Register FPReg = RegInfo->getFrameRegister(MF); + + // In case FP is a high reg, we need a separate pop sequence to generate + // a correct Frame Record + bool NeedsFrameRecordPop = hasFP(MF) && ARM::hGPRRegClass.contains(FPReg); + + std::set FrameRecord; + std::set SpilledGPRs; + for (CalleeSavedInfo &I : CSI) { + Register Reg = I.getReg(); + if (NeedsFrameRecordPop && (Reg == FPReg || Reg == ARM::LR)) + FrameRecord.insert(Reg); + else + SpilledGPRs.insert(Reg); + + if (Reg == ARM::LR) + I.setRestored(false); + } + + // Determine intermidiate registers which can be used for popping high regs: + // - Spilled low regs + // - Unused return registers + std::set CopyRegs; + std::set UnusedReturnRegs; + for (Register Reg : SpilledGPRs) + if ((ARM::tGPRRegClass.contains(Reg)) && !(hasFP(MF) && Reg == FPReg)) + CopyRegs.insert(Reg); + auto Terminator = MBB.getFirstTerminator(); + if (Terminator != MBB.end() && Terminator->getOpcode() == ARM::tBX_RET) { + UnusedReturnRegs.insert(ARM::R0); + UnusedReturnRegs.insert(ARM::R1); + UnusedReturnRegs.insert(ARM::R2); + UnusedReturnRegs.insert(ARM::R3); + for (auto Op : Terminator->implicit_operands()) { + if (Op.isReg()) + UnusedReturnRegs.erase(Op.getReg()); } - MIB.addReg(Reg, getDefRegState(true)); - NeedsPop = true; } + CopyRegs.insert(UnusedReturnRegs.begin(), UnusedReturnRegs.end()); - // It's illegal to emit pop instruction without operands. - if (NeedsPop) - MBB.insert(MI, &*MIB); - else - MF.deleteMachineInstr(MIB); + popRegsFromStack(MBB, MI, TII, SpilledGPRs, CopyRegs, IsVarArg, + STI.hasV5TOps()); + // Only unused return registers can be used as copy regs at this point + popRegsFromStack(MBB, MI, TII, FrameRecord, UnusedReturnRegs, IsVarArg, + STI.hasV5TOps()); return true; } diff --git a/llvm/lib/Target/ARM/ThumbRegisterInfo.cpp b/llvm/lib/Target/ARM/ThumbRegisterInfo.cpp index f76ff10..5dd39e7 100644 --- a/llvm/lib/Target/ARM/ThumbRegisterInfo.cpp +++ b/llvm/lib/Target/ARM/ThumbRegisterInfo.cpp @@ -361,6 +361,7 @@ bool ThumbRegisterInfo::rewriteFrameIndex(MachineBasicBlock::iterator II, const ARMBaseInstrInfo &TII) const { MachineInstr &MI = *II; MachineBasicBlock &MBB = *MI.getParent(); + MachineFunction &MF = *MBB.getParent(); assert(MBB.getParent()->getSubtarget().isThumb1Only() && "This isn't needed for thumb2!"); DebugLoc dl = MI.getDebugLoc(); @@ -396,7 +397,18 @@ bool ThumbRegisterInfo::rewriteFrameIndex(MachineBasicBlock::iterator II, if ((unsigned)Offset <= Mask * Scale) { // Replace the FrameIndex with the frame register (e.g., sp). - MI.getOperand(FrameRegIdx).ChangeToRegister(FrameReg, false); + Register DestReg = FrameReg; + + // In case FrameReg is a high register, move it to a low reg to ensure it + // can be used as an operand. + if (ARM::hGPRRegClass.contains(FrameReg) && FrameReg != ARM::SP) { + DestReg = MF.getRegInfo().createVirtualRegister(&ARM::tGPRRegClass); + BuildMI(MBB, II, dl, TII.get(ARM::tMOVr), DestReg) + .addReg(FrameReg) + .add(predOps(ARMCC::AL)); + } + + MI.getOperand(FrameRegIdx).ChangeToRegister(DestReg, false); ImmOp.ChangeToImmediate(ImmedOffset); // If we're using a register where sp was stored, convert the instruction @@ -526,11 +538,21 @@ void ThumbRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II, MI.setDesc(TII.get(UseRR ? ARM::tLDRr : ARM::tLDRi)); MI.getOperand(FIOperandNum).ChangeToRegister(TmpReg, false, false, true); - if (UseRR) - // Use [reg, reg] addrmode. Replace the immediate operand w/ the frame - // register. The offset is already handled in the vreg value. - MI.getOperand(FIOperandNum+1).ChangeToRegister(FrameReg, false, false, - false); + if (UseRR) { + if (!ARM::hGPRRegClass.contains(FrameReg)) { + // Use [reg, reg] addrmode. Replace the immediate operand w/ the frame + // register. The offset is already handled in the vreg value. + MI.getOperand(FIOperandNum+1).ChangeToRegister(FrameReg, false, false, + false); + } else { + // If FrameReg is a high register, add the reg values in a separate + // instruction as the load won't be able to access it. + BuildMI(MBB, II, dl, TII.get(ARM::tADDhirr), TmpReg) + .addReg(TmpReg) + .addReg(FrameReg) + .add(predOps(ARMCC::AL)); + } + } } else if (MI.mayStore()) { VReg = MF.getRegInfo().createVirtualRegister(&ARM::tGPRRegClass); bool UseRR = false; @@ -548,11 +570,21 @@ void ThumbRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II, *this); MI.setDesc(TII.get(UseRR ? ARM::tSTRr : ARM::tSTRi)); MI.getOperand(FIOperandNum).ChangeToRegister(VReg, false, false, true); - if (UseRR) - // Use [reg, reg] addrmode. Replace the immediate operand w/ the frame - // register. The offset is already handled in the vreg value. - MI.getOperand(FIOperandNum+1).ChangeToRegister(FrameReg, false, false, - false); + if (UseRR) { + if (!ARM::hGPRRegClass.contains(FrameReg)) { + // Use [reg, reg] addrmode. Replace the immediate operand w/ the frame + // register. The offset is already handled in the vreg value. + MI.getOperand(FIOperandNum+1).ChangeToRegister(FrameReg, false, false, + false); + } else { + // If FrameReg is a high register, add the reg values in a separate + // instruction as the load won't be able to access it. + BuildMI(MBB, II, dl, TII.get(ARM::tADDhirr), VReg) + .addReg(VReg) + .addReg(FrameReg) + .add(predOps(ARMCC::AL)); + } + } } else { llvm_unreachable("Unexpected opcode!"); } diff --git a/llvm/test/CodeGen/ARM/frame-chain-reserved-fp.ll b/llvm/test/CodeGen/ARM/frame-chain-reserved-fp.ll new file mode 100644 index 0000000..6540381 --- /dev/null +++ b/llvm/test/CodeGen/ARM/frame-chain-reserved-fp.ll @@ -0,0 +1,25 @@ +; RUN: not llc -mtriple arm-arm-none-eabi -filetype asm -o - %s -frame-pointer=all 2>&1 | FileCheck %s --check-prefix=RESERVED-R11 +; RUN: not llc -mtriple arm-arm-none-eabi -filetype asm -o - %s -frame-pointer=all -mattr=+aapcs-frame-chain 2>&1 | FileCheck %s --check-prefix=RESERVED-R11 +; RUN: not llc -mtriple arm-arm-none-eabi -filetype asm -o - %s -frame-pointer=all -mattr=+aapcs-frame-chain-leaf 2>&1 | FileCheck %s --check-prefix=RESERVED-R11 +; RUN: llc -mtriple arm-arm-none-eabi -filetype asm -o - %s -frame-pointer=non-leaf 2>&1 | FileCheck %s --check-prefix=RESERVED-NONE +; RUN: not llc -mtriple arm-arm-none-eabi -filetype asm -o - %s -frame-pointer=non-leaf -mattr=+aapcs-frame-chain 2>&1 | FileCheck %s --check-prefix=RESERVED-R11 +; RUN: not llc -mtriple arm-arm-none-eabi -filetype asm -o - %s -frame-pointer=non-leaf -mattr=+aapcs-frame-chain-leaf 2>&1 | FileCheck %s --check-prefix=RESERVED-R11 +; RUN: llc -mtriple arm-arm-none-eabi -filetype asm -o - %s -frame-pointer=none 2>&1 | FileCheck %s --check-prefix=RESERVED-NONE +; RUN: not llc -mtriple arm-arm-none-eabi -filetype asm -o - %s -frame-pointer=none -mattr=+aapcs-frame-chain 2>&1 | FileCheck %s --check-prefix=RESERVED-R11 +; RUN: not llc -mtriple arm-arm-none-eabi -filetype asm -o - %s -frame-pointer=none -mattr=+aapcs-frame-chain-leaf 2>&1 | FileCheck %s --check-prefix=RESERVED-R11 + +declare void @leaf(i32 %input) + +define void @reserved_r7(i32 %input) { +; RESERVED-NONE-NOT: error: write to reserved register 'R7' +; RESERVED-R11-NOT: error: write to reserved register 'R7' + %1 = call i32 asm sideeffect "mov $0, $1", "={r7},r"(i32 %input) + ret void +} + +define void @reserved_r11(i32 %input) { +; RESERVED-NONE-NOT: error: write to reserved register 'R11' +; RESERVED-R11: error: write to reserved register 'R11' + %1 = call i32 asm sideeffect "mov $0, $1", "={r11},r"(i32 %input) + ret void +} diff --git a/llvm/test/CodeGen/ARM/frame-chain.ll b/llvm/test/CodeGen/ARM/frame-chain.ll new file mode 100644 index 0000000..c6fede4 --- /dev/null +++ b/llvm/test/CodeGen/ARM/frame-chain.ll @@ -0,0 +1,223 @@ +; RUN: llc -mtriple arm-arm-none-eabi -filetype asm -o - %s -frame-pointer=all | FileCheck %s --check-prefixes=FP,LEAF-FP +; RUN: llc -mtriple arm-arm-none-eabi -filetype asm -o - %s -frame-pointer=all -mattr=+aapcs-frame-chain | FileCheck %s --check-prefixes=FP-AAPCS,LEAF-FP +; RUN: llc -mtriple arm-arm-none-eabi -filetype asm -o - %s -frame-pointer=all -mattr=+aapcs-frame-chain-leaf | FileCheck %s --check-prefixes=FP-AAPCS,LEAF-FP-AAPCS +; RUN: llc -mtriple arm-arm-none-eabi -filetype asm -o - %s -frame-pointer=non-leaf | FileCheck %s --check-prefixes=FP,LEAF-NOFP +; RUN: llc -mtriple arm-arm-none-eabi -filetype asm -o - %s -frame-pointer=non-leaf -mattr=+aapcs-frame-chain | FileCheck %s --check-prefixes=FP-AAPCS,LEAF-NOFP +; RUN: llc -mtriple arm-arm-none-eabi -filetype asm -o - %s -frame-pointer=non-leaf -mattr=+aapcs-frame-chain-leaf | FileCheck %s --check-prefixes=FP-AAPCS,LEAF-NOFP-AAPCS +; RUN: llc -mtriple arm-arm-none-eabi -filetype asm -o - %s -frame-pointer=none | FileCheck %s --check-prefixes=NOFP,LEAF-NOFP +; RUN: llc -mtriple arm-arm-none-eabi -filetype asm -o - %s -frame-pointer=none -mattr=+aapcs-frame-chain | FileCheck %s --check-prefixes=NOFP-AAPCS,LEAF-NOFP +; RUN: llc -mtriple arm-arm-none-eabi -filetype asm -o - %s -frame-pointer=none -mattr=+aapcs-frame-chain-leaf | FileCheck %s --check-prefixes=NOFP-AAPCS,LEAF-NOFP-AAPCS + +define dso_local noundef i32 @leaf(i32 noundef %0) { +; LEAF-FP-LABEL: leaf: +; LEAF-FP: @ %bb.0: +; LEAF-FP-NEXT: .pad #4 +; LEAF-FP-NEXT: sub sp, sp, #4 +; LEAF-FP-NEXT: str r0, [sp] +; LEAF-FP-NEXT: add r0, r0, #4 +; LEAF-FP-NEXT: add sp, sp, #4 +; LEAF-FP-NEXT: mov pc, lr +; +; LEAF-FP-AAPCS-LABEL: leaf: +; LEAF-FP-AAPCS: @ %bb.0: +; LEAF-FP-AAPCS-NEXT: .save {r11, lr} +; LEAF-FP-AAPCS-NEXT: push {r11, lr} +; LEAF-FP-AAPCS-NEXT: .setfp r11, sp +; LEAF-FP-AAPCS-NEXT: mov r11, sp +; LEAF-FP-AAPCS-NEXT: push {r0} +; LEAF-FP-AAPCS-NEXT: add r0, r0, #4 +; LEAF-FP-AAPCS-NEXT: mov sp, r11 +; LEAF-FP-AAPCS-NEXT: pop {r11, lr} +; LEAF-FP-AAPCS-NEXT: mov pc, lr +; +; LEAF-NOFP-LABEL: leaf: +; LEAF-NOFP: @ %bb.0: +; LEAF-NOFP-NEXT: .pad #4 +; LEAF-NOFP-NEXT: sub sp, sp, #4 +; LEAF-NOFP-NEXT: str r0, [sp] +; LEAF-NOFP-NEXT: add r0, r0, #4 +; LEAF-NOFP-NEXT: add sp, sp, #4 +; LEAF-NOFP-NEXT: mov pc, lr +; +; LEAF-NOFP-AAPCS-LABEL: leaf: +; LEAF-NOFP-AAPCS: @ %bb.0: +; LEAF-NOFP-AAPCS-NEXT: .pad #4 +; LEAF-NOFP-AAPCS-NEXT: sub sp, sp, #4 +; LEAF-NOFP-AAPCS-NEXT: str r0, [sp] +; LEAF-NOFP-AAPCS-NEXT: add r0, r0, #4 +; LEAF-NOFP-AAPCS-NEXT: add sp, sp, #4 +; LEAF-NOFP-AAPCS-NEXT: mov pc, lr + %2 = alloca i32, align 4 + store i32 %0, i32* %2, align 4 + %3 = load i32, i32* %2, align 4 + %4 = add nsw i32 %3, 4 + ret i32 %4 +} + +define dso_local noundef i32 @non_leaf(i32 noundef %0) { +; FP-LABEL: non_leaf: +; FP: @ %bb.0: +; FP-NEXT: .save {r11, lr} +; FP-NEXT: push {r11, lr} +; FP-NEXT: .setfp r11, sp +; FP-NEXT: mov r11, sp +; FP-NEXT: .pad #8 +; FP-NEXT: sub sp, sp, #8 +; FP-NEXT: str r0, [sp, #4] +; FP-NEXT: bl leaf +; FP-NEXT: add r0, r0, #1 +; FP-NEXT: mov sp, r11 +; FP-NEXT: pop {r11, lr} +; FP-NEXT: mov pc, lr +; +; FP-AAPCS-LABEL: non_leaf: +; FP-AAPCS: @ %bb.0: +; FP-AAPCS-NEXT: .save {r11, lr} +; FP-AAPCS-NEXT: push {r11, lr} +; FP-AAPCS-NEXT: .setfp r11, sp +; FP-AAPCS-NEXT: mov r11, sp +; FP-AAPCS-NEXT: .pad #8 +; FP-AAPCS-NEXT: sub sp, sp, #8 +; FP-AAPCS-NEXT: str r0, [sp, #4] +; FP-AAPCS-NEXT: bl leaf +; FP-AAPCS-NEXT: add r0, r0, #1 +; FP-AAPCS-NEXT: mov sp, r11 +; FP-AAPCS-NEXT: pop {r11, lr} +; FP-AAPCS-NEXT: mov pc, lr +; +; NOFP-LABEL: non_leaf: +; NOFP: @ %bb.0: +; NOFP-NEXT: .save {r11, lr} +; NOFP-NEXT: push {r11, lr} +; NOFP-NEXT: .pad #8 +; NOFP-NEXT: sub sp, sp, #8 +; NOFP-NEXT: str r0, [sp, #4] +; NOFP-NEXT: bl leaf +; NOFP-NEXT: add r0, r0, #1 +; NOFP-NEXT: add sp, sp, #8 +; NOFP-NEXT: pop {r11, lr} +; NOFP-NEXT: mov pc, lr +; +; NOFP-AAPCS-LABEL: non_leaf: +; NOFP-AAPCS: @ %bb.0: +; NOFP-AAPCS-NEXT: .save {r11, lr} +; NOFP-AAPCS-NEXT: push {r11, lr} +; NOFP-AAPCS-NEXT: .pad #8 +; NOFP-AAPCS-NEXT: sub sp, sp, #8 +; NOFP-AAPCS-NEXT: str r0, [sp, #4] +; NOFP-AAPCS-NEXT: bl leaf +; NOFP-AAPCS-NEXT: add r0, r0, #1 +; NOFP-AAPCS-NEXT: add sp, sp, #8 +; NOFP-AAPCS-NEXT: pop {r11, lr} +; NOFP-AAPCS-NEXT: mov pc, lr + %2 = alloca i32, align 4 + store i32 %0, i32* %2, align 4 + %3 = load i32, i32* %2, align 4 + %4 = call noundef i32 @leaf(i32 noundef %3) + %5 = add nsw i32 %4, 1 + ret i32 %5 +} + +declare i8* @llvm.stacksave() +define dso_local void @required_fp(i32 %0, i32 %1) { +; LEAF-FP-LABEL: required_fp: +; LEAF-FP: @ %bb.0: +; LEAF-FP-NEXT: .save {r4, r5, r11, lr} +; LEAF-FP-NEXT: push {r4, r5, r11, lr} +; LEAF-FP-NEXT: .setfp r11, sp, #8 +; LEAF-FP-NEXT: add r11, sp, #8 +; LEAF-FP-NEXT: .pad #24 +; LEAF-FP-NEXT: sub sp, sp, #24 +; LEAF-FP-NEXT: str r1, [r11, #-16] +; LEAF-FP-NEXT: mov r1, #7 +; LEAF-FP-NEXT: add r1, r1, r0, lsl #2 +; LEAF-FP-NEXT: str r0, [r11, #-12] +; LEAF-FP-NEXT: bic r1, r1, #7 +; LEAF-FP-NEXT: str sp, [r11, #-24] +; LEAF-FP-NEXT: sub sp, sp, r1 +; LEAF-FP-NEXT: mov r1, #0 +; LEAF-FP-NEXT: str r0, [r11, #-32] +; LEAF-FP-NEXT: str r1, [r11, #-28] +; LEAF-FP-NEXT: sub sp, r11, #8 +; LEAF-FP-NEXT: pop {r4, r5, r11, lr} +; LEAF-FP-NEXT: mov pc, lr +; +; LEAF-FP-AAPCS-LABEL: required_fp: +; LEAF-FP-AAPCS: @ %bb.0: +; LEAF-FP-AAPCS-NEXT: .save {r4, r5, r11, lr} +; LEAF-FP-AAPCS-NEXT: push {r4, r5, r11, lr} +; LEAF-FP-AAPCS-NEXT: .setfp r11, sp, #8 +; LEAF-FP-AAPCS-NEXT: add r11, sp, #8 +; LEAF-FP-AAPCS-NEXT: .pad #24 +; LEAF-FP-AAPCS-NEXT: sub sp, sp, #24 +; LEAF-FP-AAPCS-NEXT: str r1, [r11, #-16] +; LEAF-FP-AAPCS-NEXT: mov r1, #7 +; LEAF-FP-AAPCS-NEXT: add r1, r1, r0, lsl #2 +; LEAF-FP-AAPCS-NEXT: str r0, [r11, #-12] +; LEAF-FP-AAPCS-NEXT: bic r1, r1, #7 +; LEAF-FP-AAPCS-NEXT: str sp, [r11, #-24] +; LEAF-FP-AAPCS-NEXT: sub sp, sp, r1 +; LEAF-FP-AAPCS-NEXT: mov r1, #0 +; LEAF-FP-AAPCS-NEXT: str r0, [r11, #-32] +; LEAF-FP-AAPCS-NEXT: str r1, [r11, #-28] +; LEAF-FP-AAPCS-NEXT: sub sp, r11, #8 +; LEAF-FP-AAPCS-NEXT: pop {r4, r5, r11, lr} +; LEAF-FP-AAPCS-NEXT: mov pc, lr +; +; LEAF-NOFP-LABEL: required_fp: +; LEAF-NOFP: @ %bb.0: +; LEAF-NOFP-NEXT: .save {r4, r5, r11} +; LEAF-NOFP-NEXT: push {r4, r5, r11} +; LEAF-NOFP-NEXT: .setfp r11, sp, #8 +; LEAF-NOFP-NEXT: add r11, sp, #8 +; LEAF-NOFP-NEXT: .pad #20 +; LEAF-NOFP-NEXT: sub sp, sp, #20 +; LEAF-NOFP-NEXT: str r1, [r11, #-16] +; LEAF-NOFP-NEXT: mov r1, #7 +; LEAF-NOFP-NEXT: add r1, r1, r0, lsl #2 +; LEAF-NOFP-NEXT: str r0, [r11, #-12] +; LEAF-NOFP-NEXT: bic r1, r1, #7 +; LEAF-NOFP-NEXT: str sp, [r11, #-20] +; LEAF-NOFP-NEXT: sub sp, sp, r1 +; LEAF-NOFP-NEXT: mov r1, #0 +; LEAF-NOFP-NEXT: str r0, [r11, #-28] +; LEAF-NOFP-NEXT: str r1, [r11, #-24] +; LEAF-NOFP-NEXT: sub sp, r11, #8 +; LEAF-NOFP-NEXT: pop {r4, r5, r11} +; LEAF-NOFP-NEXT: mov pc, lr +; +; LEAF-NOFP-AAPCS-LABEL: required_fp: +; LEAF-NOFP-AAPCS: @ %bb.0: +; LEAF-NOFP-AAPCS-NEXT: .save {r4, r5, r11, lr} +; LEAF-NOFP-AAPCS-NEXT: push {r4, r5, r11, lr} +; LEAF-NOFP-AAPCS-NEXT: .setfp r11, sp, #8 +; LEAF-NOFP-AAPCS-NEXT: add r11, sp, #8 +; LEAF-NOFP-AAPCS-NEXT: .pad #24 +; LEAF-NOFP-AAPCS-NEXT: sub sp, sp, #24 +; LEAF-NOFP-AAPCS-NEXT: str r1, [r11, #-16] +; LEAF-NOFP-AAPCS-NEXT: mov r1, #7 +; LEAF-NOFP-AAPCS-NEXT: add r1, r1, r0, lsl #2 +; LEAF-NOFP-AAPCS-NEXT: str r0, [r11, #-12] +; LEAF-NOFP-AAPCS-NEXT: bic r1, r1, #7 +; LEAF-NOFP-AAPCS-NEXT: str sp, [r11, #-24] +; LEAF-NOFP-AAPCS-NEXT: sub sp, sp, r1 +; LEAF-NOFP-AAPCS-NEXT: mov r1, #0 +; LEAF-NOFP-AAPCS-NEXT: str r0, [r11, #-32] +; LEAF-NOFP-AAPCS-NEXT: str r1, [r11, #-28] +; LEAF-NOFP-AAPCS-NEXT: sub sp, r11, #8 +; LEAF-NOFP-AAPCS-NEXT: pop {r4, r5, r11, lr} +; LEAF-NOFP-AAPCS-NEXT: mov pc, lr + %3 = alloca i32, align 4 + %4 = alloca i32, align 4 + %5 = alloca i8*, align 8 + %6 = alloca i64, align 8 + store i32 %0, i32* %3, align 4 + store i32 %1, i32* %4, align 4 + %7 = load i32, i32* %3, align 4 + %8 = zext i32 %7 to i64 + %9 = call i8* @llvm.stacksave() + store i8* %9, i8** %5, align 8 + %10 = alloca i32, i64 %8, align 4 + store i64 %8, i64* %6, align 8 + ret void +} diff --git a/llvm/test/CodeGen/Thumb/frame-access.ll b/llvm/test/CodeGen/Thumb/frame-access.ll index d3a5871..6a98d12 100644 --- a/llvm/test/CodeGen/Thumb/frame-access.ll +++ b/llvm/test/CodeGen/Thumb/frame-access.ll @@ -1,4 +1,7 @@ -; RUN: llc -mtriple=thumbv6m-eabi -frame-pointer=none %s -o - | FileCheck %s +; RUN: llc -mtriple=thumbv6m-eabi -frame-pointer=none %s -o - --verify-machineinstrs | FileCheck %s --check-prefixes=CHECK,CHECK-NOFP,CHECK-ATPCS +; RUN: llc -mtriple=thumbv6m-eabi -frame-pointer=all %s -o - --verify-machineinstrs | FileCheck %s --check-prefixes=CHECK,CHECK-FP-ATPCS,CHECK-ATPCS +; RUN: llc -mtriple=thumbv6m-eabi -frame-pointer=none -mattr=+aapcs-frame-chain-leaf %s -o - --verify-machineinstrs | FileCheck %s --check-prefixes=CHECK,CHECK-NOFP,CHECK-AAPCS +; RUN: llc -mtriple=thumbv6m-eabi -frame-pointer=all -mattr=+aapcs-frame-chain-leaf %s -o - --verify-machineinstrs | FileCheck %s --check-prefixes=CHECK,CHECK-FP-AAPCS,CHECK-AAPCS ; struct S { int x[128]; } s; ; int f(int *, int, int, int, struct S); @@ -12,6 +15,7 @@ @s = common dso_local global %struct.S zeroinitializer, align 4 declare void @llvm.va_start(i8*) +declare dso_local i32 @i(i32) local_unnamed_addr declare dso_local i32 @g(i32*, i32, i32, i32, i32, i32) local_unnamed_addr declare dso_local i32 @f(i32*, i32, i32, i32, %struct.S* byval(%struct.S) align 4) local_unnamed_addr declare dso_local i32 @h(i32*, i32*, i32*) local_unnamed_addr @@ -21,7 +25,7 @@ declare dso_local i32 @u(i32*, i32*, i32*, %struct.S* byval(%struct.S) align 4, ; Test access to arguments, passed on stack (including varargs) ; -; Usual case, access via SP +; Usual case, access via SP if FP is not available ; int test_args_sp(int a, int b, int c, int d, int e) { ; int v[4]; ; return g(v, a, b, c, d, e); @@ -36,7 +40,10 @@ entry: } ; CHECK-LABEL: test_args_sp ; Load `e` -; CHECK: ldr r0, [sp, #32] +; CHECK-NOFP: ldr r0, [sp, #32] +; CHECK-FP-ATPCS: ldr r0, [r7, #8] +; CHECK-FP-AAPCS: mov r0, r11 +; CHECK-FP-AAPCS: ldr r0, [r0, #8] ; CHECK-NEXT: str r3, [sp] ; Pass `e` on stack ; CHECK-NEXT: str r0, [sp, #4] @@ -63,9 +70,18 @@ entry: ; Three incoming varargs in registers ; CHECK: sub sp, #12 ; CHECK: sub sp, #28 -; Incoming arguments area is accessed via SP -; CHECK: add r0, sp, #36 -; CHECK: stm r0!, {r1, r2, r3} +; Incoming arguments area is accessed via SP if FP is not available +; CHECK-NOFP: add r0, sp, #36 +; CHECK-NOFP: stm r0!, {r1, r2, r3} +; CHECK-FP-ATPCS: mov r0, r7 +; CHECK-FP-ATPCS: adds r0, #8 +; CHECK-FP-ATPCS: stm r0!, {r1, r2, r3} +; CHECK-FP-AAPCS: mov r0, r11 +; CHECK-FP-AAPCS: str r1, [r0, #8] +; CHECK-FP-AAPCS: mov r0, r11 +; CHECK-FP-AAPCS: str r2, [r0, #12] +; CHECK-FP-AAPCS: mov r0, r11 +; CHECK-FP-AAPCS: str r3, [r0, #16] ; Re-aligned stack, access via FP ; int test_args_realign(int a, int b, int c, int d, int e) { @@ -83,14 +99,17 @@ entry: } ; CHECK-LABEL: test_args_realign ; Setup frame pointer -; CHECK: add r7, sp, #8 +; CHECK-ATPCS: add r7, sp, #8 +; CHECK-AAPCS: mov r11, sp ; Align stack ; CHECK: mov r4, sp ; CHECK-NEXT: lsrs r4, r4, #4 ; CHECK-NEXT: lsls r4, r4, #4 ; CHECK-NEXT: mov sp, r4 ; Load `e` via FP -; CHECK: ldr r0, [r7, #8] +; CHECK-ATPCS: ldr r0, [r7, #8] +; CHECK-AAPCS: mov r0, r11 +; CHECK-AAPCS: ldr r0, [r0, #8] ; CHECK-NEXT: str r3, [sp] ; Pass `e` as argument ; CHECK-NEXT: str r0, [sp, #4] @@ -117,16 +136,23 @@ entry: ; Three incoming register varargs ; CHECK: sub sp, #12 ; Setup frame pointer -; CHECK: add r7, sp, #8 +; CHECK-ATPCS: add r7, sp, #8 +; CHECK-AAPCS: mov r11, sp ; Align stack ; CHECK: mov r4, sp ; CHECK-NEXT: lsrs r4, r4, #4 ; CHECK-NEXT: lsls r4, r4, #4 ; CHECK-NEXT: mov sp, r4 ; Incoming register varargs stored via FP -; CHECK: mov r0, r7 -; CHECK-NEXT: adds r0, #8 -; CHECK-NEXT: stm r0!, {r1, r2, r3} +; CHECK-ATPCS: mov r0, r7 +; CHECK-ATPCS-NEXT: adds r0, #8 +; CHECK-ATPCS-NEXT: stm r0!, {r1, r2, r3} +; CHECK-AAPCS: mov r0, r11 +; CHECK-AAPCS: str r1, [r0, #8] +; CHECK-AAPCS: mov r0, r11 +; CHECK-AAPCS: str r2, [r0, #12] +; CHECK-AAPCS: mov r0, r11 +; CHECK-AAPCS: str r3, [r0, #16] ; VLAs present, access via FP ; int test_args_vla(int a, int b, int c, int d, int e) { ; int v[a]; @@ -140,11 +166,14 @@ entry: } ; CHECK-LABEL: test_args_vla ; Setup frame pointer -; CHECK: add r7, sp, #12 +; CHECK-ATPCS: add r7, sp, #12 +; CHECK-AAPCS: mov r11, sp ; Allocate outgoing stack arguments space -; CHECK: sub sp, #4 +; CHECK: sub sp, #8 ; Load `e` via FP -; CHECK: ldr r5, [r7, #8] +; CHECK-ATPCS: ldr r5, [r7, #8] +; CHECK-AAPCS: mov r5, r11 +; CHECK-AAPCS: ldr r5, [r5, #8] ; Pass `d` and `e` as arguments ; CHECK-NEXT: str r3, [sp] ; CHECK-NEXT: str r5, [sp, #4] @@ -169,11 +198,18 @@ entry: ; Three incoming register varargs ; CHECK: sub sp, #12 ; Setup frame pointer -; CHECK: add r7, sp, #8 +; CHECK-ATPCS: add r7, sp, #8 +; CHECK-AAPCS: mov r11, sp ; Register varargs stored via FP -; CHECK-DAG: str r3, [r7, #16] -; CHECK-DAG: str r2, [r7, #12] -; CHECK-DAG: str r1, [r7, #8] +; CHECK-ATPCS-DAG: str r3, [r7, #16] +; CHECK-ATPCS-DAG: str r2, [r7, #12] +; CHECK-ATPCS-DAG: str r1, [r7, #8] +; CHECK-AAPCS-DAG: mov r5, r11 +; CHECK-AAPCS-DAG: str r1, [r5, #8] +; CHECK-AAPCS-DAG: mov r1, r11 +; CHECK-AAPCS-DAG: str r3, [r1, #16] +; CHECK-AAPCS-DAG: mov r1, r11 +; CHECK-AAPCS-DAG: str r2, [r1, #12] ; Moving SP, access via SP ; int test_args_moving_sp(int a, int b, int c, int d, int e) { @@ -195,17 +231,32 @@ entry: ret i32 %add7 } ; CHECK-LABEL: test_args_moving_sp -; 20 bytes callee-saved area -; CHECK: push {r4, r5, r6, r7, lr} -; 20 bytes locals -; CHECK: sub sp, #20 +; 20 bytes callee-saved area without FP +; CHECK-NOFP: push {r4, r5, r6, r7, lr} +; 20 bytes callee-saved area for ATPCS +; CHECK-FP-ATPCS: push {r4, r5, r6, r7, lr} +; 24 bytes callee-saved area for AAPCS as codegen prefers an even number of GPRs spilled +; CHECK-FP-AAPCS: push {lr} +; CHECK-FP-AAPCS: mov lr, r11 +; CHECK-FP-AAPCS: push {lr} +; CHECK-FP-AAPCS: push {r4, r5, r6, r7} +; 20 bytes locals without FP +; CHECK-NOFP: sub sp, #20 +; 28 bytes locals with FP for ATPCS +; CHECK-FP-ATPCS: sub sp, #28 +; 24 bytes locals with FP for AAPCS +; CHECK-FP-AAPCS: sub sp, #24 ; Setup base pointer ; CHECK: mov r6, sp ; Allocate outgoing arguments space ; CHECK: sub sp, #508 ; CHECK: sub sp, #4 -; Load `e` via BP, 40 = 20 + 20 -; CHECK: ldr r3, [r6, #40] +; Load `e` via BP if FP is not present (40 = 20 + 20) +; CHECK-NOFP: ldr r3, [r6, #40] +; Load `e` via FP otherwise +; CHECK-FP-ATPCS: ldr r3, [r7, #8] +; CHECK-FP-AAPCS: mov r0, r11 +; CHECK-FP-AAPCS: ldr r3, [r0, #8] ; CHECK: bl f ; Stack restored before next call ; CHECK-NEXT: add sp, #508 @@ -236,14 +287,53 @@ entry: ; CHECK-LABEL: test_varargs_moving_sp ; Three incoming register varargs ; CHECK: sub sp, #12 -; 16 bytes callee-saves -; CHECK: push {r4, r5, r6, lr} -; 20 bytes locals -; CHECK: sub sp, #20 -; Incoming varargs stored via BP, 36 = 20 + 16 -; CHECK: mov r0, r6 -; CHECK-NEXT: adds r0, #36 -; CHECK-NEXT: stm r0!, {r1, r2, r3} +; 16 bytes callee-saves without FP +; CHECK-NOFP: push {r4, r5, r6, lr} +; 24 bytes callee-saves with FP +; CHECK-FP-ATPCS: push {r4, r5, r6, r7, lr} +; CHECK-FP-AAPCS: push {lr} +; CHECK-FP-AAPCS: mov lr, r11 +; CHECK-FP-AAPCS: push {lr} +; CHECK-FP-AAPCS: push {r4, r5, r6, r7} +; Locals area +; CHECK-NOFP: sub sp, #20 +; CHECK-FP-ATPCS: sub sp, #24 +; CHECK-FP-AAPCS: sub sp, #20 +; Incoming varargs stored via BP if FP is not present (36 = 20 + 16) +; CHECK-NOFP: mov r0, r6 +; CHECK-NOFP-NEXT: adds r0, #36 +; CHECK-NOFP-NEXT: stm r0!, {r1, r2, r3} +; Incoming varargs stored via FP otherwise +; CHECK-FP-ATPCS: mov r0, r7 +; CHECK-FP-ATPCS-NEXT: adds r0, #8 +; CHECK-FP-ATPCS-NEXT: stm r0!, {r1, r2, r3} +; CHECK-FP-AAPCS: mov r0, r11 +; CHECK-FP-AAPCS-NEXT: str r1, [r0, #8] +; CHECK-FP-AAPCS-NEXT: mov r0, r11 +; CHECK-FP-AAPCS-NEXT: str r2, [r0, #12] +; CHECK-FP-AAPCS-NEXT: mov r0, r11 +; CHECK-FP-AAPCS-NEXT: str r3, [r0, #16] + +; struct S { int x[128]; } s; +; int test(S a, int b) { +; return i(b); +; } +define dso_local i32 @test_args_large_offset(%struct.S* byval(%struct.S) align 4 %0, i32 %1) local_unnamed_addr { + %3 = alloca i32, align 4 + store i32 %1, i32* %3, align 4 + %4 = load i32, i32* %3, align 4 + %5 = call i32 @i(i32 %4) + ret i32 %5 +} +; CHECK-LABEL: test_args_large_offset +; Without FP: Access to large offset is made using SP +; CHECK-NOFP: ldr r0, [sp, #520] +; With FP: Access to large offset is made through a const pool using FP +; CHECK-FP: ldr r0, .LCPI0_0 +; CHECK-FP-ATPCS: ldr r0, [r0, r7] +; CHECK-FP-AAPCS: add r0, r11 +; CHECK-FP-AAPCS: ldr r0, [r0] +; CHECK: bl i ; ; Access to locals @@ -313,7 +403,8 @@ entry: } ; CHECK-LABEL: test_local_realign ; Setup frame pointer -; CHECK: add r7, sp, #8 +; CHECK-ATPCS: add r7, sp, #8 +; CHECK-AAPCS: mov r11, sp ; Re-align stack ; CHECK: mov r4, sp ; CHECK-NEXT: lsrs r4, r4, #4 @@ -355,15 +446,24 @@ entry: } ; CHECK-LABEL: test_local_vla ; Setup frame pointer -; CHECK: add r7, sp, #12 +; CHECK-ATPCS: add r7, sp, #12 +; CHECK-AAPCS: mov r11, sp +; Locas area +; CHECK-ATPCS: sub sp, #12 +; CHECK-AAPCS: sub sp, #16 ; Setup base pointer ; CHECK: mov r6, sp -; CHECK: mov r5, r6 +; CHECK-ATPCS: mov r5, r6 +; CHECK-AAPCS: adds r5, r6, #4 ; Arguments to `h` compute relative to BP ; CHECK: adds r0, r6, #7 -; CHECK-NEXT: adds r0, #1 -; CHECK-NEXT: adds r1, r6, #4 -; CHECK-NEXT: mov r2, r6 +; CHECK-ATPCS-NEXT: adds r0, #1 +; CHECK-ATPCS-NEXT: adds r1, r6, #4 +; CHECK-ATPCS-NEXT: mov r2, r6 +; CHECK-AAPCS-NEXT: adds r0, #5 +; CHECK-AAPCS-NEXT: adds r1, r6, #7 +; CHECK-AAPCS-NEXT: adds r1, #1 +; CHECK-AAPCS-NEXT: adds r2, r6, #4 ; CHECK-NEXT: bl h ; Load `x`, `y`, `z` via BP (r5 should still have the value of r6 from the move ; above) @@ -396,7 +496,9 @@ entry: } ; CHECK-LABEL: test_local_moving_sp ; Locals area -; CHECK: sub sp, #36 +; CHECK-NOFP: sub sp, #36 +; CHECK-FP-ATPCS: sub sp, #44 +; CHECK-FP-AAPCS: sub sp, #40 ; Setup BP ; CHECK: mov r6, sp ; Outoging arguments @@ -404,12 +506,24 @@ entry: ; CHECK-NEXT: sub sp, #508 ; CHECK-NEXT: sub sp, #8 ; Argument addresses computed relative to BP -; CHECK: adds r4, r6, #7 -; CHECK-NEXT: adds r4, #13 -; CHECK: adds r1, r6, #7 -; CHECK-NEXT: adds r1, #9 -; CHECK: adds r5, r6, #7 -; CHECK-NEXT: adds r5, #5 +; CHECK-NOFP: adds r4, r6, #7 +; CHECK-NOFP-NEXT: adds r4, #13 +; CHECK-NOFP: adds r1, r6, #7 +; CHECK-NOFP-NEXT: adds r1, #9 +; CHECK-NOFP: adds r5, r6, #7 +; CHECK-NOFP-NEXT: adds r5, #5 +; CHECK-FP-ATPCS: adds r0, r6, #7 +; CHECK-FP-ATPCS-NEXT: adds r0, #21 +; CHECK-FP-ATPCS: adds r1, r6, #7 +; CHECK-FP-ATPCS-NEXT: adds r1, #17 +; CHECK-FP-ATPCS: adds r5, r6, #7 +; CHECK-FP-ATPCS-NEXT: adds r5, #13 +; CHECK-FP-AAPCS: adds r4, r6, #7 +; CHECK-FP-AAPCS-NEXT: adds r4, #17 +; CHECK-FP-AAPCS: adds r1, r6, #7 +; CHECK-FP-AAPCS-NEXT: adds r1, #13 +; CHECK-FP-AAPCS: adds r5, r6, #7 +; CHECK-FP-AAPCS-NEXT: adds r5, #9 ; CHECK: bl u ; Stack restored before next call ; CHECK: add sp, #508 diff --git a/llvm/test/CodeGen/Thumb/frame-chain-reserved-fp.ll b/llvm/test/CodeGen/Thumb/frame-chain-reserved-fp.ll new file mode 100644 index 0000000..37dd16b --- /dev/null +++ b/llvm/test/CodeGen/Thumb/frame-chain-reserved-fp.ll @@ -0,0 +1,27 @@ +; RUN: not llc -mtriple thumbv6m-arm-none-eabi -filetype asm -o - %s -frame-pointer=all 2>&1 | FileCheck %s --check-prefix=RESERVED-R7 +; RUN: not llc -mtriple thumbv6m-arm-none-eabi -filetype asm -o - %s -frame-pointer=all -mattr=+aapcs-frame-chain 2>&1 | FileCheck %s --check-prefix=RESERVED-R11 +; RUN: not llc -mtriple thumbv6m-arm-none-eabi -filetype asm -o - %s -frame-pointer=all -mattr=+aapcs-frame-chain-leaf 2>&1 | FileCheck %s --check-prefix=RESERVED-R11 +; RUN: llc -mtriple thumbv6m-arm-none-eabi -filetype asm -o - %s -frame-pointer=non-leaf 2>&1 | FileCheck %s --check-prefix=RESERVED-NONE +; RUN: not llc -mtriple thumbv6m-arm-none-eabi -filetype asm -o - %s -frame-pointer=non-leaf -mattr=+aapcs-frame-chain 2>&1 | FileCheck %s --check-prefix=RESERVED-R11 +; RUN: not llc -mtriple thumbv6m-arm-none-eabi -filetype asm -o - %s -frame-pointer=non-leaf -mattr=+aapcs-frame-chain-leaf 2>&1 | FileCheck %s --check-prefix=RESERVED-R11 +; RUN: llc -mtriple thumbv6m-arm-none-eabi -filetype asm -o - %s -frame-pointer=none 2>&1 | FileCheck %s --check-prefix=RESERVED-NONE +; RUN: not llc -mtriple thumbv6m-arm-none-eabi -filetype asm -o - %s -frame-pointer=none -mattr=+aapcs-frame-chain 2>&1 | FileCheck %s --check-prefix=RESERVED-R11 +; RUN: not llc -mtriple thumbv6m-arm-none-eabi -filetype asm -o - %s -frame-pointer=none -mattr=+aapcs-frame-chain-leaf 2>&1 | FileCheck %s --check-prefix=RESERVED-R11 + +declare void @leaf(i32 %input) + +define void @reserved_r7(i32 %input) { +; RESERVED-NONE-NOT: error: write to reserved register 'R7' +; RESERVED-R7: error: write to reserved register 'R7' +; RESERVED-R11-NOT: error: write to reserved register 'R7' + %1 = call i32 asm sideeffect "mov $0, $1", "={r7},r"(i32 %input) + ret void +} + +define void @reserved_r11(i32 %input) { +; RESERVED-NONE-NOT: error: write to reserved register 'R11' +; RESERVED-R7-NOT: error: write to reserved register 'R11' +; RESERVED-R11: error: write to reserved register 'R11' + %1 = call i32 asm sideeffect "mov $0, $1", "={r11},r"(i32 %input) + ret void +} diff --git a/llvm/test/CodeGen/Thumb/frame-chain.ll b/llvm/test/CodeGen/Thumb/frame-chain.ll new file mode 100644 index 0000000..c8b6b8a --- /dev/null +++ b/llvm/test/CodeGen/Thumb/frame-chain.ll @@ -0,0 +1,288 @@ +; RUN: llc -mtriple thumb-arm-none-eabi -filetype asm -o - %s -frame-pointer=all | FileCheck %s --check-prefixes=FP,LEAF-FP +; RUN: llc -mtriple thumb-arm-none-eabi -filetype asm -o - %s -frame-pointer=all -mattr=+aapcs-frame-chain | FileCheck %s --check-prefixes=FP-AAPCS,LEAF-FP +; RUN: llc -mtriple thumb-arm-none-eabi -filetype asm -o - %s -frame-pointer=all -mattr=+aapcs-frame-chain-leaf | FileCheck %s --check-prefixes=FP-AAPCS,LEAF-FP-AAPCS +; RUN: llc -mtriple thumb-arm-none-eabi -filetype asm -o - %s -frame-pointer=non-leaf | FileCheck %s --check-prefixes=FP,LEAF-NOFP +; RUN: llc -mtriple thumb-arm-none-eabi -filetype asm -o - %s -frame-pointer=non-leaf -mattr=+aapcs-frame-chain | FileCheck %s --check-prefixes=FP-AAPCS,LEAF-NOFP +; RUN: llc -mtriple thumb-arm-none-eabi -filetype asm -o - %s -frame-pointer=non-leaf -mattr=+aapcs-frame-chain-leaf | FileCheck %s --check-prefixes=FP-AAPCS,LEAF-NOFP-AAPCS +; RUN: llc -mtriple thumb-arm-none-eabi -filetype asm -o - %s -frame-pointer=none | FileCheck %s --check-prefixes=NOFP,LEAF-NOFP +; RUN: llc -mtriple thumb-arm-none-eabi -filetype asm -o - %s -frame-pointer=none -mattr=+aapcs-frame-chain | FileCheck %s --check-prefixes=NOFP-AAPCS,LEAF-NOFP +; RUN: llc -mtriple thumb-arm-none-eabi -filetype asm -o - %s -frame-pointer=none -mattr=+aapcs-frame-chain-leaf | FileCheck %s --check-prefixes=NOFP-AAPCS,LEAF-NOFP-AAPCS + +define dso_local noundef i32 @leaf(i32 noundef %0) { +; LEAF-FP-LABEL: leaf: +; LEAF-FP: @ %bb.0: +; LEAF-FP-NEXT: .pad #4 +; LEAF-FP-NEXT: sub sp, #4 +; LEAF-FP-NEXT: str r0, [sp] +; LEAF-FP-NEXT: adds r0, r0, #4 +; LEAF-FP-NEXT: add sp, #4 +; LEAF-FP-NEXT: bx lr +; +; LEAF-FP-AAPCS-LABEL: leaf: +; LEAF-FP-AAPCS: @ %bb.0: +; LEAF-FP-AAPCS-NEXT: .save {lr} +; LEAF-FP-AAPCS-NEXT: push {lr} +; LEAF-FP-AAPCS-NEXT: mov lr, r11 +; LEAF-FP-AAPCS-NEXT: .save {r11} +; LEAF-FP-AAPCS-NEXT: push {lr} +; LEAF-FP-AAPCS-NEXT: .setfp r11, sp +; LEAF-FP-AAPCS-NEXT: mov r11, sp +; LEAF-FP-AAPCS-NEXT: .pad #4 +; LEAF-FP-AAPCS-NEXT: sub sp, #4 +; LEAF-FP-AAPCS-NEXT: str r0, [sp] +; LEAF-FP-AAPCS-NEXT: adds r0, r0, #4 +; LEAF-FP-AAPCS-NEXT: add sp, #4 +; LEAF-FP-AAPCS-NEXT: pop {r1} +; LEAF-FP-AAPCS-NEXT: mov r11, r1 +; LEAF-FP-AAPCS-NEXT: pop {r1} +; LEAF-FP-AAPCS-NEXT: bx r1 +; +; LEAF-NOFP-LABEL: leaf: +; LEAF-NOFP: @ %bb.0: +; LEAF-NOFP-NEXT: .pad #4 +; LEAF-NOFP-NEXT: sub sp, #4 +; LEAF-NOFP-NEXT: str r0, [sp] +; LEAF-NOFP-NEXT: adds r0, r0, #4 +; LEAF-NOFP-NEXT: add sp, #4 +; LEAF-NOFP-NEXT: bx lr +; +; LEAF-NOFP-AAPCS-LABEL: leaf: +; LEAF-NOFP-AAPCS: @ %bb.0: +; LEAF-NOFP-AAPCS-NEXT: .pad #4 +; LEAF-NOFP-AAPCS-NEXT: sub sp, #4 +; LEAF-NOFP-AAPCS-NEXT: str r0, [sp] +; LEAF-NOFP-AAPCS-NEXT: adds r0, r0, #4 +; LEAF-NOFP-AAPCS-NEXT: add sp, #4 +; LEAF-NOFP-AAPCS-NEXT: bx lr + %2 = alloca i32, align 4 + store i32 %0, i32* %2, align 4 + %3 = load i32, i32* %2, align 4 + %4 = add nsw i32 %3, 4 + ret i32 %4 +} + +define dso_local noundef i32 @non_leaf(i32 noundef %0) { +; FP-LABEL: non_leaf: +; FP: @ %bb.0: +; FP-NEXT: .save {r7, lr} +; FP-NEXT: push {r7, lr} +; FP-NEXT: .setfp r7, sp +; FP-NEXT: add r7, sp, #0 +; FP-NEXT: .pad #8 +; FP-NEXT: sub sp, #8 +; FP-NEXT: str r0, [sp, #4] +; FP-NEXT: bl leaf +; FP-NEXT: adds r0, r0, #1 +; FP-NEXT: add sp, #8 +; FP-NEXT: pop {r7} +; FP-NEXT: pop {r1} +; FP-NEXT: bx r1 +; +; FP-AAPCS-LABEL: non_leaf: +; FP-AAPCS: @ %bb.0: +; FP-AAPCS-NEXT: .save {lr} +; FP-AAPCS-NEXT: push {lr} +; FP-AAPCS-NEXT: mov lr, r11 +; FP-AAPCS-NEXT: .save {r11} +; FP-AAPCS-NEXT: push {lr} +; FP-AAPCS-NEXT: .setfp r11, sp +; FP-AAPCS-NEXT: mov r11, sp +; FP-AAPCS-NEXT: .pad #8 +; FP-AAPCS-NEXT: sub sp, #8 +; FP-AAPCS-NEXT: str r0, [sp, #4] +; FP-AAPCS-NEXT: bl leaf +; FP-AAPCS-NEXT: adds r0, r0, #1 +; FP-AAPCS-NEXT: add sp, #8 +; FP-AAPCS-NEXT: pop {r1} +; FP-AAPCS-NEXT: mov r11, r1 +; FP-AAPCS-NEXT: pop {r1} +; FP-AAPCS-NEXT: bx r1 +; +; NOFP-LABEL: non_leaf: +; NOFP: @ %bb.0: +; NOFP-NEXT: .save {r7, lr} +; NOFP-NEXT: push {r7, lr} +; NOFP-NEXT: .pad #8 +; NOFP-NEXT: sub sp, #8 +; NOFP-NEXT: str r0, [sp, #4] +; NOFP-NEXT: bl leaf +; NOFP-NEXT: adds r0, r0, #1 +; NOFP-NEXT: add sp, #8 +; NOFP-NEXT: pop {r7} +; NOFP-NEXT: pop {r1} +; NOFP-NEXT: bx r1 +; +; NOFP-AAPCS-LABEL: non_leaf: +; NOFP-AAPCS: @ %bb.0: +; NOFP-AAPCS-NEXT: .save {r7, lr} +; NOFP-AAPCS-NEXT: push {r7, lr} +; NOFP-AAPCS-NEXT: .pad #8 +; NOFP-AAPCS-NEXT: sub sp, #8 +; NOFP-AAPCS-NEXT: str r0, [sp, #4] +; NOFP-AAPCS-NEXT: bl leaf +; NOFP-AAPCS-NEXT: adds r0, r0, #1 +; NOFP-AAPCS-NEXT: add sp, #8 +; NOFP-AAPCS-NEXT: pop {r7} +; NOFP-AAPCS-NEXT: pop {r1} +; NOFP-AAPCS-NEXT: bx r1 + %2 = alloca i32, align 4 + store i32 %0, i32* %2, align 4 + %3 = load i32, i32* %2, align 4 + %4 = call noundef i32 @leaf(i32 noundef %3) + %5 = add nsw i32 %4, 1 + ret i32 %5 +} + +declare i8* @llvm.stacksave() +define dso_local void @required_fp(i32 %0, i32 %1) { +; FP-LABEL: required_fp: +; FP: @ %bb.0: +; FP-NEXT: .save {r4, r6, r7, lr} +; FP-NEXT: push {r4, r6, r7, lr} +; FP-NEXT: .setfp r7, sp, #8 +; FP-NEXT: add r7, sp, #8 +; FP-NEXT: .pad #24 +; FP-NEXT: sub sp, #24 +; FP-NEXT: mov r6, sp +; FP-NEXT: mov r2, r6 +; FP-NEXT: str r1, [r2, #16] +; FP-NEXT: str r0, [r2, #20] +; FP-NEXT: mov r1, sp +; FP-NEXT: str r1, [r2, #8] +; FP-NEXT: lsls r1, r0, #2 +; FP-NEXT: adds r1, r1, #7 +; FP-NEXT: movs r3, #7 +; FP-NEXT: bics r1, r3 +; FP-NEXT: mov r3, sp +; FP-NEXT: subs r1, r3, r1 +; FP-NEXT: mov sp, r1 +; FP-NEXT: movs r1, #0 +; FP-NEXT: str r1, [r6, #4] +; FP-NEXT: str r0, [r2] +; FP-NEXT: subs r4, r7, #7 +; FP-NEXT: subs r4, #1 +; FP-NEXT: mov sp, r4 +; FP-NEXT: pop {r4, r6, r7} +; FP-NEXT: pop {r0} +; FP-NEXT: bx r0 +; +; FP-AAPCS-LABEL: required_fp: +; FP-AAPCS: @ %bb.0: +; FP-AAPCS-NEXT: .save {lr} +; FP-AAPCS-NEXT: push {lr} +; FP-AAPCS-NEXT: mov lr, r11 +; FP-AAPCS-NEXT: .save {r11} +; FP-AAPCS-NEXT: push {lr} +; FP-AAPCS-NEXT: .setfp r11, sp +; FP-AAPCS-NEXT: mov r11, sp +; FP-AAPCS-NEXT: .save {r4, r6} +; FP-AAPCS-NEXT: push {r4, r6} +; FP-AAPCS-NEXT: .pad #24 +; FP-AAPCS-NEXT: sub sp, #24 +; FP-AAPCS-NEXT: mov r6, sp +; FP-AAPCS-NEXT: mov r2, r6 +; FP-AAPCS-NEXT: str r1, [r2, #16] +; FP-AAPCS-NEXT: str r0, [r2, #20] +; FP-AAPCS-NEXT: mov r1, sp +; FP-AAPCS-NEXT: str r1, [r2, #8] +; FP-AAPCS-NEXT: lsls r1, r0, #2 +; FP-AAPCS-NEXT: adds r1, r1, #7 +; FP-AAPCS-NEXT: movs r3, #7 +; FP-AAPCS-NEXT: bics r1, r3 +; FP-AAPCS-NEXT: mov r3, sp +; FP-AAPCS-NEXT: subs r1, r3, r1 +; FP-AAPCS-NEXT: mov sp, r1 +; FP-AAPCS-NEXT: movs r1, #0 +; FP-AAPCS-NEXT: str r1, [r6, #4] +; FP-AAPCS-NEXT: str r0, [r2] +; FP-AAPCS-NEXT: mov r4, r11 +; FP-AAPCS-NEXT: subs r4, #8 +; FP-AAPCS-NEXT: mov sp, r4 +; FP-AAPCS-NEXT: pop {r4, r6} +; FP-AAPCS-NEXT: pop {r0} +; FP-AAPCS-NEXT: mov r11, r0 +; FP-AAPCS-NEXT: pop {r0} +; FP-AAPCS-NEXT: bx r0 +; +; NOFP-LABEL: required_fp: +; NOFP: @ %bb.0: +; NOFP-NEXT: .save {r4, r6, r7, lr} +; NOFP-NEXT: push {r4, r6, r7, lr} +; NOFP-NEXT: .setfp r7, sp, #8 +; NOFP-NEXT: add r7, sp, #8 +; NOFP-NEXT: .pad #24 +; NOFP-NEXT: sub sp, #24 +; NOFP-NEXT: mov r6, sp +; NOFP-NEXT: mov r2, r6 +; NOFP-NEXT: str r1, [r2, #16] +; NOFP-NEXT: str r0, [r2, #20] +; NOFP-NEXT: mov r1, sp +; NOFP-NEXT: str r1, [r2, #8] +; NOFP-NEXT: lsls r1, r0, #2 +; NOFP-NEXT: adds r1, r1, #7 +; NOFP-NEXT: movs r3, #7 +; NOFP-NEXT: bics r1, r3 +; NOFP-NEXT: mov r3, sp +; NOFP-NEXT: subs r1, r3, r1 +; NOFP-NEXT: mov sp, r1 +; NOFP-NEXT: movs r1, #0 +; NOFP-NEXT: str r1, [r6, #4] +; NOFP-NEXT: str r0, [r2] +; NOFP-NEXT: subs r4, r7, #7 +; NOFP-NEXT: subs r4, #1 +; NOFP-NEXT: mov sp, r4 +; NOFP-NEXT: pop {r4, r6, r7} +; NOFP-NEXT: pop {r0} +; NOFP-NEXT: bx r0 +; +; NOFP-AAPCS-LABEL: required_fp: +; NOFP-AAPCS: @ %bb.0: +; NOFP-AAPCS-NEXT: .save {lr} +; NOFP-AAPCS-NEXT: push {lr} +; NOFP-AAPCS-NEXT: mov lr, r11 +; NOFP-AAPCS-NEXT: .save {r11} +; NOFP-AAPCS-NEXT: push {lr} +; NOFP-AAPCS-NEXT: .setfp r11, sp +; NOFP-AAPCS-NEXT: mov r11, sp +; NOFP-AAPCS-NEXT: .save {r4, r6} +; NOFP-AAPCS-NEXT: push {r4, r6} +; NOFP-AAPCS-NEXT: .pad #24 +; NOFP-AAPCS-NEXT: sub sp, #24 +; NOFP-AAPCS-NEXT: mov r6, sp +; NOFP-AAPCS-NEXT: mov r2, r6 +; NOFP-AAPCS-NEXT: str r1, [r2, #16] +; NOFP-AAPCS-NEXT: str r0, [r2, #20] +; NOFP-AAPCS-NEXT: mov r1, sp +; NOFP-AAPCS-NEXT: str r1, [r2, #8] +; NOFP-AAPCS-NEXT: lsls r1, r0, #2 +; NOFP-AAPCS-NEXT: adds r1, r1, #7 +; NOFP-AAPCS-NEXT: movs r3, #7 +; NOFP-AAPCS-NEXT: bics r1, r3 +; NOFP-AAPCS-NEXT: mov r3, sp +; NOFP-AAPCS-NEXT: subs r1, r3, r1 +; NOFP-AAPCS-NEXT: mov sp, r1 +; NOFP-AAPCS-NEXT: movs r1, #0 +; NOFP-AAPCS-NEXT: str r1, [r6, #4] +; NOFP-AAPCS-NEXT: str r0, [r2] +; NOFP-AAPCS-NEXT: mov r4, r11 +; NOFP-AAPCS-NEXT: subs r4, #8 +; NOFP-AAPCS-NEXT: mov sp, r4 +; NOFP-AAPCS-NEXT: pop {r4, r6} +; NOFP-AAPCS-NEXT: pop {r0} +; NOFP-AAPCS-NEXT: mov r11, r0 +; NOFP-AAPCS-NEXT: pop {r0} +; NOFP-AAPCS-NEXT: bx r0 + %3 = alloca i32, align 4 + %4 = alloca i32, align 4 + %5 = alloca i8*, align 8 + %6 = alloca i64, align 8 + store i32 %0, i32* %3, align 4 + store i32 %1, i32* %4, align 4 + %7 = load i32, i32* %3, align 4 + %8 = zext i32 %7 to i64 + %9 = call i8* @llvm.stacksave() + store i8* %9, i8** %5, align 8 + %10 = alloca i32, i64 %8, align 4 + store i64 %8, i64* %6, align 8 + ret void +} -- 2.7.4