cl::desc("sort stack allocations"),
cl::init(true), cl::Hidden);
+cl::opt<bool> EnableHomogeneousPrologEpilog(
+ "homogeneous-prolog-epilog", cl::init(false), cl::ZeroOrMore, cl::Hidden,
+ cl::desc("Emit homogeneous prologue and epilogue for the size "
+ "optimization (default = off)"));
+
STATISTIC(NumRedZoneFunctions, "Number of functions using red zone");
/// Returns the argument pop size.
return ArgumentPopSize;
}
+static bool produceCompactUnwindFrame(MachineFunction &MF);
+static bool needsWinCFI(const MachineFunction &MF);
+static StackOffset getSVEStackSize(const MachineFunction &MF);
+
+/// Returns true if a homogeneous prolog or epilog code can be emitted
+/// for the size optimization. If possible, a frame helper call is injected.
+/// When Exit block is given, this check is for epilog.
+bool AArch64FrameLowering::homogeneousPrologEpilog(
+ MachineFunction &MF, MachineBasicBlock *Exit) const {
+ if (!MF.getFunction().hasMinSize())
+ return false;
+ if (!EnableHomogeneousPrologEpilog)
+ return false;
+ if (ReverseCSRRestoreSeq)
+ return false;
+ if (EnableRedZone)
+ return false;
+
+ // TODO: Window is supported yet.
+ if (needsWinCFI(MF))
+ return false;
+ // TODO: SVE is not supported yet.
+ if (getSVEStackSize(MF))
+ return false;
+
+ // Bail on stack adjustment needed on return for simplicity.
+ const MachineFrameInfo &MFI = MF.getFrameInfo();
+ const TargetRegisterInfo *RegInfo = MF.getSubtarget().getRegisterInfo();
+ if (MFI.hasVarSizedObjects() || RegInfo->needsStackRealignment(MF))
+ return false;
+ if (Exit && getArgumentPopSize(MF, *Exit))
+ return false;
+
+ return true;
+}
+
+/// Returns true if CSRs should be paired.
+bool AArch64FrameLowering::producePairRegisters(MachineFunction &MF) const {
+ return produceCompactUnwindFrame(MF) || homogeneousPrologEpilog(MF);
+}
+
/// This is the biggest offset to the stack pointer we can encode in aarch64
/// instructions (without using a separate calculation and a temp register).
/// Note that the exception here are vector stores/loads which cannot encode any
const MachineFrameInfo &MFI = MF.getFrameInfo();
const AArch64Subtarget &Subtarget = MF.getSubtarget<AArch64Subtarget>();
const AArch64RegisterInfo *RegInfo = Subtarget.getRegisterInfo();
+ if (homogeneousPrologEpilog(MF))
+ return false;
if (AFI->getLocalStackSize() == 0)
return false;
// All of the remaining stack allocations are for locals.
AFI->setLocalStackSize(NumBytes - PrologueSaveSize);
bool CombineSPBump = shouldCombineCSRLocalStackBump(MF, NumBytes);
+ bool HomPrologEpilog = homogeneousPrologEpilog(MF);
if (CombineSPBump) {
assert(!SVEStackSize && "Cannot combine SP bump with SVE");
emitFrameOffset(MBB, MBBI, DL, AArch64::SP, AArch64::SP,
StackOffset::getFixed(-NumBytes), TII,
MachineInstr::FrameSetup, false, NeedsWinCFI, &HasWinCFI);
NumBytes = 0;
+ } else if (HomPrologEpilog) {
+ // Stack has been already adjusted.
+ NumBytes -= PrologueSaveSize;
} else if (PrologueSaveSize != 0) {
MBBI = convertCalleeSaveRestoreToSPPrePostIncDec(
MBB, MBBI, DL, TII, -PrologueSaveSize, NeedsWinCFI, &HasWinCFI);
if (CombineSPBump)
FPOffset += AFI->getLocalStackSize();
- // Issue sub fp, sp, FPOffset or
- // mov fp,sp when FPOffset is zero.
- // Note: All stores of callee-saved registers are marked as "FrameSetup".
- // This code marks the instruction(s) that set the FP also.
- emitFrameOffset(MBB, MBBI, DL, AArch64::FP, AArch64::SP,
- StackOffset::getFixed(FPOffset), TII,
- MachineInstr::FrameSetup, false, NeedsWinCFI, &HasWinCFI);
+ if (HomPrologEpilog) {
+ auto Prolog = MBBI;
+ --Prolog;
+ assert(Prolog->getOpcode() == AArch64::HOM_Prolog);
+ Prolog->addOperand(MachineOperand::CreateImm(FPOffset));
+ } else {
+ // Issue sub fp, sp, FPOffset or
+ // mov fp,sp when FPOffset is zero.
+ // Note: All stores of callee-saved registers are marked as "FrameSetup".
+ // This code marks the instruction(s) that set the FP also.
+ emitFrameOffset(MBB, MBBI, DL, AArch64::FP, AArch64::SP,
+ StackOffset::getFixed(FPOffset), TII,
+ MachineInstr::FrameSetup, false, NeedsWinCFI, &HasWinCFI);
+ }
}
if (windowsRequiresStackProbe(MF, NumBytes)) {
// function.
if (MF.hasEHFunclets())
AFI->setLocalStackSize(NumBytes - PrologueSaveSize);
+ if (homogeneousPrologEpilog(MF, &MBB)) {
+ assert(!NeedsWinCFI);
+ auto LastPopI = MBB.getFirstTerminator();
+ if (LastPopI != MBB.begin()) {
+ auto HomogeneousEpilog = std::prev(LastPopI);
+ if (HomogeneousEpilog->getOpcode() == AArch64::HOM_Epilog)
+ LastPopI = HomogeneousEpilog;
+ }
+
+ // Adjust local stack
+ emitFrameOffset(MBB, LastPopI, DL, AArch64::SP, AArch64::SP,
+ StackOffset::getFixed(-AFI->getLocalStackSize()), TII,
+ MachineInstr::FrameDestroy, false, NeedsWinCFI);
+
+ // SP has been already adjusted while restoring callee save regs.
+ // We've bailed-out the case with adjusting SP for arguments.
+ assert(AfterCSRPopSize == 0);
+ return;
+ }
bool CombineSPBump = shouldCombineCSRLocalStackBumpInEpilogue(MBB, NumBytes);
// Assume we can't combine the last pop with the sp restore.
MBB.addLiveIn(AArch64::X18);
}
+ if (homogeneousPrologEpilog(MF)) {
+ auto MIB = BuildMI(MBB, MI, DL, TII.get(AArch64::HOM_Prolog))
+ .setMIFlag(MachineInstr::FrameSetup);
+
+ for (auto &RPI : RegPairs) {
+ MIB.addReg(RPI.Reg1);
+ MIB.addReg(RPI.Reg2);
+
+ // Update register live in.
+ if (!MRI.isReserved(RPI.Reg1))
+ MBB.addLiveIn(RPI.Reg1);
+ if (!MRI.isReserved(RPI.Reg2))
+ MBB.addLiveIn(RPI.Reg2);
+ }
+ return true;
+ }
for (auto RPII = RegPairs.rbegin(), RPIE = RegPairs.rend(); RPII != RPIE;
++RPII) {
RegPairInfo RPI = *RPII;
for (const RegPairInfo &RPI : reverse(RegPairs))
if (!RPI.isScalable())
EmitMI(RPI);
+ } else if (homogeneousPrologEpilog(MF, &MBB)) {
+ auto MIB = BuildMI(MBB, MI, DL, TII.get(AArch64::HOM_Epilog))
+ .setMIFlag(MachineInstr::FrameDestroy);
+ for (auto &RPI : RegPairs) {
+ MIB.addReg(RPI.Reg1, RegState::Define);
+ MIB.addReg(RPI.Reg2, RegState::Define);
+ }
+ return true;
} else
for (const RegPairInfo &RPI : RegPairs)
if (!RPI.isScalable())
// MachO's compact unwind format relies on all registers being stored in
// pairs.
// FIXME: the usual format is actually better if unwinding isn't needed.
- if (produceCompactUnwindFrame(MF) && PairedReg != AArch64::NoRegister &&
+ if (producePairRegisters(MF) && PairedReg != AArch64::NoRegister &&
!SavedRegs.test(PairedReg)) {
SavedRegs.set(PairedReg);
if (AArch64::GPR64RegClass.contains(PairedReg) &&
// MachO's compact unwind format relies on all registers being stored in
// pairs, so if we need to spill one extra for BigStack, then we need to
// store the pair.
- if (produceCompactUnwindFrame(MF))
+ if (producePairRegisters(MF))
SavedRegs.set(UnspilledCSGPRPaired);
ExtraCSSpill = UnspilledCSGPR;
}
--- /dev/null
+//===- AArch64LowerHomogeneousPrologEpilog.cpp ----------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains a pass that lowers homogeneous prolog/epilog instructions.
+//
+//===----------------------------------------------------------------------===//
+
+#include "AArch64InstrInfo.h"
+#include "AArch64Subtarget.h"
+#include "MCTargetDesc/AArch64InstPrinter.h"
+#include "Utils/AArch64BaseInfo.h"
+#include "llvm/CodeGen/MachineBasicBlock.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineInstr.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineModuleInfo.h"
+#include "llvm/CodeGen/MachineOperand.h"
+#include "llvm/CodeGen/TargetSubtargetInfo.h"
+#include "llvm/IR/DebugLoc.h"
+#include "llvm/Pass.h"
+#include "llvm/Support/raw_ostream.h"
+#include <sstream>
+
+using namespace llvm;
+
+#define AARCH64_LOWER_HOMOGENEOUS_PROLOG_EPILOG_NAME \
+ "AArch64 homogeneous prolog/epilog lowering pass"
+
+cl::opt<int> FrameHelperSizeThreshold(
+ "frame-helper-size-threshold", cl::init(2), cl::Hidden,
+ cl::desc("The minimum number of instructions that are outlined in a frame "
+ "helper (default = 2)"));
+
+namespace {
+
+class AArch64LowerHomogeneousPE {
+public:
+ const AArch64InstrInfo *TII;
+
+ AArch64LowerHomogeneousPE(Module *M, MachineModuleInfo *MMI)
+ : M(M), MMI(MMI) {}
+
+ bool run();
+ bool runOnMachineFunction(MachineFunction &Fn);
+
+private:
+ Module *M;
+ MachineModuleInfo *MMI;
+
+ bool runOnMBB(MachineBasicBlock &MBB);
+ bool runOnMI(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI,
+ MachineBasicBlock::iterator &NextMBBI);
+
+ /// Lower a HOM_Prolog pseudo instruction into a helper call
+ /// or a sequence of homogeneous stores.
+ /// When a a fp setup follows, it can be optimized.
+ bool lowerProlog(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI,
+ MachineBasicBlock::iterator &NextMBBI);
+ /// Lower a HOM_Epilog pseudo instruction into a helper call
+ /// or a sequence of homogeneous loads.
+ /// When a return follow, it can be optimized.
+ bool lowerEpilog(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI,
+ MachineBasicBlock::iterator &NextMBBI);
+};
+
+class AArch64LowerHomogeneousPrologEpilog : public ModulePass {
+public:
+ static char ID;
+
+ AArch64LowerHomogeneousPrologEpilog() : ModulePass(ID) {
+ initializeAArch64LowerHomogeneousPrologEpilogPass(
+ *PassRegistry::getPassRegistry());
+ }
+ void getAnalysisUsage(AnalysisUsage &AU) const override {
+ AU.addRequired<MachineModuleInfoWrapperPass>();
+ AU.addPreserved<MachineModuleInfoWrapperPass>();
+ AU.setPreservesAll();
+ ModulePass::getAnalysisUsage(AU);
+ }
+ bool runOnModule(Module &M) override;
+
+ StringRef getPassName() const override {
+ return AARCH64_LOWER_HOMOGENEOUS_PROLOG_EPILOG_NAME;
+ }
+};
+
+} // end anonymous namespace
+
+char AArch64LowerHomogeneousPrologEpilog::ID = 0;
+
+INITIALIZE_PASS(AArch64LowerHomogeneousPrologEpilog,
+ "aarch64-lower-homogeneous-prolog-epilog",
+ AARCH64_LOWER_HOMOGENEOUS_PROLOG_EPILOG_NAME, false, false)
+
+bool AArch64LowerHomogeneousPrologEpilog::runOnModule(Module &M) {
+ if (skipModule(M))
+ return false;
+
+ MachineModuleInfo *MMI =
+ &getAnalysis<MachineModuleInfoWrapperPass>().getMMI();
+ return AArch64LowerHomogeneousPE(&M, MMI).run();
+}
+
+bool AArch64LowerHomogeneousPE::run() {
+ bool Changed = false;
+ for (auto &F : *M) {
+ if (F.empty())
+ continue;
+
+ MachineFunction *MF = MMI->getMachineFunction(F);
+ if (!MF)
+ continue;
+ Changed |= runOnMachineFunction(*MF);
+ }
+
+ return Changed;
+}
+enum FrameHelperType { Prolog, PrologFrame, Epilog, EpilogTail };
+
+/// Return a frame helper name with the given CSRs and the helper type.
+/// For instance, a prolog helper that saves x19 and x20 is named as
+/// OUTLINED_FUNCTION_PROLOG_x19x20.
+static std::string getFrameHelperName(SmallVectorImpl<unsigned> &Regs,
+ FrameHelperType Type, unsigned FpOffset) {
+ std::ostringstream RegStream;
+ switch (Type) {
+ case FrameHelperType::Prolog:
+ RegStream << "OUTLINED_FUNCTION_PROLOG_";
+ break;
+ case FrameHelperType::PrologFrame:
+ RegStream << "OUTLINED_FUNCTION_PROLOG_FRAME" << FpOffset << "_";
+ break;
+ case FrameHelperType::Epilog:
+ RegStream << "OUTLINED_FUNCTION_EPILOG_";
+ break;
+ case FrameHelperType::EpilogTail:
+ RegStream << "OUTLINED_FUNCTION_EPILOG_TAIL_";
+ break;
+ }
+
+ for (auto Reg : Regs)
+ RegStream << AArch64InstPrinter::getRegisterName(Reg);
+
+ return RegStream.str();
+}
+
+/// Create a Function for the unique frame helper with the given name.
+/// Return a newly created MachineFunction with an empty MachineBasicBlock.
+static MachineFunction &createFrameHelperMachineFunction(Module *M,
+ MachineModuleInfo *MMI,
+ StringRef Name) {
+ LLVMContext &C = M->getContext();
+ Function *F = M->getFunction(Name);
+ assert(F == nullptr && "Function has been created before");
+ F = Function::Create(FunctionType::get(Type::getVoidTy(C), false),
+ Function::ExternalLinkage, Name, M);
+ assert(F && "Function was null!");
+
+ // Use ODR linkage to avoid duplication.
+ F->setLinkage(GlobalValue::LinkOnceODRLinkage);
+ F->setUnnamedAddr(GlobalValue::UnnamedAddr::Global);
+
+ // Set no-opt/minsize, so we don't insert padding between outlined
+ // functions.
+ F->addFnAttr(Attribute::OptimizeNone);
+ F->addFnAttr(Attribute::NoInline);
+ F->addFnAttr(Attribute::MinSize);
+ F->addFnAttr(Attribute::Naked);
+
+ MachineFunction &MF = MMI->getOrCreateMachineFunction(*F);
+ // Remove unnecessary register liveness and set NoVRegs.
+ MF.getProperties().reset(MachineFunctionProperties::Property::TracksLiveness);
+ MF.getProperties().reset(MachineFunctionProperties::Property::IsSSA);
+ MF.getProperties().set(MachineFunctionProperties::Property::NoVRegs);
+ MF.getRegInfo().freezeReservedRegs(MF);
+
+ // Create entry block.
+ BasicBlock *EntryBB = BasicBlock::Create(C, "entry", F);
+ IRBuilder<> Builder(EntryBB);
+ Builder.CreateRetVoid();
+
+ // Insert the new block into the function.
+ MachineBasicBlock *MBB = MF.CreateMachineBasicBlock();
+ MF.insert(MF.begin(), MBB);
+
+ return MF;
+}
+
+/// Emit a store-pair instruction for frame-setup.
+static void emitStore(MachineFunction &MF, MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator Pos,
+ const TargetInstrInfo &TII, unsigned Reg1, unsigned Reg2,
+ int Offset, bool IsPreDec) {
+ bool IsFloat = AArch64::FPR64RegClass.contains(Reg1);
+ assert(!(IsFloat ^ AArch64::FPR64RegClass.contains(Reg2)));
+ unsigned Opc;
+ if (IsPreDec)
+ Opc = IsFloat ? AArch64::STPDpre : AArch64::STPXpre;
+ else
+ Opc = IsFloat ? AArch64::STPDi : AArch64::STPXi;
+
+ MachineInstrBuilder MIB = BuildMI(MBB, Pos, DebugLoc(), TII.get(Opc));
+ if (IsPreDec)
+ MIB.addDef(AArch64::SP);
+ MIB.addReg(Reg2)
+ .addReg(Reg1)
+ .addReg(AArch64::SP)
+ .addImm(Offset)
+ .setMIFlag(MachineInstr::FrameSetup);
+}
+
+/// Emit a load-pair instruction for frame-destroy.
+static void emitLoad(MachineFunction &MF, MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator Pos,
+ const TargetInstrInfo &TII, unsigned Reg1, unsigned Reg2,
+ int Offset, bool IsPostDec) {
+ bool IsFloat = AArch64::FPR64RegClass.contains(Reg1);
+ assert(!(IsFloat ^ AArch64::FPR64RegClass.contains(Reg2)));
+ unsigned Opc;
+ if (IsPostDec)
+ Opc = IsFloat ? AArch64::LDPDpost : AArch64::LDPXpost;
+ else
+ Opc = IsFloat ? AArch64::LDPDi : AArch64::LDPXi;
+
+ MachineInstrBuilder MIB = BuildMI(MBB, Pos, DebugLoc(), TII.get(Opc));
+ if (IsPostDec)
+ MIB.addDef(AArch64::SP);
+ MIB.addReg(Reg2, getDefRegState(true))
+ .addReg(Reg1, getDefRegState(true))
+ .addReg(AArch64::SP)
+ .addImm(Offset)
+ .setMIFlag(MachineInstr::FrameDestroy);
+}
+
+/// Return a unique function if a helper can be formed with the given Regs
+/// and frame type.
+/// 1) _OUTLINED_FUNCTION_PROLOG_x30x29x19x20x21x22:
+/// stp x22, x21, [sp, #-32]! ; x29/x30 has been stored at the caller
+/// stp x20, x19, [sp, #16]
+/// ret
+///
+/// 2) _OUTLINED_FUNCTION_PROLOG_FRAME32_x30x29x19x20x21x22:
+/// stp x22, x21, [sp, #-32]! ; x29/x30 has been stored at the caller
+/// stp x20, x19, [sp, #16]
+/// add fp, sp, #32
+/// ret
+///
+/// 3) _OUTLINED_FUNCTION_EPILOG_x30x29x19x20x21x22:
+/// mov x16, x30
+/// ldp x29, x30, [sp, #32]
+/// ldp x20, x19, [sp, #16]
+/// ldp x22, x21, [sp], #48
+/// ret x16
+///
+/// 4) _OUTLINED_FUNCTION_EPILOG_TAIL_x30x29x19x20x21x22:
+/// ldp x29, x30, [sp, #32]
+/// ldp x20, x19, [sp, #16]
+/// ldp x22, x21, [sp], #48
+/// ret
+/// @param M module
+/// @param MMI machine module info
+/// @param Regs callee save regs that the helper will handle
+/// @param Type frame helper type
+/// @return a helper function
+static Function *getOrCreateFrameHelper(Module *M, MachineModuleInfo *MMI,
+ SmallVectorImpl<unsigned> &Regs,
+ FrameHelperType Type,
+ unsigned FpOffset = 0) {
+ assert(Regs.size() >= 2);
+ auto Name = getFrameHelperName(Regs, Type, FpOffset);
+ auto *F = M->getFunction(Name);
+ if (F)
+ return F;
+
+ auto &MF = createFrameHelperMachineFunction(M, MMI, Name);
+ MachineBasicBlock &MBB = *MF.begin();
+ const TargetSubtargetInfo &STI = MF.getSubtarget();
+ const TargetInstrInfo &TII = *STI.getInstrInfo();
+
+ int Size = (int)Regs.size();
+ switch (Type) {
+ case FrameHelperType::Prolog:
+ case FrameHelperType::PrologFrame: {
+ // Compute the remaining SP adjust beyond FP/LR.
+ auto LRIdx = std::distance(
+ Regs.begin(), std::find(Regs.begin(), Regs.end(), AArch64::LR));
+
+ // If the register stored to the lowest address is not LR, we must subtract
+ // more from SP here.
+ if (LRIdx != Size - 2) {
+ assert(Regs[Size - 2] != AArch64::LR);
+ emitStore(MF, MBB, MBB.end(), TII, Regs[Size - 2], Regs[Size - 1],
+ LRIdx - Size + 2, true);
+ }
+
+ // Store CSRs in the reverse order.
+ for (int I = Size - 3; I >= 0; I -= 2) {
+ // FP/LR has been stored at call-site.
+ if (Regs[I - 1] == AArch64::LR)
+ continue;
+ emitStore(MF, MBB, MBB.end(), TII, Regs[I - 1], Regs[I], Size - I - 1,
+ false);
+ }
+ if (Type == FrameHelperType::PrologFrame)
+ BuildMI(MBB, MBB.end(), DebugLoc(), TII.get(AArch64::ADDXri))
+ .addDef(AArch64::FP)
+ .addUse(AArch64::SP)
+ .addImm(FpOffset)
+ .addImm(0)
+ .setMIFlag(MachineInstr::FrameSetup);
+
+ BuildMI(MBB, MBB.end(), DebugLoc(), TII.get(AArch64::RET))
+ .addReg(AArch64::LR);
+ break;
+ }
+ case FrameHelperType::Epilog:
+ case FrameHelperType::EpilogTail:
+ if (Type == FrameHelperType::Epilog)
+ // Stash LR to X16
+ BuildMI(MBB, MBB.end(), DebugLoc(), TII.get(AArch64::ORRXrs))
+ .addDef(AArch64::X16)
+ .addReg(AArch64::XZR)
+ .addUse(AArch64::LR)
+ .addImm(0);
+
+ for (int I = 0; I < Size - 2; I += 2)
+ emitLoad(MF, MBB, MBB.end(), TII, Regs[I], Regs[I + 1], Size - I - 2,
+ false);
+ // Restore the last CSR with post-increment of SP.
+ emitLoad(MF, MBB, MBB.end(), TII, Regs[Size - 2], Regs[Size - 1], Size,
+ true);
+
+ BuildMI(MBB, MBB.end(), DebugLoc(), TII.get(AArch64::RET))
+ .addReg(Type == FrameHelperType::Epilog ? AArch64::X16 : AArch64::LR);
+ break;
+ }
+
+ return M->getFunction(Name);
+}
+
+/// This function checks if a frame helper should be used for
+/// HOM_Prolog/HOM_Epilog pseudo instruction expansion.
+/// @param MBB machine basic block
+/// @param NextMBBI next instruction following HOM_Prolog/HOM_Epilog
+/// @param Regs callee save registers that are saved or restored.
+/// @param Type frame helper type
+/// @return True if a use of helper is qualified.
+static bool shouldUseFrameHelper(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator &NextMBBI,
+ SmallVectorImpl<unsigned> &Regs,
+ FrameHelperType Type) {
+ const auto *TRI = MBB.getParent()->getSubtarget().getRegisterInfo();
+ auto RegCount = Regs.size();
+ assert(RegCount > 0 && (RegCount % 2 == 0));
+ // # of instructions that will be outlined.
+ int InstCount = RegCount / 2;
+
+ // Do not use a helper call when not saving LR.
+ if (std::find(Regs.begin(), Regs.end(), AArch64::LR) == Regs.end())
+ return false;
+
+ switch (Type) {
+ case FrameHelperType::Prolog:
+ // Prolog helper cannot save FP/LR.
+ InstCount--;
+ break;
+ case FrameHelperType::PrologFrame: {
+ // Effecitvely no change in InstCount since FpAdjusment is included.
+ break;
+ }
+ case FrameHelperType::Epilog:
+ // Bail-out if X16 is live across the epilog helper because it is used in
+ // the helper to handle X30.
+ for (auto NextMI = NextMBBI; NextMI != MBB.end(); NextMI++) {
+ if (NextMI->readsRegister(AArch64::W16, TRI))
+ return false;
+ }
+ // Epilog may not be in the last block. Check the liveness in successors.
+ for (const MachineBasicBlock *SuccMBB : MBB.successors()) {
+ if (SuccMBB->isLiveIn(AArch64::W16) || SuccMBB->isLiveIn(AArch64::X16))
+ return false;
+ }
+ // No change in InstCount for the regular epilog case.
+ break;
+ case FrameHelperType::EpilogTail: {
+ // EpilogTail helper includes the caller's return.
+ if (NextMBBI == MBB.end())
+ return false;
+ if (NextMBBI->getOpcode() != AArch64::RET_ReallyLR)
+ return false;
+ InstCount++;
+ break;
+ }
+ }
+
+ return InstCount >= FrameHelperSizeThreshold;
+}
+
+/// Lower a HOM_Epilog pseudo instruction into a helper call while
+/// creating the helper on demand. Or emit a sequence of loads in place when not
+/// using a helper call.
+///
+/// 1. With a helper including ret
+/// HOM_Epilog x30, x29, x19, x20, x21, x22 ; MBBI
+/// ret ; NextMBBI
+/// =>
+/// b _OUTLINED_FUNCTION_EPILOG_TAIL_x30x29x19x20x21x22
+/// ... ; NextMBBI
+///
+/// 2. With a helper
+/// HOM_Epilog x30, x29, x19, x20, x21, x22
+/// =>
+/// bl _OUTLINED_FUNCTION_EPILOG_x30x29x19x20x21x22
+///
+/// 3. Without a helper
+/// HOM_Epilog x30, x29, x19, x20, x21, x22
+/// =>
+/// ldp x29, x30, [sp, #32]
+/// ldp x20, x19, [sp, #16]
+/// ldp x22, x21, [sp], #48
+bool AArch64LowerHomogeneousPE::lowerEpilog(
+ MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI,
+ MachineBasicBlock::iterator &NextMBBI) {
+ auto &MF = *MBB.getParent();
+ MachineInstr &MI = *MBBI;
+
+ DebugLoc DL = MI.getDebugLoc();
+ SmallVector<unsigned, 8> Regs;
+ for (auto &MO : MI.operands())
+ if (MO.isReg())
+ Regs.push_back(MO.getReg());
+ int Size = (int)Regs.size();
+ if (Size == 0)
+ return false;
+ // Registers are in pair.
+ assert(Size % 2 == 0);
+ assert(MI.getOpcode() == AArch64::HOM_Epilog);
+
+ auto Return = NextMBBI;
+ if (shouldUseFrameHelper(MBB, NextMBBI, Regs, FrameHelperType::EpilogTail)) {
+ // When MBB ends with a return, emit a tail-call to the epilog helper
+ auto *EpilogTailHelper =
+ getOrCreateFrameHelper(M, MMI, Regs, FrameHelperType::EpilogTail);
+ BuildMI(MBB, MBBI, DL, TII->get(AArch64::TCRETURNdi))
+ .addGlobalAddress(EpilogTailHelper)
+ .addImm(0)
+ .setMIFlag(MachineInstr::FrameDestroy)
+ .copyImplicitOps(MI)
+ .copyImplicitOps(*Return);
+ NextMBBI = std::next(Return);
+ Return->removeFromParent();
+ } else if (shouldUseFrameHelper(MBB, NextMBBI, Regs,
+ FrameHelperType::Epilog)) {
+ // The default epilog helper case.
+ auto *EpilogHelper =
+ getOrCreateFrameHelper(M, MMI, Regs, FrameHelperType::Epilog);
+ BuildMI(MBB, MBBI, DL, TII->get(AArch64::BL))
+ .addGlobalAddress(EpilogHelper)
+ .setMIFlag(MachineInstr::FrameDestroy)
+ .copyImplicitOps(MI);
+ } else {
+ // Fall back to no-helper.
+ for (int I = 0; I < Size - 2; I += 2)
+ emitLoad(MF, MBB, MBBI, *TII, Regs[I], Regs[I + 1], Size - I - 2, false);
+ // Restore the last CSR with post-increment of SP.
+ emitLoad(MF, MBB, MBBI, *TII, Regs[Size - 2], Regs[Size - 1], Size, true);
+ }
+
+ MBBI->removeFromParent();
+ return true;
+}
+
+/// Lower a HOM_Prolog pseudo instruction into a helper call while
+/// creating the helper on demand. Or emit a sequence of stores in place when
+/// not using a helper call.
+///
+/// 1. With a helper including frame-setup
+/// HOM_Prolog x30, x29, x19, x20, x21, x22, 32
+/// =>
+/// stp x29, x30, [sp, #-16]!
+/// bl _OUTLINED_FUNCTION_PROLOG_FRAME32_x30x29x19x20x21x22
+///
+/// 2. With a helper
+/// HOM_Prolog x30, x29, x19, x20, x21, x22
+/// =>
+/// stp x29, x30, [sp, #-16]!
+/// bl _OUTLINED_FUNCTION_PROLOG_x30x29x19x20x21x22
+///
+/// 3. Without a helper
+/// HOM_Prolog x30, x29, x19, x20, x21, x22
+/// =>
+/// stp x22, x21, [sp, #-48]!
+/// stp x20, x19, [sp, #16]
+/// stp x29, x30, [sp, #32]
+bool AArch64LowerHomogeneousPE::lowerProlog(
+ MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI,
+ MachineBasicBlock::iterator &NextMBBI) {
+ auto &MF = *MBB.getParent();
+ MachineInstr &MI = *MBBI;
+
+ DebugLoc DL = MI.getDebugLoc();
+ SmallVector<unsigned, 8> Regs;
+ int LRIdx = 0;
+ Optional<int> FpOffset;
+ for (auto &MO : MI.operands()) {
+ if (MO.isReg()) {
+ if (MO.getReg() == AArch64::LR)
+ LRIdx = Regs.size();
+ Regs.push_back(MO.getReg());
+ } else if (MO.isImm()) {
+ FpOffset = MO.getImm();
+ }
+ }
+ int Size = (int)Regs.size();
+ if (Size == 0)
+ return false;
+ // Allow compact unwind case only for oww.
+ assert(Size % 2 == 0);
+ assert(MI.getOpcode() == AArch64::HOM_Prolog);
+
+ if (FpOffset &&
+ shouldUseFrameHelper(MBB, NextMBBI, Regs, FrameHelperType::PrologFrame)) {
+ // FP/LR is stored at the top of stack before the prolog helper call.
+ emitStore(MF, MBB, MBBI, *TII, AArch64::LR, AArch64::FP, -LRIdx - 2, true);
+ auto *PrologFrameHelper = getOrCreateFrameHelper(
+ M, MMI, Regs, FrameHelperType::PrologFrame, *FpOffset);
+ BuildMI(MBB, MBBI, DL, TII->get(AArch64::BL))
+ .addGlobalAddress(PrologFrameHelper)
+ .setMIFlag(MachineInstr::FrameSetup)
+ .copyImplicitOps(MI)
+ .addReg(AArch64::FP, RegState::Implicit | RegState::Define)
+ .addReg(AArch64::SP, RegState::Implicit);
+ } else if (!FpOffset && shouldUseFrameHelper(MBB, NextMBBI, Regs,
+ FrameHelperType::Prolog)) {
+ // FP/LR is stored at the top of stack before the prolog helper call.
+ emitStore(MF, MBB, MBBI, *TII, AArch64::LR, AArch64::FP, -LRIdx - 2, true);
+ auto *PrologHelper =
+ getOrCreateFrameHelper(M, MMI, Regs, FrameHelperType::Prolog);
+ BuildMI(MBB, MBBI, DL, TII->get(AArch64::BL))
+ .addGlobalAddress(PrologHelper)
+ .setMIFlag(MachineInstr::FrameSetup)
+ .copyImplicitOps(MI);
+ } else {
+ // Fall back to no-helper.
+ emitStore(MF, MBB, MBBI, *TII, Regs[Size - 2], Regs[Size - 1], -Size, true);
+ for (int I = Size - 3; I >= 0; I -= 2)
+ emitStore(MF, MBB, MBBI, *TII, Regs[I - 1], Regs[I], Size - I - 1, false);
+ if (FpOffset) {
+ BuildMI(MBB, MBBI, DL, TII->get(AArch64::ADDXri))
+ .addDef(AArch64::FP)
+ .addUse(AArch64::SP)
+ .addImm(*FpOffset)
+ .addImm(0)
+ .setMIFlag(MachineInstr::FrameSetup);
+ }
+ }
+
+ MBBI->removeFromParent();
+ return true;
+}
+
+/// Process each machine instruction
+/// @param MBB machine basic block
+/// @param MBBI current instruction iterator
+/// @param NextMBBIT next instruction iterator which can be updated
+/// @return True when IR is changed.
+bool AArch64LowerHomogeneousPE::runOnMI(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MBBI,
+ MachineBasicBlock::iterator &NextMBBI) {
+ MachineInstr &MI = *MBBI;
+ unsigned Opcode = MI.getOpcode();
+ switch (Opcode) {
+ default:
+ break;
+ case AArch64::HOM_Prolog:
+ return lowerProlog(MBB, MBBI, NextMBBI);
+ case AArch64::HOM_Epilog:
+ return lowerEpilog(MBB, MBBI, NextMBBI);
+ }
+ return false;
+}
+
+bool AArch64LowerHomogeneousPE::runOnMBB(MachineBasicBlock &MBB) {
+ bool Modified = false;
+
+ MachineBasicBlock::iterator MBBI = MBB.begin(), E = MBB.end();
+ while (MBBI != E) {
+ MachineBasicBlock::iterator NMBBI = std::next(MBBI);
+ Modified |= runOnMI(MBB, MBBI, NMBBI);
+ MBBI = NMBBI;
+ }
+
+ return Modified;
+}
+
+bool AArch64LowerHomogeneousPE::runOnMachineFunction(MachineFunction &MF) {
+ TII = static_cast<const AArch64InstrInfo *>(MF.getSubtarget().getInstrInfo());
+
+ bool Modified = false;
+ for (auto &MBB : MF)
+ Modified |= runOnMBB(MBB);
+ return Modified;
+}
+
+ModulePass *llvm::createAArch64LowerHomogeneousPrologEpilogPass() {
+ return new AArch64LowerHomogeneousPrologEpilog();
+}