// Note SGPRSpill stack IDs should only be used for SGPR spilling to VGPRs, not
// memory.
-static bool allStackObjectsAreDeadOrSGPR(const MachineFrameInfo &MFI) {
+static bool allStackObjectsAreDead(const MachineFrameInfo &MFI) {
for (int I = MFI.getObjectIndexBegin(), E = MFI.getObjectIndexEnd();
I != E; ++I) {
- if (!MFI.isDeadObjectIndex(I) &&
- MFI.getStackID(I) != TargetStackID::SGPRSpill)
+ if (!MFI.isDeadObjectIndex(I))
return false;
}
const SIRegisterInfo &TRI = TII->getRegisterInfo();
SIMachineFunctionInfo *FuncInfo = MF.getInfo<SIMachineFunctionInfo>();
- if (TRI.spillSGPRToVGPR() && FuncInfo->hasSpilledSGPRs()) {
- // Process all SGPR spills before frame offsets are finalized. Ideally SGPRs
- // are spilled to VGPRs, in which case we can eliminate the stack usage.
- //
- // XXX - This operates under the assumption that only other SGPR spills are
- // users of the frame index. I'm not 100% sure this is correct. The
- // StackColoring pass has a comment saying a future improvement would be to
- // merging of allocas with spill slots, but for now according to
- // MachineFrameInfo isSpillSlot can't alias any other object.
- for (MachineBasicBlock &MBB : MF) {
- MachineBasicBlock::iterator Next;
- for (auto I = MBB.begin(), E = MBB.end(); I != E; I = Next) {
- MachineInstr &MI = *I;
- Next = std::next(I);
-
- if (TII->isSGPRSpill(MI)) {
- int FI = TII->getNamedOperand(MI, AMDGPU::OpName::addr)->getIndex();
- assert(MFI.getStackID(FI) == TargetStackID::SGPRSpill);
- if (FuncInfo->allocateSGPRSpillToVGPR(MF, FI)) {
- bool Spilled = TRI.eliminateSGPRToVGPRSpillFrameIndex(MI, FI, RS);
- (void)Spilled;
- assert(Spilled && "failed to spill SGPR to VGPR when allocated");
- }
- }
- }
- }
- }
-
FuncInfo->removeSGPRToVGPRFrameIndices(MFI);
- if (!allStackObjectsAreDeadOrSGPR(MFI)) {
+ // FIXME: The other checks should be redundant with allStackObjectsAreDead,
+ // but currently hasNonSpillStackObjects is set only from source
+ // allocas. Stack temps produced from legalization are not counted currently.
+ if (!allStackObjectsAreDead(MFI)) {
assert(RS && "RegScavenger required if spilling");
if (FuncInfo->isEntryFunction()) {
}
}
-void SIFrameLowering::determineCalleeSaves(MachineFunction &MF, BitVector &SavedRegs,
+// Only report VGPRs to generic code.
+void SIFrameLowering::determineCalleeSaves(MachineFunction &MF,
+ BitVector &SavedRegs,
RegScavenger *RS) const {
TargetFrameLowering::determineCalleeSaves(MF, SavedRegs, RS);
+ const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
+ const SIRegisterInfo *TRI = ST.getRegisterInfo();
+ SavedRegs.clearBitsNotInMask(TRI->getAllVGPRRegMask());
+
+ // VGPRs used for SGPR spilling need to be specially inserted in the prolog.
const SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>();
+ for (auto SSpill : MFI->getSGPRSpillVGPRs())
+ SavedRegs.reset(SSpill.VGPR);
+}
+
+void SIFrameLowering::determineCalleeSavesSGPR(MachineFunction &MF,
+ BitVector &SavedRegs,
+ RegScavenger *RS) const {
+ TargetFrameLowering::determineCalleeSaves(MF, SavedRegs, RS);
+ const SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>();
+
+ const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
+ const SIRegisterInfo *TRI = ST.getRegisterInfo();
// The SP is specifically managed and we don't want extra spills of it.
SavedRegs.reset(MFI->getStackPtrOffsetReg());
+ SavedRegs.clearBitsInMask(TRI->getAllVGPRRegMask());
}
MachineBasicBlock::iterator SIFrameLowering::eliminateCallFramePseudoInstr(
--- /dev/null
+//===-- SILowerSGPRSPills.cpp ---------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// Handle SGPR spills. This pass takes the place of PrologEpilogInserter for all
+// SGPR spills, so must insert CSR SGPR spills as well as expand them.
+//
+// This pass must never create new SGPR virtual registers.
+//
+// FIXME: Must stop RegScavenger spills in later passes.
+//
+//===----------------------------------------------------------------------===//
+
+#include "AMDGPU.h"
+#include "AMDGPUSubtarget.h"
+#include "SIInstrInfo.h"
+#include "SIMachineFunctionInfo.h"
+#include "llvm/CodeGen/LiveIntervals.h"
+#include "llvm/CodeGen/MachineBasicBlock.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineInstr.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineOperand.h"
+#include "llvm/CodeGen/VirtRegMap.h"
+#include "llvm/Target/TargetMachine.h"
+
+using namespace llvm;
+
+#define DEBUG_TYPE "si-lower-sgpr-spills"
+
+using MBBVector = SmallVector<MachineBasicBlock *, 4>;
+
+namespace {
+
+class SILowerSGPRSpills : public MachineFunctionPass {
+private:
+ const SIRegisterInfo *TRI = nullptr;
+ const SIInstrInfo *TII = nullptr;
+ VirtRegMap *VRM = nullptr;
+ LiveIntervals *LIS = nullptr;
+
+ // Save and Restore blocks of the current function. Typically there is a
+ // single save block, unless Windows EH funclets are involved.
+ MBBVector SaveBlocks;
+ MBBVector RestoreBlocks;
+
+public:
+ static char ID;
+
+ SILowerSGPRSpills() : MachineFunctionPass(ID) {}
+
+ void calculateSaveRestoreBlocks(MachineFunction &MF);
+ bool spillCalleeSavedRegs(MachineFunction &MF);
+
+ bool runOnMachineFunction(MachineFunction &MF) override;
+
+ void getAnalysisUsage(AnalysisUsage &AU) const override {
+ AU.setPreservesAll();
+ MachineFunctionPass::getAnalysisUsage(AU);
+ }
+};
+
+} // end anonymous namespace
+
+char SILowerSGPRSpills::ID = 0;
+
+INITIALIZE_PASS_BEGIN(SILowerSGPRSpills, DEBUG_TYPE,
+ "SI lower SGPR spill instructions", false, false)
+INITIALIZE_PASS_DEPENDENCY(VirtRegMap)
+INITIALIZE_PASS_END(SILowerSGPRSpills, DEBUG_TYPE,
+ "SI lower SGPR spill instructions", false, false)
+
+char &llvm::SILowerSGPRSpillsID = SILowerSGPRSpills::ID;
+
+/// Insert restore code for the callee-saved registers used in the function.
+static void insertCSRSaves(MachineBasicBlock &SaveBlock,
+ ArrayRef<CalleeSavedInfo> CSI,
+ LiveIntervals *LIS) {
+ MachineFunction &MF = *SaveBlock.getParent();
+ const TargetInstrInfo &TII = *MF.getSubtarget().getInstrInfo();
+ const TargetFrameLowering *TFI = MF.getSubtarget().getFrameLowering();
+ const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo();
+
+ MachineBasicBlock::iterator I = SaveBlock.begin();
+ if (!TFI->spillCalleeSavedRegisters(SaveBlock, I, CSI, TRI)) {
+ for (const CalleeSavedInfo &CS : CSI) {
+ // Insert the spill to the stack frame.
+ unsigned Reg = CS.getReg();
+
+ MachineInstrSpan MIS(I);
+ const TargetRegisterClass *RC = TRI->getMinimalPhysRegClass(Reg);
+
+ TII.storeRegToStackSlot(SaveBlock, I, Reg, true, CS.getFrameIdx(), RC,
+ TRI);
+
+ if (LIS) {
+ assert(std::distance(MIS.begin(), I) == 1);
+ MachineInstr &Inst = *std::prev(I);
+
+ LIS->InsertMachineInstrInMaps(Inst);
+ LIS->removeAllRegUnitsForPhysReg(Reg);
+ }
+ }
+ }
+}
+
+/// Insert restore code for the callee-saved registers used in the function.
+static void insertCSRRestores(MachineBasicBlock &RestoreBlock,
+ std::vector<CalleeSavedInfo> &CSI,
+ LiveIntervals *LIS) {
+ MachineFunction &MF = *RestoreBlock.getParent();
+ const TargetInstrInfo &TII = *MF.getSubtarget().getInstrInfo();
+ const TargetFrameLowering *TFI = MF.getSubtarget().getFrameLowering();
+ const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo();
+
+ // Restore all registers immediately before the return and any
+ // terminators that precede it.
+ MachineBasicBlock::iterator I = RestoreBlock.getFirstTerminator();
+
+ // FIXME: Just emit the readlane/writelane directly
+ if (!TFI->restoreCalleeSavedRegisters(RestoreBlock, I, CSI, TRI)) {
+ for (const CalleeSavedInfo &CI : reverse(CSI)) {
+ unsigned Reg = CI.getReg();
+ const TargetRegisterClass *RC = TRI->getMinimalPhysRegClass(Reg);
+
+ TII.loadRegFromStackSlot(RestoreBlock, I, Reg, CI.getFrameIdx(), RC, TRI);
+ assert(I != RestoreBlock.begin() &&
+ "loadRegFromStackSlot didn't insert any code!");
+ // Insert in reverse order. loadRegFromStackSlot can insert
+ // multiple instructions.
+
+ if (LIS) {
+ MachineInstr &Inst = *std::prev(I);
+ LIS->InsertMachineInstrInMaps(Inst);
+ LIS->removeAllRegUnitsForPhysReg(Reg);
+ }
+ }
+ }
+}
+
+/// Compute the sets of entry and return blocks for saving and restoring
+/// callee-saved registers, and placing prolog and epilog code.
+void SILowerSGPRSpills::calculateSaveRestoreBlocks(MachineFunction &MF) {
+ const MachineFrameInfo &MFI = MF.getFrameInfo();
+
+ // Even when we do not change any CSR, we still want to insert the
+ // prologue and epilogue of the function.
+ // So set the save points for those.
+
+ // Use the points found by shrink-wrapping, if any.
+ if (MFI.getSavePoint()) {
+ SaveBlocks.push_back(MFI.getSavePoint());
+ assert(MFI.getRestorePoint() && "Both restore and save must be set");
+ MachineBasicBlock *RestoreBlock = MFI.getRestorePoint();
+ // If RestoreBlock does not have any successor and is not a return block
+ // then the end point is unreachable and we do not need to insert any
+ // epilogue.
+ if (!RestoreBlock->succ_empty() || RestoreBlock->isReturnBlock())
+ RestoreBlocks.push_back(RestoreBlock);
+ return;
+ }
+
+ // Save refs to entry and return blocks.
+ SaveBlocks.push_back(&MF.front());
+ for (MachineBasicBlock &MBB : MF) {
+ if (MBB.isEHFuncletEntry())
+ SaveBlocks.push_back(&MBB);
+ if (MBB.isReturnBlock())
+ RestoreBlocks.push_back(&MBB);
+ }
+}
+
+bool SILowerSGPRSpills::spillCalleeSavedRegs(MachineFunction &MF) {
+ MachineRegisterInfo &MRI = MF.getRegInfo();
+ const Function &F = MF.getFunction();
+ const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
+ const SIFrameLowering *TFI = ST.getFrameLowering();
+ MachineFrameInfo &MFI = MF.getFrameInfo();
+ RegScavenger *RS = nullptr;
+
+ // Determine which of the registers in the callee save list should be saved.
+ BitVector SavedRegs;
+ TFI->determineCalleeSavesSGPR(MF, SavedRegs, RS);
+
+ // Add the code to save and restore the callee saved registers.
+ if (!F.hasFnAttribute(Attribute::Naked)) {
+ // FIXME: This is a lie. The CalleeSavedInfo is incomplete, but this is
+ // necessary for verifier liveness checks.
+ MFI.setCalleeSavedInfoValid(true);
+
+ std::vector<CalleeSavedInfo> CSI;
+ const MCPhysReg *CSRegs = MRI.getCalleeSavedRegs();
+
+ for (unsigned I = 0; CSRegs[I]; ++I) {
+ unsigned Reg = CSRegs[I];
+ if (SavedRegs.test(Reg)) {
+ const TargetRegisterClass *RC = TRI->getMinimalPhysRegClass(Reg);
+ int JunkFI = MFI.CreateStackObject(TRI->getSpillSize(*RC),
+ TRI->getSpillAlignment(*RC),
+ true);
+
+ CSI.push_back(CalleeSavedInfo(Reg, JunkFI));
+ }
+ }
+
+ if (!CSI.empty()) {
+ for (MachineBasicBlock *SaveBlock : SaveBlocks)
+ insertCSRSaves(*SaveBlock, CSI, LIS);
+
+ for (MachineBasicBlock *RestoreBlock : RestoreBlocks)
+ insertCSRRestores(*RestoreBlock, CSI, LIS);
+ return true;
+ }
+ }
+
+ return false;
+}
+
+bool SILowerSGPRSpills::runOnMachineFunction(MachineFunction &MF) {
+ const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
+ TII = ST.getInstrInfo();
+ TRI = &TII->getRegisterInfo();
+
+ VRM = getAnalysisIfAvailable<VirtRegMap>();
+
+ assert(SaveBlocks.empty() && RestoreBlocks.empty());
+
+ // First, expose any CSR SGPR spills. This is mostly the same as what PEI
+ // does, but somewhat simpler.
+ calculateSaveRestoreBlocks(MF);
+ bool HasCSRs = spillCalleeSavedRegs(MF);
+
+ MachineFrameInfo &MFI = MF.getFrameInfo();
+ if (!MFI.hasStackObjects() && !HasCSRs) {
+ SaveBlocks.clear();
+ RestoreBlocks.clear();
+ return false;
+ }
+
+ SIMachineFunctionInfo *FuncInfo = MF.getInfo<SIMachineFunctionInfo>();
+ bool MadeChange = false;
+
+ if (TRI->spillSGPRToVGPR() && (HasCSRs || FuncInfo->hasSpilledSGPRs())) {
+ // Process all SGPR spills before frame offsets are finalized. Ideally SGPRs
+ // are spilled to VGPRs, in which case we can eliminate the stack usage.
+ //
+ // This operates under the assumption that only other SGPR spills are users
+ // of the frame index.
+ for (MachineBasicBlock &MBB : MF) {
+ MachineBasicBlock::iterator Next;
+ for (auto I = MBB.begin(), E = MBB.end(); I != E; I = Next) {
+ MachineInstr &MI = *I;
+ Next = std::next(I);
+
+ if (!TII->isSGPRSpill(MI))
+ continue;
+
+ int FI = TII->getNamedOperand(MI, AMDGPU::OpName::addr)->getIndex();
+ assert(MFI.getStackID(FI) == TargetStackID::SGPRSpill);
+ if (FuncInfo->allocateSGPRSpillToVGPR(MF, FI)) {
+ bool Spilled = TRI->eliminateSGPRToVGPRSpillFrameIndex(MI, FI, nullptr);
+ (void)Spilled;
+ assert(Spilled && "failed to spill SGPR to VGPR when allocated");
+ }
+
+ }
+ }
+
+ for (MachineBasicBlock &MBB : MF) {
+ for (auto SSpill : FuncInfo->getSGPRSpillVGPRs())
+ MBB.addLiveIn(SSpill.VGPR);
+ MBB.sortUniqueLiveIns();
+ }
+
+ FuncInfo->removeSGPRToVGPRFrameIndices(MFI);
+ MadeChange = true;
+ }
+
+ SaveBlocks.clear();
+ RestoreBlocks.clear();
+
+ return MadeChange;
+}