From 59e128266c9de11ba334450e1c3b7101155bd55b Mon Sep 17 00:00:00 2001 From: Eugene Zelenko Date: Tue, 8 Aug 2017 00:47:13 +0000 Subject: [PATCH] [AMDGPU] Fix some Clang-tidy modernize-use-using and Include What You Use warnings; other minor fixes (NFC). llvm-svn: 310328 --- llvm/lib/Target/AMDGPU/SIAnnotateControlFlow.cpp | 36 ++++--- llvm/lib/Target/AMDGPU/SIFixSGPRCopies.cpp | 51 ++++++---- llvm/lib/Target/AMDGPU/SIInsertWaitcnts.cpp | 121 +++++++++++++---------- llvm/lib/Target/AMDGPU/SIInsertWaits.cpp | 21 ++-- llvm/lib/Target/AMDGPU/SIInstrInfo.cpp | 72 +++++++++----- llvm/lib/Target/AMDGPU/SIInstrInfo.h | 45 ++++++--- llvm/lib/Target/AMDGPU/SILoadStoreOptimizer.cpp | 11 +-- llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp | 35 ++----- llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.h | 75 +++++++------- llvm/lib/Target/AMDGPU/SIPeepholeSDWA.cpp | 42 +++++--- 10 files changed, 294 insertions(+), 215 deletions(-) diff --git a/llvm/lib/Target/AMDGPU/SIAnnotateControlFlow.cpp b/llvm/lib/Target/AMDGPU/SIAnnotateControlFlow.cpp index 8cb35c5..150d8c3 100644 --- a/llvm/lib/Target/AMDGPU/SIAnnotateControlFlow.cpp +++ b/llvm/lib/Target/AMDGPU/SIAnnotateControlFlow.cpp @@ -1,4 +1,4 @@ -//===-- SIAnnotateControlFlow.cpp - ------------------===// +//===- SIAnnotateControlFlow.cpp ------------------------------------------===// // // The LLVM Compiler Infrastructure // @@ -14,16 +14,32 @@ #include "AMDGPU.h" #include "llvm/ADT/DepthFirstIterator.h" +#include "llvm/ADT/STLExtras.h" +#include "llvm/ADT/SmallVector.h" #include "llvm/Analysis/DivergenceAnalysis.h" #include "llvm/Analysis/LoopInfo.h" +#include "llvm/IR/BasicBlock.h" +#include "llvm/IR/CFG.h" +#include "llvm/IR/Constant.h" #include "llvm/IR/Constants.h" +#include "llvm/IR/DerivedTypes.h" #include "llvm/IR/Dominators.h" +#include "llvm/IR/Function.h" +#include "llvm/IR/Instruction.h" #include "llvm/IR/Instructions.h" +#include "llvm/IR/Intrinsics.h" #include "llvm/IR/Module.h" +#include "llvm/IR/Type.h" +#include "llvm/IR/ValueHandle.h" #include "llvm/Pass.h" +#include "llvm/Support/Casting.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/raw_ostream.h" #include "llvm/Transforms/Utils/BasicBlockUtils.h" #include "llvm/Transforms/Utils/Local.h" -#include "llvm/Transforms/Utils/SSAUpdater.h" +#include +#include using namespace llvm; @@ -32,8 +48,8 @@ using namespace llvm; namespace { // Complex types used in this pass -typedef std::pair StackEntry; -typedef SmallVector StackVector; +using StackEntry = std::pair; +using StackVector = SmallVector; class SIAnnotateControlFlow : public FunctionPass { DivergenceAnalysis *DA; @@ -89,8 +105,7 @@ class SIAnnotateControlFlow : public FunctionPass { public: static char ID; - SIAnnotateControlFlow(): - FunctionPass(ID) { } + SIAnnotateControlFlow() : FunctionPass(ID) {} bool doInitialization(Module &M) override; @@ -105,7 +120,6 @@ public: AU.addPreserved(); FunctionPass::getAnalysisUsage(AU); } - }; } // end anonymous namespace @@ -186,7 +200,7 @@ bool SIAnnotateControlFlow::isElse(PHINode *Phi) { // \brief Erase "Phi" if it is not used any more void SIAnnotateControlFlow::eraseIfUnused(PHINode *Phi) { - if (llvm::RecursivelyDeleteDeadPHINode(Phi)) { + if (RecursivelyDeleteDeadPHINode(Phi)) { DEBUG(dbgs() << "Erased unused condition phi\n"); } } @@ -215,7 +229,6 @@ void SIAnnotateControlFlow::insertElse(BranchInst *Term) { Value *SIAnnotateControlFlow::handleLoopCondition( Value *Cond, PHINode *Broken, llvm::Loop *L, BranchInst *Term, SmallVectorImpl &LoopPhiConditions) { - // Only search through PHI nodes which are inside the loop. If we try this // with PHI nodes that are outside of the loop, we end up inserting new PHI // nodes outside of the loop which depend on values defined inside the loop. @@ -223,7 +236,6 @@ Value *SIAnnotateControlFlow::handleLoopCondition( // 'Instruction does not dominate all users!' errors. PHINode *Phi = nullptr; if ((Phi = dyn_cast(Cond)) && L->contains(Phi)) { - BasicBlock *Parent = Phi->getParent(); PHINode *NewPhi = PHINode::Create(Int64, 0, "loop.phi", &Parent->front()); Value *Ret = NewPhi; @@ -333,7 +345,7 @@ void SIAnnotateControlFlow::handleLoop(BranchInst *Term) { Term->setCondition(CallInst::Create(Loop, Arg, "", Term)); - for (WeakTrackingVH Val : reverse(LoopPhiConditions)) { + for (WeakTrackingVH Val : llvm::reverse(LoopPhiConditions)) { if (PHINode *Cond = cast_or_null(Val)) eraseIfUnused(Cond); } @@ -360,7 +372,7 @@ void SIAnnotateControlFlow::closeControlFlow(BasicBlock *BB) { Preds.push_back(Pred); } - BB = llvm::SplitBlockPredecessors(BB, Preds, "endcf.split", DT, LI, false); + BB = SplitBlockPredecessors(BB, Preds, "endcf.split", DT, LI, false); } Value *Exec = popSaved(); diff --git a/llvm/lib/Target/AMDGPU/SIFixSGPRCopies.cpp b/llvm/lib/Target/AMDGPU/SIFixSGPRCopies.cpp index 86ad8df..73a9158 100644 --- a/llvm/lib/Target/AMDGPU/SIFixSGPRCopies.cpp +++ b/llvm/lib/Target/AMDGPU/SIFixSGPRCopies.cpp @@ -1,4 +1,4 @@ -//===-- SIFixSGPRCopies.cpp - Remove potential VGPR => SGPR copies --------===// +//===- SIFixSGPRCopies.cpp - Remove potential VGPR => SGPR copies ---------===// // // The LLVM Compiler Infrastructure // @@ -68,14 +68,33 @@ #include "AMDGPU.h" #include "AMDGPUSubtarget.h" #include "SIInstrInfo.h" +#include "SIRegisterInfo.h" #include "llvm/ADT/DenseSet.h" +#include "llvm/ADT/STLExtras.h" +#include "llvm/ADT/SmallSet.h" +#include "llvm/ADT/SmallVector.h" +#include "llvm/CodeGen/MachineBasicBlock.h" #include "llvm/CodeGen/MachineDominators.h" +#include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineFunctionPass.h" +#include "llvm/CodeGen/MachineInstr.h" #include "llvm/CodeGen/MachineInstrBuilder.h" +#include "llvm/CodeGen/MachineOperand.h" #include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/Pass.h" +#include "llvm/Support/CodeGen.h" +#include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" #include "llvm/Support/raw_ostream.h" #include "llvm/Target/TargetMachine.h" +#include "llvm/Target/TargetRegisterInfo.h" +#include +#include +#include +#include +#include +#include +#include using namespace llvm; @@ -89,13 +108,12 @@ static cl::opt EnableM0Merge( namespace { class SIFixSGPRCopies : public MachineFunctionPass { - MachineDominatorTree *MDT; public: static char ID; - SIFixSGPRCopies() : MachineFunctionPass(ID) { } + SIFixSGPRCopies() : MachineFunctionPass(ID) {} bool runOnMachineFunction(MachineFunction &MF) override; @@ -109,7 +127,7 @@ public: } }; -} // End anonymous namespace +} // end anonymous namespace INITIALIZE_PASS_BEGIN(SIFixSGPRCopies, DEBUG_TYPE, "SI Fix SGPR copies", false, false) @@ -117,7 +135,6 @@ INITIALIZE_PASS_DEPENDENCY(MachineDominatorTree) INITIALIZE_PASS_END(SIFixSGPRCopies, DEBUG_TYPE, "SI Fix SGPR copies", false, false) - char SIFixSGPRCopies::ID = 0; char &llvm::SIFixSGPRCopiesID = SIFixSGPRCopies::ID; @@ -287,7 +304,6 @@ static bool phiHasVGPROperands(const MachineInstr &PHI, const MachineRegisterInfo &MRI, const SIRegisterInfo *TRI, const SIInstrInfo *TII) { - for (unsigned i = 1; i < PHI.getNumOperands(); i += 2) { unsigned Reg = PHI.getOperand(i).getReg(); if (TRI->hasVGPRs(MRI.getRegClass(Reg))) @@ -295,10 +311,10 @@ static bool phiHasVGPROperands(const MachineInstr &PHI, } return false; } + static bool phiHasBreakDef(const MachineInstr &PHI, const MachineRegisterInfo &MRI, SmallSet &Visited) { - for (unsigned i = 1; i < PHI.getNumOperands(); i += 2) { unsigned Reg = PHI.getOperand(i).getReg(); if (Visited.count(Reg)) @@ -337,7 +353,6 @@ static bool isSafeToFoldImmIntoCopy(const MachineInstr *Copy, const SIInstrInfo *TII, unsigned &SMovOp, int64_t &Imm) { - if (Copy->getOpcode() != AMDGPU::COPY) return false; @@ -371,13 +386,12 @@ template bool searchPredecessors(const MachineBasicBlock *MBB, const MachineBasicBlock *CutOff, UnaryPredicate Predicate) { - if (MBB == CutOff) return false; - DenseSet Visited; - SmallVector Worklist(MBB->pred_begin(), - MBB->pred_end()); + DenseSet Visited; + SmallVector Worklist(MBB->pred_begin(), + MBB->pred_end()); while (!Worklist.empty()) { MachineBasicBlock *MBB = Worklist.pop_back_val(); @@ -433,7 +447,7 @@ static bool hoistAndMergeSGPRInits(unsigned Reg, const MachineRegisterInfo &MRI, MachineDominatorTree &MDT) { // List of inits by immediate value. - typedef std::map> InitListMap; + using InitListMap = std::map>; InitListMap Inits; // List of clobbering instructions. SmallVector Clobbers; @@ -490,9 +504,10 @@ static bool hoistAndMergeSGPRInits(unsigned Reg, MDT.properlyDominates(Clobber->getParent(), MBBTo)); }; - return (any_of(Clobbers, interferes)) || - (any_of(Inits, [&](InitListMap::value_type &C) { - return C.first != Init.first && any_of(C.second, interferes); + return (llvm::any_of(Clobbers, interferes)) || + (llvm::any_of(Inits, [&](InitListMap::value_type &C) { + return C.first != Init.first && + llvm::any_of(C.second, interferes); })); }; @@ -558,7 +573,6 @@ bool SIFixSGPRCopies::runOnMachineFunction(MachineFunction &MF) { for (MachineFunction::iterator BI = MF.begin(), BE = MF.end(); BI != BE; ++BI) { - MachineBasicBlock &MBB = *BI; for (MachineBasicBlock::iterator I = MBB.begin(), E = MBB.end(); I != E; ++I) { @@ -661,7 +675,7 @@ bool SIFixSGPRCopies::runOnMachineFunction(MachineFunction &MF) { } break; } - case AMDGPU::REG_SEQUENCE: { + case AMDGPU::REG_SEQUENCE: if (TRI->hasVGPRs(TII->getOpRegClass(MI, 0)) || !hasVGPROperands(MI, TRI)) { foldVGPRCopyIntoRegSequence(MI, TRI, TII, MRI); @@ -672,7 +686,6 @@ bool SIFixSGPRCopies::runOnMachineFunction(MachineFunction &MF) { TII->moveToVALU(MI); break; - } case AMDGPU::INSERT_SUBREG: { const TargetRegisterClass *DstRC, *Src0RC, *Src1RC; DstRC = MRI.getRegClass(MI.getOperand(0).getReg()); diff --git a/llvm/lib/Target/AMDGPU/SIInsertWaitcnts.cpp b/llvm/lib/Target/AMDGPU/SIInsertWaitcnts.cpp index 44185f4..1cb502d 100644 --- a/llvm/lib/Target/AMDGPU/SIInsertWaitcnts.cpp +++ b/llvm/lib/Target/AMDGPU/SIInsertWaitcnts.cpp @@ -1,4 +1,4 @@ -//===-- SIInsertWaitcnts.cpp - Insert Wait Instructions --------------------===/ +//===- SIInsertWaitcnts.cpp - Insert Wait Instructions --------------------===// // // The LLVM Compiler Infrastructure // @@ -21,12 +21,34 @@ #include "SIDefines.h" #include "SIInstrInfo.h" #include "SIMachineFunctionInfo.h" +#include "SIRegisterInfo.h" #include "Utils/AMDGPUBaseInfo.h" +#include "llvm/ADT/DenseMap.h" +#include "llvm/ADT/DenseSet.h" #include "llvm/ADT/PostOrderIterator.h" +#include "llvm/ADT/STLExtras.h" +#include "llvm/ADT/SmallVector.h" +#include "llvm/CodeGen/MachineBasicBlock.h" #include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineFunctionPass.h" +#include "llvm/CodeGen/MachineInstr.h" #include "llvm/CodeGen/MachineInstrBuilder.h" +#include "llvm/CodeGen/MachineLoopInfo.h" +#include "llvm/CodeGen/MachineMemOperand.h" +#include "llvm/CodeGen/MachineOperand.h" #include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/IR/DebugLoc.h" +#include "llvm/Pass.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/raw_ostream.h" +#include +#include +#include +#include +#include +#include +#include #define DEBUG_TYPE "si-insert-waitcnts" @@ -42,7 +64,7 @@ namespace { enum InstCounterType { VM_CNT = 0, LGKM_CNT, EXP_CNT, NUM_INST_CNTS }; -typedef std::pair RegInterval; +using RegInterval = std::pair; struct { int32_t VmcntMax; @@ -101,6 +123,15 @@ enum RegisterMapping { // "s_waitcnt 0" before use. class BlockWaitcntBrackets { public: + BlockWaitcntBrackets() { + for (enum InstCounterType T = VM_CNT; T < NUM_INST_CNTS; + T = (enum InstCounterType)(T + 1)) { + memset(VgprScores[T], 0, sizeof(VgprScores[T])); + } + } + + ~BlockWaitcntBrackets() = default; + static int32_t getWaitCountMax(InstCounterType T) { switch (T) { case VM_CNT: @@ -113,14 +144,14 @@ public: break; } return 0; - }; + } void setScoreLB(InstCounterType T, int32_t Val) { assert(T < NUM_INST_CNTS); if (T >= NUM_INST_CNTS) return; ScoreLBs[T] = Val; - }; + } void setScoreUB(InstCounterType T, int32_t Val) { assert(T < NUM_INST_CNTS); @@ -132,21 +163,21 @@ public: if (ScoreLBs[T] < UB) ScoreLBs[T] = UB; } - }; + } int32_t getScoreLB(InstCounterType T) { assert(T < NUM_INST_CNTS); if (T >= NUM_INST_CNTS) return 0; return ScoreLBs[T]; - }; + } int32_t getScoreUB(InstCounterType T) { assert(T < NUM_INST_CNTS); if (T >= NUM_INST_CNTS) return 0; return ScoreUBs[T]; - }; + } // Mapping from event to counter. InstCounterType eventCounter(WaitEventType E) { @@ -218,26 +249,18 @@ public: void setEventUB(enum WaitEventType W, int32_t Val) { EventUBs[W] = Val; } int32_t getMaxVGPR() const { return VgprUB; } int32_t getMaxSGPR() const { return SgprUB; } + int32_t getEventUB(enum WaitEventType W) const { assert(W < NUM_WAIT_EVENTS); return EventUBs[W]; } + bool counterOutOfOrder(InstCounterType T); unsigned int updateByWait(InstCounterType T, int ScoreToWait); void updateByEvent(const SIInstrInfo *TII, const SIRegisterInfo *TRI, const MachineRegisterInfo *MRI, WaitEventType E, MachineInstr &MI); - BlockWaitcntBrackets() - : WaitAtBeginning(false), RevisitLoop(false), ValidLoop(false), MixedExpTypes(false), - LoopRegion(NULL), PostOrder(0), Waitcnt(NULL), VgprUB(0), SgprUB(0) { - for (enum InstCounterType T = VM_CNT; T < NUM_INST_CNTS; - T = (enum InstCounterType)(T + 1)) { - memset(VgprScores[T], 0, sizeof(VgprScores[T])); - } - } - ~BlockWaitcntBrackets(){}; - bool hasPendingSMEM() const { return (EventUBs[SMEM_ACCESS] > ScoreLBs[LGKM_CNT] && EventUBs[SMEM_ACCESS] <= ScoreUBs[LGKM_CNT]); @@ -266,7 +289,7 @@ public: int32_t getPostOrder() const { return PostOrder; } void setWaitcnt(MachineInstr *WaitcntIn) { Waitcnt = WaitcntIn; } - void clearWaitcnt() { Waitcnt = NULL; } + void clearWaitcnt() { Waitcnt = nullptr; } MachineInstr *getWaitcnt() const { return Waitcnt; } bool mixedExpTypes() const { return MixedExpTypes; } @@ -278,13 +301,13 @@ public: void dump() { print(dbgs()); } private: - bool WaitAtBeginning; - bool RevisitLoop; - bool ValidLoop; - bool MixedExpTypes; - MachineLoop *LoopRegion; - int32_t PostOrder; - MachineInstr *Waitcnt; + bool WaitAtBeginning = false; + bool RevisitLoop = false; + bool ValidLoop = false; + bool MixedExpTypes = false; + MachineLoop *LoopRegion = nullptr; + int32_t PostOrder = 0; + MachineInstr *Waitcnt = nullptr; int32_t ScoreLBs[NUM_INST_CNTS] = {0}; int32_t ScoreUBs[NUM_INST_CNTS] = {0}; int32_t EventUBs[NUM_WAIT_EVENTS] = {0}; @@ -292,8 +315,8 @@ private: int32_t LastFlat[NUM_INST_CNTS] = {0}; // wait_cnt scores for every vgpr. // Keep track of the VgprUB and SgprUB to make merge at join efficient. - int32_t VgprUB; - int32_t SgprUB; + int32_t VgprUB = 0; + int32_t SgprUB = 0; int32_t VgprScores[NUM_INST_CNTS][NUM_ALL_VGPRS]; // Wait cnt scores for every sgpr, only lgkmcnt is relevant. int32_t SgprScores[SQ_MAX_PGM_SGPRS] = {0}; @@ -306,38 +329,36 @@ private: // at the end of the loop footer. class LoopWaitcntData { public: + LoopWaitcntData() = default; + ~LoopWaitcntData() = default; + void incIterCnt() { IterCnt++; } void resetIterCnt() { IterCnt = 0; } int32_t getIterCnt() { return IterCnt; } - LoopWaitcntData() : LfWaitcnt(NULL), IterCnt(0) {} - ~LoopWaitcntData(){}; - void setWaitcnt(MachineInstr *WaitcntIn) { LfWaitcnt = WaitcntIn; } MachineInstr *getWaitcnt() const { return LfWaitcnt; } void print() { DEBUG(dbgs() << " iteration " << IterCnt << '\n';); - return; } private: // s_waitcnt added at the end of loop footer to stablize wait scores // at the end of the loop footer. - MachineInstr *LfWaitcnt; + MachineInstr *LfWaitcnt = nullptr; // Number of iterations the loop has been visited, not including the initial // walk over. - int32_t IterCnt; + int32_t IterCnt = 0; }; class SIInsertWaitcnts : public MachineFunctionPass { - private: - const SISubtarget *ST; - const SIInstrInfo *TII; - const SIRegisterInfo *TRI; - const MachineRegisterInfo *MRI; - const MachineLoopInfo *MLI; + const SISubtarget *ST = nullptr; + const SIInstrInfo *TII = nullptr; + const SIRegisterInfo *TRI = nullptr; + const MachineRegisterInfo *MRI = nullptr; + const MachineLoopInfo *MLI = nullptr; AMDGPU::IsaInfo::IsaVersion IV; AMDGPUAS AMDGPUASI; @@ -357,9 +378,7 @@ private: public: static char ID; - SIInsertWaitcnts() - : MachineFunctionPass(ID), ST(nullptr), TII(nullptr), TRI(nullptr), - MRI(nullptr), MLI(nullptr) {} + SIInsertWaitcnts() : MachineFunctionPass(ID) {} bool runOnMachineFunction(MachineFunction &MF) override; @@ -376,7 +395,8 @@ public: void addKillWaitBracket(BlockWaitcntBrackets *Bracket) { // The waitcnt information is copied because it changes as the block is // traversed. - KillWaitBrackets.push_back(make_unique(*Bracket)); + KillWaitBrackets.push_back( + llvm::make_unique(*Bracket)); } bool mayAccessLDSThroughFlat(const MachineInstr &MI) const; @@ -390,7 +410,7 @@ public: void insertWaitcntBeforeCF(MachineBasicBlock &Block, MachineInstr *Inst); }; -} // End anonymous namespace. +} // end anonymous namespace RegInterval BlockWaitcntBrackets::getRegInterval(const MachineInstr *MI, const SIInstrInfo *TII, @@ -643,7 +663,6 @@ void BlockWaitcntBrackets::print(raw_ostream &OS) { OS << '\n'; } OS << '\n'; - return; } unsigned int BlockWaitcntBrackets::updateByWait(InstCounterType T, @@ -1098,7 +1117,8 @@ MachineInstr *SIInsertWaitcnts::generateSWaitCntInstBefore( BlockWaitcntBracketsMap[TBB].get(); if (!ScoreBracket) { assert(BlockVisitedSet.find(TBB) == BlockVisitedSet.end()); - BlockWaitcntBracketsMap[TBB] = make_unique(); + BlockWaitcntBracketsMap[TBB] = + llvm::make_unique(); ScoreBracket = BlockWaitcntBracketsMap[TBB].get(); } ScoreBracket->setRevisitLoop(true); @@ -1145,8 +1165,6 @@ void SIInsertWaitcnts::insertWaitcntBeforeCF(MachineBasicBlock &MBB, } else { MBB.push_back(Waitcnt); } - - return; } // This is a flat memory operation. Check to see if it has memory @@ -1764,13 +1782,13 @@ bool SIInsertWaitcnts::runOnMachineFunction(MachineFunction &MF) { BlockWaitcntBrackets *ScoreBrackets = BlockWaitcntBracketsMap[&MBB].get(); if (!ScoreBrackets) { - BlockWaitcntBracketsMap[&MBB] = make_unique(); + BlockWaitcntBracketsMap[&MBB] = llvm::make_unique(); ScoreBrackets = BlockWaitcntBracketsMap[&MBB].get(); } ScoreBrackets->setPostOrder(MBB.getNumber()); MachineLoop *ContainingLoop = MLI->getLoopFor(&MBB); if (ContainingLoop && LoopWaitcntDataMap[ContainingLoop] == nullptr) - LoopWaitcntDataMap[ContainingLoop] = make_unique(); + LoopWaitcntDataMap[ContainingLoop] = llvm::make_unique(); // If we are walking into the block from before the loop, then guarantee // at least 1 re-walk over the loop to propagate the information, even if @@ -1831,12 +1849,10 @@ bool SIInsertWaitcnts::runOnMachineFunction(MachineFunction &MF) { for (MachineFunction::iterator BI = MF.begin(), BE = MF.end(); BI != BE; ++BI) { - MachineBasicBlock &MBB = *BI; for (MachineBasicBlock::iterator I = MBB.begin(), E = MBB.end(); I != E; ++I) { - if (!HaveScalarStores && TII->isScalarStore(*I)) HaveScalarStores = true; @@ -1859,7 +1875,6 @@ bool SIInsertWaitcnts::runOnMachineFunction(MachineFunction &MF) { for (MachineBasicBlock::iterator I = MBB->begin(), E = MBB->end(); I != E; ++I) { - if (I->getOpcode() == AMDGPU::S_DCACHE_WB) SeenDCacheWB = true; else if (TII->isScalarStore(*I)) diff --git a/llvm/lib/Target/AMDGPU/SIInsertWaits.cpp b/llvm/lib/Target/AMDGPU/SIInsertWaits.cpp index bc86515..b074b95 100644 --- a/llvm/lib/Target/AMDGPU/SIInsertWaits.cpp +++ b/llvm/lib/Target/AMDGPU/SIInsertWaits.cpp @@ -1,4 +1,4 @@ -//===-- SILowerControlFlow.cpp - Use predicates for control flow ----------===// +//===- SILowerControlFlow.cpp - Use predicates for control flow -----------===// // // The LLVM Compiler Infrastructure // @@ -33,15 +33,14 @@ #include "llvm/CodeGen/MachineOperand.h" #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/IR/DebugLoc.h" +#include "llvm/MC/MCInstrDesc.h" #include "llvm/Pass.h" #include "llvm/Support/Debug.h" #include "llvm/Support/raw_ostream.h" -#include "llvm/Target/TargetRegisterInfo.h" #include #include #include #include -#include #include #define DEBUG_TYPE "si-insert-waits" @@ -51,23 +50,23 @@ using namespace llvm; namespace { /// \brief One variable for each of the hardware counters -typedef union { +using Counters = union { struct { unsigned VM; unsigned EXP; unsigned LGKM; } Named; unsigned Array[3]; -} Counters; +}; -typedef enum { +using InstType = enum { OTHER, SMEM, VMEM -} InstType; +}; -typedef Counters RegCounters[512]; -typedef std::pair RegInterval; +using RegCounters = Counters[512]; +using RegInterval = std::pair; class SIInsertWaits : public MachineFunctionPass { private: @@ -409,7 +408,6 @@ bool SIInsertWaits::insertWait(MachineBasicBlock &MBB, // Adjust the value to the real hardware possibilities. Counts.Array[i] = std::min(Value, HardwareLimits.Array[i]); - } else Counts.Array[i] = 0; @@ -568,12 +566,10 @@ bool SIInsertWaits::runOnMachineFunction(MachineFunction &MF) { for (MachineFunction::iterator BI = MF.begin(), BE = MF.end(); BI != BE; ++BI) { - MachineBasicBlock &MBB = *BI; for (MachineBasicBlock::iterator I = MBB.begin(), E = MBB.end(); I != E; ++I) { - if (!HaveScalarStores && TII->isScalarStore(*I)) HaveScalarStores = true; @@ -671,7 +667,6 @@ bool SIInsertWaits::runOnMachineFunction(MachineFunction &MF) { for (MachineBasicBlock::iterator I = MBB->begin(), E = MBB->end(); I != E; ++I) { - if (I->getOpcode() == AMDGPU::S_DCACHE_WB) SeenDCacheWB = true; else if (TII->isScalarStore(*I)) diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp index e9492c4..ad9deef 100644 --- a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp +++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp @@ -1,4 +1,4 @@ -//===-- SIInstrInfo.cpp - SI Instruction Information ---------------------===// +//===- SIInstrInfo.cpp - SI Instruction Information ----------------------===// // // The LLVM Compiler Infrastructure // @@ -13,19 +13,51 @@ //===----------------------------------------------------------------------===// #include "SIInstrInfo.h" -#include "AMDGPUTargetMachine.h" +#include "AMDGPU.h" +#include "AMDGPUSubtarget.h" #include "GCNHazardRecognizer.h" #include "SIDefines.h" #include "SIMachineFunctionInfo.h" +#include "SIRegisterInfo.h" +#include "Utils/AMDGPUBaseInfo.h" +#include "llvm/ADT/APInt.h" +#include "llvm/ADT/ArrayRef.h" +#include "llvm/ADT/SmallVector.h" +#include "llvm/ADT/StringRef.h" +#include "llvm/ADT/iterator_range.h" +#include "llvm/Analysis/AliasAnalysis.h" +#include "llvm/Analysis/MemoryLocation.h" +#include "llvm/CodeGen/MachineBasicBlock.h" #include "llvm/CodeGen/MachineFrameInfo.h" +#include "llvm/CodeGen/MachineFunction.h" +#include "llvm/CodeGen/MachineInstr.h" #include "llvm/CodeGen/MachineInstrBuilder.h" +#include "llvm/CodeGen/MachineInstrBundle.h" +#include "llvm/CodeGen/MachineMemOperand.h" +#include "llvm/CodeGen/MachineOperand.h" #include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/CodeGen/MachineValueType.h" #include "llvm/CodeGen/RegisterScavenging.h" #include "llvm/CodeGen/ScheduleDAG.h" +#include "llvm/CodeGen/SelectionDAGNodes.h" +#include "llvm/IR/DebugLoc.h" #include "llvm/IR/DiagnosticInfo.h" #include "llvm/IR/Function.h" +#include "llvm/IR/InlineAsm.h" +#include "llvm/IR/LLVMContext.h" #include "llvm/MC/MCInstrDesc.h" -#include "llvm/Support/Debug.h" +#include "llvm/Support/Casting.h" +#include "llvm/Support/CommandLine.h" +#include "llvm/Support/Compiler.h" +#include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/MathExtras.h" +#include "llvm/Target/TargetMachine.h" +#include "llvm/Target/TargetOpcodes.h" +#include "llvm/Target/TargetRegisterInfo.h" +#include +#include +#include +#include using namespace llvm; @@ -462,7 +494,6 @@ void SIInstrInfo::copyPhysReg(MachineBasicBlock &MBB, } } - ArrayRef SubIndices = RI.getRegSplitParts(RC, EltSize); bool Forward = RI.getHWRegIndex(DestReg) <= RI.getHWRegIndex(SrcReg); @@ -933,7 +964,6 @@ unsigned SIInstrInfo::calculateLDSSpillAddress( if (!AMDGPU::isShader(MF->getFunction()->getCallingConv()) && WorkGroupSize > WavefrontSize) { - unsigned TIDIGXReg = MFI->getPreloadedReg(AMDGPUFunctionArgInfo::WORKGROUP_ID_X); unsigned TIDIGYReg @@ -1053,24 +1083,24 @@ bool SIInstrInfo::expandPostRAPseudo(MachineInstr &MI) const { DebugLoc DL = MBB.findDebugLoc(MI); switch (MI.getOpcode()) { default: return AMDGPUInstrInfo::expandPostRAPseudo(MI); - case AMDGPU::S_MOV_B64_term: { + case AMDGPU::S_MOV_B64_term: // This is only a terminator to get the correct spill code placement during // register allocation. MI.setDesc(get(AMDGPU::S_MOV_B64)); break; - } - case AMDGPU::S_XOR_B64_term: { + + case AMDGPU::S_XOR_B64_term: // This is only a terminator to get the correct spill code placement during // register allocation. MI.setDesc(get(AMDGPU::S_XOR_B64)); break; - } - case AMDGPU::S_ANDN2_B64_term: { + + case AMDGPU::S_ANDN2_B64_term: // This is only a terminator to get the correct spill code placement during // register allocation. MI.setDesc(get(AMDGPU::S_ANDN2_B64)); break; - } + case AMDGPU::V_MOV_B64_PSEUDO: { unsigned Dst = MI.getOperand(0).getReg(); unsigned DstLo = RI.getSubReg(Dst, AMDGPU::sub0); @@ -1173,7 +1203,7 @@ bool SIInstrInfo::expandPostRAPseudo(MachineInstr &MI) const { MIB.add(MI.getOperand(2)); Bundler.append(MIB); - llvm::finalizeBundle(MBB, Bundler.begin()); + finalizeBundle(MBB, Bundler.begin()); MI.eraseFromParent(); break; @@ -1271,7 +1301,6 @@ MachineInstr *SIInstrInfo::commuteInstructionImpl(MachineInstr &MI, bool NewMI, return nullptr; } - if (CommutedMI) { swapSourceModifiers(MI, Src0, AMDGPU::OpName::src0_modifiers, Src1, AMDGPU::OpName::src1_modifiers); @@ -1581,7 +1610,6 @@ unsigned SIInstrInfo::insertBranch(MachineBasicBlock &MBB, ArrayRef Cond, const DebugLoc &DL, int *BytesAdded) const { - if (!FBB && Cond.empty()) { BuildMI(&MBB, DL, get(AMDGPU::S_BRANCH)) .addMBB(TBB); @@ -2172,10 +2200,9 @@ bool SIInstrInfo::isInlineConstant(const MachineOperand &MO, case AMDGPU::OPERAND_REG_IMM_INT64: case AMDGPU::OPERAND_REG_IMM_FP64: case AMDGPU::OPERAND_REG_INLINE_C_INT64: - case AMDGPU::OPERAND_REG_INLINE_C_FP64: { + case AMDGPU::OPERAND_REG_INLINE_C_FP64: return AMDGPU::isInlinableLiteral64(MO.getImm(), ST.hasInv2PiInlineImm()); - } case AMDGPU::OPERAND_REG_IMM_INT16: case AMDGPU::OPERAND_REG_IMM_FP16: case AMDGPU::OPERAND_REG_INLINE_C_INT16: @@ -2478,7 +2505,6 @@ bool SIInstrInfo::verifyInstruction(const MachineInstr &MI, // Verify SDWA if (isSDWA(MI)) { - if (!ST.hasSDWA()) { ErrInfo = "SDWA is not supported on this target"; return false; @@ -3131,7 +3157,6 @@ void SIInstrInfo::legalizeGenericOperand(MachineBasicBlock &InsertMBB, MachineOperand &Op, MachineRegisterInfo &MRI, const DebugLoc &DL) const { - unsigned OpReg = Op.getReg(); unsigned OpSubReg = Op.getSubReg(); @@ -3489,11 +3514,10 @@ void SIInstrInfo::moveToVALU(MachineInstr &TopInst) const { Inst.eraseFromParent(); continue; - case AMDGPU::S_BFE_I64: { + case AMDGPU::S_BFE_I64: splitScalar64BitBFE(Worklist, Inst); Inst.eraseFromParent(); continue; - } case AMDGPU::S_LSHL_B32: if (ST.getGeneration() >= SISubtarget::VOLCANIC_ISLANDS) { @@ -3552,12 +3576,11 @@ void SIInstrInfo::moveToVALU(MachineInstr &TopInst) const { case AMDGPU::S_PACK_LL_B32_B16: case AMDGPU::S_PACK_LH_B32_B16: - case AMDGPU::S_PACK_HH_B32_B16: { + case AMDGPU::S_PACK_HH_B32_B16: movePackToVALU(Worklist, MRI, Inst); Inst.eraseFromParent(); continue; } - } if (NewOpcode == AMDGPU::INSTRUCTION_LIST_END) { // We cannot move this instruction to the VALU, so we should try to @@ -3977,8 +4000,8 @@ void SIInstrInfo::addSCCDefUsersToVALUWorklist( // This assumes that all the users of SCC are in the same block // as the SCC def. for (MachineInstr &MI : - llvm::make_range(MachineBasicBlock::iterator(SCCDefInst), - SCCDefInst.getParent()->end())) { + make_range(MachineBasicBlock::iterator(SCCDefInst), + SCCDefInst.getParent()->end())) { // Exit if we find another SCC def. if (MI.findRegisterDefOperandIdx(AMDGPU::SCC) != -1) return; @@ -4166,7 +4189,6 @@ unsigned SIInstrInfo::isSGPRStackAccess(const MachineInstr &MI, unsigned SIInstrInfo::isLoadFromStackSlot(const MachineInstr &MI, int &FrameIndex) const { - if (!MI.mayLoad()) return AMDGPU::NoRegister; diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.h b/llvm/lib/Target/AMDGPU/SIInstrInfo.h index 0dbe080..b2df4e0 100644 --- a/llvm/lib/Target/AMDGPU/SIInstrInfo.h +++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.h @@ -1,4 +1,4 @@ -//===-- SIInstrInfo.h - SI Instruction Info Interface -----------*- C++ -*-===// +//===- SIInstrInfo.h - SI Instruction Info Interface ------------*- C++ -*-===// // // The LLVM Compiler Infrastructure // @@ -12,17 +12,33 @@ // //===----------------------------------------------------------------------===// - #ifndef LLVM_LIB_TARGET_AMDGPU_SIINSTRINFO_H #define LLVM_LIB_TARGET_AMDGPU_SIINSTRINFO_H #include "AMDGPUInstrInfo.h" #include "SIDefines.h" #include "SIRegisterInfo.h" +#include "Utils/AMDGPUBaseInfo.h" +#include "llvm/ADT/ArrayRef.h" #include "llvm/ADT/SetVector.h" +#include "llvm/CodeGen/MachineBasicBlock.h" +#include "llvm/CodeGen/MachineFunction.h" +#include "llvm/CodeGen/MachineInstr.h" +#include "llvm/CodeGen/MachineInstrBuilder.h" +#include "llvm/CodeGen/MachineOperand.h" +#include "llvm/MC/MCInstrDesc.h" +#include "llvm/Support/Compiler.h" +#include +#include namespace llvm { +class APInt; +class MachineRegisterInfo; +class RegScavenger; +class SISubtarget; +class TargetRegisterClass; + class SIInstrInfo final : public AMDGPUInstrInfo { private: const SIRegisterInfo RI; @@ -39,7 +55,7 @@ private: EXECZ = 3 }; - typedef SmallSetVector SetVectorType; + using SetVectorType = SmallSetVector; static unsigned getBranchOpcode(BranchPredicate Cond); static BranchPredicate getBranchPredicate(unsigned Opcode); @@ -76,9 +92,8 @@ private: MachineRegisterInfo &MRI, MachineInstr &Inst) const; - void addUsersToMoveToVALUWorklist( - unsigned Reg, MachineRegisterInfo &MRI, - SetVectorType &Worklist) const; + void addUsersToMoveToVALUWorklist(unsigned Reg, MachineRegisterInfo &MRI, + SetVectorType &Worklist) const; void addSCCDefUsersToVALUWorklist(MachineInstr &SCCDefInst, @@ -101,7 +116,6 @@ protected: unsigned OpIdx1) const override; public: - enum TargetOperandFlags { MO_MASK = 0x7, @@ -120,7 +134,7 @@ public: MO_REL32_HI = 5 }; - explicit SIInstrInfo(const SISubtarget &); + explicit SIInstrInfo(const SISubtarget &ST); const SIRegisterInfo &getRegisterInfo() const { return RI; @@ -159,7 +173,7 @@ public: unsigned insertNE(MachineBasicBlock *MBB, MachineBasicBlock::iterator I, const DebugLoc &DL, - unsigned SrcReg, int Value) const; + unsigned SrcReg, int Value) const; unsigned insertEQ(MachineBasicBlock *MBB, MachineBasicBlock::iterator I, const DebugLoc &DL, @@ -228,7 +242,6 @@ public: bool reverseBranchCondition( SmallVectorImpl &Cond) const override; - bool canInsertSelect(const MachineBasicBlock &MBB, ArrayRef Cond, unsigned TrueReg, unsigned FalseReg, @@ -827,6 +840,7 @@ public: }; namespace AMDGPU { + LLVM_READONLY int getVOPe64(uint16_t Opcode); @@ -867,7 +881,8 @@ namespace AMDGPU { TF_LONG_BRANCH_FORWARD = 1 << 0, TF_LONG_BRANCH_BACKWARD = 1 << 1 }; -} // End namespace AMDGPU + +} // end namespace AMDGPU namespace SI { namespace KernelInputOffsets { @@ -885,9 +900,9 @@ enum Offsets { LOCAL_SIZE_Z = 32 }; -} // End namespace KernelInputOffsets -} // End namespace SI +} // end namespace KernelInputOffsets +} // end namespace SI -} // End namespace llvm +} // end namespace llvm -#endif +#endif // LLVM_LIB_TARGET_AMDGPU_SIINSTRINFO_H diff --git a/llvm/lib/Target/AMDGPU/SILoadStoreOptimizer.cpp b/llvm/lib/Target/AMDGPU/SILoadStoreOptimizer.cpp index c6ad61a..b7ddfe4 100644 --- a/llvm/lib/Target/AMDGPU/SILoadStoreOptimizer.cpp +++ b/llvm/lib/Target/AMDGPU/SILoadStoreOptimizer.cpp @@ -1,4 +1,4 @@ -//===-- SILoadStoreOptimizer.cpp ------------------------------------------===// +//===- SILoadStoreOptimizer.cpp -------------------------------------------===// // // The LLVM Compiler Infrastructure // @@ -56,8 +56,9 @@ #include "llvm/Support/Debug.h" #include "llvm/Support/MathExtras.h" #include "llvm/Support/raw_ostream.h" -#include "llvm/Target/TargetMachine.h" +#include #include +#include #include #include @@ -68,8 +69,7 @@ using namespace llvm; namespace { class SILoadStoreOptimizer : public MachineFunctionPass { - - typedef struct { + using CombineInfo = struct { MachineBasicBlock::iterator I; MachineBasicBlock::iterator Paired; unsigned EltSize; @@ -78,7 +78,7 @@ class SILoadStoreOptimizer : public MachineFunctionPass { unsigned BaseOff; bool UseST64; SmallVector InstsToMove; - } CombineInfo; + }; private: const SIInstrInfo *TII = nullptr; @@ -258,7 +258,6 @@ bool SILoadStoreOptimizer::findMatchingDSInst(CombineInfo &CI) { for ( ; MBBI != E; ++MBBI) { if (MBBI->getOpcode() != CI.I->getOpcode()) { - // This is not a matching DS instruction, but we can keep looking as // long as one of these conditions are met: // 1. It is safe to move I down past MBBI. diff --git a/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp b/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp index baf2603..b372082 100644 --- a/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp +++ b/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp @@ -1,4 +1,4 @@ -//===-- SIMachineFunctionInfo.cpp -------- SI Machine Function Info -------===// +//===- SIMachineFunctionInfo.cpp - SI Machine Function Info ---------------===// // // The LLVM Compiler Infrastructure // @@ -8,13 +8,19 @@ //===----------------------------------------------------------------------===// #include "SIMachineFunctionInfo.h" +#include "AMDGPUArgumentUsageInfo.h" #include "AMDGPUSubtarget.h" -#include "SIInstrInfo.h" +#include "SIRegisterInfo.h" +#include "Utils/AMDGPUBaseInfo.h" +#include "llvm/ADT/Optional.h" +#include "llvm/CodeGen/MachineBasicBlock.h" #include "llvm/CodeGen/MachineFrameInfo.h" -#include "llvm/CodeGen/MachineInstrBuilder.h" +#include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/IR/CallingConv.h" #include "llvm/IR/Function.h" -#include "llvm/IR/LLVMContext.h" +#include +#include #define MAX_LANES 64 @@ -22,27 +28,6 @@ using namespace llvm; SIMachineFunctionInfo::SIMachineFunctionInfo(const MachineFunction &MF) : AMDGPUMachineFunction(MF), - TIDReg(AMDGPU::NoRegister), - ScratchRSrcReg(AMDGPU::PRIVATE_RSRC_REG), - ScratchWaveOffsetReg(AMDGPU::SCRATCH_WAVE_OFFSET_REG), - FrameOffsetReg(AMDGPU::FP_REG), - StackPtrOffsetReg(AMDGPU::SP_REG), - ArgInfo(), - PSInputAddr(0), - PSInputEnable(0), - ReturnsVoid(true), - FlatWorkGroupSizes(0, 0), - WavesPerEU(0, 0), - DebuggerWorkGroupIDStackObjectIndices({{0, 0, 0}}), - DebuggerWorkItemIDStackObjectIndices({{0, 0, 0}}), - LDSWaveSpillSize(0), - NumUserSGPRs(0), - NumSystemSGPRs(0), - HasSpilledSGPRs(false), - HasSpilledVGPRs(false), - HasNonSpillStackObjects(false), - NumSpilledSGPRs(0), - NumSpilledVGPRs(0), PrivateSegmentBuffer(false), DispatchPtr(false), QueuePtr(false), diff --git a/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.h b/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.h index 5581fe4..4d87286 100644 --- a/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.h +++ b/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.h @@ -1,4 +1,4 @@ -//===- SIMachineFunctionInfo.h - SIMachineFunctionInfo interface -*- C++ -*-==// +//==- SIMachineFunctionInfo.h - SIMachineFunctionInfo interface --*- C++ -*-==// // // The LLVM Compiler Infrastructure // @@ -15,23 +15,30 @@ #define LLVM_LIB_TARGET_AMDGPU_SIMACHINEFUNCTIONINFO_H #include "AMDGPUMachineFunction.h" -#include "MCTargetDesc/AMDGPUMCTargetDesc.h" #include "AMDGPUArgumentUsageInfo.h" #include "SIRegisterInfo.h" +#include "llvm/ADT/ArrayRef.h" +#include "llvm/ADT/DenseMap.h" +#include "llvm/ADT/Optional.h" +#include "llvm/ADT/SmallVector.h" #include "llvm/CodeGen/PseudoSourceValue.h" #include "llvm/MC/MCRegisterInfo.h" #include "llvm/Support/ErrorHandling.h" #include #include -#include #include +#include namespace llvm { +class MachineFrameInfo; +class MachineFunction; +class TargetRegisterClass; + class AMDGPUImagePseudoSourceValue : public PseudoSourceValue { public: explicit AMDGPUImagePseudoSourceValue() : - PseudoSourceValue(PseudoSourceValue::TargetCustom) { } + PseudoSourceValue(PseudoSourceValue::TargetCustom) {} bool isConstant(const MachineFrameInfo *) const override { // This should probably be true for most images, but we will start by being @@ -45,7 +52,7 @@ public: return false; } - bool mayAlias(const MachineFrameInfo*) const override { + bool mayAlias(const MachineFrameInfo *) const override { // FIXME: If we ever change image intrinsics to accept fat pointers, then // this could be true for some cases. return false; @@ -55,7 +62,7 @@ public: class AMDGPUBufferPseudoSourceValue : public PseudoSourceValue { public: explicit AMDGPUBufferPseudoSourceValue() : - PseudoSourceValue(PseudoSourceValue::TargetCustom) { } + PseudoSourceValue(PseudoSourceValue::TargetCustom) {} bool isConstant(const MachineFrameInfo *) const override { // This should probably be true for most images, but we will start by being @@ -69,7 +76,7 @@ public: return false; } - bool mayAlias(const MachineFrameInfo*) const override { + bool mayAlias(const MachineFrameInfo *) const override { // FIXME: If we ever change image intrinsics to accept fat pointers, then // this could be true for some cases. return false; @@ -82,57 +89,58 @@ class SIMachineFunctionInfo final : public AMDGPUMachineFunction { // FIXME: This should be removed and getPreloadedValue moved here. friend class SIRegisterInfo; - unsigned TIDReg; + unsigned TIDReg = AMDGPU::NoRegister; // Registers that may be reserved for spilling purposes. These may be the same // as the input registers. - unsigned ScratchRSrcReg; - unsigned ScratchWaveOffsetReg; + unsigned ScratchRSrcReg = AMDGPU::PRIVATE_RSRC_REG; + unsigned ScratchWaveOffsetReg = AMDGPU::SCRATCH_WAVE_OFFSET_REG; // This is the current function's incremented size from the kernel's scratch // wave offset register. For an entry function, this is exactly the same as // the ScratchWaveOffsetReg. - unsigned FrameOffsetReg; + unsigned FrameOffsetReg = AMDGPU::FP_REG; // Top of the stack SGPR offset derived from the ScratchWaveOffsetReg. - unsigned StackPtrOffsetReg; + unsigned StackPtrOffsetReg = AMDGPU::SP_REG; AMDGPUFunctionArgInfo ArgInfo; // Graphics info. - unsigned PSInputAddr; - unsigned PSInputEnable; + unsigned PSInputAddr = 0; + unsigned PSInputEnable = 0; - bool ReturnsVoid; + bool ReturnsVoid = true; // A pair of default/requested minimum/maximum flat work group sizes. // Minimum - first, maximum - second. - std::pair FlatWorkGroupSizes; + std::pair FlatWorkGroupSizes = {0, 0}; // A pair of default/requested minimum/maximum number of waves per execution // unit. Minimum - first, maximum - second. - std::pair WavesPerEU; + std::pair WavesPerEU = {0, 0}; // Stack object indices for work group IDs. - std::array DebuggerWorkGroupIDStackObjectIndices; + std::array DebuggerWorkGroupIDStackObjectIndices = {{0, 0, 0}}; + // Stack object indices for work item IDs. - std::array DebuggerWorkItemIDStackObjectIndices; + std::array DebuggerWorkItemIDStackObjectIndices = {{0, 0, 0}}; AMDGPUBufferPseudoSourceValue BufferPSV; AMDGPUImagePseudoSourceValue ImagePSV; private: - unsigned LDSWaveSpillSize; + unsigned LDSWaveSpillSize = 0; unsigned ScratchOffsetReg; - unsigned NumUserSGPRs; - unsigned NumSystemSGPRs; + unsigned NumUserSGPRs = 0; + unsigned NumSystemSGPRs = 0; - bool HasSpilledSGPRs; - bool HasSpilledVGPRs; - bool HasNonSpillStackObjects; + bool HasSpilledSGPRs = false; + bool HasSpilledVGPRs = false; + bool HasNonSpillStackObjects = false; - unsigned NumSpilledSGPRs; - unsigned NumSpilledVGPRs; + unsigned NumSpilledSGPRs = 0; + unsigned NumSpilledVGPRs = 0; // Feature bits required for inputs passed in user SGPRs. bool PrivateSegmentBuffer : 1; @@ -180,7 +188,7 @@ public: int Lane = -1; SpilledReg() = default; - SpilledReg(unsigned R, int L) : VGPR (R), Lane (L) { } + SpilledReg(unsigned R, int L) : VGPR (R), Lane (L) {} bool hasLane() { return Lane != -1;} bool hasReg() { return VGPR != AMDGPU::NoRegister;} @@ -194,14 +202,12 @@ public: // prolog/epilog. Optional FI; - SGPRSpillVGPRCSR(unsigned V, Optional F) : - VGPR(V), - FI(F) {} + SGPRSpillVGPRCSR(unsigned V, Optional F) : VGPR(V), FI(F) {} }; private: // SGPR->VGPR spilling support. - typedef std::pair SpillRegMask; + using SpillRegMask = std::pair; // Track VGPR + wave index for each subregister of the SGPR spilled to // frameindex key. @@ -225,8 +231,8 @@ public: bool allocateSGPRSpillToVGPR(MachineFunction &MF, int FI); void removeSGPRToVGPRFrameIndices(MachineFrameInfo &MFI); - bool hasCalculatedTID() const { return TIDReg != AMDGPU::NoRegister; }; - unsigned getTIDReg() const { return TIDReg; }; + bool hasCalculatedTID() const { return TIDReg != AMDGPU::NoRegister; } + unsigned getTIDReg() const { return TIDReg; } void setTIDReg(unsigned Reg) { TIDReg = Reg; } // Add user SGPRs. @@ -276,7 +282,6 @@ public: ArgInfo.WorkItemIDZ = Arg; } - unsigned addPrivateSegmentWaveByteOffset() { ArgInfo.PrivateSegmentWaveByteOffset = ArgDescriptor::createRegister(getNextSystemSGPR()); diff --git a/llvm/lib/Target/AMDGPU/SIPeepholeSDWA.cpp b/llvm/lib/Target/AMDGPU/SIPeepholeSDWA.cpp index e2ac663..24b7fe0 100644 --- a/llvm/lib/Target/AMDGPU/SIPeepholeSDWA.cpp +++ b/llvm/lib/Target/AMDGPU/SIPeepholeSDWA.cpp @@ -1,4 +1,4 @@ -//===-- SIPeepholeSDWA.cpp - Peephole optimization for SDWA instructions --===// +//===- SIPeepholeSDWA.cpp - Peephole optimization for SDWA instructions ---===// // // The LLVM Compiler Infrastructure // @@ -24,12 +24,31 @@ #include "AMDGPUSubtarget.h" #include "SIDefines.h" #include "SIInstrInfo.h" +#include "SIRegisterInfo.h" +#include "Utils/AMDGPUBaseInfo.h" +#include "llvm/ADT/None.h" +#include "llvm/ADT/Optional.h" #include "llvm/ADT/STLExtras.h" +#include "llvm/ADT/SmallVector.h" #include "llvm/ADT/Statistic.h" +#include "llvm/CodeGen/MachineBasicBlock.h" +#include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineFunctionPass.h" +#include "llvm/CodeGen/MachineInstr.h" #include "llvm/CodeGen/MachineInstrBuilder.h" +#include "llvm/CodeGen/MachineOperand.h" +#include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/MC/LaneBitmask.h" +#include "llvm/MC/MCInstrDesc.h" +#include "llvm/Pass.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/raw_ostream.h" +#include "llvm/Target/TargetRegisterInfo.h" +#include +#include +#include +#include #include -#include using namespace llvm; @@ -45,7 +64,7 @@ class SDWAOperand; class SIPeepholeSDWA : public MachineFunctionPass { public: - typedef SmallVector SDWAOperandsVector; + using SDWAOperandsVector = SmallVector; private: MachineRegisterInfo *MRI; @@ -91,7 +110,7 @@ public: assert(Replaced->isReg()); } - virtual ~SDWAOperand() {} + virtual ~SDWAOperand() = default; virtual MachineInstr *potentialToConvert(const SIInstrInfo *TII) = 0; virtual bool convertToSDWA(MachineInstr &MI, const SIInstrInfo *TII) = 0; @@ -99,6 +118,7 @@ public: MachineOperand *getTargetOperand() const { return Target; } MachineOperand *getReplacedOperand() const { return Replaced; } MachineInstr *getParentInst() const { return Target->getParent(); } + MachineRegisterInfo *getMRI() const { return &getParentInst()->getParent()->getParent()->getRegInfo(); } @@ -120,8 +140,8 @@ public: : SDWAOperand(TargetOp, ReplacedOp), SrcSel(SrcSel_), Abs(Abs_), Neg(Neg_), Sext(Sext_) {} - virtual MachineInstr *potentialToConvert(const SIInstrInfo *TII) override; - virtual bool convertToSDWA(MachineInstr &MI, const SIInstrInfo *TII) override; + MachineInstr *potentialToConvert(const SIInstrInfo *TII) override; + bool convertToSDWA(MachineInstr &MI, const SIInstrInfo *TII) override; SdwaSel getSrcSel() const { return SrcSel; } bool getAbs() const { return Abs; } @@ -142,14 +162,14 @@ public: SdwaSel DstSel_ = DWORD, DstUnused DstUn_ = UNUSED_PAD) : SDWAOperand(TargetOp, ReplacedOp), DstSel(DstSel_), DstUn(DstUn_) {} - virtual MachineInstr *potentialToConvert(const SIInstrInfo *TII) override; - virtual bool convertToSDWA(MachineInstr &MI, const SIInstrInfo *TII) override; + MachineInstr *potentialToConvert(const SIInstrInfo *TII) override; + bool convertToSDWA(MachineInstr &MI, const SIInstrInfo *TII) override; SdwaSel getDstSel() const { return DstSel; } DstUnused getDstUnused() const { return DstUn; } }; -} // End anonymous namespace. +} // end anonymous namespace INITIALIZE_PASS(SIPeepholeSDWA, DEBUG_TYPE, "SI Peephole SDWA", false, false) @@ -162,7 +182,6 @@ FunctionPass *llvm::createSIPeepholeSDWAPass() { } #ifndef NDEBUG - static raw_ostream& operator<<(raw_ostream &OS, const SdwaSel &Sel) { switch(Sel) { case BYTE_0: OS << "BYTE_0"; break; @@ -199,7 +218,6 @@ static raw_ostream& operator<<(raw_ostream &OS, const SDWADstOperand &Dst) { << " dst_unused:" << Dst.getDstUnused() << '\n'; return OS; } - #endif static void copyRegOperand(MachineOperand &To, const MachineOperand &From) { @@ -564,7 +582,7 @@ void SIPeepholeSDWA::matchSDWAOperands(MachineFunction &MF) { auto SDWASrc = make_unique( Src0, Dst, SrcSel, false, false, - Opcode == AMDGPU::V_BFE_U32 ? false : true); + Opcode != AMDGPU::V_BFE_U32); DEBUG(dbgs() << "Match: " << MI << "To: " << *SDWASrc << '\n'); SDWAOperands[&MI] = std::move(SDWASrc); ++NumSDWAPatternsFound; -- 2.7.4