From: Eugene Zelenko Date: Sat, 21 Jan 2017 00:53:49 +0000 (+0000) Subject: [AMDGPU] Fix some Clang-tidy modernize and Include What You Use warnings; other minor... X-Git-Url: http://review.tizen.org/git/?a=commitdiff_plain;h=6620376da79f7ee91f71cf93c5921fb8f406f71c;p=platform%2Fupstream%2Fllvm.git [AMDGPU] Fix some Clang-tidy modernize and Include What You Use warnings; other minor fixes (NFC). llvm-svn: 292688 --- diff --git a/llvm/lib/Target/AMDGPU/AMDILCFGStructurizer.cpp b/llvm/lib/Target/AMDGPU/AMDILCFGStructurizer.cpp index 7faeccd..addcf4e 100644 --- a/llvm/lib/Target/AMDGPU/AMDILCFGStructurizer.cpp +++ b/llvm/lib/Target/AMDGPU/AMDILCFGStructurizer.cpp @@ -9,27 +9,39 @@ //==-----------------------------------------------------------------------===// #include "AMDGPU.h" -#include "AMDGPUInstrInfo.h" #include "AMDGPUSubtarget.h" #include "R600InstrInfo.h" +#include "R600RegisterInfo.h" #include "llvm/ADT/DepthFirstIterator.h" #include "llvm/ADT/SCCIterator.h" +#include "llvm/ADT/SmallPtrSet.h" #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/Statistic.h" +#include "llvm/ADT/StringRef.h" +#include "llvm/CodeGen/MachineBasicBlock.h" #include "llvm/CodeGen/MachineDominators.h" #include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineFunctionPass.h" +#include "llvm/CodeGen/MachineInstr.h" #include "llvm/CodeGen/MachineInstrBuilder.h" -#include "llvm/CodeGen/MachineJumpTableInfo.h" #include "llvm/CodeGen/MachineLoopInfo.h" +#include "llvm/CodeGen/MachineOperand.h" #include "llvm/CodeGen/MachinePostDominators.h" #include "llvm/CodeGen/MachineRegisterInfo.h" -#include "llvm/IR/Dominators.h" +#include "llvm/CodeGen/MachineValueType.h" +#include "llvm/IR/DebugLoc.h" +#include "llvm/IR/LLVMContext.h" +#include "llvm/Pass.h" #include "llvm/Support/Debug.h" +#include "llvm/Support/ErrorHandling.h" #include "llvm/Support/raw_ostream.h" -#include "llvm/Target/TargetInstrInfo.h" -#include "llvm/Target/TargetMachine.h" +#include +#include #include +#include +#include +#include +#include using namespace llvm; @@ -53,15 +65,19 @@ STATISTIC(numClonedBlock, "CFGStructurizer cloned blocks"); STATISTIC(numClonedInstr, "CFGStructurizer cloned instructions"); namespace llvm { + void initializeAMDGPUCFGStructurizerPass(PassRegistry&); -} + +} // end namespace llvm + +namespace { //===----------------------------------------------------------------------===// // // Miscellaneous utility for CFGStructurizer. // //===----------------------------------------------------------------------===// -namespace { + #define SHOWNEWINSTR(i) \ DEBUG(dbgs() << "New instr: " << *i << "\n"); @@ -92,25 +108,19 @@ void ReverseVector(SmallVectorImpl &Src) { } } -} // end anonymous namespace - //===----------------------------------------------------------------------===// // // supporting data structure for CFGStructurizer // //===----------------------------------------------------------------------===// - -namespace { - class BlockInformation { public: - bool IsRetired; - int SccNum; - BlockInformation() : IsRetired(false), SccNum(INVALIDSCCNUM) {} -}; + bool IsRetired = false; + int SccNum = INVALIDSCCNUM; -} // end anonymous namespace + BlockInformation() = default; +}; //===----------------------------------------------------------------------===// // @@ -118,7 +128,6 @@ public: // //===----------------------------------------------------------------------===// -namespace { class AMDGPUCFGStructurizer : public MachineFunctionPass { public: typedef SmallVector MBBVector; @@ -133,8 +142,7 @@ public: static char ID; - AMDGPUCFGStructurizer() : - MachineFunctionPass(ID), TII(nullptr), TRI(nullptr) { + AMDGPUCFGStructurizer() : MachineFunctionPass(ID) { initializeAMDGPUCFGStructurizerPass(*PassRegistry::getPassRegistry()); } @@ -167,7 +175,7 @@ public: MLI = &getAnalysis(); DEBUG(dbgs() << "LoopInfo:\n"; PrintLoopinfo(*MLI);); MDT = &getAnalysis(); - DEBUG(MDT->print(dbgs(), (const llvm::Module*)nullptr);); + DEBUG(MDT->print(dbgs(), (const Module*)nullptr);); PDT = &getAnalysis(); DEBUG(PDT->print(dbgs());); prepare(); @@ -180,8 +188,8 @@ protected: MachineDominatorTree *MDT; MachinePostDominatorTree *PDT; MachineLoopInfo *MLI; - const R600InstrInfo *TII; - const R600RegisterInfo *TRI; + const R600InstrInfo *TII = nullptr; + const R600RegisterInfo *TRI = nullptr; // PRINT FUNCTIONS /// Print the ordered Blocks. @@ -198,6 +206,7 @@ protected: } } } + static void PrintLoopinfo(const MachineLoopInfo &LoopInfo) { for (MachineLoop::iterator iter = LoopInfo.begin(), iterEnd = LoopInfo.end(); iter != iterEnd; ++iter) { @@ -263,7 +272,6 @@ protected: MachineBasicBlock *OldMBB, MachineBasicBlock *NewBlk); static void wrapup(MachineBasicBlock *MBB); - int patternMatch(MachineBasicBlock *MBB); int patternMatchGroup(MachineBasicBlock *MBB); int serialPatternMatch(MachineBasicBlock *MBB); @@ -328,7 +336,6 @@ protected: void recordSccnum(MachineBasicBlock *MBB, int SCCNum); void retireBlock(MachineBasicBlock *MBB); - private: MBBInfoMap BlockInfoMap; LoopLandInfoMap LLInfoMap; @@ -337,6 +344,10 @@ private: SmallVector OrderedBlks; }; +char AMDGPUCFGStructurizer::ID = 0; + +} // end anonymous namespace + int AMDGPUCFGStructurizer::getSCCNum(MachineBasicBlock *MBB) const { MBBInfoMap::const_iterator It = BlockInfoMap.find(MBB); if (It == BlockInfoMap.end()) @@ -379,6 +390,7 @@ bool AMDGPUCFGStructurizer::isActiveLoophead(MachineBasicBlock *MBB) const { } return false; } + AMDGPUCFGStructurizer::PathToKind AMDGPUCFGStructurizer::singlePathTo( MachineBasicBlock *SrcMBB, MachineBasicBlock *DstMBB, bool AllowSideEntry) const { @@ -697,10 +709,8 @@ void AMDGPUCFGStructurizer::wrapup(MachineBasicBlock *MBB) { // (jumpTableInfo->isEmpty() == false) { need to clean the jump table, but // there isn't such an interface yet. alternatively, replace all the other // blocks in the jump table with the entryBlk //} - } - bool AMDGPUCFGStructurizer::prepare() { bool Changed = false; @@ -748,7 +758,6 @@ bool AMDGPUCFGStructurizer::prepare() { } bool AMDGPUCFGStructurizer::run() { - //Assume reducible CFG... DEBUG(dbgs() << "AMDGPUCFGStructurizer::run\n"); @@ -886,8 +895,6 @@ bool AMDGPUCFGStructurizer::run() { return true; } - - void AMDGPUCFGStructurizer::orderBlocks(MachineFunction *MF) { int SccNum = 0; MachineBasicBlock *MBB; @@ -941,7 +948,6 @@ int AMDGPUCFGStructurizer::patternMatchGroup(MachineBasicBlock *MBB) { return NumMatch; } - int AMDGPUCFGStructurizer::serialPatternMatch(MachineBasicBlock *MBB) { if (MBB->succ_size() != 1) return 0; @@ -1039,7 +1045,7 @@ int AMDGPUCFGStructurizer::loopendPatternMatch() { for (MachineLoop *ML : depth_first(It)) NestedLoops.push_front(ML); - if (NestedLoops.size() == 0) + if (NestedLoops.empty()) return 0; // Process nested loop outside->inside (we did push_front), @@ -1074,7 +1080,7 @@ int AMDGPUCFGStructurizer::mergeLoop(MachineLoop *LoopRep) { MachineBasicBlock *ExitBlk = *ExitBlks.begin(); assert(ExitBlk && "Loop has several exit block"); MBBVector LatchBlks; - typedef GraphTraits > InvMBBTraits; + typedef GraphTraits> InvMBBTraits; InvMBBTraits::ChildIteratorType PI = InvMBBTraits::child_begin(LoopHeader), PE = InvMBBTraits::child_end(LoopHeader); for (; PI != PE; PI++) { @@ -1217,7 +1223,7 @@ void AMDGPUCFGStructurizer::showImproveSimpleJumpintoIf( } } - dbgs() << "\n"; + dbgs() << "\n"; } int AMDGPUCFGStructurizer::improveSimpleJumpintoIf(MachineBasicBlock *HeadMBB, @@ -1478,7 +1484,6 @@ void AMDGPUCFGStructurizer::mergeIfthenelseBlock(MachineInstr *BranchMI, if (LandMBB && TrueMBB && FalseMBB) MBB->addSuccessor(LandMBB); - } void AMDGPUCFGStructurizer::mergeLooplandBlock(MachineBasicBlock *DstBlk, @@ -1491,7 +1496,6 @@ void AMDGPUCFGStructurizer::mergeLooplandBlock(MachineBasicBlock *DstBlk, DstBlk->replaceSuccessor(DstBlk, LandMBB); } - void AMDGPUCFGStructurizer::mergeLoopbreakBlock(MachineBasicBlock *ExitingMBB, MachineBasicBlock *LandMBB) { DEBUG(dbgs() << "loopbreakPattern exiting = BB" << ExitingMBB->getNumber() @@ -1727,11 +1731,6 @@ void AMDGPUCFGStructurizer::retireBlock(MachineBasicBlock *MBB) { && "can't retire block yet"); } -char AMDGPUCFGStructurizer::ID = 0; - -} // end anonymous namespace - - INITIALIZE_PASS_BEGIN(AMDGPUCFGStructurizer, "amdgpustructurizer", "AMDGPU CFG Structurizer", false, false) INITIALIZE_PASS_DEPENDENCY(MachineDominatorTree) diff --git a/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp b/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp index 425fd52..fed2e93 100644 --- a/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp +++ b/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp @@ -16,6 +16,7 @@ #include "Utils/AMDGPUAsmUtils.h" #include "llvm/ADT/APFloat.h" #include "llvm/ADT/APInt.h" +#include "llvm/ADT/ArrayRef.h" #include "llvm/ADT/SmallBitVector.h" #include "llvm/ADT/SmallString.h" #include "llvm/ADT/STLExtras.h" @@ -39,15 +40,12 @@ #include "llvm/MC/MCSubtargetInfo.h" #include "llvm/MC/MCSymbol.h" #include "llvm/Support/Casting.h" -#include "llvm/Support/Debug.h" #include "llvm/Support/ELF.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/MathExtras.h" #include "llvm/Support/raw_ostream.h" #include "llvm/Support/SMLoc.h" #include "llvm/Support/TargetRegistry.h" -#include "llvm/Support/raw_ostream.h" -#include "llvm/Support/MathExtras.h" #include #include #include @@ -56,7 +54,6 @@ #include #include #include -#include using namespace llvm; using namespace llvm::AMDGPU; @@ -695,9 +692,9 @@ raw_ostream &operator <<(raw_ostream &OS, AMDGPUOperand::Modifiers Mods) { // Kernel scope begins at .amdgpu_hsa_kernel directive, ends at next // .amdgpu_hsa_kernel or at EOF. class KernelScopeInfo { - int SgprIndexUnusedMin; - int VgprIndexUnusedMin; - MCContext *Ctx; + int SgprIndexUnusedMin = -1; + int VgprIndexUnusedMin = -1; + MCContext *Ctx = nullptr; void usesSgprAt(int i) { if (i >= SgprIndexUnusedMin) { @@ -708,6 +705,7 @@ class KernelScopeInfo { } } } + void usesVgprAt(int i) { if (i >= VgprIndexUnusedMin) { VgprIndexUnusedMin = ++i; @@ -717,14 +715,16 @@ class KernelScopeInfo { } } } + public: - KernelScopeInfo() : SgprIndexUnusedMin(-1), VgprIndexUnusedMin(-1), Ctx(nullptr) - {} + KernelScopeInfo() = default; + void initialize(MCContext &Context) { Ctx = &Context; usesSgprAt(SgprIndexUnusedMin = -1); usesVgprAt(VgprIndexUnusedMin = -1); } + void usesRegister(RegisterKind RegKind, unsigned DwordRegIndex, unsigned RegWidth) { switch (RegKind) { case IS_SGPR: usesSgprAt(DwordRegIndex + RegWidth - 1); break; @@ -738,9 +738,9 @@ class AMDGPUAsmParser : public MCTargetAsmParser { const MCInstrInfo &MII; MCAsmParser &Parser; - unsigned ForcedEncodingSize; - bool ForcedDPP; - bool ForcedSDWA; + unsigned ForcedEncodingSize = 0; + bool ForcedDPP = false; + bool ForcedSDWA = false; KernelScopeInfo KernelScope; /// @name Auto-generated Match Functions @@ -779,10 +779,7 @@ public: AMDGPUAsmParser(const MCSubtargetInfo &STI, MCAsmParser &_Parser, const MCInstrInfo &MII, const MCTargetOptions &Options) - : MCTargetAsmParser(Options, STI), MII(MII), Parser(_Parser), - ForcedEncodingSize(0), - ForcedDPP(false), - ForcedSDWA(false) { + : MCTargetAsmParser(Options, STI), MII(MII), Parser(_Parser) { MCAsmParserExtension::Initialize(Parser); if (getSTI().getFeatureBits().none()) { @@ -1043,7 +1040,6 @@ bool AMDGPUOperand::isInlinableImm(MVT type) const { AsmParser->hasInv2PiInlineImm()); } - // We got int literal token. if (type == MVT::f64 || type == MVT::i64) { // Expected 64-bit operand return AMDGPU::isInlinableLiteral64(Imm.Val, @@ -1132,7 +1128,7 @@ void AMDGPUOperand::addLiteralImmOperand(MCInst &Inst, int64_t Val) const { APInt Literal(64, Val); switch (OpSize) { - case 8: { + case 8: if (AMDGPU::isInlinableLiteral64(Literal.getZExtValue(), AsmParser->hasInv2PiInlineImm())) { Inst.addOperand(MCOperand::createImm(Literal.getZExtValue())); @@ -1156,7 +1152,7 @@ void AMDGPUOperand::addLiteralImmOperand(MCInst &Inst, int64_t Val) const { // unclear how we should encode them. This case should be checked earlier // in predicate methods (isLiteralImm()) llvm_unreachable("fp literal in 64-bit integer instruction."); - } + case 4: case 2: { bool lost; @@ -1180,7 +1176,7 @@ void AMDGPUOperand::addLiteralImmOperand(MCInst &Inst, int64_t Val) const { // Only sign extend inline immediates. // FIXME: No errors on truncation switch (OpSize) { - case 4: { + case 4: if (isInt<32>(Val) && AMDGPU::isInlinableLiteral32(static_cast(Val), AsmParser->hasInv2PiInlineImm())) { @@ -1190,8 +1186,8 @@ void AMDGPUOperand::addLiteralImmOperand(MCInst &Inst, int64_t Val) const { Inst.addOperand(MCOperand::createImm(Val & 0xffffffff)); return; - } - case 8: { + + case 8: if (AMDGPU::isInlinableLiteral64(Val, AsmParser->hasInv2PiInlineImm())) { Inst.addOperand(MCOperand::createImm(Val)); @@ -1200,8 +1196,8 @@ void AMDGPUOperand::addLiteralImmOperand(MCInst &Inst, int64_t Val) const { Inst.addOperand(MCOperand::createImm(Lo_32(Val))); return; - } - case 2: { + + case 2: if (isInt<16>(Val) && AMDGPU::isInlinableLiteral16(static_cast(Val), AsmParser->hasInv2PiInlineImm())) { @@ -1211,7 +1207,7 @@ void AMDGPUOperand::addLiteralImmOperand(MCInst &Inst, int64_t Val) const { Inst.addOperand(MCOperand::createImm(Val & 0xffff)); return; - } + default: llvm_unreachable("invalid operand size"); } @@ -1295,7 +1291,8 @@ static unsigned getSpecialRegForName(StringRef RegName) { .Default(0); } -bool AMDGPUAsmParser::ParseRegister(unsigned &RegNo, SMLoc &StartLoc, SMLoc &EndLoc) { +bool AMDGPUAsmParser::ParseRegister(unsigned &RegNo, SMLoc &StartLoc, + SMLoc &EndLoc) { auto R = parseRegister(); if (!R) return true; assert(R->isReg()); @@ -1305,20 +1302,43 @@ bool AMDGPUAsmParser::ParseRegister(unsigned &RegNo, SMLoc &StartLoc, SMLoc &End return false; } -bool AMDGPUAsmParser::AddNextRegisterToList(unsigned& Reg, unsigned& RegWidth, RegisterKind RegKind, unsigned Reg1, unsigned RegNum) -{ +bool AMDGPUAsmParser::AddNextRegisterToList(unsigned &Reg, unsigned &RegWidth, + RegisterKind RegKind, unsigned Reg1, + unsigned RegNum) { switch (RegKind) { case IS_SPECIAL: - if (Reg == AMDGPU::EXEC_LO && Reg1 == AMDGPU::EXEC_HI) { Reg = AMDGPU::EXEC; RegWidth = 2; return true; } - if (Reg == AMDGPU::FLAT_SCR_LO && Reg1 == AMDGPU::FLAT_SCR_HI) { Reg = AMDGPU::FLAT_SCR; RegWidth = 2; return true; } - if (Reg == AMDGPU::VCC_LO && Reg1 == AMDGPU::VCC_HI) { Reg = AMDGPU::VCC; RegWidth = 2; return true; } - if (Reg == AMDGPU::TBA_LO && Reg1 == AMDGPU::TBA_HI) { Reg = AMDGPU::TBA; RegWidth = 2; return true; } - if (Reg == AMDGPU::TMA_LO && Reg1 == AMDGPU::TMA_HI) { Reg = AMDGPU::TMA; RegWidth = 2; return true; } + if (Reg == AMDGPU::EXEC_LO && Reg1 == AMDGPU::EXEC_HI) { + Reg = AMDGPU::EXEC; + RegWidth = 2; + return true; + } + if (Reg == AMDGPU::FLAT_SCR_LO && Reg1 == AMDGPU::FLAT_SCR_HI) { + Reg = AMDGPU::FLAT_SCR; + RegWidth = 2; + return true; + } + if (Reg == AMDGPU::VCC_LO && Reg1 == AMDGPU::VCC_HI) { + Reg = AMDGPU::VCC; + RegWidth = 2; + return true; + } + if (Reg == AMDGPU::TBA_LO && Reg1 == AMDGPU::TBA_HI) { + Reg = AMDGPU::TBA; + RegWidth = 2; + return true; + } + if (Reg == AMDGPU::TMA_LO && Reg1 == AMDGPU::TMA_HI) { + Reg = AMDGPU::TMA; + RegWidth = 2; + return true; + } return false; case IS_VGPR: case IS_SGPR: case IS_TTMP: - if (Reg1 != Reg + RegWidth) { return false; } + if (Reg1 != Reg + RegWidth) { + return false; + } RegWidth++; return true; default: @@ -1326,8 +1346,9 @@ bool AMDGPUAsmParser::AddNextRegisterToList(unsigned& Reg, unsigned& RegWidth, R } } -bool AMDGPUAsmParser::ParseAMDGPURegister(RegisterKind& RegKind, unsigned& Reg, unsigned& RegNum, unsigned& RegWidth, unsigned *DwordRegIndex) -{ +bool AMDGPUAsmParser::ParseAMDGPURegister(RegisterKind &RegKind, unsigned &Reg, + unsigned &RegNum, unsigned &RegWidth, + unsigned *DwordRegIndex) { if (DwordRegIndex) { *DwordRegIndex = 0; } const MCRegisterInfo *TRI = getContext().getRegisterInfo(); if (getLexer().is(AsmToken::Identifier)) { @@ -1528,7 +1549,8 @@ AMDGPUAsmParser::parseRegOrImm(OperandVector &Operands) { } OperandMatchResultTy -AMDGPUAsmParser::parseRegOrImmWithFPInputMods(OperandVector &Operands, bool AllowImm) { +AMDGPUAsmParser::parseRegOrImmWithFPInputMods(OperandVector &Operands, + bool AllowImm) { // XXX: During parsing we can't determine if minus sign means // negate-modifier or negative immediate value. // By default we suppose it is modifier. @@ -1539,7 +1561,8 @@ AMDGPUAsmParser::parseRegOrImmWithFPInputMods(OperandVector &Operands, bool Allo Negate = true; } - if (getLexer().getKind() == AsmToken::Identifier && Parser.getTok().getString() == "abs") { + if (getLexer().getKind() == AsmToken::Identifier && + Parser.getTok().getString() == "abs") { Parser.Lex(); Abs2 = true; if (getLexer().isNot(AsmToken::LParen)) { @@ -1597,10 +1620,12 @@ AMDGPUAsmParser::parseRegOrImmWithFPInputMods(OperandVector &Operands, bool Allo } OperandMatchResultTy -AMDGPUAsmParser::parseRegOrImmWithIntInputMods(OperandVector &Operands, bool AllowImm) { +AMDGPUAsmParser::parseRegOrImmWithIntInputMods(OperandVector &Operands, + bool AllowImm) { bool Sext = false; - if (getLexer().getKind() == AsmToken::Identifier && Parser.getTok().getString() == "sext") { + if (getLexer().getKind() == AsmToken::Identifier && + Parser.getTok().getString() == "sext") { Parser.Lex(); Sext = true; if (getLexer().isNot(AsmToken::LParen)) { @@ -1667,7 +1692,6 @@ OperandMatchResultTy AMDGPUAsmParser::parseVReg32OrOff(OperandVector &Operands) } unsigned AMDGPUAsmParser::checkTargetMatchPredicate(MCInst &Inst) { - uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags; if ((getForcedEncodingSize() == 32 && (TSFlags & SIInstrFlags::VOP3)) || @@ -1799,7 +1823,6 @@ bool AMDGPUAsmParser::ParseAsAbsoluteExpression(uint32_t &Ret) { return false; } - bool AMDGPUAsmParser::ParseDirectiveMajorMinor(uint32_t &Major, uint32_t &Minor) { if (ParseAsAbsoluteExpression(Major)) @@ -1816,7 +1839,6 @@ bool AMDGPUAsmParser::ParseDirectiveMajorMinor(uint32_t &Major, } bool AMDGPUAsmParser::ParseDirectiveHSACodeObjectVersion() { - uint32_t Major; uint32_t Minor; @@ -2086,7 +2108,6 @@ bool AMDGPUAsmParser::subtargetHasRegister(const MCRegisterInfo &MRI, OperandMatchResultTy AMDGPUAsmParser::parseOperand(OperandVector &Operands, StringRef Mnemonic) { - // Try to parse with a custom parser OperandMatchResultTy ResTy = MatchOperandParserImpl(Operands, Mnemonic); diff --git a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp index e314345..4dde35a 100644 --- a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp +++ b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp @@ -15,7 +15,6 @@ #ifdef _MSC_VER // Provide M_PI. #define _USE_MATH_DEFINES -#include #endif #include "AMDGPU.h" @@ -26,15 +25,59 @@ #include "SIInstrInfo.h" #include "SIMachineFunctionInfo.h" #include "SIRegisterInfo.h" +#include "Utils/AMDGPUBaseInfo.h" +#include "llvm/ADT/APFloat.h" +#include "llvm/ADT/APInt.h" +#include "llvm/ADT/ArrayRef.h" #include "llvm/ADT/BitVector.h" +#include "llvm/ADT/SmallVector.h" +#include "llvm/ADT/StringRef.h" #include "llvm/ADT/StringSwitch.h" +#include "llvm/ADT/Twine.h" +#include "llvm/CodeGen/Analysis.h" #include "llvm/CodeGen/CallingConvLower.h" +#include "llvm/CodeGen/DAGCombine.h" +#include "llvm/CodeGen/ISDOpcodes.h" +#include "llvm/CodeGen/MachineBasicBlock.h" +#include "llvm/CodeGen/MachineFrameInfo.h" +#include "llvm/CodeGen/MachineFunction.h" +#include "llvm/CodeGen/MachineInstr.h" #include "llvm/CodeGen/MachineInstrBuilder.h" +#include "llvm/CodeGen/MachineMemOperand.h" +#include "llvm/CodeGen/MachineOperand.h" #include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/CodeGen/MachineValueType.h" #include "llvm/CodeGen/SelectionDAG.h" -#include "llvm/CodeGen/Analysis.h" +#include "llvm/CodeGen/SelectionDAGNodes.h" +#include "llvm/CodeGen/ValueTypes.h" +#include "llvm/IR/Constants.h" +#include "llvm/IR/DataLayout.h" +#include "llvm/IR/DebugLoc.h" +#include "llvm/IR/DerivedTypes.h" #include "llvm/IR/DiagnosticInfo.h" #include "llvm/IR/Function.h" +#include "llvm/IR/GlobalValue.h" +#include "llvm/IR/InstrTypes.h" +#include "llvm/IR/Instruction.h" +#include "llvm/IR/Instructions.h" +#include "llvm/IR/Type.h" +#include "llvm/Support/Casting.h" +#include "llvm/Support/CodeGen.h" +#include "llvm/Support/CommandLine.h" +#include "llvm/Support/Compiler.h" +#include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/MathExtras.h" +#include "llvm/Target/TargetCallingConv.h" +#include "llvm/Target/TargetMachine.h" +#include "llvm/Target/TargetOptions.h" +#include "llvm/Target/TargetRegisterInfo.h" +#include +#include +#include +#include +#include +#include +#include using namespace llvm; @@ -43,7 +86,6 @@ static cl::opt EnableVGPRIndexMode( cl::desc("Use GPR indexing mode instead of movrel for vector indexing"), cl::init(false)); - static unsigned findFirstFreeSGPR(CCState &CCInfo) { unsigned NumSGPRs = AMDGPU::SGPR_32RegClass.getNumRegs(); for (unsigned Reg = 0; Reg < NumSGPRs; ++Reg) { @@ -110,7 +152,6 @@ SITargetLowering::SITargetLowering(const TargetMachine &TM, setTruncStoreAction(MVT::v16i32, MVT::v16i8, Expand); setTruncStoreAction(MVT::v32i32, MVT::v32i8, Expand); - setOperationAction(ISD::GlobalAddress, MVT::i32, Custom); setOperationAction(ISD::GlobalAddress, MVT::i64, Custom); setOperationAction(ISD::ConstantPool, MVT::v2i64, Expand); @@ -441,7 +482,7 @@ bool SITargetLowering::isLegalAddressingMode(const DataLayout &DL, return false; switch (AS) { - case AMDGPUAS::GLOBAL_ADDRESS: { + case AMDGPUAS::GLOBAL_ADDRESS: if (Subtarget->getGeneration() >= SISubtarget::VOLCANIC_ISLANDS) { // Assume the we will use FLAT for all global memory accesses // on VI. @@ -456,8 +497,8 @@ bool SITargetLowering::isLegalAddressingMode(const DataLayout &DL, } return isLegalMUBUFAddressingMode(AM); - } - case AMDGPUAS::CONSTANT_ADDRESS: { + + case AMDGPUAS::CONSTANT_ADDRESS: // If the offset isn't a multiple of 4, it probably isn't going to be // correctly aligned. // FIXME: Can we get the real alignment here? @@ -494,13 +535,12 @@ bool SITargetLowering::isLegalAddressingMode(const DataLayout &DL, return true; return false; - } case AMDGPUAS::PRIVATE_ADDRESS: return isLegalMUBUFAddressingMode(AM); case AMDGPUAS::LOCAL_ADDRESS: - case AMDGPUAS::REGION_ADDRESS: { + case AMDGPUAS::REGION_ADDRESS: // Basic, single offset DS instructions allow a 16-bit unsigned immediate // field. // XXX - If doing a 4-byte aligned 8-byte type access, we effectively have @@ -515,7 +555,7 @@ bool SITargetLowering::isLegalAddressingMode(const DataLayout &DL, return true; return false; - } + case AMDGPUAS::FLAT_ADDRESS: case AMDGPUAS::UNKNOWN_ADDRESS_SPACE: // For an unknown address space, this usually means that this is for some @@ -897,7 +937,6 @@ SDValue SITargetLowering::LowerFormalArguments( SmallVector Chains; for (unsigned i = 0, e = Ins.size(), ArgIdx = 0; i != e; ++i) { - const ISD::InputArg &Arg = Ins[i]; if (Skipped[i]) { InVals.push_back(DAG.getUNDEF(Arg.VT)); @@ -954,7 +993,6 @@ SDValue SITargetLowering::LowerFormalArguments( SDValue Val = DAG.getCopyFromReg(Chain, DL, Reg, VT); if (Arg.VT.isVector()) { - // Build a vector from the registers Type *ParamType = FType->getParamType(Arg.getOrigArgIndex()); unsigned NumElements = ParamType->getVectorNumElements(); @@ -1543,7 +1581,6 @@ static MachineBasicBlock *emitIndirectSrc(MachineInstr &MI, return &MBB; } - const DebugLoc &DL = MI.getDebugLoc(); MachineBasicBlock::iterator I(&MI); @@ -1736,13 +1773,13 @@ MachineBasicBlock *SITargetLowering::EmitInstrWithCustomInserter( } switch (MI.getOpcode()) { - case AMDGPU::SI_INIT_M0: { + case AMDGPU::SI_INIT_M0: BuildMI(*BB, MI.getIterator(), MI.getDebugLoc(), TII->get(AMDGPU::S_MOV_B32), AMDGPU::M0) .add(MI.getOperand(0)); MI.eraseFromParent(); return BB; - } + case AMDGPU::GET_GROUPSTATICSIZE: { DebugLoc DL = MI.getDebugLoc(); BuildMI(*BB, MI, DL, TII->get(AMDGPU::S_MOV_B32)) @@ -2001,7 +2038,6 @@ bool SITargetLowering::shouldEmitPCReloc(const GlobalValue *GV) const { /// last parameter, also switches branch target with BR if the need arise SDValue SITargetLowering::LowerBRCOND(SDValue BRCOND, SelectionDAG &DAG) const { - SDLoc DL(BRCOND); SDNode *Intr = BRCOND.getOperand(1).getNode(); @@ -2399,17 +2435,15 @@ SDValue SITargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op, case Intrinsic::amdgcn_rsq: case AMDGPUIntrinsic::AMDGPU_rsq: // Legacy name return DAG.getNode(AMDGPUISD::RSQ, DL, VT, Op.getOperand(1)); - case Intrinsic::amdgcn_rsq_legacy: { + case Intrinsic::amdgcn_rsq_legacy: if (Subtarget->getGeneration() >= SISubtarget::VOLCANIC_ISLANDS) return emitRemovedIntrinsicError(DAG, DL, VT); return DAG.getNode(AMDGPUISD::RSQ_LEGACY, DL, VT, Op.getOperand(1)); - } - case Intrinsic::amdgcn_rcp_legacy: { + case Intrinsic::amdgcn_rcp_legacy: if (Subtarget->getGeneration() >= SISubtarget::VOLCANIC_ISLANDS) return emitRemovedIntrinsicError(DAG, DL, VT); return DAG.getNode(AMDGPUISD::RCP_LEGACY, DL, VT, Op.getOperand(1)); - } case Intrinsic::amdgcn_rsq_clamp: { if (Subtarget->getGeneration() < SISubtarget::VOLCANIC_ISLANDS) return DAG.getNode(AMDGPUISD::RSQ_CLAMP, DL, VT, Op.getOperand(1)); @@ -2516,9 +2550,8 @@ SDValue SITargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op, return DAG.getMemIntrinsicNode(AMDGPUISD::LOAD_CONSTANT, DL, Op->getVTList(), Ops, VT, MMO); } - case AMDGPUIntrinsic::amdgcn_fdiv_fast: { + case AMDGPUIntrinsic::amdgcn_fdiv_fast: return lowerFDIV_FAST(Op, DAG); - } case AMDGPUIntrinsic::SI_vs_load_input: return DAG.getNode(AMDGPUISD::LOAD_INPUT, DL, VT, Op.getOperand(1), @@ -2912,7 +2945,7 @@ SDValue SITargetLowering::LowerLOAD(SDValue Op, SelectionDAG &DAG) const { // loads. // LLVM_FALLTHROUGH; - case AMDGPUAS::GLOBAL_ADDRESS: { + case AMDGPUAS::GLOBAL_ADDRESS: if (Subtarget->getScalarizeGlobalBehavior() && isMemOpUniform(Load) && isMemOpHasNoClobberedMemOperand(Load)) return SDValue(); @@ -2920,14 +2953,13 @@ SDValue SITargetLowering::LowerLOAD(SDValue Op, SelectionDAG &DAG) const { // have the same legalization requirements as global and private // loads. // - } LLVM_FALLTHROUGH; case AMDGPUAS::FLAT_ADDRESS: if (NumElements > 4) return SplitVectorLoad(Op, DAG); // v4 loads are supported for private and global memory. return SDValue(); - case AMDGPUAS::PRIVATE_ADDRESS: { + case AMDGPUAS::PRIVATE_ADDRESS: // Depending on the setting of the private_element_size field in the // resource descriptor, we can only make private accesses up to a certain // size. @@ -2946,8 +2978,7 @@ SDValue SITargetLowering::LowerLOAD(SDValue Op, SelectionDAG &DAG) const { default: llvm_unreachable("unsupported private_element_size"); } - } - case AMDGPUAS::LOCAL_ADDRESS: { + case AMDGPUAS::LOCAL_ADDRESS: if (NumElements > 2) return SplitVectorLoad(Op, DAG); @@ -2956,7 +2987,6 @@ SDValue SITargetLowering::LowerLOAD(SDValue Op, SelectionDAG &DAG) const { // If properly aligned, if we split we might be able to use ds_read_b64. return SplitVectorLoad(Op, DAG); - } default: return SDValue(); } @@ -3454,27 +3484,24 @@ SDValue SITargetLowering::performUCharToFloatCombine(SDNode *N, static bool canFoldOffset(unsigned OffsetSize, unsigned AS, const SISubtarget &STI) { switch (AS) { - case AMDGPUAS::GLOBAL_ADDRESS: { + case AMDGPUAS::GLOBAL_ADDRESS: // MUBUF instructions a 12-bit offset in bytes. return isUInt<12>(OffsetSize); - } - case AMDGPUAS::CONSTANT_ADDRESS: { + case AMDGPUAS::CONSTANT_ADDRESS: // SMRD instructions have an 8-bit offset in dwords on SI and // a 20-bit offset in bytes on VI. if (STI.getGeneration() >= SISubtarget::VOLCANIC_ISLANDS) return isUInt<20>(OffsetSize); else return (OffsetSize % 4 == 0) && isUInt<8>(OffsetSize / 4); - } case AMDGPUAS::LOCAL_ADDRESS: - case AMDGPUAS::REGION_ADDRESS: { + case AMDGPUAS::REGION_ADDRESS: // The single offset versions have a 16-bit offset in bytes. return isUInt<16>(OffsetSize); - } case AMDGPUAS::PRIVATE_ADDRESS: // Indirect register addressing does not use any offsets. default: - return 0; + return false; } } @@ -4176,11 +4203,10 @@ SDValue SITargetLowering::PerformDAGCombine(SDNode *N, case ISD::ATOMIC_LOAD_UMIN: case ISD::ATOMIC_LOAD_UMAX: case AMDGPUISD::ATOMIC_INC: - case AMDGPUISD::ATOMIC_DEC: { // TODO: Target mem intrinsics. + case AMDGPUISD::ATOMIC_DEC: // TODO: Target mem intrinsics. if (DCI.isBeforeLegalize()) break; return performMemSDNodeCombine(cast(N), DCI); - } case ISD::AND: return performAndCombine(N, DCI); case ISD::OR: @@ -4291,7 +4317,6 @@ void SITargetLowering::adjustWritemask(MachineSDNode *&Node, // Update the users of the node with the new indices for (unsigned i = 0, Idx = AMDGPU::sub0; i < 4; ++i) { - SDNode *User = Users[i]; if (!User) continue; diff --git a/llvm/lib/Target/AMDGPU/SIInsertSkips.cpp b/llvm/lib/Target/AMDGPU/SIInsertSkips.cpp index fe14647..c6b420f 100644 --- a/llvm/lib/Target/AMDGPU/SIInsertSkips.cpp +++ b/llvm/lib/Target/AMDGPU/SIInsertSkips.cpp @@ -1,4 +1,4 @@ -//===-- SIInsertSkips.cpp - Use predicates for control flow ----------===// +//===-- SIInsertSkips.cpp - Use predicates for control flow ---------------===// // // The LLVM Compiler Infrastructure // @@ -12,33 +12,46 @@ /// branches when it's expected that jumping over the untaken control flow will /// be cheaper than having every workitem no-op through it. // +//===----------------------------------------------------------------------===// #include "AMDGPU.h" #include "AMDGPUSubtarget.h" #include "SIInstrInfo.h" #include "SIMachineFunctionInfo.h" -#include "llvm/CodeGen/MachineFrameInfo.h" +#include "llvm/ADT/SmallVector.h" +#include "llvm/ADT/StringRef.h" +#include "llvm/CodeGen/MachineBasicBlock.h" #include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineFunctionPass.h" +#include "llvm/CodeGen/MachineInstr.h" #include "llvm/CodeGen/MachineInstrBuilder.h" +#include "llvm/CodeGen/MachineOperand.h" +#include "llvm/IR/CallingConv.h" +#include "llvm/IR/DebugLoc.h" #include "llvm/MC/MCAsmInfo.h" +#include "llvm/Pass.h" +#include "llvm/Support/CommandLine.h" +#include "llvm/Target/TargetMachine.h" +#include +#include +#include using namespace llvm; #define DEBUG_TYPE "si-insert-skips" -namespace { - static cl::opt SkipThresholdFlag( "amdgpu-skip-threshold", cl::desc("Number of instructions before jumping over divergent control flow"), cl::init(12), cl::Hidden); +namespace { + class SIInsertSkips : public MachineFunctionPass { private: - const SIRegisterInfo *TRI; - const SIInstrInfo *TII; - unsigned SkipThreshold; + const SIRegisterInfo *TRI = nullptr; + const SIInstrInfo *TII = nullptr; + unsigned SkipThreshold = 0; bool shouldSkip(const MachineBasicBlock &From, const MachineBasicBlock &To) const; @@ -55,8 +68,7 @@ private: public: static char ID; - SIInsertSkips() : - MachineFunctionPass(ID), TRI(nullptr), TII(nullptr), SkipThreshold(0) { } + SIInsertSkips() : MachineFunctionPass(ID) {} bool runOnMachineFunction(MachineFunction &MF) override; @@ -69,7 +81,7 @@ public: } }; -} // End anonymous namespace +} // end anonymous namespace char SIInsertSkips::ID = 0; @@ -270,19 +282,19 @@ bool SIInsertSkips::runOnMachineFunction(MachineFunction &MF) { MachineInstr &MI = *I; switch (MI.getOpcode()) { - case AMDGPU::SI_MASK_BRANCH: { + case AMDGPU::SI_MASK_BRANCH: ExecBranchStack.push_back(MI.getOperand(0).getMBB()); MadeChange |= skipMaskBranch(MI, MBB); break; - } - case AMDGPU::S_BRANCH: { + + case AMDGPU::S_BRANCH: // Optimize out branches to the next block. // FIXME: Shouldn't this be handled by BranchFolding? if (MBB.isLayoutSuccessor(MI.getOperand(0).getMBB())) MI.eraseFromParent(); break; - } - case AMDGPU::SI_KILL_TERMINATOR: { + + case AMDGPU::SI_KILL_TERMINATOR: MadeChange = true; kill(MI); @@ -298,8 +310,8 @@ bool SIInsertSkips::runOnMachineFunction(MachineFunction &MF) { MI.eraseFromParent(); break; - } - case AMDGPU::SI_RETURN: { + + case AMDGPU::SI_RETURN: // FIXME: Should move somewhere else assert(!MF.getInfo()->returnsVoid()); @@ -318,7 +330,8 @@ bool SIInsertSkips::runOnMachineFunction(MachineFunction &MF) { .addMBB(EmptyMBBAtEnd); I->eraseFromParent(); } - } + break; + default: break; } diff --git a/llvm/lib/Target/AMDGPU/SIInsertWaits.cpp b/llvm/lib/Target/AMDGPU/SIInsertWaits.cpp index fceabd7..c814e55 100644 --- a/llvm/lib/Target/AMDGPU/SIInsertWaits.cpp +++ b/llvm/lib/Target/AMDGPU/SIInsertWaits.cpp @@ -21,11 +21,28 @@ #include "SIDefines.h" #include "SIInstrInfo.h" #include "SIMachineFunctionInfo.h" +#include "SIRegisterInfo.h" #include "Utils/AMDGPUBaseInfo.h" +#include "llvm/ADT/SmallVector.h" +#include "llvm/ADT/StringRef.h" +#include "llvm/CodeGen/MachineBasicBlock.h" #include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineFunctionPass.h" +#include "llvm/CodeGen/MachineInstr.h" #include "llvm/CodeGen/MachineInstrBuilder.h" +#include "llvm/CodeGen/MachineOperand.h" #include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/IR/DebugLoc.h" +#include "llvm/Pass.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/raw_ostream.h" +#include "llvm/Target/TargetRegisterInfo.h" +#include +#include +#include +#include +#include +#include #define DEBUG_TYPE "si-insert-waits" @@ -42,7 +59,6 @@ typedef union { unsigned LGKM; } Named; unsigned Array[3]; - } Counters; typedef enum { @@ -55,11 +71,10 @@ typedef Counters RegCounters[512]; typedef std::pair RegInterval; class SIInsertWaits : public MachineFunctionPass { - private: - const SISubtarget *ST; - const SIInstrInfo *TII; - const SIRegisterInfo *TRI; + const SISubtarget *ST = nullptr; + const SIInstrInfo *TII = nullptr; + const SIRegisterInfo *TRI = nullptr; const MachineRegisterInfo *MRI; IsaVersion IV; @@ -86,7 +101,7 @@ private: RegCounters DefinedRegs; /// \brief Different export instruction types seen since last wait. - unsigned ExpInstrTypesSeen; + unsigned ExpInstrTypesSeen = 0; /// \brief Type of the last opcode. InstType LastOpcodeType; @@ -100,7 +115,7 @@ private: bool ReturnsVoid; /// Whether the VCCZ bit is possibly corrupt - bool VCCZCorrupt; + bool VCCZCorrupt = false; /// \brief Get increment/decrement amount for this instruction. Counters getHwCounts(MachineInstr &MI); @@ -141,13 +156,7 @@ private: public: static char ID; - SIInsertWaits() : - MachineFunctionPass(ID), - ST(nullptr), - TII(nullptr), - TRI(nullptr), - ExpInstrTypesSeen(0), - VCCZCorrupt(false) { } + SIInsertWaits() : MachineFunctionPass(ID) {} bool runOnMachineFunction(MachineFunction &MF) override; @@ -161,7 +170,7 @@ public: } }; -} // End anonymous namespace +} // end anonymous namespace INITIALIZE_PASS_BEGIN(SIInsertWaits, DEBUG_TYPE, "SI Insert Waits", false, false) @@ -294,7 +303,6 @@ RegInterval SIInsertWaits::getRegInterval(const TargetRegisterClass *RC, void SIInsertWaits::pushInstruction(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, const Counters &Increment) { - // Get the hardware counter increments and sum them up Counters Limit = ZeroCounts; unsigned Sum = 0; @@ -366,7 +374,6 @@ void SIInsertWaits::pushInstruction(MachineBasicBlock &MBB, bool SIInsertWaits::insertWait(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, const Counters &Required) { - // End of program? No need to wait on anything // A function not returning void needs to wait, because other bytecode will // be appended after it and we don't know what it will be. @@ -393,7 +400,6 @@ bool SIInsertWaits::insertWait(MachineBasicBlock &MBB, bool NeedWait = false; for (unsigned i = 0; i < 3; ++i) { - if (Required.Array[i] <= WaitedOn.Array[i]) continue; @@ -434,7 +440,6 @@ bool SIInsertWaits::insertWait(MachineBasicBlock &MBB, /// \brief helper function for handleOperands static void increaseCounters(Counters &Dst, const Counters &Src) { - for (unsigned i = 0; i < 3; ++i) Dst.Array[i] = std::max(Dst.Array[i], Src.Array[i]); } @@ -468,7 +473,6 @@ void SIInsertWaits::handleExistingWait(MachineBasicBlock::iterator I) { } Counters SIInsertWaits::handleOperands(MachineInstr &MI) { - Counters Result = ZeroCounts; // For each register affected by this instruction increase the result @@ -484,7 +488,6 @@ Counters SIInsertWaits::handleOperands(MachineInstr &MI) { const TargetRegisterClass *RC = TII->getOpRegClass(MI, i); RegInterval Interval = getRegInterval(RC, Op); for (unsigned j = Interval.first; j < Interval.second; ++j) { - if (Op.isDef()) { increaseCounters(Result, UsedRegs[j]); increaseCounters(Result, DefinedRegs[j]); diff --git a/llvm/lib/Target/AMDGPU/SILoadStoreOptimizer.cpp b/llvm/lib/Target/AMDGPU/SILoadStoreOptimizer.cpp index ae5aefc..6b0d18e 100644 --- a/llvm/lib/Target/AMDGPU/SILoadStoreOptimizer.cpp +++ b/llvm/lib/Target/AMDGPU/SILoadStoreOptimizer.cpp @@ -39,15 +39,27 @@ #include "AMDGPUSubtarget.h" #include "SIInstrInfo.h" #include "SIRegisterInfo.h" -#include "llvm/CodeGen/LiveIntervalAnalysis.h" -#include "llvm/CodeGen/LiveVariables.h" +#include "Utils/AMDGPUBaseInfo.h" +#include "llvm/ADT/ArrayRef.h" +#include "llvm/ADT/SmallVector.h" +#include "llvm/ADT/StringRef.h" +#include "llvm/Analysis/AliasAnalysis.h" +#include "llvm/CodeGen/MachineBasicBlock.h" #include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineFunctionPass.h" +#include "llvm/CodeGen/MachineInstr.h" #include "llvm/CodeGen/MachineInstrBuilder.h" +#include "llvm/CodeGen/MachineOperand.h" #include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/IR/DebugLoc.h" +#include "llvm/Pass.h" #include "llvm/Support/Debug.h" +#include "llvm/Support/MathExtras.h" #include "llvm/Support/raw_ostream.h" #include "llvm/Target/TargetMachine.h" +#include +#include +#include using namespace llvm; @@ -57,10 +69,10 @@ namespace { class SILoadStoreOptimizer : public MachineFunctionPass { private: - const SIInstrInfo *TII; - const SIRegisterInfo *TRI; - MachineRegisterInfo *MRI; - AliasAnalysis *AA; + const SIInstrInfo *TII = nullptr; + const SIRegisterInfo *TRI = nullptr; + MachineRegisterInfo *MRI = nullptr; + AliasAnalysis *AA = nullptr; static bool offsetsCanBeCombined(unsigned Offset0, unsigned Offset1, @@ -86,9 +98,7 @@ private: public: static char ID; - SILoadStoreOptimizer() - : MachineFunctionPass(ID), TII(nullptr), TRI(nullptr), MRI(nullptr), - AA(nullptr) {} + SILoadStoreOptimizer() : MachineFunctionPass(ID) {} SILoadStoreOptimizer(const TargetMachine &TM_) : MachineFunctionPass(ID) { initializeSILoadStoreOptimizerPass(*PassRegistry::getPassRegistry()); @@ -108,7 +118,7 @@ public: } }; -} // End anonymous namespace. +} // end anonymous namespace. INITIALIZE_PASS_BEGIN(SILoadStoreOptimizer, DEBUG_TYPE, "SI Load / Store Optimizer", false, false) @@ -141,11 +151,10 @@ static void addDefsToList(const MachineInstr &MI, } } -static bool memAccessesCanBeReordered( - MachineBasicBlock::iterator A, - MachineBasicBlock::iterator B, - const SIInstrInfo *TII, - llvm::AliasAnalysis * AA) { +static bool memAccessesCanBeReordered(MachineBasicBlock::iterator A, + MachineBasicBlock::iterator B, + const SIInstrInfo *TII, + AliasAnalysis * AA) { return (TII->areMemAccessesTriviallyDisjoint(*A, *B, AA) || // RAW or WAR - cannot reorder // WAW - cannot reorder @@ -179,7 +188,6 @@ canMoveInstsAcrossMemOp(MachineInstr &MemOp, ArrayRef InstsToMove, const SIInstrInfo *TII, AliasAnalysis *AA) { - assert(MemOp.mayLoadOrStore()); for (MachineInstr *InstToMove : InstsToMove) { @@ -230,7 +238,6 @@ SILoadStoreOptimizer::findMatchingDSInst(MachineBasicBlock::iterator I, addDefsToList(*I, DefsToMove); for ( ; MBBI != E; ++MBBI) { - if (MBBI->getOpcode() != I->getOpcode()) { // This is not a matching DS instruction, but we can keep looking as diff --git a/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.h b/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.h index 3b4e233..2d92068 100644 --- a/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.h +++ b/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.h @@ -16,13 +16,16 @@ #include "AMDGPUMachineFunction.h" #include "SIRegisterInfo.h" +#include "llvm/CodeGen/PseudoSourceValue.h" +#include "llvm/MC/MCRegisterInfo.h" +#include "llvm/Support/ErrorHandling.h" #include +#include #include +#include namespace llvm { -class MachineRegisterInfo; - class AMDGPUImagePseudoSourceValue : public PseudoSourceValue { public: explicit AMDGPUImagePseudoSourceValue() : @@ -174,10 +177,12 @@ private: public: struct SpilledReg { - unsigned VGPR; - int Lane; + unsigned VGPR = AMDGPU::NoRegister; + int Lane = -1; + + SpilledReg() = default; SpilledReg(unsigned R, int L) : VGPR (R), Lane (L) { } - SpilledReg() : VGPR(AMDGPU::NoRegister), Lane(-1) { } + bool hasLane() { return Lane != -1;} bool hasReg() { return VGPR != AMDGPU::NoRegister;} }; @@ -185,6 +190,7 @@ public: // SIMachineFunctionInfo definition SIMachineFunctionInfo(const MachineFunction &MF); + SpilledReg getSpilledReg(MachineFunction *MF, unsigned FrameIndex, unsigned SubIdx); bool hasCalculatedTID() const { return TIDReg != AMDGPU::NoRegister; }; @@ -495,6 +501,6 @@ public: } }; -} // End namespace llvm +} // end namespace llvm -#endif +#endif // LLVM_LIB_TARGET_AMDGPU_SIMACHINEFUNCTIONINFO_H diff --git a/llvm/lib/Target/AMDGPU/SIMachineScheduler.h b/llvm/lib/Target/AMDGPU/SIMachineScheduler.h index 77c0735..2dc4b34 100644 --- a/llvm/lib/Target/AMDGPU/SIMachineScheduler.h +++ b/llvm/lib/Target/AMDGPU/SIMachineScheduler.h @@ -40,13 +40,12 @@ enum SIScheduleCandReason { struct SISchedulerCandidate { // The reason for this candidate. - SIScheduleCandReason Reason; + SIScheduleCandReason Reason = NoCand; // Set of reasons that apply to multiple candidates. - uint32_t RepeatReasonSet; + uint32_t RepeatReasonSet = 0; - SISchedulerCandidate() - : Reason(NoCand), RepeatReasonSet(0) {} + SISchedulerCandidate() = default; bool isRepeat(SIScheduleCandReason R) { return RepeatReasonSet & (1 << R); } void setRepeat(SIScheduleCandReason R) { RepeatReasonSet |= (1 << R); } @@ -84,8 +83,8 @@ class SIScheduleBlock { std::set LiveInRegs; std::set LiveOutRegs; - bool Scheduled; - bool HighLatencyBlock; + bool Scheduled = false; + bool HighLatencyBlock = false; std::vector HasLowLatencyNonWaitedParent; @@ -94,13 +93,12 @@ class SIScheduleBlock { std::vector Preds; // All blocks predecessors. std::vector Succs; // All blocks successors. - unsigned NumHighLatencySuccessors; + unsigned NumHighLatencySuccessors = 0; public: SIScheduleBlock(SIScheduleDAGMI *DAG, SIScheduleBlockCreator *BC, unsigned ID): - DAG(DAG), BC(BC), TopRPTracker(TopPressure), Scheduled(false), - HighLatencyBlock(false), ID(ID), NumHighLatencySuccessors(0) {} + DAG(DAG), BC(BC), TopRPTracker(TopPressure), ID(ID) {} ~SIScheduleBlock() = default; @@ -213,9 +211,9 @@ struct SIScheduleBlocks { }; enum SISchedulerBlockCreatorVariant { - LatenciesAlone, - LatenciesGrouped, - LatenciesAlonePlusConsecutive + LatenciesAlone, + LatenciesGrouped, + LatenciesAlonePlusConsecutive }; class SIScheduleBlockCreator {