From 59e128266c9de11ba334450e1c3b7101155bd55b Mon Sep 17 00:00:00 2001
From: Eugene Zelenko <eugene.zelenko@gmail.com>
Date: Tue, 8 Aug 2017 00:47:13 +0000
Subject: [PATCH] [AMDGPU] Fix some Clang-tidy modernize-use-using and Include
 What You Use warnings; other minor fixes (NFC).

llvm-svn: 310328
---
 llvm/lib/Target/AMDGPU/SIAnnotateControlFlow.cpp |  36 ++++---
 llvm/lib/Target/AMDGPU/SIFixSGPRCopies.cpp       |  51 ++++++----
 llvm/lib/Target/AMDGPU/SIInsertWaitcnts.cpp      | 121 +++++++++++++----------
 llvm/lib/Target/AMDGPU/SIInsertWaits.cpp         |  21 ++--
 llvm/lib/Target/AMDGPU/SIInstrInfo.cpp           |  72 +++++++++-----
 llvm/lib/Target/AMDGPU/SIInstrInfo.h             |  45 ++++++---
 llvm/lib/Target/AMDGPU/SILoadStoreOptimizer.cpp  |  11 +--
 llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp |  35 ++-----
 llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.h   |  75 +++++++-------
 llvm/lib/Target/AMDGPU/SIPeepholeSDWA.cpp        |  42 +++++---
 10 files changed, 294 insertions(+), 215 deletions(-)
diff --git a/llvm/lib/Target/AMDGPU/SIAnnotateControlFlow.cpp b/llvm/lib/Target/AMDGPU/SIAnnotateControlFlow.cpp
index 8cb35c5..150d8c3 100644
--- a/llvm/lib/Target/AMDGPU/SIAnnotateControlFlow.cpp
+++ b/llvm/lib/Target/AMDGPU/SIAnnotateControlFlow.cpp
@@ -1,4 +1,4 @@
-//===-- SIAnnotateControlFlow.cpp -  ------------------===//
+//===- SIAnnotateControlFlow.cpp ------------------------------------------===//
 //
 //                     The LLVM Compiler Infrastructure
 //
@@ -14,16 +14,32 @@
 
 #include "AMDGPU.h"
 #include "llvm/ADT/DepthFirstIterator.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/SmallVector.h"
 #include "llvm/Analysis/DivergenceAnalysis.h"
 #include "llvm/Analysis/LoopInfo.h"
+#include "llvm/IR/BasicBlock.h"
+#include "llvm/IR/CFG.h"
+#include "llvm/IR/Constant.h"
 #include "llvm/IR/Constants.h"
+#include "llvm/IR/DerivedTypes.h"
 #include "llvm/IR/Dominators.h"
+#include "llvm/IR/Function.h"
+#include "llvm/IR/Instruction.h"
 #include "llvm/IR/Instructions.h"
+#include "llvm/IR/Intrinsics.h"
 #include "llvm/IR/Module.h"
+#include "llvm/IR/Type.h"
+#include "llvm/IR/ValueHandle.h"
 #include "llvm/Pass.h"
+#include "llvm/Support/Casting.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/raw_ostream.h"
 #include "llvm/Transforms/Utils/BasicBlockUtils.h"
 #include "llvm/Transforms/Utils/Local.h"
-#include "llvm/Transforms/Utils/SSAUpdater.h"
+#include <cassert>
+#include <utility>
 
 using namespace llvm;
 
@@ -32,8 +48,8 @@ using namespace llvm;
 namespace {
 
 // Complex types used in this pass
-typedef std::pair<BasicBlock *, Value *> StackEntry;
-typedef SmallVector<StackEntry, 16> StackVector;
+using StackEntry = std::pair<BasicBlock *, Value *>;
+using StackVector = SmallVector<StackEntry, 16>;
 
 class SIAnnotateControlFlow : public FunctionPass {
   DivergenceAnalysis *DA;
@@ -89,8 +105,7 @@ class SIAnnotateControlFlow : public FunctionPass {
 public:
   static char ID;
 
-  SIAnnotateControlFlow():
-    FunctionPass(ID) { }
+  SIAnnotateControlFlow() : FunctionPass(ID) {}
 
   bool doInitialization(Module &M) override;
 
@@ -105,7 +120,6 @@ public:
     AU.addPreserved<DominatorTreeWrapperPass>();
     FunctionPass::getAnalysisUsage(AU);
   }
-
 };
 
 } // end anonymous namespace
@@ -186,7 +200,7 @@ bool SIAnnotateControlFlow::isElse(PHINode *Phi) {
 
 // \brief Erase "Phi" if it is not used any more
 void SIAnnotateControlFlow::eraseIfUnused(PHINode *Phi) {
-  if (llvm::RecursivelyDeleteDeadPHINode(Phi)) {
+  if (RecursivelyDeleteDeadPHINode(Phi)) {
     DEBUG(dbgs() << "Erased unused condition phi\n");
   }
 }
@@ -215,7 +229,6 @@ void SIAnnotateControlFlow::insertElse(BranchInst *Term) {
 Value *SIAnnotateControlFlow::handleLoopCondition(
     Value *Cond, PHINode *Broken, llvm::Loop *L, BranchInst *Term,
     SmallVectorImpl<WeakTrackingVH> &LoopPhiConditions) {
-
   // Only search through PHI nodes which are inside the loop.  If we try this
   // with PHI nodes that are outside of the loop, we end up inserting new PHI
   // nodes outside of the loop which depend on values defined inside the loop.
@@ -223,7 +236,6 @@ Value *SIAnnotateControlFlow::handleLoopCondition(
   // 'Instruction does not dominate all users!' errors.
   PHINode *Phi = nullptr;
   if ((Phi = dyn_cast<PHINode>(Cond)) && L->contains(Phi)) {
-
     BasicBlock *Parent = Phi->getParent();
     PHINode *NewPhi = PHINode::Create(Int64, 0, "loop.phi", &Parent->front());
     Value *Ret = NewPhi;
@@ -333,7 +345,7 @@ void SIAnnotateControlFlow::handleLoop(BranchInst *Term) {
 
   Term->setCondition(CallInst::Create(Loop, Arg, "", Term));
 
-  for (WeakTrackingVH Val : reverse(LoopPhiConditions)) {
+  for (WeakTrackingVH Val : llvm::reverse(LoopPhiConditions)) {
     if (PHINode *Cond = cast_or_null<PHINode>(Val))
       eraseIfUnused(Cond);
   }
@@ -360,7 +372,7 @@ void SIAnnotateControlFlow::closeControlFlow(BasicBlock *BB) {
         Preds.push_back(Pred);
     }
 
-    BB = llvm::SplitBlockPredecessors(BB, Preds, "endcf.split", DT, LI, false);
+    BB = SplitBlockPredecessors(BB, Preds, "endcf.split", DT, LI, false);
   }
 
   Value *Exec = popSaved();
diff --git a/llvm/lib/Target/AMDGPU/SIFixSGPRCopies.cpp b/llvm/lib/Target/AMDGPU/SIFixSGPRCopies.cpp
index 86ad8df..73a9158 100644
--- a/llvm/lib/Target/AMDGPU/SIFixSGPRCopies.cpp
+++ b/llvm/lib/Target/AMDGPU/SIFixSGPRCopies.cpp
@@ -1,4 +1,4 @@
-//===-- SIFixSGPRCopies.cpp - Remove potential VGPR => SGPR copies --------===//
+//===- SIFixSGPRCopies.cpp - Remove potential VGPR => SGPR copies ---------===//
 //
 //                     The LLVM Compiler Infrastructure
 //
@@ -68,14 +68,33 @@
 #include "AMDGPU.h"
 #include "AMDGPUSubtarget.h"
 #include "SIInstrInfo.h"
+#include "SIRegisterInfo.h"
 #include "llvm/ADT/DenseSet.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/SmallSet.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/CodeGen/MachineBasicBlock.h"
 #include "llvm/CodeGen/MachineDominators.h"
+#include "llvm/CodeGen/MachineFunction.h"
 #include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineInstr.h"
 #include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineOperand.h"
 #include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/Pass.h"
+#include "llvm/Support/CodeGen.h"
+#include "llvm/Support/CommandLine.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/raw_ostream.h"
 #include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetRegisterInfo.h"
+#include <cassert>
+#include <cstdint>
+#include <iterator>
+#include <list>
+#include <map>
+#include <tuple>
+#include <utility>
 
 using namespace llvm;
 
@@ -89,13 +108,12 @@ static cl::opt<bool> EnableM0Merge(
 namespace {
 
 class SIFixSGPRCopies : public MachineFunctionPass {
-
   MachineDominatorTree *MDT;
 
 public:
   static char ID;
 
-  SIFixSGPRCopies() : MachineFunctionPass(ID) { }
+  SIFixSGPRCopies() : MachineFunctionPass(ID) {}
 
   bool runOnMachineFunction(MachineFunction &MF) override;
 
@@ -109,7 +127,7 @@ public:
   }
 };
 
-} // End anonymous namespace
+} // end anonymous namespace
 
 INITIALIZE_PASS_BEGIN(SIFixSGPRCopies, DEBUG_TYPE,
                      "SI Fix SGPR copies", false, false)
@@ -117,7 +135,6 @@ INITIALIZE_PASS_DEPENDENCY(MachineDominatorTree)
 INITIALIZE_PASS_END(SIFixSGPRCopies, DEBUG_TYPE,
                      "SI Fix SGPR copies", false, false)
 
-
 char SIFixSGPRCopies::ID = 0;
 
 char &llvm::SIFixSGPRCopiesID = SIFixSGPRCopies::ID;
@@ -287,7 +304,6 @@ static bool phiHasVGPROperands(const MachineInstr &PHI,
                                const MachineRegisterInfo &MRI,
                                const SIRegisterInfo *TRI,
                                const SIInstrInfo *TII) {
-
   for (unsigned i = 1; i < PHI.getNumOperands(); i += 2) {
     unsigned Reg = PHI.getOperand(i).getReg();
     if (TRI->hasVGPRs(MRI.getRegClass(Reg)))
@@ -295,10 +311,10 @@ static bool phiHasVGPROperands(const MachineInstr &PHI,
   }
   return false;
 }
+
 static bool phiHasBreakDef(const MachineInstr &PHI,
                            const MachineRegisterInfo &MRI,
                            SmallSet<unsigned, 8> &Visited) {
-
   for (unsigned i = 1; i < PHI.getNumOperands(); i += 2) {
     unsigned Reg = PHI.getOperand(i).getReg();
     if (Visited.count(Reg))
@@ -337,7 +353,6 @@ static bool isSafeToFoldImmIntoCopy(const MachineInstr *Copy,
                                     const SIInstrInfo *TII,
                                     unsigned &SMovOp,
                                     int64_t &Imm) {
-
   if (Copy->getOpcode() != AMDGPU::COPY)
     return false;
 
@@ -371,13 +386,12 @@ template <class UnaryPredicate>
 bool searchPredecessors(const MachineBasicBlock *MBB,
                         const MachineBasicBlock *CutOff,
                         UnaryPredicate Predicate) {
-
   if (MBB == CutOff)
     return false;
 
-  DenseSet<const MachineBasicBlock*> Visited;
-  SmallVector<MachineBasicBlock*, 4> Worklist(MBB->pred_begin(),
-                                              MBB->pred_end());
+  DenseSet<const MachineBasicBlock *> Visited;
+  SmallVector<MachineBasicBlock *, 4> Worklist(MBB->pred_begin(),
+                                               MBB->pred_end());
 
   while (!Worklist.empty()) {
     MachineBasicBlock *MBB = Worklist.pop_back_val();
@@ -433,7 +447,7 @@ static bool hoistAndMergeSGPRInits(unsigned Reg,
                                    const MachineRegisterInfo &MRI,
                                    MachineDominatorTree &MDT) {
   // List of inits by immediate value.
-  typedef std::map<unsigned, std::list<MachineInstr*>> InitListMap;
+  using InitListMap = std::map<unsigned, std::list<MachineInstr *>>;
   InitListMap Inits;
   // List of clobbering instructions.
   SmallVector<MachineInstr*, 8> Clobbers;
@@ -490,9 +504,10 @@ static bool hoistAndMergeSGPRInits(unsigned Reg,
                      MDT.properlyDominates(Clobber->getParent(), MBBTo));
           };
 
-          return (any_of(Clobbers, interferes)) ||
-                 (any_of(Inits, [&](InitListMap::value_type &C) {
-                    return C.first != Init.first && any_of(C.second, interferes);
+          return (llvm::any_of(Clobbers, interferes)) ||
+                 (llvm::any_of(Inits, [&](InitListMap::value_type &C) {
+                    return C.first != Init.first &&
+                           llvm::any_of(C.second, interferes);
                   }));
         };
 
@@ -558,7 +573,6 @@ bool SIFixSGPRCopies::runOnMachineFunction(MachineFunction &MF) {
 
   for (MachineFunction::iterator BI = MF.begin(), BE = MF.end();
                                                   BI != BE; ++BI) {
-
     MachineBasicBlock &MBB = *BI;
     for (MachineBasicBlock::iterator I = MBB.begin(), E = MBB.end();
          I != E; ++I) {
@@ -661,7 +675,7 @@ bool SIFixSGPRCopies::runOnMachineFunction(MachineFunction &MF) {
         }
         break;
       }
-      case AMDGPU::REG_SEQUENCE: {
+      case AMDGPU::REG_SEQUENCE:
         if (TRI->hasVGPRs(TII->getOpRegClass(MI, 0)) ||
             !hasVGPROperands(MI, TRI)) {
           foldVGPRCopyIntoRegSequence(MI, TRI, TII, MRI);
@@ -672,7 +686,6 @@ bool SIFixSGPRCopies::runOnMachineFunction(MachineFunction &MF) {
 
         TII->moveToVALU(MI);
         break;
-      }
       case AMDGPU::INSERT_SUBREG: {
         const TargetRegisterClass *DstRC, *Src0RC, *Src1RC;
         DstRC = MRI.getRegClass(MI.getOperand(0).getReg());
diff --git a/llvm/lib/Target/AMDGPU/SIInsertWaitcnts.cpp b/llvm/lib/Target/AMDGPU/SIInsertWaitcnts.cpp
index 44185f4..1cb502d 100644
--- a/llvm/lib/Target/AMDGPU/SIInsertWaitcnts.cpp
+++ b/llvm/lib/Target/AMDGPU/SIInsertWaitcnts.cpp
@@ -1,4 +1,4 @@
-//===-- SIInsertWaitcnts.cpp - Insert Wait Instructions --------------------===/
+//===- SIInsertWaitcnts.cpp - Insert Wait Instructions --------------------===//
 //
 //                     The LLVM Compiler Infrastructure
 //
@@ -21,12 +21,34 @@
 #include "SIDefines.h"
 #include "SIInstrInfo.h"
 #include "SIMachineFunctionInfo.h"
+#include "SIRegisterInfo.h"
 #include "Utils/AMDGPUBaseInfo.h"
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/DenseSet.h"
 #include "llvm/ADT/PostOrderIterator.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/CodeGen/MachineBasicBlock.h"
 #include "llvm/CodeGen/MachineFunction.h"
 #include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineInstr.h"
 #include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineLoopInfo.h"
+#include "llvm/CodeGen/MachineMemOperand.h"
+#include "llvm/CodeGen/MachineOperand.h"
 #include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/IR/DebugLoc.h"
+#include "llvm/Pass.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/raw_ostream.h"
+#include <algorithm>
+#include <cassert>
+#include <cstdint>
+#include <cstring>
+#include <memory>
+#include <utility>
+#include <vector>
 
 #define DEBUG_TYPE "si-insert-waitcnts"
 
@@ -42,7 +64,7 @@ namespace {
 
 enum InstCounterType { VM_CNT = 0, LGKM_CNT, EXP_CNT, NUM_INST_CNTS };
 
-typedef std::pair<signed, signed> RegInterval;
+using RegInterval = std::pair<signed, signed>;
 
 struct {
   int32_t VmcntMax;
@@ -101,6 +123,15 @@ enum RegisterMapping {
 // "s_waitcnt 0" before use.
 class BlockWaitcntBrackets {
 public:
+  BlockWaitcntBrackets() {
+    for (enum InstCounterType T = VM_CNT; T < NUM_INST_CNTS;
+         T = (enum InstCounterType)(T + 1)) {
+      memset(VgprScores[T], 0, sizeof(VgprScores[T]));
+    }
+  }
+
+  ~BlockWaitcntBrackets() = default;
+
   static int32_t getWaitCountMax(InstCounterType T) {
     switch (T) {
     case VM_CNT:
@@ -113,14 +144,14 @@ public:
       break;
     }
     return 0;
-  };
+  }
 
   void setScoreLB(InstCounterType T, int32_t Val) {
     assert(T < NUM_INST_CNTS);
     if (T >= NUM_INST_CNTS)
       return;
     ScoreLBs[T] = Val;
-  };
+  }
 
   void setScoreUB(InstCounterType T, int32_t Val) {
     assert(T < NUM_INST_CNTS);
@@ -132,21 +163,21 @@ public:
       if (ScoreLBs[T] < UB)
         ScoreLBs[T] = UB;
     }
-  };
+  }
 
   int32_t getScoreLB(InstCounterType T) {
     assert(T < NUM_INST_CNTS);
     if (T >= NUM_INST_CNTS)
       return 0;
     return ScoreLBs[T];
-  };
+  }
 
   int32_t getScoreUB(InstCounterType T) {
     assert(T < NUM_INST_CNTS);
     if (T >= NUM_INST_CNTS)
       return 0;
     return ScoreUBs[T];
-  };
+  }
 
   // Mapping from event to counter.
   InstCounterType eventCounter(WaitEventType E) {
@@ -218,26 +249,18 @@ public:
   void setEventUB(enum WaitEventType W, int32_t Val) { EventUBs[W] = Val; }
   int32_t getMaxVGPR() const { return VgprUB; }
   int32_t getMaxSGPR() const { return SgprUB; }
+
   int32_t getEventUB(enum WaitEventType W) const {
     assert(W < NUM_WAIT_EVENTS);
     return EventUBs[W];
   }
+
   bool counterOutOfOrder(InstCounterType T);
   unsigned int updateByWait(InstCounterType T, int ScoreToWait);
   void updateByEvent(const SIInstrInfo *TII, const SIRegisterInfo *TRI,
                      const MachineRegisterInfo *MRI, WaitEventType E,
                      MachineInstr &MI);
 
-  BlockWaitcntBrackets()
-      : WaitAtBeginning(false), RevisitLoop(false), ValidLoop(false), MixedExpTypes(false),
-        LoopRegion(NULL), PostOrder(0), Waitcnt(NULL), VgprUB(0), SgprUB(0) {
-    for (enum InstCounterType T = VM_CNT; T < NUM_INST_CNTS;
-         T = (enum InstCounterType)(T + 1)) {
-      memset(VgprScores[T], 0, sizeof(VgprScores[T]));
-    }
-  }
-  ~BlockWaitcntBrackets(){};
-
   bool hasPendingSMEM() const {
     return (EventUBs[SMEM_ACCESS] > ScoreLBs[LGKM_CNT] &&
             EventUBs[SMEM_ACCESS] <= ScoreUBs[LGKM_CNT]);
@@ -266,7 +289,7 @@ public:
   int32_t getPostOrder() const { return PostOrder; }
 
   void setWaitcnt(MachineInstr *WaitcntIn) { Waitcnt = WaitcntIn; }
-  void clearWaitcnt() { Waitcnt = NULL; }
+  void clearWaitcnt() { Waitcnt = nullptr; }
   MachineInstr *getWaitcnt() const { return Waitcnt; }
 
   bool mixedExpTypes() const { return MixedExpTypes; }
@@ -278,13 +301,13 @@ public:
   void dump() { print(dbgs()); }
 
 private:
-  bool WaitAtBeginning;
-  bool RevisitLoop;
-  bool ValidLoop;
-  bool MixedExpTypes;
-  MachineLoop *LoopRegion;
-  int32_t PostOrder;
-  MachineInstr *Waitcnt;
+  bool WaitAtBeginning = false;
+  bool RevisitLoop = false;
+  bool ValidLoop = false;
+  bool MixedExpTypes = false;
+  MachineLoop *LoopRegion = nullptr;
+  int32_t PostOrder = 0;
+  MachineInstr *Waitcnt = nullptr;
   int32_t ScoreLBs[NUM_INST_CNTS] = {0};
   int32_t ScoreUBs[NUM_INST_CNTS] = {0};
   int32_t EventUBs[NUM_WAIT_EVENTS] = {0};
@@ -292,8 +315,8 @@ private:
   int32_t LastFlat[NUM_INST_CNTS] = {0};
   // wait_cnt scores for every vgpr.
   // Keep track of the VgprUB and SgprUB to make merge at join efficient.
-  int32_t VgprUB;
-  int32_t SgprUB;
+  int32_t VgprUB = 0;
+  int32_t SgprUB = 0;
   int32_t VgprScores[NUM_INST_CNTS][NUM_ALL_VGPRS];
   // Wait cnt scores for every sgpr, only lgkmcnt is relevant.
   int32_t SgprScores[SQ_MAX_PGM_SGPRS] = {0};
@@ -306,38 +329,36 @@ private:
 // at the end of the loop footer.
 class LoopWaitcntData {
 public:
+  LoopWaitcntData() = default;
+  ~LoopWaitcntData() = default;
+
   void incIterCnt() { IterCnt++; }
   void resetIterCnt() { IterCnt = 0; }
   int32_t getIterCnt() { return IterCnt; }
 
-  LoopWaitcntData() : LfWaitcnt(NULL), IterCnt(0) {}
-  ~LoopWaitcntData(){};
-
   void setWaitcnt(MachineInstr *WaitcntIn) { LfWaitcnt = WaitcntIn; }
   MachineInstr *getWaitcnt() const { return LfWaitcnt; }
 
   void print() {
     DEBUG(dbgs() << "  iteration " << IterCnt << '\n';);
-    return;
   }
 
 private:
   // s_waitcnt added at the end of loop footer to stablize wait scores
   // at the end of the loop footer.
-  MachineInstr *LfWaitcnt;
+  MachineInstr *LfWaitcnt = nullptr;
   // Number of iterations the loop has been visited, not including the initial
   // walk over.
-  int32_t IterCnt;
+  int32_t IterCnt = 0;
 };
 
 class SIInsertWaitcnts : public MachineFunctionPass {
-
 private:
-  const SISubtarget *ST;
-  const SIInstrInfo *TII;
-  const SIRegisterInfo *TRI;
-  const MachineRegisterInfo *MRI;
-  const MachineLoopInfo *MLI;
+  const SISubtarget *ST = nullptr;
+  const SIInstrInfo *TII = nullptr;
+  const SIRegisterInfo *TRI = nullptr;
+  const MachineRegisterInfo *MRI = nullptr;
+  const MachineLoopInfo *MLI = nullptr;
   AMDGPU::IsaInfo::IsaVersion IV;
   AMDGPUAS AMDGPUASI;
 
@@ -357,9 +378,7 @@ private:
 public:
   static char ID;
 
-  SIInsertWaitcnts()
-      : MachineFunctionPass(ID), ST(nullptr), TII(nullptr), TRI(nullptr),
-        MRI(nullptr), MLI(nullptr) {}
+  SIInsertWaitcnts() : MachineFunctionPass(ID) {}
 
   bool runOnMachineFunction(MachineFunction &MF) override;
 
@@ -376,7 +395,8 @@ public:
   void addKillWaitBracket(BlockWaitcntBrackets *Bracket) {
     // The waitcnt information is copied because it changes as the block is
     // traversed.
-    KillWaitBrackets.push_back(make_unique<BlockWaitcntBrackets>(*Bracket));
+    KillWaitBrackets.push_back(
+        llvm::make_unique<BlockWaitcntBrackets>(*Bracket));
   }
 
   bool mayAccessLDSThroughFlat(const MachineInstr &MI) const;
@@ -390,7 +410,7 @@ public:
   void insertWaitcntBeforeCF(MachineBasicBlock &Block, MachineInstr *Inst);
 };
 
-} // End anonymous namespace.
+} // end anonymous namespace
 
 RegInterval BlockWaitcntBrackets::getRegInterval(const MachineInstr *MI,
                                                  const SIInstrInfo *TII,
@@ -643,7 +663,6 @@ void BlockWaitcntBrackets::print(raw_ostream &OS) {
     OS << '\n';
   }
   OS << '\n';
-  return;
 }
 
 unsigned int BlockWaitcntBrackets::updateByWait(InstCounterType T,
@@ -1098,7 +1117,8 @@ MachineInstr *SIInsertWaitcnts::generateSWaitCntInstBefore(
               BlockWaitcntBracketsMap[TBB].get();
           if (!ScoreBracket) {
             assert(BlockVisitedSet.find(TBB) == BlockVisitedSet.end());
-            BlockWaitcntBracketsMap[TBB] = make_unique<BlockWaitcntBrackets>();
+            BlockWaitcntBracketsMap[TBB] =
+                llvm::make_unique<BlockWaitcntBrackets>();
             ScoreBracket = BlockWaitcntBracketsMap[TBB].get();
           }
           ScoreBracket->setRevisitLoop(true);
@@ -1145,8 +1165,6 @@ void SIInsertWaitcnts::insertWaitcntBeforeCF(MachineBasicBlock &MBB,
   } else {
     MBB.push_back(Waitcnt);
   }
-
-  return;
 }
 
 // This is a flat memory operation. Check to see if it has memory
@@ -1764,13 +1782,13 @@ bool SIInsertWaitcnts::runOnMachineFunction(MachineFunction &MF) {
 
     BlockWaitcntBrackets *ScoreBrackets = BlockWaitcntBracketsMap[&MBB].get();
     if (!ScoreBrackets) {
-      BlockWaitcntBracketsMap[&MBB] = make_unique<BlockWaitcntBrackets>();
+      BlockWaitcntBracketsMap[&MBB] = llvm::make_unique<BlockWaitcntBrackets>();
       ScoreBrackets = BlockWaitcntBracketsMap[&MBB].get();
     }
     ScoreBrackets->setPostOrder(MBB.getNumber());
     MachineLoop *ContainingLoop = MLI->getLoopFor(&MBB);
     if (ContainingLoop && LoopWaitcntDataMap[ContainingLoop] == nullptr)
-      LoopWaitcntDataMap[ContainingLoop] = make_unique<LoopWaitcntData>();
+      LoopWaitcntDataMap[ContainingLoop] = llvm::make_unique<LoopWaitcntData>();
 
     // If we are walking into the block from before the loop, then guarantee
     // at least 1 re-walk over the loop to propagate the information, even if
@@ -1831,12 +1849,10 @@ bool SIInsertWaitcnts::runOnMachineFunction(MachineFunction &MF) {
 
   for (MachineFunction::iterator BI = MF.begin(), BE = MF.end(); BI != BE;
        ++BI) {
-
     MachineBasicBlock &MBB = *BI;
 
     for (MachineBasicBlock::iterator I = MBB.begin(), E = MBB.end(); I != E;
          ++I) {
-
       if (!HaveScalarStores && TII->isScalarStore(*I))
         HaveScalarStores = true;
 
@@ -1859,7 +1875,6 @@ bool SIInsertWaitcnts::runOnMachineFunction(MachineFunction &MF) {
 
       for (MachineBasicBlock::iterator I = MBB->begin(), E = MBB->end(); I != E;
            ++I) {
-
         if (I->getOpcode() == AMDGPU::S_DCACHE_WB)
           SeenDCacheWB = true;
         else if (TII->isScalarStore(*I))
diff --git a/llvm/lib/Target/AMDGPU/SIInsertWaits.cpp b/llvm/lib/Target/AMDGPU/SIInsertWaits.cpp
index bc86515..b074b95 100644
--- a/llvm/lib/Target/AMDGPU/SIInsertWaits.cpp
+++ b/llvm/lib/Target/AMDGPU/SIInsertWaits.cpp
@@ -1,4 +1,4 @@
-//===-- SILowerControlFlow.cpp - Use predicates for control flow ----------===//
+//===- SILowerControlFlow.cpp - Use predicates for control flow -----------===//
 //
 //                     The LLVM Compiler Infrastructure
 //
@@ -33,15 +33,14 @@
 #include "llvm/CodeGen/MachineOperand.h"
 #include "llvm/CodeGen/MachineRegisterInfo.h"
 #include "llvm/IR/DebugLoc.h"
+#include "llvm/MC/MCInstrDesc.h"
 #include "llvm/Pass.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/raw_ostream.h"
-#include "llvm/Target/TargetRegisterInfo.h"
 #include <algorithm>
 #include <cassert>
 #include <cstdint>
 #include <cstring>
-#include <new>
 #include <utility>
 
 #define DEBUG_TYPE "si-insert-waits"
@@ -51,23 +50,23 @@ using namespace llvm;
 namespace {
 
 /// \brief One variable for each of the hardware counters
-typedef union {
+using Counters = union {
   struct {
     unsigned VM;
     unsigned EXP;
     unsigned LGKM;
   } Named;
   unsigned Array[3];
-} Counters;
+};
 
-typedef enum {
+using InstType = enum {
   OTHER,
   SMEM,
   VMEM
-} InstType;
+};
 
-typedef Counters RegCounters[512];
-typedef std::pair<unsigned, unsigned> RegInterval;
+using RegCounters =  Counters[512];
+using RegInterval = std::pair<unsigned, unsigned>;
 
 class SIInsertWaits : public MachineFunctionPass {
 private:
@@ -409,7 +408,6 @@ bool SIInsertWaits::insertWait(MachineBasicBlock &MBB,
 
       // Adjust the value to the real hardware possibilities.
       Counts.Array[i] = std::min(Value, HardwareLimits.Array[i]);
-
     } else
       Counts.Array[i] = 0;
 
@@ -568,12 +566,10 @@ bool SIInsertWaits::runOnMachineFunction(MachineFunction &MF) {
 
   for (MachineFunction::iterator BI = MF.begin(), BE = MF.end();
        BI != BE; ++BI) {
-
     MachineBasicBlock &MBB = *BI;
 
     for (MachineBasicBlock::iterator I = MBB.begin(), E = MBB.end();
          I != E; ++I) {
-
       if (!HaveScalarStores && TII->isScalarStore(*I))
         HaveScalarStores = true;
 
@@ -671,7 +667,6 @@ bool SIInsertWaits::runOnMachineFunction(MachineFunction &MF) {
 
       for (MachineBasicBlock::iterator I = MBB->begin(), E = MBB->end();
            I != E; ++I) {
-
         if (I->getOpcode() == AMDGPU::S_DCACHE_WB)
           SeenDCacheWB = true;
         else if (TII->isScalarStore(*I))
diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
index e9492c4..ad9deef 100644
--- a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
@@ -1,4 +1,4 @@
-//===-- SIInstrInfo.cpp - SI Instruction Information  ---------------------===//
+//===- SIInstrInfo.cpp - SI Instruction Information  ----------------------===//
 //
 //                     The LLVM Compiler Infrastructure
 //
@@ -13,19 +13,51 @@
 //===----------------------------------------------------------------------===//
 
 #include "SIInstrInfo.h"
-#include "AMDGPUTargetMachine.h"
+#include "AMDGPU.h"
+#include "AMDGPUSubtarget.h"
 #include "GCNHazardRecognizer.h"
 #include "SIDefines.h"
 #include "SIMachineFunctionInfo.h"
+#include "SIRegisterInfo.h"
+#include "Utils/AMDGPUBaseInfo.h"
+#include "llvm/ADT/APInt.h"
+#include "llvm/ADT/ArrayRef.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/StringRef.h"
+#include "llvm/ADT/iterator_range.h"
+#include "llvm/Analysis/AliasAnalysis.h"
+#include "llvm/Analysis/MemoryLocation.h"
+#include "llvm/CodeGen/MachineBasicBlock.h"
 #include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineInstr.h"
 #include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineInstrBundle.h"
+#include "llvm/CodeGen/MachineMemOperand.h"
+#include "llvm/CodeGen/MachineOperand.h"
 #include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/MachineValueType.h"
 #include "llvm/CodeGen/RegisterScavenging.h"
 #include "llvm/CodeGen/ScheduleDAG.h"
+#include "llvm/CodeGen/SelectionDAGNodes.h"
+#include "llvm/IR/DebugLoc.h"
 #include "llvm/IR/DiagnosticInfo.h"
 #include "llvm/IR/Function.h"
+#include "llvm/IR/InlineAsm.h"
+#include "llvm/IR/LLVMContext.h"
 #include "llvm/MC/MCInstrDesc.h"
-#include "llvm/Support/Debug.h"
+#include "llvm/Support/Casting.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Compiler.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/MathExtras.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetOpcodes.h"
+#include "llvm/Target/TargetRegisterInfo.h"
+#include <cassert>
+#include <cstdint>
+#include <iterator>
+#include <utility>
 
 using namespace llvm;
 
@@ -462,7 +494,6 @@ void SIInstrInfo::copyPhysReg(MachineBasicBlock &MBB,
     }
   }
 
-
   ArrayRef<int16_t> SubIndices = RI.getRegSplitParts(RC, EltSize);
   bool Forward = RI.getHWRegIndex(DestReg) <= RI.getHWRegIndex(SrcReg);
 
@@ -933,7 +964,6 @@ unsigned SIInstrInfo::calculateLDSSpillAddress(
 
     if (!AMDGPU::isShader(MF->getFunction()->getCallingConv()) &&
         WorkGroupSize > WavefrontSize) {
-
       unsigned TIDIGXReg
         = MFI->getPreloadedReg(AMDGPUFunctionArgInfo::WORKGROUP_ID_X);
       unsigned TIDIGYReg
@@ -1053,24 +1083,24 @@ bool SIInstrInfo::expandPostRAPseudo(MachineInstr &MI) const {
   DebugLoc DL = MBB.findDebugLoc(MI);
   switch (MI.getOpcode()) {
   default: return AMDGPUInstrInfo::expandPostRAPseudo(MI);
-  case AMDGPU::S_MOV_B64_term: {
+  case AMDGPU::S_MOV_B64_term:
     // This is only a terminator to get the correct spill code placement during
     // register allocation.
     MI.setDesc(get(AMDGPU::S_MOV_B64));
     break;
-  }
-  case AMDGPU::S_XOR_B64_term: {
+
+  case AMDGPU::S_XOR_B64_term:
     // This is only a terminator to get the correct spill code placement during
     // register allocation.
     MI.setDesc(get(AMDGPU::S_XOR_B64));
     break;
-  }
-  case AMDGPU::S_ANDN2_B64_term: {
+
+  case AMDGPU::S_ANDN2_B64_term:
     // This is only a terminator to get the correct spill code placement during
     // register allocation.
     MI.setDesc(get(AMDGPU::S_ANDN2_B64));
     break;
-  }
+
   case AMDGPU::V_MOV_B64_PSEUDO: {
     unsigned Dst = MI.getOperand(0).getReg();
     unsigned DstLo = RI.getSubReg(Dst, AMDGPU::sub0);
@@ -1173,7 +1203,7 @@ bool SIInstrInfo::expandPostRAPseudo(MachineInstr &MI) const {
       MIB.add(MI.getOperand(2));
 
     Bundler.append(MIB);
-    llvm::finalizeBundle(MBB, Bundler.begin());
+    finalizeBundle(MBB, Bundler.begin());
 
     MI.eraseFromParent();
     break;
@@ -1271,7 +1301,6 @@ MachineInstr *SIInstrInfo::commuteInstructionImpl(MachineInstr &MI, bool NewMI,
     return nullptr;
   }
 
-
   if (CommutedMI) {
     swapSourceModifiers(MI, Src0, AMDGPU::OpName::src0_modifiers,
                         Src1, AMDGPU::OpName::src1_modifiers);
@@ -1581,7 +1610,6 @@ unsigned SIInstrInfo::insertBranch(MachineBasicBlock &MBB,
                                    ArrayRef<MachineOperand> Cond,
                                    const DebugLoc &DL,
                                    int *BytesAdded) const {
-
   if (!FBB && Cond.empty()) {
     BuildMI(&MBB, DL, get(AMDGPU::S_BRANCH))
       .addMBB(TBB);
@@ -2172,10 +2200,9 @@ bool SIInstrInfo::isInlineConstant(const MachineOperand &MO,
   case AMDGPU::OPERAND_REG_IMM_INT64:
   case AMDGPU::OPERAND_REG_IMM_FP64:
   case AMDGPU::OPERAND_REG_INLINE_C_INT64:
-  case AMDGPU::OPERAND_REG_INLINE_C_FP64: {
+  case AMDGPU::OPERAND_REG_INLINE_C_FP64:
     return AMDGPU::isInlinableLiteral64(MO.getImm(),
                                         ST.hasInv2PiInlineImm());
-  }
   case AMDGPU::OPERAND_REG_IMM_INT16:
   case AMDGPU::OPERAND_REG_IMM_FP16:
   case AMDGPU::OPERAND_REG_INLINE_C_INT16:
@@ -2478,7 +2505,6 @@ bool SIInstrInfo::verifyInstruction(const MachineInstr &MI,
 
   // Verify SDWA
   if (isSDWA(MI)) {
-
     if (!ST.hasSDWA()) {
       ErrInfo = "SDWA is not supported on this target";
       return false;
@@ -3131,7 +3157,6 @@ void SIInstrInfo::legalizeGenericOperand(MachineBasicBlock &InsertMBB,
                                          MachineOperand &Op,
                                          MachineRegisterInfo &MRI,
                                          const DebugLoc &DL) const {
-
   unsigned OpReg = Op.getReg();
   unsigned OpSubReg = Op.getSubReg();
 
@@ -3489,11 +3514,10 @@ void SIInstrInfo::moveToVALU(MachineInstr &TopInst) const {
       Inst.eraseFromParent();
       continue;
 
-    case AMDGPU::S_BFE_I64: {
+    case AMDGPU::S_BFE_I64:
       splitScalar64BitBFE(Worklist, Inst);
       Inst.eraseFromParent();
       continue;
-    }
 
     case AMDGPU::S_LSHL_B32:
       if (ST.getGeneration() >= SISubtarget::VOLCANIC_ISLANDS) {
@@ -3552,12 +3576,11 @@ void SIInstrInfo::moveToVALU(MachineInstr &TopInst) const {
 
     case AMDGPU::S_PACK_LL_B32_B16:
     case AMDGPU::S_PACK_LH_B32_B16:
-    case AMDGPU::S_PACK_HH_B32_B16: {
+    case AMDGPU::S_PACK_HH_B32_B16:
       movePackToVALU(Worklist, MRI, Inst);
       Inst.eraseFromParent();
       continue;
     }
-    }
 
     if (NewOpcode == AMDGPU::INSTRUCTION_LIST_END) {
       // We cannot move this instruction to the VALU, so we should try to
@@ -3977,8 +4000,8 @@ void SIInstrInfo::addSCCDefUsersToVALUWorklist(
   // This assumes that all the users of SCC are in the same block
   // as the SCC def.
   for (MachineInstr &MI :
-       llvm::make_range(MachineBasicBlock::iterator(SCCDefInst),
-                        SCCDefInst.getParent()->end())) {
+       make_range(MachineBasicBlock::iterator(SCCDefInst),
+                      SCCDefInst.getParent()->end())) {
     // Exit if we find another SCC def.
     if (MI.findRegisterDefOperandIdx(AMDGPU::SCC) != -1)
       return;
@@ -4166,7 +4189,6 @@ unsigned SIInstrInfo::isSGPRStackAccess(const MachineInstr &MI,
 
 unsigned SIInstrInfo::isLoadFromStackSlot(const MachineInstr &MI,
                                           int &FrameIndex) const {
-
   if (!MI.mayLoad())
     return AMDGPU::NoRegister;
 
diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.h b/llvm/lib/Target/AMDGPU/SIInstrInfo.h
index 0dbe080..b2df4e0 100644
--- a/llvm/lib/Target/AMDGPU/SIInstrInfo.h
+++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.h
@@ -1,4 +1,4 @@
-//===-- SIInstrInfo.h - SI Instruction Info Interface -----------*- C++ -*-===//
+//===- SIInstrInfo.h - SI Instruction Info Interface ------------*- C++ -*-===//
 //
 //                     The LLVM Compiler Infrastructure
 //
@@ -12,17 +12,33 @@
 //
 //===----------------------------------------------------------------------===//
 
-
 #ifndef LLVM_LIB_TARGET_AMDGPU_SIINSTRINFO_H
 #define LLVM_LIB_TARGET_AMDGPU_SIINSTRINFO_H
 
 #include "AMDGPUInstrInfo.h"
 #include "SIDefines.h"
 #include "SIRegisterInfo.h"
+#include "Utils/AMDGPUBaseInfo.h"
+#include "llvm/ADT/ArrayRef.h"
 #include "llvm/ADT/SetVector.h"
+#include "llvm/CodeGen/MachineBasicBlock.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineInstr.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineOperand.h"
+#include "llvm/MC/MCInstrDesc.h"
+#include "llvm/Support/Compiler.h"
+#include <cassert>
+#include <cstdint>
 
 namespace llvm {
 
+class APInt;
+class MachineRegisterInfo;
+class RegScavenger;
+class SISubtarget;
+class TargetRegisterClass;
+
 class SIInstrInfo final : public AMDGPUInstrInfo {
 private:
   const SIRegisterInfo RI;
@@ -39,7 +55,7 @@ private:
     EXECZ = 3
   };
 
-  typedef SmallSetVector<MachineInstr *, 32> SetVectorType;
+  using SetVectorType = SmallSetVector<MachineInstr *, 32>;
 
   static unsigned getBranchOpcode(BranchPredicate Cond);
   static BranchPredicate getBranchPredicate(unsigned Opcode);
@@ -76,9 +92,8 @@ private:
                       MachineRegisterInfo &MRI,
                       MachineInstr &Inst) const;
 
-  void addUsersToMoveToVALUWorklist(
-    unsigned Reg, MachineRegisterInfo &MRI,
-    SetVectorType &Worklist) const;
+  void addUsersToMoveToVALUWorklist(unsigned Reg, MachineRegisterInfo &MRI,
+                                    SetVectorType &Worklist) const;
 
   void
   addSCCDefUsersToVALUWorklist(MachineInstr &SCCDefInst,
@@ -101,7 +116,6 @@ protected:
                                        unsigned OpIdx1) const override;
 
 public:
-
   enum TargetOperandFlags {
     MO_MASK = 0x7,
 
@@ -120,7 +134,7 @@ public:
     MO_REL32_HI = 5
   };
 
-  explicit SIInstrInfo(const SISubtarget &);
+  explicit SIInstrInfo(const SISubtarget &ST);
 
   const SIRegisterInfo &getRegisterInfo() const {
     return RI;
@@ -159,7 +173,7 @@ public:
 
   unsigned insertNE(MachineBasicBlock *MBB,
                     MachineBasicBlock::iterator I, const DebugLoc &DL,
-                    unsigned SrcReg, int Value)  const;
+                    unsigned SrcReg, int Value) const;
 
   unsigned insertEQ(MachineBasicBlock *MBB,
                     MachineBasicBlock::iterator I, const DebugLoc &DL,
@@ -228,7 +242,6 @@ public:
   bool reverseBranchCondition(
     SmallVectorImpl<MachineOperand> &Cond) const override;
 
-
   bool canInsertSelect(const MachineBasicBlock &MBB,
                        ArrayRef<MachineOperand> Cond,
                        unsigned TrueReg, unsigned FalseReg,
@@ -827,6 +840,7 @@ public:
 };
 
 namespace AMDGPU {
+
   LLVM_READONLY
   int getVOPe64(uint16_t Opcode);
 
@@ -867,7 +881,8 @@ namespace AMDGPU {
     TF_LONG_BRANCH_FORWARD = 1 << 0,
     TF_LONG_BRANCH_BACKWARD = 1 << 1
   };
-} // End namespace AMDGPU
+
+} // end namespace AMDGPU
 
 namespace SI {
 namespace KernelInputOffsets {
@@ -885,9 +900,9 @@ enum Offsets {
   LOCAL_SIZE_Z = 32
 };
 
-} // End namespace KernelInputOffsets
-} // End namespace SI
+} // end namespace KernelInputOffsets
+} // end namespace SI
 
-} // End namespace llvm
+} // end namespace llvm
 
-#endif
+#endif // LLVM_LIB_TARGET_AMDGPU_SIINSTRINFO_H
diff --git a/llvm/lib/Target/AMDGPU/SILoadStoreOptimizer.cpp b/llvm/lib/Target/AMDGPU/SILoadStoreOptimizer.cpp
index c6ad61a..b7ddfe4 100644
--- a/llvm/lib/Target/AMDGPU/SILoadStoreOptimizer.cpp
+++ b/llvm/lib/Target/AMDGPU/SILoadStoreOptimizer.cpp
@@ -1,4 +1,4 @@
-//===-- SILoadStoreOptimizer.cpp ------------------------------------------===//
+//===- SILoadStoreOptimizer.cpp -------------------------------------------===//
 //
 //                     The LLVM Compiler Infrastructure
 //
@@ -56,8 +56,9 @@
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/MathExtras.h"
 #include "llvm/Support/raw_ostream.h"
-#include "llvm/Target/TargetMachine.h"
+#include <algorithm>
 #include <cassert>
+#include <cstdlib>
 #include <iterator>
 #include <utility>
 
@@ -68,8 +69,7 @@ using namespace llvm;
 namespace {
 
 class SILoadStoreOptimizer : public MachineFunctionPass {
-
-  typedef struct {
+  using CombineInfo = struct {
     MachineBasicBlock::iterator I;
     MachineBasicBlock::iterator Paired;
     unsigned EltSize;
@@ -78,7 +78,7 @@ class SILoadStoreOptimizer : public MachineFunctionPass {
     unsigned BaseOff;
     bool UseST64;
     SmallVector<MachineInstr*, 8> InstsToMove;
-   } CombineInfo;
+   };
 
 private:
   const SIInstrInfo *TII = nullptr;
@@ -258,7 +258,6 @@ bool SILoadStoreOptimizer::findMatchingDSInst(CombineInfo &CI) {
 
   for ( ; MBBI != E; ++MBBI) {
     if (MBBI->getOpcode() != CI.I->getOpcode()) {
-
       // This is not a matching DS instruction, but we can keep looking as
       // long as one of these conditions are met:
       // 1. It is safe to move I down past MBBI.
diff --git a/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp b/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp
index baf2603..b372082 100644
--- a/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp
@@ -1,4 +1,4 @@
-//===-- SIMachineFunctionInfo.cpp -------- SI Machine Function Info -------===//
+//===- SIMachineFunctionInfo.cpp - SI Machine Function Info ---------------===//
 //
 //                     The LLVM Compiler Infrastructure
 //
@@ -8,13 +8,19 @@
 //===----------------------------------------------------------------------===//
 
 #include "SIMachineFunctionInfo.h"
+#include "AMDGPUArgumentUsageInfo.h"
 #include "AMDGPUSubtarget.h"
-#include "SIInstrInfo.h"
+#include "SIRegisterInfo.h"
+#include "Utils/AMDGPUBaseInfo.h"
+#include "llvm/ADT/Optional.h"
+#include "llvm/CodeGen/MachineBasicBlock.h"
 #include "llvm/CodeGen/MachineFrameInfo.h"
-#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineFunction.h"
 #include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/IR/CallingConv.h"
 #include "llvm/IR/Function.h"
-#include "llvm/IR/LLVMContext.h"
+#include <cassert>
+#include <vector>
 
 #define MAX_LANES 64
 
@@ -22,27 +28,6 @@ using namespace llvm;
 
 SIMachineFunctionInfo::SIMachineFunctionInfo(const MachineFunction &MF)
   : AMDGPUMachineFunction(MF),
-    TIDReg(AMDGPU::NoRegister),
-    ScratchRSrcReg(AMDGPU::PRIVATE_RSRC_REG),
-    ScratchWaveOffsetReg(AMDGPU::SCRATCH_WAVE_OFFSET_REG),
-    FrameOffsetReg(AMDGPU::FP_REG),
-    StackPtrOffsetReg(AMDGPU::SP_REG),
-    ArgInfo(),
-    PSInputAddr(0),
-    PSInputEnable(0),
-    ReturnsVoid(true),
-    FlatWorkGroupSizes(0, 0),
-    WavesPerEU(0, 0),
-    DebuggerWorkGroupIDStackObjectIndices({{0, 0, 0}}),
-    DebuggerWorkItemIDStackObjectIndices({{0, 0, 0}}),
-    LDSWaveSpillSize(0),
-    NumUserSGPRs(0),
-    NumSystemSGPRs(0),
-    HasSpilledSGPRs(false),
-    HasSpilledVGPRs(false),
-    HasNonSpillStackObjects(false),
-    NumSpilledSGPRs(0),
-    NumSpilledVGPRs(0),
     PrivateSegmentBuffer(false),
     DispatchPtr(false),
     QueuePtr(false),
diff --git a/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.h b/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.h
index 5581fe4..4d87286 100644
--- a/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.h
+++ b/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.h
@@ -1,4 +1,4 @@
-//===- SIMachineFunctionInfo.h - SIMachineFunctionInfo interface -*- C++ -*-==//
+//==- SIMachineFunctionInfo.h - SIMachineFunctionInfo interface --*- C++ -*-==//
 //
 //                     The LLVM Compiler Infrastructure
 //
@@ -15,23 +15,30 @@
 #define LLVM_LIB_TARGET_AMDGPU_SIMACHINEFUNCTIONINFO_H
 
 #include "AMDGPUMachineFunction.h"
-#include "MCTargetDesc/AMDGPUMCTargetDesc.h"
 #include "AMDGPUArgumentUsageInfo.h"
 #include "SIRegisterInfo.h"
+#include "llvm/ADT/ArrayRef.h"
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/Optional.h"
+#include "llvm/ADT/SmallVector.h"
 #include "llvm/CodeGen/PseudoSourceValue.h"
 #include "llvm/MC/MCRegisterInfo.h"
 #include "llvm/Support/ErrorHandling.h"
 #include <array>
 #include <cassert>
-#include <map>
 #include <utility>
+#include <vector>
 
 namespace llvm {
 
+class MachineFrameInfo;
+class MachineFunction;
+class TargetRegisterClass;
+
 class AMDGPUImagePseudoSourceValue : public PseudoSourceValue {
 public:
   explicit AMDGPUImagePseudoSourceValue() :
-    PseudoSourceValue(PseudoSourceValue::TargetCustom) { }
+    PseudoSourceValue(PseudoSourceValue::TargetCustom) {}
 
   bool isConstant(const MachineFrameInfo *) const override {
     // This should probably be true for most images, but we will start by being
@@ -45,7 +52,7 @@ public:
     return false;
   }
 
-  bool mayAlias(const MachineFrameInfo*) const override {
+  bool mayAlias(const MachineFrameInfo *) const override {
     // FIXME: If we ever change image intrinsics to accept fat pointers, then
     // this could be true for some cases.
     return false;
@@ -55,7 +62,7 @@ public:
 class AMDGPUBufferPseudoSourceValue : public PseudoSourceValue {
 public:
   explicit AMDGPUBufferPseudoSourceValue() :
-    PseudoSourceValue(PseudoSourceValue::TargetCustom) { }
+    PseudoSourceValue(PseudoSourceValue::TargetCustom) {}
 
   bool isConstant(const MachineFrameInfo *) const override {
     // This should probably be true for most images, but we will start by being
@@ -69,7 +76,7 @@ public:
     return false;
   }
 
-  bool mayAlias(const MachineFrameInfo*) const override {
+  bool mayAlias(const MachineFrameInfo *) const override {
     // FIXME: If we ever change image intrinsics to accept fat pointers, then
     // this could be true for some cases.
     return false;
@@ -82,57 +89,58 @@ class SIMachineFunctionInfo final : public AMDGPUMachineFunction {
   // FIXME: This should be removed and getPreloadedValue moved here.
   friend class SIRegisterInfo;
 
-  unsigned TIDReg;
+  unsigned TIDReg = AMDGPU::NoRegister;
 
   // Registers that may be reserved for spilling purposes. These may be the same
   // as the input registers.
-  unsigned ScratchRSrcReg;
-  unsigned ScratchWaveOffsetReg;
+  unsigned ScratchRSrcReg = AMDGPU::PRIVATE_RSRC_REG;
+  unsigned ScratchWaveOffsetReg = AMDGPU::SCRATCH_WAVE_OFFSET_REG;
 
   // This is the current function's incremented size from the kernel's scratch
   // wave offset register. For an entry function, this is exactly the same as
   // the ScratchWaveOffsetReg.
-  unsigned FrameOffsetReg;
+  unsigned FrameOffsetReg = AMDGPU::FP_REG;
 
   // Top of the stack SGPR offset derived from the ScratchWaveOffsetReg.
-  unsigned StackPtrOffsetReg;
+  unsigned StackPtrOffsetReg = AMDGPU::SP_REG;
 
   AMDGPUFunctionArgInfo ArgInfo;
 
   // Graphics info.
-  unsigned PSInputAddr;
-  unsigned PSInputEnable;
+  unsigned PSInputAddr = 0;
+  unsigned PSInputEnable = 0;
 
-  bool ReturnsVoid;
+  bool ReturnsVoid = true;
 
   // A pair of default/requested minimum/maximum flat work group sizes.
   // Minimum - first, maximum - second.
-  std::pair<unsigned, unsigned> FlatWorkGroupSizes;
+  std::pair<unsigned, unsigned> FlatWorkGroupSizes = {0, 0};
 
   // A pair of default/requested minimum/maximum number of waves per execution
   // unit. Minimum - first, maximum - second.
-  std::pair<unsigned, unsigned> WavesPerEU;
+  std::pair<unsigned, unsigned> WavesPerEU = {0, 0};
 
   // Stack object indices for work group IDs.
-  std::array<int, 3> DebuggerWorkGroupIDStackObjectIndices;
+  std::array<int, 3> DebuggerWorkGroupIDStackObjectIndices = {{0, 0, 0}};
+
   // Stack object indices for work item IDs.
-  std::array<int, 3> DebuggerWorkItemIDStackObjectIndices;
+  std::array<int, 3> DebuggerWorkItemIDStackObjectIndices = {{0, 0, 0}};
 
   AMDGPUBufferPseudoSourceValue BufferPSV;
   AMDGPUImagePseudoSourceValue ImagePSV;
 
 private:
-  unsigned LDSWaveSpillSize;
+  unsigned LDSWaveSpillSize = 0;
   unsigned ScratchOffsetReg;
-  unsigned NumUserSGPRs;
-  unsigned NumSystemSGPRs;
+  unsigned NumUserSGPRs = 0;
+  unsigned NumSystemSGPRs = 0;
 
-  bool HasSpilledSGPRs;
-  bool HasSpilledVGPRs;
-  bool HasNonSpillStackObjects;
+  bool HasSpilledSGPRs = false;
+  bool HasSpilledVGPRs = false;
+  bool HasNonSpillStackObjects = false;
 
-  unsigned NumSpilledSGPRs;
-  unsigned NumSpilledVGPRs;
+  unsigned NumSpilledSGPRs = 0;
+  unsigned NumSpilledVGPRs = 0;
 
   // Feature bits required for inputs passed in user SGPRs.
   bool PrivateSegmentBuffer : 1;
@@ -180,7 +188,7 @@ public:
     int Lane = -1;
 
     SpilledReg() = default;
-    SpilledReg(unsigned R, int L) : VGPR (R), Lane (L) { }
+    SpilledReg(unsigned R, int L) : VGPR (R), Lane (L) {}
 
     bool hasLane() { return Lane != -1;}
     bool hasReg() { return VGPR != AMDGPU::NoRegister;}
@@ -194,14 +202,12 @@ public:
     // prolog/epilog.
     Optional<int> FI;
 
-    SGPRSpillVGPRCSR(unsigned V, Optional<int> F) :
-      VGPR(V),
-      FI(F) {}
+    SGPRSpillVGPRCSR(unsigned V, Optional<int> F) : VGPR(V), FI(F) {}
   };
 
 private:
   // SGPR->VGPR spilling support.
-  typedef std::pair<unsigned, unsigned> SpillRegMask;
+  using SpillRegMask = std::pair<unsigned, unsigned>;
 
   // Track VGPR + wave index for each subregister of the SGPR spilled to
   // frameindex key.
@@ -225,8 +231,8 @@ public:
   bool allocateSGPRSpillToVGPR(MachineFunction &MF, int FI);
   void removeSGPRToVGPRFrameIndices(MachineFrameInfo &MFI);
 
-  bool hasCalculatedTID() const { return TIDReg != AMDGPU::NoRegister; };
-  unsigned getTIDReg() const { return TIDReg; };
+  bool hasCalculatedTID() const { return TIDReg != AMDGPU::NoRegister; }
+  unsigned getTIDReg() const { return TIDReg; }
   void setTIDReg(unsigned Reg) { TIDReg = Reg; }
 
   // Add user SGPRs.
@@ -276,7 +282,6 @@ public:
     ArgInfo.WorkItemIDZ = Arg;
   }
 
-
   unsigned addPrivateSegmentWaveByteOffset() {
     ArgInfo.PrivateSegmentWaveByteOffset
       = ArgDescriptor::createRegister(getNextSystemSGPR());
diff --git a/llvm/lib/Target/AMDGPU/SIPeepholeSDWA.cpp b/llvm/lib/Target/AMDGPU/SIPeepholeSDWA.cpp
index e2ac663..24b7fe0 100644
--- a/llvm/lib/Target/AMDGPU/SIPeepholeSDWA.cpp
+++ b/llvm/lib/Target/AMDGPU/SIPeepholeSDWA.cpp
@@ -1,4 +1,4 @@
-//===-- SIPeepholeSDWA.cpp - Peephole optimization for SDWA instructions --===//
+//===- SIPeepholeSDWA.cpp - Peephole optimization for SDWA instructions ---===//
 //
 //                     The LLVM Compiler Infrastructure
 //
@@ -24,12 +24,31 @@
 #include "AMDGPUSubtarget.h"
 #include "SIDefines.h"
 #include "SIInstrInfo.h"
+#include "SIRegisterInfo.h"
+#include "Utils/AMDGPUBaseInfo.h"
+#include "llvm/ADT/None.h"
+#include "llvm/ADT/Optional.h"
 #include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/SmallVector.h"
 #include "llvm/ADT/Statistic.h"
+#include "llvm/CodeGen/MachineBasicBlock.h"
+#include "llvm/CodeGen/MachineFunction.h"
 #include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineInstr.h"
 #include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineOperand.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/MC/LaneBitmask.h"
+#include "llvm/MC/MCInstrDesc.h"
+#include "llvm/Pass.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Target/TargetRegisterInfo.h"
+#include <algorithm>
+#include <cassert>
+#include <cstdint>
+#include <memory>
 #include <unordered_map>
-#include <unordered_set>
 
 using namespace llvm;
 
@@ -45,7 +64,7 @@ class SDWAOperand;
 
 class SIPeepholeSDWA : public MachineFunctionPass {
 public:
-  typedef SmallVector<SDWAOperand *, 4> SDWAOperandsVector;
+  using SDWAOperandsVector = SmallVector<SDWAOperand *, 4>;
 
 private:
   MachineRegisterInfo *MRI;
@@ -91,7 +110,7 @@ public:
     assert(Replaced->isReg());
   }
 
-  virtual ~SDWAOperand() {}
+  virtual ~SDWAOperand() = default;
 
   virtual MachineInstr *potentialToConvert(const SIInstrInfo *TII) = 0;
   virtual bool convertToSDWA(MachineInstr &MI, const SIInstrInfo *TII) = 0;
@@ -99,6 +118,7 @@ public:
   MachineOperand *getTargetOperand() const { return Target; }
   MachineOperand *getReplacedOperand() const { return Replaced; }
   MachineInstr *getParentInst() const { return Target->getParent(); }
+
   MachineRegisterInfo *getMRI() const {
     return &getParentInst()->getParent()->getParent()->getRegInfo();
   }
@@ -120,8 +140,8 @@ public:
       : SDWAOperand(TargetOp, ReplacedOp), SrcSel(SrcSel_), Abs(Abs_),
         Neg(Neg_), Sext(Sext_) {}
 
-  virtual MachineInstr *potentialToConvert(const SIInstrInfo *TII) override;
-  virtual bool convertToSDWA(MachineInstr &MI, const SIInstrInfo *TII) override;
+  MachineInstr *potentialToConvert(const SIInstrInfo *TII) override;
+  bool convertToSDWA(MachineInstr &MI, const SIInstrInfo *TII) override;
 
   SdwaSel getSrcSel() const { return SrcSel; }
   bool getAbs() const { return Abs; }
@@ -142,14 +162,14 @@ public:
                  SdwaSel DstSel_ = DWORD, DstUnused DstUn_ = UNUSED_PAD)
       : SDWAOperand(TargetOp, ReplacedOp), DstSel(DstSel_), DstUn(DstUn_) {}
 
-  virtual MachineInstr *potentialToConvert(const SIInstrInfo *TII) override;
-  virtual bool convertToSDWA(MachineInstr &MI, const SIInstrInfo *TII) override;
+  MachineInstr *potentialToConvert(const SIInstrInfo *TII) override;
+  bool convertToSDWA(MachineInstr &MI, const SIInstrInfo *TII) override;
 
   SdwaSel getDstSel() const { return DstSel; }
   DstUnused getDstUnused() const { return DstUn; }
 };
 
-} // End anonymous namespace.
+} // end anonymous namespace
 
 INITIALIZE_PASS(SIPeepholeSDWA, DEBUG_TYPE, "SI Peephole SDWA", false, false)
 
@@ -162,7 +182,6 @@ FunctionPass *llvm::createSIPeepholeSDWAPass() {
 }
 
 #ifndef NDEBUG
-
 static raw_ostream& operator<<(raw_ostream &OS, const SdwaSel &Sel) {
   switch(Sel) {
   case BYTE_0: OS << "BYTE_0"; break;
@@ -199,7 +218,6 @@ static raw_ostream& operator<<(raw_ostream &OS, const SDWADstOperand &Dst) {
      << " dst_unused:" << Dst.getDstUnused() << '\n';
   return OS;
 }
-
 #endif
 
 static void copyRegOperand(MachineOperand &To, const MachineOperand &From) {
@@ -564,7 +582,7 @@ void SIPeepholeSDWA::matchSDWAOperands(MachineFunction &MF) {
 
         auto SDWASrc = make_unique<SDWASrcOperand>(
             Src0, Dst, SrcSel, false, false,
-            Opcode == AMDGPU::V_BFE_U32 ? false : true);
+            Opcode != AMDGPU::V_BFE_U32);
         DEBUG(dbgs() << "Match: " << MI << "To: " << *SDWASrc << '\n');
         SDWAOperands[&MI] = std::move(SDWASrc);
         ++NumSDWAPatternsFound;
-- 
2.7.4