From d16eff816b876aac959dbd45dc78d41a06f966ad Mon Sep 17 00:00:00 2001 From: Eugene Zelenko Date: Tue, 8 Aug 2017 23:53:55 +0000 Subject: [PATCH] [AMDGPU] Fix some Clang-tidy modernize-use-using and Include What You Use warnings; other minor fixes (NFC). llvm-svn: 310429 --- llvm/lib/Target/AMDGPU/AMDGPUAliasAnalysis.cpp | 17 ++- llvm/lib/Target/AMDGPU/AMDGPUAliasAnalysis.h | 25 +++- .../Target/AMDGPU/AMDGPUMachineCFGStructurizer.cpp | 160 +++++++++++---------- .../AMDGPU/AMDGPUOpenCLImageTypeLoweringPass.cpp | 55 ++++--- .../Target/AMDGPU/AMDGPURewriteOutArguments.cpp | 45 ++++-- .../Target/AMDGPU/AMDGPUTargetTransformInfo.cpp | 54 ++++--- llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.h | 28 ++-- llvm/lib/Target/AMDGPU/AMDILCFGStructurizer.cpp | 29 ++-- llvm/lib/Target/AMDGPU/AMDKernelCodeT.h | 48 +++---- llvm/lib/Target/AMDGPU/GCNMinRegStrategy.cpp | 32 +++-- 10 files changed, 300 insertions(+), 193 deletions(-) diff --git a/llvm/lib/Target/AMDGPU/AMDGPUAliasAnalysis.cpp b/llvm/lib/Target/AMDGPU/AMDGPUAliasAnalysis.cpp index faa424e..9527c9f 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUAliasAnalysis.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUAliasAnalysis.cpp @@ -1,4 +1,4 @@ -//===- AMDGPUAliasAnalysis ---------------------------------------*- C++ -*-==// +//===- AMDGPUAliasAnalysis ------------------------------------------------===// // // The LLVM Compiler Infrastructure // @@ -12,13 +12,21 @@ #include "AMDGPUAliasAnalysis.h" #include "AMDGPU.h" +#include "llvm/ADT/Triple.h" #include "llvm/Analysis/AliasAnalysis.h" -#include "llvm/Analysis/Passes.h" +#include "llvm/Analysis/MemoryLocation.h" #include "llvm/Analysis/ValueTracking.h" +#include "llvm/IR/Argument.h" +#include "llvm/IR/Attributes.h" +#include "llvm/IR/CallingConv.h" #include "llvm/IR/Function.h" -#include "llvm/IR/Module.h" +#include "llvm/IR/GlobalVariable.h" +#include "llvm/IR/Type.h" +#include "llvm/IR/Value.h" #include "llvm/Pass.h" -#include "llvm/Support/raw_ostream.h" +#include "llvm/Support/Casting.h" +#include "llvm/Support/ErrorHandling.h" +#include using namespace llvm; @@ -26,6 +34,7 @@ using namespace llvm; // Register this pass... char AMDGPUAAWrapperPass::ID = 0; + INITIALIZE_PASS(AMDGPUAAWrapperPass, "amdgpu-aa", "AMDGPU Address space based Alias Analysis", false, true) diff --git a/llvm/lib/Target/AMDGPU/AMDGPUAliasAnalysis.h b/llvm/lib/Target/AMDGPU/AMDGPUAliasAnalysis.h index 5f8ed9b..645a38a 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUAliasAnalysis.h +++ b/llvm/lib/Target/AMDGPU/AMDGPUAliasAnalysis.h @@ -1,4 +1,4 @@ -//===- AMDGPUAliasAnalysis ---------------------------------------*- C++ -*-==// +//===- AMDGPUAliasAnalysis --------------------------------------*- C++ -*-===// // // The LLVM Compiler Infrastructure // @@ -10,17 +10,24 @@ /// This is the AMGPU address space based alias analysis pass. //===----------------------------------------------------------------------===// -#ifndef LLVM_ANALYSIS_AMDGPUALIASANALYSIS_H -#define LLVM_ANALYSIS_AMDGPUALIASANALYSIS_H +#ifndef LLVM_LIB_TARGET_AMDGPU_AMDGPUALIASANALYSIS_H +#define LLVM_LIB_TARGET_AMDGPU_AMDGPUALIASANALYSIS_H #include "AMDGPU.h" +#include "llvm/ADT/Triple.h" #include "llvm/Analysis/AliasAnalysis.h" #include "llvm/IR/Function.h" #include "llvm/IR/Module.h" #include "llvm/Pass.h" +#include +#include namespace llvm { +class DataLayout; +class MDNode; +class MemoryLocation; + /// A simple AA result that uses TBAA metadata to answer queries. class AMDGPUAAResult : public AAResultBase { friend AAResultBase; @@ -50,7 +57,9 @@ private: class ASAliasRulesTy { public: ASAliasRulesTy(AMDGPUAS AS_, Triple::ArchType Arch_); + AliasResult getAliasResult(unsigned AS1, unsigned AS2) const; + private: Triple::ArchType Arch; AMDGPUAS AS; @@ -61,10 +70,11 @@ private: /// Analysis pass providing a never-invalidated alias analysis result. class AMDGPUAA : public AnalysisInfoMixin { friend AnalysisInfoMixin; + static char PassID; public: - typedef AMDGPUAAResult Result; + using Result = AMDGPUAAResult; AMDGPUAAResult run(Function &F, AnalysisManager &AM) { return AMDGPUAAResult(F.getParent()->getDataLayout(), @@ -91,12 +101,15 @@ public: Triple(M.getTargetTriple()))); return false; } + bool doFinalization(Module &M) override { Result.reset(); return false; } + void getAnalysisUsage(AnalysisUsage &AU) const override; }; -} -#endif // LLVM_ANALYSIS_AMDGPUALIASANALYSIS_H +} // end namespace llvm + +#endif // LLVM_LIB_TARGET_AMDGPU_AMDGPUALIASANALYSIS_H diff --git a/llvm/lib/Target/AMDGPU/AMDGPUMachineCFGStructurizer.cpp b/llvm/lib/Target/AMDGPU/AMDGPUMachineCFGStructurizer.cpp index 936333e..646303d 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUMachineCFGStructurizer.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUMachineCFGStructurizer.cpp @@ -14,46 +14,55 @@ #include "AMDGPU.h" #include "AMDGPUSubtarget.h" #include "SIInstrInfo.h" +#include "llvm/ADT/ArrayRef.h" +#include "llvm/ADT/DenseMap.h" #include "llvm/ADT/DenseSet.h" #include "llvm/ADT/PostOrderIterator.h" #include "llvm/ADT/SetVector.h" #include "llvm/ADT/SmallPtrSet.h" #include "llvm/ADT/SmallVector.h" -#include "llvm/Analysis/CFG.h" #include "llvm/CodeGen/MachineBasicBlock.h" +#include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineFunctionPass.h" #include "llvm/CodeGen/MachineInstr.h" #include "llvm/CodeGen/MachineInstrBuilder.h" +#include "llvm/CodeGen/MachineOperand.h" #include "llvm/CodeGen/MachineRegionInfo.h" #include "llvm/CodeGen/MachineRegisterInfo.h" -#include "llvm/CodeGen/Passes.h" #include "llvm/IR/DebugLoc.h" +#include "llvm/Pass.h" +#include "llvm/Support/Compiler.h" #include "llvm/Support/Debug.h" -#include "llvm/Target/TargetInstrInfo.h" -#include "llvm/Target/TargetLowering.h" -#include "llvm/Target/TargetSubtargetInfo.h" +#include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/raw_ostream.h" +#include "llvm/Target/TargetOpcodes.h" +#include "llvm/Target/TargetRegisterInfo.h" +#include #include +#include + using namespace llvm; #define DEBUG_TYPE "amdgpucfgstructurizer" namespace { + class PHILinearizeDestIterator; class PHILinearize { friend class PHILinearizeDestIterator; public: - typedef std::pair PHISourceT; + using PHISourceT = std::pair; private: - typedef DenseSet PHISourcesT; - typedef struct { + using PHISourcesT = DenseSet; + using PHIInfoElementT = struct { unsigned DestReg; DebugLoc DL; PHISourcesT Sources; - } PHIInfoElementT; - typedef SmallPtrSet PHIInfoT; + }; + using PHIInfoT = SmallPtrSet; PHIInfoT PHIInfo; static unsigned phiInfoElementGetDest(PHIInfoElementT *Info); @@ -85,8 +94,8 @@ public: void dump(MachineRegisterInfo *MRI); void clear(); - typedef PHISourcesT::iterator source_iterator; - typedef PHILinearizeDestIterator dest_iterator; + using source_iterator = PHISourcesT::iterator; + using dest_iterator = PHILinearizeDestIterator; dest_iterator dests_begin(); dest_iterator dests_end(); @@ -100,6 +109,8 @@ private: PHILinearize::PHIInfoT::iterator Iter; public: + PHILinearizeDestIterator(PHILinearize::PHIInfoT::iterator I) : Iter(I) {} + unsigned operator*() { return PHILinearize::phiInfoElementGetDest(*Iter); } PHILinearizeDestIterator &operator++() { ++Iter; @@ -111,10 +122,10 @@ public: bool operator!=(const PHILinearizeDestIterator &I) const { return I.Iter != Iter; } - - PHILinearizeDestIterator(PHILinearize::PHIInfoT::iterator I) : Iter(I) {} }; +} // end anonymous namespace + unsigned PHILinearize::phiInfoElementGetDest(PHIInfoElementT *Info) { return Info->DestReg; } @@ -282,14 +293,12 @@ PHILinearize::source_iterator PHILinearize::sources_begin(unsigned Reg) { auto InfoElement = findPHIInfoElement(Reg); return phiInfoElementGetSources(InfoElement).begin(); } + PHILinearize::source_iterator PHILinearize::sources_end(unsigned Reg) { auto InfoElement = findPHIInfoElement(Reg); return phiInfoElementGetSources(InfoElement).end(); } -class RegionMRT; -class MBBMRT; - static unsigned getPHINumInputs(MachineInstr &PHI) { assert(PHI.isPHI()); return (PHI.getNumOperands() - 1) / 2; @@ -315,6 +324,11 @@ static unsigned getPHIDestReg(MachineInstr &PHI) { return PHI.getOperand(0).getReg(); } +namespace { + +class RegionMRT; +class MBBMRT; + class LinearizedRegion { protected: MachineBasicBlock *Entry; @@ -349,6 +363,11 @@ protected: RegionMRT *TopRegion = nullptr); public: + LinearizedRegion(); + LinearizedRegion(MachineBasicBlock *MBB, const MachineRegisterInfo *MRI, + const TargetRegisterInfo *TRI, PHILinearize &PHIInfo); + ~LinearizedRegion() = default; + void setRegionMRT(RegionMRT *Region) { RMRT = Region; } RegionMRT *getRegionMRT() { return RMRT; } @@ -413,13 +432,6 @@ public: void initLiveOut(RegionMRT *Region, const MachineRegisterInfo *MRI, const TargetRegisterInfo *TRI, PHILinearize &PHIInfo); - - LinearizedRegion(MachineBasicBlock *MBB, const MachineRegisterInfo *MRI, - const TargetRegisterInfo *TRI, PHILinearize &PHIInfo); - - LinearizedRegion(); - - ~LinearizedRegion(); }; class MRT { @@ -429,6 +441,8 @@ protected: unsigned BBSelectRegOut; public: + virtual ~MRT() = default; + unsigned getBBSelectRegIn() { return BBSelectRegIn; } unsigned getBBSelectRegOut() { return BBSelectRegOut; } @@ -467,42 +481,55 @@ public: dbgs() << " "; } } - - virtual ~MRT() {} }; class MBBMRT : public MRT { MachineBasicBlock *MBB; public: - virtual MBBMRT *getMBBMRT() { return this; } + MBBMRT(MachineBasicBlock *BB) : MBB(BB) { + setParent(nullptr); + setBBSelectRegOut(0); + setBBSelectRegIn(0); + } + + MBBMRT *getMBBMRT() override { return this; } MachineBasicBlock *getMBB() { return MBB; } - virtual void dump(const TargetRegisterInfo *TRI, int depth = 0) { + void dump(const TargetRegisterInfo *TRI, int depth = 0) override { dumpDepth(depth); dbgs() << "MBB: " << getMBB()->getNumber(); dbgs() << " In: " << PrintReg(getBBSelectRegIn(), TRI); dbgs() << ", Out: " << PrintReg(getBBSelectRegOut(), TRI) << "\n"; } - - MBBMRT(MachineBasicBlock *BB) : MBB(BB) { - setParent(nullptr); - setBBSelectRegOut(0); - setBBSelectRegIn(0); - } }; class RegionMRT : public MRT { protected: MachineRegion *Region; - LinearizedRegion *LRegion; - MachineBasicBlock *Succ; - + LinearizedRegion *LRegion = nullptr; + MachineBasicBlock *Succ = nullptr; SetVector Children; public: - virtual RegionMRT *getRegionMRT() { return this; } + RegionMRT(MachineRegion *MachineRegion) : Region(MachineRegion) { + setParent(nullptr); + setBBSelectRegOut(0); + setBBSelectRegIn(0); + } + + ~RegionMRT() override { + if (LRegion) { + delete LRegion; + } + + for (auto CI : Children) { + delete &(*CI); + } + } + + RegionMRT *getRegionMRT() override { return this; } void setLinearizedRegion(LinearizedRegion *LinearizeRegion) { LRegion = LinearizeRegion; @@ -520,7 +547,7 @@ public: SetVector *getChildren() { return &Children; } - virtual void dump(const TargetRegisterInfo *TRI, int depth = 0) { + void dump(const TargetRegisterInfo *TRI, int depth = 0) override { dumpDepth(depth); dbgs() << "Region: " << (void *)Region; dbgs() << " In: " << PrintReg(getBBSelectRegIn(), TRI); @@ -583,25 +610,10 @@ public: } } } - - RegionMRT(MachineRegion *MachineRegion) - : Region(MachineRegion), LRegion(nullptr), Succ(nullptr) { - setParent(nullptr); - setBBSelectRegOut(0); - setBBSelectRegIn(0); - } - - virtual ~RegionMRT() { - if (LRegion) { - delete LRegion; - } - - for (auto CI : Children) { - delete &(*CI); - } - } }; +} // end anonymous namespace + static unsigned createBBSelectReg(const SIInstrInfo *TII, MachineRegisterInfo *MRI) { return MRI->createVirtualRegister(TII->getPreferredSelectRegClass(32)); @@ -1063,7 +1075,7 @@ LinearizedRegion::LinearizedRegion() { Parent = nullptr; } -LinearizedRegion::~LinearizedRegion() {} +namespace { class AMDGPUMachineCFGStructurizer : public MachineFunctionPass { private: @@ -1074,6 +1086,7 @@ private: unsigned BBSelectRegister; PHILinearize PHIInfo; DenseMap FallthroughMap; + RegionMRT *RMRT; void getPHIRegionIndices(RegionMRT *Region, MachineInstr &PHI, SmallVector &RegionIndices); @@ -1197,15 +1210,15 @@ private: public: static char ID; + AMDGPUMachineCFGStructurizer() : MachineFunctionPass(ID) { + initializeAMDGPUMachineCFGStructurizerPass(*PassRegistry::getPassRegistry()); + } + void getAnalysisUsage(AnalysisUsage &AU) const override { AU.addRequired(); MachineFunctionPass::getAnalysisUsage(AU); } - AMDGPUMachineCFGStructurizer() : MachineFunctionPass(ID) { - initializeAMDGPUMachineCFGStructurizerPass(*PassRegistry::getPassRegistry()); - } - void initFallthroughMap(MachineFunction &MF); void createLinearizedRegion(RegionMRT *Region, unsigned SelectOut); @@ -1214,14 +1227,14 @@ public: MachineRegisterInfo *MRI, const SIInstrInfo *TII); - RegionMRT *RMRT; void setRegionMRT(RegionMRT *RegionTree) { RMRT = RegionTree; } RegionMRT *getRegionMRT() { return RMRT; } bool runOnMachineFunction(MachineFunction &MF) override; }; -} + +} // end anonymous namespace char AMDGPUMachineCFGStructurizer::ID = 0; @@ -1258,7 +1271,6 @@ void AMDGPUMachineCFGStructurizer::transformSimpleIfRegion(RegionMRT *Region) { } static void fixMBBTerminator(MachineBasicBlock *MBB) { - if (MBB->succ_size() == 1) { auto *Succ = *(MBB->succ_begin()); for (auto &TI : MBB->terminators()) { @@ -1535,7 +1547,6 @@ void AMDGPUMachineCFGStructurizer::replacePHI( void AMDGPUMachineCFGStructurizer::replaceEntryPHI( MachineInstr &PHI, unsigned CombinedSourceReg, MachineBasicBlock *IfMBB, SmallVector &PHIRegionIndices) { - DEBUG(dbgs() << "Replace entry PHI: "); DEBUG(PHI.dump()); DEBUG(dbgs() << " with "); @@ -2491,7 +2502,6 @@ AMDGPUMachineCFGStructurizer::splitExit(LinearizedRegion *LRegion) { return NewExit; } - static MachineBasicBlock *split(MachineBasicBlock::iterator I) { // Create the fall-through block. MachineBasicBlock *MBB = (*I).getParent(); @@ -2845,16 +2855,6 @@ static void checkRegOnlyPHIInputs(MachineFunction &MF) { } } - -INITIALIZE_PASS_BEGIN(AMDGPUMachineCFGStructurizer, "amdgpu-machine-cfg-structurizer", - "AMDGPU Machine CFG Structurizer", false, false) -INITIALIZE_PASS_DEPENDENCY(MachineRegionInfoPass) -INITIALIZE_PASS_END(AMDGPUMachineCFGStructurizer, "amdgpu-machine-cfg-structurizer", - "AMDGPU Machine CFG Structurizer", false, false) - -char AMDGPUMachineCFGStructurizerID = AMDGPUMachineCFGStructurizer::ID; - - bool AMDGPUMachineCFGStructurizer::runOnMachineFunction(MachineFunction &MF) { const SISubtarget &ST = MF.getSubtarget(); const SIInstrInfo *TII = ST.getInstrInfo(); @@ -2880,6 +2880,14 @@ bool AMDGPUMachineCFGStructurizer::runOnMachineFunction(MachineFunction &MF) { return result; } +char AMDGPUMachineCFGStructurizerID = AMDGPUMachineCFGStructurizer::ID; + +INITIALIZE_PASS_BEGIN(AMDGPUMachineCFGStructurizer, "amdgpu-machine-cfg-structurizer", + "AMDGPU Machine CFG Structurizer", false, false) +INITIALIZE_PASS_DEPENDENCY(MachineRegionInfoPass) +INITIALIZE_PASS_END(AMDGPUMachineCFGStructurizer, "amdgpu-machine-cfg-structurizer", + "AMDGPU Machine CFG Structurizer", false, false) + FunctionPass *llvm::createAMDGPUMachineCFGStructurizerPass() { return new AMDGPUMachineCFGStructurizer(); } diff --git a/llvm/lib/Target/AMDGPU/AMDGPUOpenCLImageTypeLoweringPass.cpp b/llvm/lib/Target/AMDGPU/AMDGPUOpenCLImageTypeLoweringPass.cpp index 410bd52d..cd71f19 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUOpenCLImageTypeLoweringPass.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUOpenCLImageTypeLoweringPass.cpp @@ -1,4 +1,4 @@ -//===-- AMDGPUOpenCLImageTypeLoweringPass.cpp -----------------------------===// +//===- AMDGPUOpenCLImageTypeLoweringPass.cpp ------------------------------===// // // The LLVM Compiler Infrastructure // @@ -22,40 +22,57 @@ /// Resource IDs of read-only images, write-only images and samplers are /// defined to be their index among the kernel arguments of the same /// type and access qualifier. +// //===----------------------------------------------------------------------===// #include "AMDGPU.h" -#include "llvm/ADT/STLExtras.h" #include "llvm/ADT/SmallVector.h" -#include "llvm/Analysis/Passes.h" +#include "llvm/ADT/StringRef.h" +#include "llvm/ADT/Twine.h" +#include "llvm/IR/Argument.h" +#include "llvm/IR/DerivedTypes.h" #include "llvm/IR/Constants.h" #include "llvm/IR/Function.h" +#include "llvm/IR/Instruction.h" #include "llvm/IR/Instructions.h" +#include "llvm/IR/Metadata.h" #include "llvm/IR/Module.h" +#include "llvm/IR/Type.h" +#include "llvm/IR/Use.h" +#include "llvm/IR/User.h" +#include "llvm/Pass.h" +#include "llvm/Support/Casting.h" +#include "llvm/Support/ErrorHandling.h" #include "llvm/Transforms/Utils/Cloning.h" +#include "llvm/Transforms/Utils/ValueMapper.h" +#include +#include +#include +#include using namespace llvm; -namespace { - -StringRef GetImageSizeFunc = "llvm.OpenCL.image.get.size"; -StringRef GetImageFormatFunc = "llvm.OpenCL.image.get.format"; -StringRef GetImageResourceIDFunc = "llvm.OpenCL.image.get.resource.id"; -StringRef GetSamplerResourceIDFunc = "llvm.OpenCL.sampler.get.resource.id"; +static StringRef GetImageSizeFunc = "llvm.OpenCL.image.get.size"; +static StringRef GetImageFormatFunc = "llvm.OpenCL.image.get.format"; +static StringRef GetImageResourceIDFunc = "llvm.OpenCL.image.get.resource.id"; +static StringRef GetSamplerResourceIDFunc = + "llvm.OpenCL.sampler.get.resource.id"; -StringRef ImageSizeArgMDType = "__llvm_image_size"; -StringRef ImageFormatArgMDType = "__llvm_image_format"; +static StringRef ImageSizeArgMDType = "__llvm_image_size"; +static StringRef ImageFormatArgMDType = "__llvm_image_format"; -StringRef KernelsMDNodeName = "opencl.kernels"; -StringRef KernelArgMDNodeNames[] = { +static StringRef KernelsMDNodeName = "opencl.kernels"; +static StringRef KernelArgMDNodeNames[] = { "kernel_arg_addr_space", "kernel_arg_access_qual", "kernel_arg_type", "kernel_arg_base_type", "kernel_arg_type_qual"}; -const unsigned NumKernelArgMDNodes = 5; +static const unsigned NumKernelArgMDNodes = 5; + +namespace { -typedef SmallVector MDVector; +using MDVector = SmallVector; struct KernelArgMD { MDVector ArgVector[NumKernelArgMDNodes]; }; @@ -303,7 +320,7 @@ class AMDGPUOpenCLImageTypeLoweringPass : public ModulePass { CloneFunctionInto(NewF, F, VMap, /*ModuleLevelChanges=*/false, Returns); // Build new MDNode. - SmallVector KernelMDArgs; + SmallVector KernelMDArgs; KernelMDArgs.push_back(ConstantAsMetadata::get(NewF)); for (unsigned i = 0; i < NumKernelArgMDNodes; ++i) KernelMDArgs.push_back(MDNode::get(*Context, NewArgMDs.ArgVector[i])); @@ -346,7 +363,7 @@ class AMDGPUOpenCLImageTypeLoweringPass : public ModulePass { return Modified; } - public: +public: AMDGPUOpenCLImageTypeLoweringPass() : ModulePass(ID) {} bool runOnModule(Module &M) override { @@ -363,10 +380,10 @@ class AMDGPUOpenCLImageTypeLoweringPass : public ModulePass { } }; -char AMDGPUOpenCLImageTypeLoweringPass::ID = 0; - } // end anonymous namespace +char AMDGPUOpenCLImageTypeLoweringPass::ID = 0; + ModulePass *llvm::createAMDGPUOpenCLImageTypeLoweringPass() { return new AMDGPUOpenCLImageTypeLoweringPass(); } diff --git a/llvm/lib/Target/AMDGPU/AMDGPURewriteOutArguments.cpp b/llvm/lib/Target/AMDGPU/AMDGPURewriteOutArguments.cpp index 8c04e2b..83e56a9 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPURewriteOutArguments.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPURewriteOutArguments.cpp @@ -1,4 +1,4 @@ -//===-- AMDGPURewriteOutArgumentsPass.cpp - Create struct returns ---------===// +//===- AMDGPURewriteOutArgumentsPass.cpp - Create struct returns ----------===// // // The LLVM Compiler Infrastructure // @@ -44,22 +44,39 @@ #include "AMDGPU.h" #include "Utils/AMDGPUBaseInfo.h" - #include "llvm/Analysis/MemoryDependenceAnalysis.h" -#include "llvm/ADT/BitVector.h" -#include "llvm/ADT/SetVector.h" +#include "llvm/ADT/DenseMap.h" +#include "llvm/ADT/STLExtras.h" +#include "llvm/ADT/SmallSet.h" +#include "llvm/ADT/SmallVector.h" #include "llvm/ADT/Statistic.h" +#include "llvm/Analysis/MemoryLocation.h" +#include "llvm/IR/Argument.h" +#include "llvm/IR/Attributes.h" +#include "llvm/IR/BasicBlock.h" +#include "llvm/IR/Constants.h" +#include "llvm/IR/DataLayout.h" +#include "llvm/IR/DerivedTypes.h" +#include "llvm/IR/Function.h" #include "llvm/IR/IRBuilder.h" +#include "llvm/IR/Instructions.h" #include "llvm/IR/Module.h" -#include "llvm/Transforms/Utils/Cloning.h" +#include "llvm/IR/Type.h" +#include "llvm/IR/Use.h" +#include "llvm/IR/User.h" +#include "llvm/IR/Value.h" +#include "llvm/Pass.h" +#include "llvm/Support/Casting.h" +#include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" +#include "llvm/Support/raw_ostream.h" +#include +#include #define DEBUG_TYPE "amdgpu-rewrite-out-arguments" using namespace llvm; -namespace { - static cl::opt AnyAddressSpace( "amdgpu-any-address-space-out-arguments", cl::desc("Replace pointer out arguments with " @@ -78,6 +95,8 @@ STATISTIC(NumOutArgumentsReplaced, STATISTIC(NumOutArgumentFunctionsReplaced, "Number of functions with out arguments moved to struct return values"); +namespace { + class AMDGPURewriteOutArguments : public FunctionPass { private: const DataLayout *DL = nullptr; @@ -89,11 +108,11 @@ private: #ifndef NDEBUG bool isVec3ToVec4Shuffle(Type *Ty0, Type* Ty1) const; #endif + public: static char ID; - AMDGPURewriteOutArguments() : - FunctionPass(ID) {} + AMDGPURewriteOutArguments() : FunctionPass(ID) {} void getAnalysisUsage(AnalysisUsage &AU) const override { AU.addRequired(); @@ -101,10 +120,10 @@ public: } bool doInitialization(Module &M) override; - bool runOnFunction(Function &M) override; + bool runOnFunction(Function &F) override; }; -} // End anonymous namespace +} // end anonymous namespace INITIALIZE_PASS_BEGIN(AMDGPURewriteOutArguments, DEBUG_TYPE, "AMDGPU Rewrite Out Arguments", false, false) @@ -239,7 +258,8 @@ bool AMDGPURewriteOutArguments::runOnFunction(Function &F) { if (OutArgs.empty()) return false; - typedef SmallVector, 4> ReplacementVec; + using ReplacementVec = SmallVector, 4>; + DenseMap Replacements; SmallVector Returns; @@ -373,7 +393,6 @@ bool AMDGPURewriteOutArguments::runOnFunction(Function &F) { if (RetVal) NewRetVal = B.CreateInsertValue(NewRetVal, RetVal, RetIdx++); - for (std::pair ReturnPoint : Replacement.second) { Argument *Arg = ReturnPoint.first; Value *Val = ReturnPoint.second; diff --git a/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp b/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp index fde2132..0930ed1 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp @@ -1,4 +1,4 @@ -//===-- AMDGPUTargetTransformInfo.cpp - AMDGPU specific TTI pass ---------===// +//===- AMDGPUTargetTransformInfo.cpp - AMDGPU specific TTI pass -----------===// // // The LLVM Compiler Infrastructure // @@ -16,15 +16,39 @@ //===----------------------------------------------------------------------===// #include "AMDGPUTargetTransformInfo.h" +#include "AMDGPUSubtarget.h" +#include "llvm/ADT/STLExtras.h" #include "llvm/Analysis/LoopInfo.h" #include "llvm/Analysis/TargetTransformInfo.h" #include "llvm/Analysis/ValueTracking.h" -#include "llvm/CodeGen/BasicTTIImpl.h" -#include "llvm/IR/Intrinsics.h" +#include "llvm/CodeGen/ISDOpcodes.h" +#include "llvm/CodeGen/MachineValueType.h" +#include "llvm/CodeGen/ValueTypes.h" +#include "llvm/IR/Argument.h" +#include "llvm/IR/Attributes.h" +#include "llvm/IR/BasicBlock.h" +#include "llvm/IR/CallingConv.h" +#include "llvm/IR/DataLayout.h" +#include "llvm/IR/DerivedTypes.h" +#include "llvm/IR/Function.h" +#include "llvm/IR/Instruction.h" +#include "llvm/IR/Instructions.h" +#include "llvm/IR/IntrinsicInst.h" #include "llvm/IR/Module.h" +#include "llvm/IR/Type.h" +#include "llvm/IR/Value.h" +#include "llvm/MC/SubtargetFeature.h" +#include "llvm/Support/Casting.h" +#include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" -#include "llvm/Target/CostTable.h" -#include "llvm/Target/TargetLowering.h" +#include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/raw_ostream.h" +#include "llvm/Target/TargetMachine.h" +#include +#include +#include +#include + using namespace llvm; #define DEBUG_TYPE "AMDGPUtti" @@ -54,7 +78,7 @@ static bool dependsOnLocalPhi(const Loop *L, const Value *Cond, if (!L->contains(I)) continue; if (const PHINode *PHI = dyn_cast(V)) { - if (none_of(L->getSubLoops(), [PHI](const Loop* SubLoop) { + if (llvm::none_of(L->getSubLoops(), [PHI](const Loop* SubLoop) { return SubLoop->contains(PHI); })) return true; } else if (Depth < 10 && dependsOnLocalPhi(L, V, Depth+1)) @@ -66,7 +90,7 @@ static bool dependsOnLocalPhi(const Loop *L, const Value *Cond, void AMDGPUTTIImpl::getUnrollingPreferences(Loop *L, ScalarEvolution &SE, TTI::UnrollingPreferences &UP) { UP.Threshold = 300; // Twice the default. - UP.MaxCount = UINT_MAX; + UP.MaxCount = std::numeric_limits::max(); UP.Partial = true; // TODO: Do we want runtime unrolling? @@ -81,12 +105,11 @@ void AMDGPUTTIImpl::getUnrollingPreferences(Loop *L, ScalarEvolution &SE, const DataLayout &DL = BB->getModule()->getDataLayout(); unsigned LocalGEPsSeen = 0; - if (any_of(L->getSubLoops(), [BB](const Loop* SubLoop) { + if (llvm::any_of(L->getSubLoops(), [BB](const Loop* SubLoop) { return SubLoop->contains(BB); })) continue; // Block belongs to an inner loop. for (const Instruction &I : *BB) { - // Unroll a loop which contains an "if" statement whose condition // defined by a PHI belonging to the loop. This may help to eliminate // if region and potentially even PHI itself, saving on both divergence @@ -153,7 +176,7 @@ void AMDGPUTTIImpl::getUnrollingPreferences(Loop *L, ScalarEvolution &SE, if (!Inst || L->isLoopInvariant(Op)) continue; - if (any_of(L->getSubLoops(), [Inst](const Loop* SubLoop) { + if (llvm::any_of(L->getSubLoops(), [Inst](const Loop* SubLoop) { return SubLoop->contains(Inst); })) continue; HasLoopDef = true; @@ -268,7 +291,6 @@ int AMDGPUTTIImpl::getArithmeticInstrCost( unsigned Opcode, Type *Ty, TTI::OperandValueKind Opd1Info, TTI::OperandValueKind Opd2Info, TTI::OperandValueProperties Opd1PropInfo, TTI::OperandValueProperties Opd2PropInfo, ArrayRef Args ) { - EVT OrigTy = TLI->getValueType(DL, Ty); if (!OrigTy.isSimple()) { return BaseT::getArithmeticInstrCost(Opcode, Ty, Opd1Info, Opd2Info, @@ -289,25 +311,23 @@ int AMDGPUTTIImpl::getArithmeticInstrCost( switch (ISD) { case ISD::SHL: case ISD::SRL: - case ISD::SRA: { + case ISD::SRA: if (SLT == MVT::i64) return get64BitInstrCost() * LT.first * NElts; // i32 return getFullRateInstrCost() * LT.first * NElts; - } case ISD::ADD: case ISD::SUB: case ISD::AND: case ISD::OR: - case ISD::XOR: { + case ISD::XOR: if (SLT == MVT::i64){ // and, or and xor are typically split into 2 VALU instructions. return 2 * getFullRateInstrCost() * LT.first * NElts; } return LT.first * NElts * getFullRateInstrCost(); - } case ISD::MUL: { const int QuarterRateCost = getQuarterRateInstrCost(); if (SLT == MVT::i64) { @@ -327,7 +347,6 @@ int AMDGPUTTIImpl::getArithmeticInstrCost( if (SLT == MVT::f32 || SLT == MVT::f16) return LT.first * NElts * getFullRateInstrCost(); break; - case ISD::FDIV: case ISD::FREM: // FIXME: frem should be handled separately. The fdiv in it is most of it, @@ -348,7 +367,6 @@ int AMDGPUTTIImpl::getArithmeticInstrCost( int Cost = 7 * getFullRateInstrCost() + 1 * getQuarterRateInstrCost(); return LT.first * NElts * Cost; } - break; default: break; @@ -465,11 +483,9 @@ static bool isArgPassedInSGPR(const Argument *A) { } } -/// /// \returns true if the result of the value could potentially be /// different across workitems in a wavefront. bool AMDGPUTTIImpl::isSourceOfDivergence(const Value *V) const { - if (const Argument *A = dyn_cast(V)) return !isArgPassedInSGPR(A); diff --git a/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.h b/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.h index 5c0e029..a5225f6 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.h +++ b/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.h @@ -1,4 +1,4 @@ -//===-- AMDGPUTargetTransformInfo.h - AMDGPU specific TTI -------*- C++ -*-===// +//===- AMDGPUTargetTransformInfo.h - AMDGPU specific TTI --------*- C++ -*-===// // // The LLVM Compiler Infrastructure // @@ -6,35 +6,48 @@ // License. See LICENSE.TXT for details. // //===----------------------------------------------------------------------===// +// /// \file /// This file a TargetTransformInfo::Concept conforming object specific to the /// AMDGPU target machine. It uses the target's detailed information to /// provide more precise answers to certain TTI queries, while letting the /// target independent and default TTI implementations handle the rest. -/// +// //===----------------------------------------------------------------------===// #ifndef LLVM_LIB_TARGET_AMDGPU_AMDGPUTARGETTRANSFORMINFO_H #define LLVM_LIB_TARGET_AMDGPU_AMDGPUTARGETTRANSFORMINFO_H #include "AMDGPU.h" +#include "AMDGPUSubtarget.h" #include "AMDGPUTargetMachine.h" +#include "Utils/AMDGPUBaseInfo.h" +#include "llvm/ADT/ArrayRef.h" #include "llvm/Analysis/TargetTransformInfo.h" #include "llvm/CodeGen/BasicTTIImpl.h" +#include "llvm/IR/Function.h" +#include "llvm/MC/SubtargetFeature.h" +#include "llvm/Support/MathExtras.h" +#include namespace llvm { + class AMDGPUTargetLowering; +class Loop; +class ScalarEvolution; +class Type; +class Value; class AMDGPUTTIImpl final : public BasicTTIImplBase { - typedef BasicTTIImplBase BaseT; - typedef TargetTransformInfo TTI; + using BaseT = BasicTTIImplBase; + using TTI = TargetTransformInfo; + friend BaseT; const AMDGPUSubtarget *ST; const AMDGPUTargetLowering *TLI; bool IsGraphicsShader; - const FeatureBitset InlineFeatureIgnoreList = { // Codegen control options which don't matter. AMDGPU::FeatureEnableLoadStoreOpt, @@ -63,7 +76,6 @@ class AMDGPUTTIImpl final : public BasicTTIImplBase { const AMDGPUSubtarget *getST() const { return ST; } const AMDGPUTargetLowering *getTLI() const { return TLI; } - static inline int getFullRateInstrCost() { return TargetTransformInfo::TCC_Basic; } @@ -104,7 +116,7 @@ public: unsigned getHardwareNumberOfRegisters(bool Vector) const; unsigned getNumberOfRegisters(bool Vector) const; - unsigned getRegisterBitWidth(bool Vector) const ; + unsigned getRegisterBitWidth(bool Vector) const; unsigned getMinVectorRegisterBitWidth() const; unsigned getLoadStoreVecRegBitWidth(unsigned AddrSpace) const; @@ -154,4 +166,4 @@ public: } // end namespace llvm -#endif +#endif // LLVM_LIB_TARGET_AMDGPU_AMDGPUTARGETTRANSFORMINFO_H diff --git a/llvm/lib/Target/AMDGPU/AMDILCFGStructurizer.cpp b/llvm/lib/Target/AMDGPU/AMDILCFGStructurizer.cpp index a5fa9f7..223fdf7 100644 --- a/llvm/lib/Target/AMDGPU/AMDILCFGStructurizer.cpp +++ b/llvm/lib/Target/AMDGPU/AMDILCFGStructurizer.cpp @@ -1,11 +1,10 @@ -//===-- AMDILCFGStructurizer.cpp - CFG Structurizer -----------------------===// +//===- AMDILCFGStructurizer.cpp - CFG Structurizer ------------------------===// // // The LLVM Compiler Infrastructure // // This file is distributed under the University of Illinois Open Source // License. See LICENSE.TXT for details. // -/// \file //==-----------------------------------------------------------------------===// #include "AMDGPU.h" @@ -67,7 +66,7 @@ STATISTIC(numClonedInstr, "CFGStructurizer cloned instructions"); namespace llvm { - void initializeAMDGPUCFGStructurizerPass(PassRegistry&); +void initializeAMDGPUCFGStructurizerPass(PassRegistry &); } // end namespace llvm @@ -121,9 +120,9 @@ public: class AMDGPUCFGStructurizer : public MachineFunctionPass { public: - typedef SmallVector MBBVector; - typedef std::map MBBInfoMap; - typedef std::map LoopLandInfoMap; + using MBBVector = SmallVector; + using MBBInfoMap = std::map; + using LoopLandInfoMap = std::map; enum PathToKind { Not_SinglePath = 0, @@ -234,6 +233,7 @@ protected: void insertCondBranchBefore(MachineBasicBlock *MBB, MachineBasicBlock::iterator I, int NewOpcode, int RegNum, const DebugLoc &DL); + static int getBranchNzeroOpcode(int OldOpcode); static int getBranchZeroOpcode(int OldOpcode); static int getContinueNzeroOpcode(int OldOpcode); @@ -246,21 +246,25 @@ protected: static bool isUncondBranch(MachineInstr *MI); static DebugLoc getLastDebugLocInBB(MachineBasicBlock *MBB); static MachineInstr *getNormalBlockBranchInstr(MachineBasicBlock *MBB); + /// The correct naming for this is getPossibleLoopendBlockBranchInstr. /// /// BB with backward-edge could have move instructions after the branch /// instruction. Such move instruction "belong to" the loop backward-edge. MachineInstr *getLoopendBlockBranchInstr(MachineBasicBlock *MBB); + static MachineInstr *getReturnInstr(MachineBasicBlock *MBB); static bool isReturnBlock(MachineBasicBlock *MBB); static void cloneSuccessorList(MachineBasicBlock *DstMBB, - MachineBasicBlock *SrcMBB) ; + MachineBasicBlock *SrcMBB); static MachineBasicBlock *clone(MachineBasicBlock *MBB); + /// MachineBasicBlock::ReplaceUsesOfBlockWith doesn't serve the purpose /// because the AMDGPU instruction is not recognized as terminator fix this /// and retire this routine void replaceInstrUseOfBlockWith(MachineBasicBlock *SrcMBB, MachineBasicBlock *OldMBB, MachineBasicBlock *NewBlk); + static void wrapup(MachineBasicBlock *MBB); int patternMatch(MachineBasicBlock *MBB); @@ -299,6 +303,7 @@ protected: MachineBasicBlock *LandMBB); void settleLoopcontBlock(MachineBasicBlock *ContingMBB, MachineBasicBlock *ContMBB); + /// normalizeInfiniteLoopExit change /// B1: /// uncond_br LoopHeader @@ -309,6 +314,7 @@ protected: /// and return the newly added dummy exit block MachineBasicBlock *normalizeInfiniteLoopExit(MachineLoop *LoopRep); void removeUnconditionalBranch(MachineBasicBlock *MBB); + /// Remove duplicate branches instructions in a block. /// For instance /// B0: @@ -318,6 +324,7 @@ protected: /// B0: /// cond_br X B1 B2 void removeRedundantConditionalBranch(MachineBasicBlock *MBB); + void addDummyExitBlock(SmallVectorImpl &RetMBB); void removeSuccessor(MachineBasicBlock *MBB); MachineBasicBlock *cloneBlockForPredecessor(MachineBasicBlock *MBB, @@ -335,10 +342,10 @@ private: SmallVector OrderedBlks; }; -char AMDGPUCFGStructurizer::ID = 0; - } // end anonymous namespace +char AMDGPUCFGStructurizer::ID = 0; + int AMDGPUCFGStructurizer::getSCCNum(MachineBasicBlock *MBB) const { MBBInfoMap::const_iterator It = BlockInfoMap.find(MBB); if (It == BlockInfoMap.end()) @@ -535,7 +542,7 @@ int AMDGPUCFGStructurizer::getContinueNzeroOpcode(int OldOpcode) { case AMDGPU::JUMP_COND: case AMDGPU::JUMP: return AMDGPU::CONTINUE_LOGICALNZ_i32; default: llvm_unreachable("internal error"); - }; + } return -1; } @@ -1597,7 +1604,7 @@ void AMDGPUCFGStructurizer::migrateInstruction(MachineBasicBlock *SrcMBB, MachineInstr *BranchMI = getNormalBlockBranchInstr(SrcMBB); if (!BranchMI) { DEBUG( - dbgs() << "migrateInstruction don't see branch instr\n" ; + dbgs() << "migrateInstruction don't see branch instr\n"; ); SpliceEnd = SrcMBB->end(); } else { diff --git a/llvm/lib/Target/AMDGPU/AMDKernelCodeT.h b/llvm/lib/Target/AMDGPU/AMDKernelCodeT.h index 5d243e9..ff4b8de 100644 --- a/llvm/lib/Target/AMDGPU/AMDKernelCodeT.h +++ b/llvm/lib/Target/AMDGPU/AMDKernelCodeT.h @@ -1,4 +1,4 @@ -//===-- AMDGPUKernelCodeT.h - Print AMDGPU assembly code ---------*- C++ -*-===// +//===- AMDGPUKernelCodeT.h - Print AMDGPU assembly code ---------*- C++ -*-===// // // The LLVM Compiler Infrastructure // @@ -6,39 +6,33 @@ // License. See LICENSE.TXT for details. // //===----------------------------------------------------------------------===// -/// \file AMDKernelCodeT.h -//===----------------------------------------------------------------------===// - -#ifndef AMDKERNELCODET_H -#define AMDKERNELCODET_H -#include "llvm/MC/SubtargetFeature.h" +#ifndef LLVM_LIB_TARGET_AMDGPU_AMDKERNELCODET_H +#define LLVM_LIB_TARGET_AMDGPU_AMDKERNELCODET_H -#include #include -#include "llvm/Support/Debug.h" //---------------------------------------------------------------------------// // AMD Kernel Code, and its dependencies // //---------------------------------------------------------------------------// -typedef uint8_t hsa_powertwo8_t; -typedef uint32_t hsa_ext_code_kind_t; -typedef uint8_t hsa_ext_brig_profile8_t; -typedef uint8_t hsa_ext_brig_machine_model8_t; -typedef uint64_t hsa_ext_control_directive_present64_t; -typedef uint16_t hsa_ext_exception_kind16_t; -typedef uint32_t hsa_ext_code_kind32_t; +using hsa_powertwo8_t = uint8_t; +using hsa_ext_code_kind_t = uint32_t; +using hsa_ext_brig_profile8_t = uint8_t; +using hsa_ext_brig_machine_model8_t = uint8_t; +using hsa_ext_control_directive_present64_t = uint64_t; +using hsa_ext_exception_kind16_t = uint16_t; +using hsa_ext_code_kind32_t = uint32_t; -typedef struct hsa_dim3_s { +using hsa_dim3_t = struct { uint32_t x; uint32_t y; uint32_t z; -} hsa_dim3_t; +}; /// The version of the amd_*_code_t struct. Minor versions must be /// backward compatible. -typedef uint32_t amd_code_version32_t; +using amd_code_version32_t = uint32_t; enum amd_code_version_t { AMD_CODE_VERSION_MAJOR = 0, AMD_CODE_VERSION_MINOR = 1 @@ -64,7 +58,7 @@ enum amd_element_byte_size_t { /// Shader program settings for CS. Contains COMPUTE_PGM_RSRC1 and /// COMPUTE_PGM_RSRC2 registers. -typedef uint64_t amd_compute_pgm_resource_register64_t; +using amd_compute_pgm_resource_register64_t = uint64_t; /// Every amd_*_code_t has the following properties, which are composed of /// a number of bit fields. Every bit field has a mask (AMD_CODE_PROPERTY_*), @@ -74,9 +68,8 @@ typedef uint64_t amd_compute_pgm_resource_register64_t; /// (Note that bit fields cannot be used as their layout is /// implementation defined in the C standard and so cannot be used to /// specify an ABI) -typedef uint32_t amd_code_property32_t; +using amd_code_property32_t = uint32_t; enum amd_code_property_mask_t { - /// Enable the setup of the SGPR user data registers /// (AMD_CODE_PROPERTY_ENABLE_SGPR_*), see documentation of amd_kernel_code_t /// for initial register state. @@ -207,7 +200,7 @@ enum amd_code_property_mask_t { /// directives. See the definition of the control directives in HSA Programmer's /// Reference Manual which also defines how the values specified as finalizer /// arguments have to agree with the control directives in the HSAIL code. -typedef struct hsa_ext_control_directives_s { +using hsa_ext_control_directives_t = struct { /// This is a bit set indicating which control directives have been /// specified. If the value is 0 then there are no control directives specified /// and the rest of the fields can be ignored. The bits are accessed using the @@ -312,7 +305,7 @@ typedef struct hsa_ext_control_directives_s { /// Reserved. Must be 0. uint8_t reserved[75]; -} hsa_ext_control_directives_t; +}; /// AMD Kernel Code Object (amd_kernel_code_t). GPU CP uses the AMD Kernel /// Code Object to set up the hardware to execute the kernel dispatch. @@ -522,9 +515,8 @@ typedef struct hsa_ext_control_directives_s { /// dispatch packet kernArgPtr to a kernarg segment address before using this V#. /// Alternatively scalar loads can be used if the kernarg offset is uniform, as /// the kernarg segment is constant for the duration of the kernel execution. -/// -typedef struct amd_kernel_code_s { +using amd_kernel_code_t = struct { uint32_t amd_kernel_code_version_major; uint32_t amd_kernel_code_version_minor; uint16_t amd_machine_kind; @@ -653,6 +645,6 @@ typedef struct amd_kernel_code_s { uint8_t reserved3[12]; uint64_t runtime_loader_kernel_symbol; uint64_t control_directives[16]; -} amd_kernel_code_t; +}; -#endif // AMDKERNELCODET_H +#endif // LLVM_LIB_TARGET_AMDGPU_AMDKERNELCODET_H diff --git a/llvm/lib/Target/AMDGPU/GCNMinRegStrategy.cpp b/llvm/lib/Target/AMDGPU/GCNMinRegStrategy.cpp index 0657f67..9904b5f 100644 --- a/llvm/lib/Target/AMDGPU/GCNMinRegStrategy.cpp +++ b/llvm/lib/Target/AMDGPU/GCNMinRegStrategy.cpp @@ -1,4 +1,4 @@ -//===----------------------- GCNMinRegStrategy.cpp - ----------------------===// +//===- GCNMinRegStrategy.cpp ----------------------------------------------===// // // The LLVM Compiler Infrastructure // @@ -6,18 +6,27 @@ // License. See LICENSE.TXT for details. // //===----------------------------------------------------------------------===// -// -/// \file -// -//===----------------------------------------------------------------------===// +#include "llvm/ADT/ArrayRef.h" +#include "llvm/ADT/SmallPtrSet.h" +#include "llvm/ADT/SmallVector.h" +#include "llvm/ADT/ilist_node.h" +#include "llvm/ADT/simple_ilist.h" #include "llvm/CodeGen/ScheduleDAG.h" +#include "llvm/Support/Allocator.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/raw_ostream.h" +#include +#include +#include +#include using namespace llvm; #define DEBUG_TYPE "machine-scheduler" namespace { + class GCNMinRegScheduler { struct Candidate : ilist_node { const SUnit *SU; @@ -28,7 +37,7 @@ class GCNMinRegScheduler { }; SpecificBumpPtrAllocator Alloc; - typedef simple_ilist Queue; + using Queue = simple_ilist; Queue RQ; // Ready queue std::vector NumPreds; @@ -72,7 +81,8 @@ public: std::vector schedule(ArrayRef TopRoots, const ScheduleDAG &DAG); }; -} // namespace + +} // end anonymous namespace void GCNMinRegScheduler::initNumPreds(const decltype(ScheduleDAG::SUnits) &SUnits) { NumPreds.resize(SUnits.size()); @@ -104,7 +114,9 @@ int GCNMinRegScheduler::getNotReadySuccessors(const SUnit *SU) const { template unsigned GCNMinRegScheduler::findMax(unsigned Num, Calc C) { assert(!RQ.empty() && Num <= RQ.size()); - typedef decltype(C(*RQ.begin())) T; + + using T = decltype(C(*RQ.begin())) ; + T Max = std::numeric_limits::min(); unsigned NumMax = 0; for (auto I = RQ.begin(); Num; --Num) { @@ -260,9 +272,11 @@ GCNMinRegScheduler::schedule(ArrayRef TopRoots, } namespace llvm { + std::vector makeMinRegSchedule(ArrayRef TopRoots, const ScheduleDAG &DAG) { GCNMinRegScheduler S; return S.schedule(TopRoots, DAG); } -} + +} // end namespace llvm -- 2.7.4