FunctionPass *createAMDGPUMachineCFGStructurizerPass();
FunctionPass *createAMDGPURewriteOutArgumentsPass();
+void initializeAMDGPUDAGToDAGISelPass(PassRegistry&);
+
void initializeAMDGPUMachineCFGStructurizerPass(PassRegistry&);
extern char &AMDGPUMachineCFGStructurizerID;
extern char &AMDGPUPromoteAllocaID;
Pass *createAMDGPUStructurizeCFGPass();
-FunctionPass *createAMDGPUISelDag(TargetMachine &TM,
- CodeGenOpt::Level OptLevel);
+FunctionPass *createAMDGPUISelDag(
+ TargetMachine *TM = nullptr,
+ CodeGenOpt::Level OptLevel = CodeGenOpt::Default);
ModulePass *createAMDGPUAlwaysInlinePass(bool GlobalOpt = true);
ModulePass *createAMDGPUOpenCLImageTypeLoweringPass();
FunctionPass *createAMDGPUAnnotateUniformValues();
ImmutablePass *createAMDGPUAAWrapperPass();
void initializeAMDGPUAAWrapperPassPass(PassRegistry&);
+void initializeAMDGPUArgumentUsageInfoPass(PassRegistry &);
+
Target &getTheAMDGPUTarget();
Target &getTheGCNTarget();
--- /dev/null
+//===----------------------------------------------------------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "AMDGPU.h"
+#include "AMDGPUArgumentUsageInfo.h"
+#include "SIRegisterInfo.h"
+#include "llvm/Support/raw_ostream.h"
+
+using namespace llvm;
+
+#define DEBUG_TYPE "amdgpu-argument-reg-usage-info"
+
+INITIALIZE_PASS(AMDGPUArgumentUsageInfo, DEBUG_TYPE,
+ "Argument Register Usage Information Storage", false, true)
+
+void ArgDescriptor::print(raw_ostream &OS,
+ const TargetRegisterInfo *TRI) const {
+ if (!isSet()) {
+ OS << "<not set>\n";
+ return;
+ }
+
+ if (isRegister())
+ OS << "Reg " << PrintReg(getRegister(), TRI) << '\n';
+ else
+ OS << "Stack offset " << getStackOffset() << '\n';
+}
+
+char AMDGPUArgumentUsageInfo::ID = 0;
+
+const AMDGPUFunctionArgInfo AMDGPUArgumentUsageInfo::ExternFunctionInfo{};
+
+bool AMDGPUArgumentUsageInfo::doInitialization(Module &M) {
+ return false;
+}
+
+bool AMDGPUArgumentUsageInfo::doFinalization(Module &M) {
+ ArgInfoMap.clear();
+ return false;
+}
+
+void AMDGPUArgumentUsageInfo::print(raw_ostream &OS, const Module *M) const {
+ for (const auto &FI : ArgInfoMap) {
+ OS << "Arguments for " << FI.first->getName() << '\n'
+ << " PrivateSegmentBuffer: " << FI.second.PrivateSegmentBuffer
+ << " DispatchPtr: " << FI.second.DispatchPtr
+ << " QueuePtr: " << FI.second.QueuePtr
+ << " KernargSegmentPtr: " << FI.second.KernargSegmentPtr
+ << " DispatchID: " << FI.second.DispatchID
+ << " FlatScratchInit: " << FI.second.FlatScratchInit
+ << " PrivateSegmentSize: " << FI.second.PrivateSegmentSize
+ << " GridWorkgroupCountX: " << FI.second.GridWorkGroupCountX
+ << " GridWorkgroupCountY: " << FI.second.GridWorkGroupCountY
+ << " GridWorkgroupCountZ: " << FI.second.GridWorkGroupCountZ
+ << " WorkGroupIDX: " << FI.second.WorkGroupIDX
+ << " WorkGroupIDY: " << FI.second.WorkGroupIDY
+ << " WorkGroupIDZ: " << FI.second.WorkGroupIDZ
+ << " WorkGroupInfo: " << FI.second.WorkGroupInfo
+ << " PrivateSegmentWaveByteOffset: "
+ << FI.second.PrivateSegmentWaveByteOffset
+ << " ImplicitBufferPtr: " << FI.second.ImplicitBufferPtr
+ << " WorkItemIDX " << FI.second.WorkItemIDX
+ << " WorkItemIDY " << FI.second.WorkItemIDY
+ << " WorkItemIDZ " << FI.second.WorkItemIDZ
+ << '\n';
+ }
+}
+
+std::pair<const ArgDescriptor *, const TargetRegisterClass *>
+AMDGPUFunctionArgInfo::getPreloadedValue(
+ AMDGPUFunctionArgInfo::PreloadedValue Value) const {
+ switch (Value) {
+ case AMDGPUFunctionArgInfo::PRIVATE_SEGMENT_BUFFER: {
+ return std::make_pair(
+ PrivateSegmentBuffer ? &PrivateSegmentBuffer : nullptr,
+ &AMDGPU::SGPR_128RegClass);
+ }
+ case AMDGPUFunctionArgInfo::IMPLICIT_BUFFER_PTR:
+ return std::make_pair(ImplicitBufferPtr ? &ImplicitBufferPtr : nullptr,
+ &AMDGPU::SGPR_64RegClass);
+ case AMDGPUFunctionArgInfo::WORKGROUP_ID_X:
+ return std::make_pair(WorkGroupIDX ? &WorkGroupIDX : nullptr,
+ &AMDGPU::SGPR_32RegClass);
+
+ case AMDGPUFunctionArgInfo::WORKGROUP_ID_Y:
+ return std::make_pair(WorkGroupIDY ? &WorkGroupIDY : nullptr,
+ &AMDGPU::SGPR_32RegClass);
+ case AMDGPUFunctionArgInfo::WORKGROUP_ID_Z:
+ return std::make_pair(WorkGroupIDZ ? &WorkGroupIDZ : nullptr,
+ &AMDGPU::SGPR_32RegClass);
+ case AMDGPUFunctionArgInfo::PRIVATE_SEGMENT_WAVE_BYTE_OFFSET:
+ return std::make_pair(
+ PrivateSegmentWaveByteOffset ? &PrivateSegmentWaveByteOffset : nullptr,
+ &AMDGPU::SGPR_32RegClass);
+ case AMDGPUFunctionArgInfo::KERNARG_SEGMENT_PTR:
+ return std::make_pair(KernargSegmentPtr ? &KernargSegmentPtr : nullptr,
+ &AMDGPU::SGPR_64RegClass);
+ case AMDGPUFunctionArgInfo::DISPATCH_ID:
+ return std::make_pair(DispatchID ? &DispatchID : nullptr,
+ &AMDGPU::SGPR_64RegClass);
+ case AMDGPUFunctionArgInfo::FLAT_SCRATCH_INIT:
+ return std::make_pair(FlatScratchInit ? &FlatScratchInit : nullptr,
+ &AMDGPU::SGPR_64RegClass);
+ case AMDGPUFunctionArgInfo::DISPATCH_PTR:
+ return std::make_pair(DispatchPtr ? &DispatchPtr : nullptr,
+ &AMDGPU::SGPR_64RegClass);
+ case AMDGPUFunctionArgInfo::QUEUE_PTR:
+ return std::make_pair(QueuePtr ? &QueuePtr : nullptr,
+ &AMDGPU::SGPR_64RegClass);
+ case AMDGPUFunctionArgInfo::WORKITEM_ID_X:
+ return std::make_pair(WorkItemIDX ? &WorkItemIDX : nullptr,
+ &AMDGPU::VGPR_32RegClass);
+ case AMDGPUFunctionArgInfo::WORKITEM_ID_Y:
+ return std::make_pair(WorkItemIDY ? &WorkItemIDY : nullptr,
+ &AMDGPU::VGPR_32RegClass);
+ case AMDGPUFunctionArgInfo::WORKITEM_ID_Z:
+ return std::make_pair(WorkItemIDZ ? &WorkItemIDZ : nullptr,
+ &AMDGPU::VGPR_32RegClass);
+ }
+ llvm_unreachable("unexpected preloaded value type");
+}
--- /dev/null
+//==- AMDGPUArgumentrUsageInfo.h - Function Arg Usage Info -------*- C++ -*-==//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIB_TARGET_AMDGPU_AMDGPUARGUMENTUSAGEINFO_H
+#define LLVM_LIB_TARGET_AMDGPU_AMDGPUARGUMENTUSAGEINFO_H
+
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/IR/Function.h"
+#include "llvm/Pass.h"
+
+namespace llvm {
+
+class Function;
+class raw_ostream;
+class SISubtarget;
+class TargetMachine;
+class TargetRegisterClass;
+class TargetRegisterInfo;
+
+struct ArgDescriptor {
+private:
+ friend struct AMDGPUFunctionArgInfo;
+
+ union {
+ unsigned Register;
+ unsigned StackOffset;
+ };
+
+ bool IsStack : 1;
+ bool IsSet : 1;
+
+ ArgDescriptor(unsigned Val = 0, bool IsStack = false, bool IsSet = false)
+ : Register(Val), IsStack(IsStack), IsSet(IsSet) {}
+public:
+ static ArgDescriptor createRegister(unsigned Reg) {
+ return ArgDescriptor(Reg, false, true);
+ }
+
+ static ArgDescriptor createStack(unsigned Reg) {
+ return ArgDescriptor(Reg, true, true);
+ }
+
+ bool isSet() const {
+ return IsSet;
+ }
+
+ explicit operator bool() const {
+ return isSet();
+ }
+
+ bool isRegister() const {
+ return !IsStack;
+ }
+
+ unsigned getRegister() const {
+ assert(!IsStack);
+ return Register;
+ }
+
+ unsigned getStackOffset() const {
+ assert(IsStack);
+ return StackOffset;
+ }
+
+ void print(raw_ostream &OS, const TargetRegisterInfo *TRI = nullptr) const;
+};
+
+inline raw_ostream &operator<<(raw_ostream &OS, const ArgDescriptor &Arg) {
+ Arg.print(OS);
+ return OS;
+}
+
+struct AMDGPUFunctionArgInfo {
+ enum PreloadedValue {
+ // SGPRS:
+ PRIVATE_SEGMENT_BUFFER = 0,
+ DISPATCH_PTR = 1,
+ QUEUE_PTR = 2,
+ KERNARG_SEGMENT_PTR = 3,
+ DISPATCH_ID = 4,
+ FLAT_SCRATCH_INIT = 5,
+ WORKGROUP_ID_X = 10,
+ WORKGROUP_ID_Y = 11,
+ WORKGROUP_ID_Z = 12,
+ PRIVATE_SEGMENT_WAVE_BYTE_OFFSET = 14,
+ IMPLICIT_BUFFER_PTR = 15,
+
+ // VGPRS:
+ FIRST_VGPR_VALUE = 16,
+ WORKITEM_ID_X = FIRST_VGPR_VALUE,
+ WORKITEM_ID_Y = 17,
+ WORKITEM_ID_Z = 18
+ };
+
+ // Kernel input registers setup for the HSA ABI in allocation order.
+
+ // User SGPRs in kernels
+ // XXX - Can these require argument spills?
+ ArgDescriptor PrivateSegmentBuffer;
+ ArgDescriptor DispatchPtr;
+ ArgDescriptor QueuePtr;
+ ArgDescriptor KernargSegmentPtr;
+ ArgDescriptor DispatchID;
+ ArgDescriptor FlatScratchInit;
+ ArgDescriptor PrivateSegmentSize;
+ ArgDescriptor GridWorkGroupCountX;
+ ArgDescriptor GridWorkGroupCountY;
+ ArgDescriptor GridWorkGroupCountZ;
+
+ // System SGPRs in kernels.
+ ArgDescriptor WorkGroupIDX;
+ ArgDescriptor WorkGroupIDY;
+ ArgDescriptor WorkGroupIDZ;
+ ArgDescriptor WorkGroupInfo;
+ ArgDescriptor PrivateSegmentWaveByteOffset;
+
+ // Input registers for non-HSA ABI
+ ArgDescriptor ImplicitBufferPtr = 0;
+
+ // VGPRs inputs. These are always v0, v1 and v2 for entry functions.
+ ArgDescriptor WorkItemIDX;
+ ArgDescriptor WorkItemIDY;
+ ArgDescriptor WorkItemIDZ;
+
+ std::pair<const ArgDescriptor *, const TargetRegisterClass *>
+ getPreloadedValue(PreloadedValue Value) const;
+};
+
+class AMDGPUArgumentUsageInfo : public ImmutablePass {
+private:
+ static const AMDGPUFunctionArgInfo ExternFunctionInfo;
+ DenseMap<const Function *, AMDGPUFunctionArgInfo> ArgInfoMap;
+
+public:
+ static char ID;
+
+ AMDGPUArgumentUsageInfo() : ImmutablePass(ID) { }
+
+ void getAnalysisUsage(AnalysisUsage &AU) const override {
+ AU.setPreservesAll();
+ }
+
+ bool doInitialization(Module &M) override;
+ bool doFinalization(Module &M) override;
+
+ void print(raw_ostream &OS, const Module *M = nullptr) const override;
+
+ void setFuncArgInfo(const Function &F, const AMDGPUFunctionArgInfo &ArgInfo) {
+ ArgInfoMap[&F] = ArgInfo;
+ }
+
+ const AMDGPUFunctionArgInfo &lookupFuncArgInfo(const Function &F) const {
+ auto I = ArgInfoMap.find(&F);
+ if (I == ArgInfoMap.end()) {
+ assert(F.isDeclaration());
+ return ExternFunctionInfo;
+ }
+
+ return I->second;
+ }
+};
+
+} // end namespace llvm
+
+#endif
//===----------------------------------------------------------------------===//
#include "AMDGPU.h"
+#include "AMDGPUArgumentUsageInfo.h"
#include "AMDGPUISelLowering.h" // For AMDGPUISD
#include "AMDGPUInstrInfo.h"
#include "AMDGPURegisterInfo.h"
AMDGPUAS AMDGPUASI;
public:
- explicit AMDGPUDAGToDAGISel(TargetMachine &TM, CodeGenOpt::Level OptLevel)
- : SelectionDAGISel(TM, OptLevel){
- AMDGPUASI = AMDGPU::getAMDGPUAS(TM);
+ explicit AMDGPUDAGToDAGISel(TargetMachine *TM = nullptr,
+ CodeGenOpt::Level OptLevel = CodeGenOpt::Default)
+ : SelectionDAGISel(*TM, OptLevel) {
+ AMDGPUASI = AMDGPU::getAMDGPUAS(*TM);
}
~AMDGPUDAGToDAGISel() override = default;
+ void getAnalysisUsage(AnalysisUsage &AU) const override {
+ AU.addRequired<AMDGPUArgumentUsageInfo>();
+ SelectionDAGISel::getAnalysisUsage(AU);
+ }
+
bool runOnMachineFunction(MachineFunction &MF) override;
void Select(SDNode *N) override;
StringRef getPassName() const override;
} // end anonymous namespace
+INITIALIZE_PASS_BEGIN(AMDGPUDAGToDAGISel, "isel",
+ "AMDGPU DAG->DAG Pattern Instruction Selection", false, false)
+INITIALIZE_PASS_DEPENDENCY(AMDGPUArgumentUsageInfo)
+INITIALIZE_PASS_END(AMDGPUDAGToDAGISel, "isel",
+ "AMDGPU DAG->DAG Pattern Instruction Selection", false, false)
+
/// \brief This pass converts a legalized DAG into a AMDGPU-specific
// DAG, ready for instruction scheduling.
-FunctionPass *llvm::createAMDGPUISelDag(TargetMachine &TM,
+FunctionPass *llvm::createAMDGPUISelDag(TargetMachine *TM,
CodeGenOpt::Level OptLevel) {
return new AMDGPUDAGToDAGISel(TM, OptLevel);
}
initializeR600PacketizerPass(*PR);
initializeR600ExpandSpecialInstrsPassPass(*PR);
initializeR600VectorRegMergerPass(*PR);
+ initializeAMDGPUDAGToDAGISelPass(*PR);
initializeSILowerI1CopiesPass(*PR);
initializeSIFixSGPRCopiesPass(*PR);
initializeSIFixVGPRCopiesPass(*PR);
initializeAMDGPUAlwaysInlinePass(*PR);
initializeAMDGPUAnnotateKernelFeaturesPass(*PR);
initializeAMDGPUAnnotateUniformValuesPass(*PR);
+ initializeAMDGPUArgumentUsageInfoPass(*PR);
initializeAMDGPULowerIntrinsicsPass(*PR);
initializeAMDGPUPromoteAllocaPass(*PR);
initializeAMDGPUCodeGenPreparePass(*PR);
}
bool AMDGPUPassConfig::addInstSelector() {
- addPass(createAMDGPUISelDag(getAMDGPUTargetMachine(), getOptLevel()));
+ addPass(createAMDGPUISelDag(&getAMDGPUTargetMachine(), getOptLevel()));
return false;
}
AMDGPUAlwaysInlinePass.cpp
AMDGPUAnnotateKernelFeatures.cpp
AMDGPUAnnotateUniformValues.cpp
+ AMDGPUArgumentUsageInfo.cpp
AMDGPUAsmPrinter.cpp
AMDGPUCallLowering.cpp
AMDGPUCodeGenPrepare.cpp