From: Konstantin Zhuravlyov Date: Tue, 11 Sep 2018 18:56:51 +0000 (+0000) Subject: AMDGPU: Move isa version and EF_AMDGPU_MACH_* determination X-Git-Url: http://review.tizen.org/git/?a=commitdiff_plain;h=941615e4c8c6eb0300665360f53f8ac768179a13;p=platform%2Fupstream%2Fllvm.git AMDGPU: Move isa version and EF_AMDGPU_MACH_* determination into TargetParser. Also switch away from target features to CPU string when determining isa version. This fixes an issue when we output wrong isa version in the object code when features of a particular CPU are altered (i.e. gfx902 w/o xnack used to result in gfx900). Differential Revision: https://reviews.llvm.org/D51890 llvm-svn: 341982 --- diff --git a/llvm/include/llvm/Support/TargetParser.h b/llvm/include/llvm/Support/TargetParser.h index f27f76d..0bd4c35 100644 --- a/llvm/include/llvm/Support/TargetParser.h +++ b/llvm/include/llvm/Support/TargetParser.h @@ -320,6 +320,13 @@ enum GPUKind : uint32_t { GK_AMDGCN_LAST = GK_GFX906, }; +/// Instruction set architecture version. +struct IsaVersion { + unsigned Major; + unsigned Minor; + unsigned Stepping; +}; + // This isn't comprehensive for now, just things that are needed from the // frontend driver. enum ArchFeatureKind : uint32_t { @@ -335,18 +342,22 @@ enum ArchFeatureKind : uint32_t { FEATURE_FAST_DENORMAL_F32 = 1 << 5 }; -GPUKind parseArchAMDGCN(StringRef CPU); -GPUKind parseArchR600(StringRef CPU); StringRef getArchNameAMDGCN(GPUKind AK); StringRef getArchNameR600(GPUKind AK); StringRef getCanonicalArchName(StringRef Arch); +GPUKind parseArchAMDGCN(StringRef CPU); +GPUKind parseArchR600(StringRef CPU); unsigned getArchAttrAMDGCN(GPUKind AK); unsigned getArchAttrR600(GPUKind AK); void fillValidArchListAMDGCN(SmallVectorImpl &Values); void fillValidArchListR600(SmallVectorImpl &Values); -} +StringRef getArchNameFromElfMach(unsigned ElfMach); +unsigned getElfMach(StringRef GPU); +IsaVersion getIsaVersion(StringRef GPU); + +} // namespace AMDGPU } // namespace llvm diff --git a/llvm/lib/Support/TargetParser.cpp b/llvm/lib/Support/TargetParser.cpp index 1f58037..f73eb29 100644 --- a/llvm/lib/Support/TargetParser.cpp +++ b/llvm/lib/Support/TargetParser.cpp @@ -17,11 +17,13 @@ #include "llvm/ADT/ArrayRef.h" #include "llvm/ADT/StringSwitch.h" #include "llvm/ADT/Twine.h" +#include "llvm/BinaryFormat/ELF.h" #include using namespace llvm; using namespace ARM; using namespace AArch64; +using namespace AMDGPU; namespace { @@ -947,6 +949,8 @@ bool llvm::AArch64::isX18ReservedByDefault(const Triple &TT) { TT.isOSWindows(); } +namespace { + struct GPUInfo { StringLiteral Name; StringLiteral CanonicalName; @@ -954,11 +958,9 @@ struct GPUInfo { unsigned Features; }; -using namespace AMDGPU; -static constexpr GPUInfo R600GPUs[26] = { - // Name Canonical Kind Features - // Name - // +constexpr GPUInfo R600GPUs[26] = { + // Name Canonical Kind Features + // Name {{"r600"}, {"r600"}, GK_R600, FEATURE_NONE }, {{"rv630"}, {"r600"}, GK_R600, FEATURE_NONE }, {{"rv635"}, {"r600"}, GK_R600, FEATURE_NONE }, @@ -989,9 +991,9 @@ static constexpr GPUInfo R600GPUs[26] = { // This table should be sorted by the value of GPUKind // Don't bother listing the implicitly true features -static constexpr GPUInfo AMDGCNGPUs[32] = { - // Name Canonical Kind Features - // Name +constexpr GPUInfo AMDGCNGPUs[32] = { + // Name Canonical Kind Features + // Name {{"gfx600"}, {"gfx600"}, GK_GFX600, FEATURE_FAST_FMA_F32}, {{"tahiti"}, {"gfx600"}, GK_GFX600, FEATURE_FAST_FMA_F32}, {{"gfx601"}, {"gfx601"}, GK_GFX601, FEATURE_NONE}, @@ -1026,8 +1028,7 @@ static constexpr GPUInfo AMDGCNGPUs[32] = { {{"gfx906"}, {"gfx906"}, GK_GFX906, FEATURE_FAST_FMA_F32|FEATURE_FAST_DENORMAL_F32}, }; -static const GPUInfo *getArchEntry(AMDGPU::GPUKind AK, - ArrayRef Table) { +const GPUInfo *getArchEntry(AMDGPU::GPUKind AK, ArrayRef Table) { GPUInfo Search = { {""}, {""}, AK, AMDGPU::FEATURE_NONE }; auto I = std::lower_bound(Table.begin(), Table.end(), Search, @@ -1040,6 +1041,8 @@ static const GPUInfo *getArchEntry(AMDGPU::GPUKind AK, return I; } +} // namespace + StringRef llvm::AMDGPU::getArchNameAMDGCN(GPUKind AK) { if (const auto *Entry = getArchEntry(AK, AMDGCNGPUs)) return Entry->CanonicalName; @@ -1092,3 +1095,118 @@ void AMDGPU::fillValidArchListR600(SmallVectorImpl &Values) { for (const auto C : R600GPUs) Values.push_back(C.Name); } + +StringRef AMDGPU::getArchNameFromElfMach(unsigned ElfMach) { + AMDGPU::GPUKind AK; + + switch (ElfMach) { + case ELF::EF_AMDGPU_MACH_R600_R600: AK = GK_R600; break; + case ELF::EF_AMDGPU_MACH_R600_R630: AK = GK_R630; break; + case ELF::EF_AMDGPU_MACH_R600_RS880: AK = GK_RS880; break; + case ELF::EF_AMDGPU_MACH_R600_RV670: AK = GK_RV670; break; + case ELF::EF_AMDGPU_MACH_R600_RV710: AK = GK_RV710; break; + case ELF::EF_AMDGPU_MACH_R600_RV730: AK = GK_RV730; break; + case ELF::EF_AMDGPU_MACH_R600_RV770: AK = GK_RV770; break; + case ELF::EF_AMDGPU_MACH_R600_CEDAR: AK = GK_CEDAR; break; + case ELF::EF_AMDGPU_MACH_R600_CYPRESS: AK = GK_CYPRESS; break; + case ELF::EF_AMDGPU_MACH_R600_JUNIPER: AK = GK_JUNIPER; break; + case ELF::EF_AMDGPU_MACH_R600_REDWOOD: AK = GK_REDWOOD; break; + case ELF::EF_AMDGPU_MACH_R600_SUMO: AK = GK_SUMO; break; + case ELF::EF_AMDGPU_MACH_R600_BARTS: AK = GK_BARTS; break; + case ELF::EF_AMDGPU_MACH_R600_CAICOS: AK = GK_CAICOS; break; + case ELF::EF_AMDGPU_MACH_R600_CAYMAN: AK = GK_CAYMAN; break; + case ELF::EF_AMDGPU_MACH_R600_TURKS: AK = GK_TURKS; break; + case ELF::EF_AMDGPU_MACH_AMDGCN_GFX600: AK = GK_GFX600; break; + case ELF::EF_AMDGPU_MACH_AMDGCN_GFX601: AK = GK_GFX601; break; + case ELF::EF_AMDGPU_MACH_AMDGCN_GFX700: AK = GK_GFX700; break; + case ELF::EF_AMDGPU_MACH_AMDGCN_GFX701: AK = GK_GFX701; break; + case ELF::EF_AMDGPU_MACH_AMDGCN_GFX702: AK = GK_GFX702; break; + case ELF::EF_AMDGPU_MACH_AMDGCN_GFX703: AK = GK_GFX703; break; + case ELF::EF_AMDGPU_MACH_AMDGCN_GFX704: AK = GK_GFX704; break; + case ELF::EF_AMDGPU_MACH_AMDGCN_GFX801: AK = GK_GFX801; break; + case ELF::EF_AMDGPU_MACH_AMDGCN_GFX802: AK = GK_GFX802; break; + case ELF::EF_AMDGPU_MACH_AMDGCN_GFX803: AK = GK_GFX803; break; + case ELF::EF_AMDGPU_MACH_AMDGCN_GFX810: AK = GK_GFX810; break; + case ELF::EF_AMDGPU_MACH_AMDGCN_GFX900: AK = GK_GFX900; break; + case ELF::EF_AMDGPU_MACH_AMDGCN_GFX902: AK = GK_GFX902; break; + case ELF::EF_AMDGPU_MACH_AMDGCN_GFX904: AK = GK_GFX904; break; + case ELF::EF_AMDGPU_MACH_AMDGCN_GFX906: AK = GK_GFX906; break; + case ELF::EF_AMDGPU_MACH_NONE: AK = GK_NONE; break; + } + + StringRef GPUName = getArchNameAMDGCN(AK); + if (GPUName != "") + return GPUName; + return getArchNameR600(AK); +} + +unsigned AMDGPU::getElfMach(StringRef GPU) { + AMDGPU::GPUKind AK = parseArchAMDGCN(GPU); + if (AK == AMDGPU::GPUKind::GK_NONE) + AK = parseArchR600(GPU); + + switch (AK) { + case GK_R600: return ELF::EF_AMDGPU_MACH_R600_R600; + case GK_R630: return ELF::EF_AMDGPU_MACH_R600_R630; + case GK_RS880: return ELF::EF_AMDGPU_MACH_R600_RS880; + case GK_RV670: return ELF::EF_AMDGPU_MACH_R600_RV670; + case GK_RV710: return ELF::EF_AMDGPU_MACH_R600_RV710; + case GK_RV730: return ELF::EF_AMDGPU_MACH_R600_RV730; + case GK_RV770: return ELF::EF_AMDGPU_MACH_R600_RV770; + case GK_CEDAR: return ELF::EF_AMDGPU_MACH_R600_CEDAR; + case GK_CYPRESS: return ELF::EF_AMDGPU_MACH_R600_CYPRESS; + case GK_JUNIPER: return ELF::EF_AMDGPU_MACH_R600_JUNIPER; + case GK_REDWOOD: return ELF::EF_AMDGPU_MACH_R600_REDWOOD; + case GK_SUMO: return ELF::EF_AMDGPU_MACH_R600_SUMO; + case GK_BARTS: return ELF::EF_AMDGPU_MACH_R600_BARTS; + case GK_CAICOS: return ELF::EF_AMDGPU_MACH_R600_CAICOS; + case GK_CAYMAN: return ELF::EF_AMDGPU_MACH_R600_CAYMAN; + case GK_TURKS: return ELF::EF_AMDGPU_MACH_R600_TURKS; + case GK_GFX600: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX600; + case GK_GFX601: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX601; + case GK_GFX700: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX700; + case GK_GFX701: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX701; + case GK_GFX702: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX702; + case GK_GFX703: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX703; + case GK_GFX704: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX704; + case GK_GFX801: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX801; + case GK_GFX802: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX802; + case GK_GFX803: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX803; + case GK_GFX810: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX810; + case GK_GFX900: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX900; + case GK_GFX902: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX902; + case GK_GFX904: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX904; + case GK_GFX906: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX906; + case GK_NONE: return ELF::EF_AMDGPU_MACH_NONE; + } + + llvm_unreachable("unknown GPU"); +} + +AMDGPU::IsaVersion AMDGPU::getIsaVersion(StringRef GPU) { + if (GPU == "generic") + return {7, 0, 0}; + + AMDGPU::GPUKind AK = parseArchAMDGCN(GPU); + if (AK == AMDGPU::GPUKind::GK_NONE) + return {0, 0, 0}; + + switch (AK) { + case GK_GFX600: return {6, 0, 0}; + case GK_GFX601: return {6, 0, 1}; + case GK_GFX700: return {7, 0, 0}; + case GK_GFX701: return {7, 0, 1}; + case GK_GFX702: return {7, 0, 2}; + case GK_GFX703: return {7, 0, 3}; + case GK_GFX704: return {7, 0, 4}; + case GK_GFX801: return {8, 0, 1}; + case GK_GFX802: return {8, 0, 2}; + case GK_GFX803: return {8, 0, 3}; + case GK_GFX810: return {8, 1, 0}; + case GK_GFX900: return {9, 0, 0}; + case GK_GFX902: return {9, 0, 2}; + case GK_GFX904: return {9, 0, 4}; + case GK_GFX906: return {9, 0, 6}; + default: return {0, 0, 0}; + } +} diff --git a/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp b/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp index b7e8423..80a29a8 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp @@ -40,6 +40,7 @@ #include "llvm/MC/MCStreamer.h" #include "llvm/Support/AMDGPUMetadata.h" #include "llvm/Support/MathExtras.h" +#include "llvm/Support/TargetParser.h" #include "llvm/Support/TargetRegistry.h" #include "llvm/Target/TargetLoweringObjectFile.h" @@ -134,9 +135,9 @@ void AMDGPUAsmPrinter::EmitStartOfAsmFile(Module &M) { getTargetStreamer()->EmitDirectiveHSACodeObjectVersion(2, 1); // HSA and PAL emit NT_AMDGPU_HSA_ISA for code objects v2. - IsaInfo::IsaVersion ISA = IsaInfo::getIsaVersion(getSTI()->getFeatureBits()); + IsaVersion Version = getIsaVersion(getSTI()->getCPU()); getTargetStreamer()->EmitDirectiveHSACodeObjectISA( - ISA.Major, ISA.Minor, ISA.Stepping, "AMD", "AMDGPU"); + Version.Major, Version.Minor, Version.Stepping, "AMD", "AMDGPU"); } void AMDGPUAsmPrinter::EmitEndOfAsmFile(Module &M) { @@ -240,7 +241,7 @@ void AMDGPUAsmPrinter::EmitFunctionBodyEnd() { *getSTI(), KernelName, getAmdhsaKernelDescriptor(*MF, CurrentProgramInfo), CurrentProgramInfo.NumVGPRsForWavesPerEU, CurrentProgramInfo.NumSGPRsForWavesPerEU - - IsaInfo::getNumExtraSGPRs(getSTI()->getFeatureBits(), + IsaInfo::getNumExtraSGPRs(getSTI(), CurrentProgramInfo.VCCUsed, CurrentProgramInfo.FlatUsed), CurrentProgramInfo.VCCUsed, CurrentProgramInfo.FlatUsed, @@ -561,7 +562,7 @@ static bool hasAnyNonFlatUseOfReg(const MachineRegisterInfo &MRI, int32_t AMDGPUAsmPrinter::SIFunctionResourceInfo::getTotalNumSGPRs( const GCNSubtarget &ST) const { - return NumExplicitSGPR + IsaInfo::getNumExtraSGPRs(ST.getFeatureBits(), + return NumExplicitSGPR + IsaInfo::getNumExtraSGPRs(&ST, UsesVCC, UsesFlatScratch); } @@ -758,7 +759,7 @@ AMDGPUAsmPrinter::SIFunctionResourceInfo AMDGPUAsmPrinter::analyzeResourceUsage( // 48 SGPRs - vcc, - flat_scr, -xnack int MaxSGPRGuess = - 47 - IsaInfo::getNumExtraSGPRs(ST.getFeatureBits(), true, + 47 - IsaInfo::getNumExtraSGPRs(getSTI(), true, ST.hasFlatAddressSpace()); MaxSGPR = std::max(MaxSGPR, MaxSGPRGuess); MaxVGPR = std::max(MaxVGPR, 23); @@ -823,7 +824,7 @@ void AMDGPUAsmPrinter::getSIProgramInfo(SIProgramInfo &ProgInfo, // duplicated in part in AMDGPUAsmParser::calculateGPRBlocks, and could be // unified. unsigned ExtraSGPRs = IsaInfo::getNumExtraSGPRs( - STM.getFeatureBits(), ProgInfo.VCCUsed, ProgInfo.FlatUsed); + getSTI(), ProgInfo.VCCUsed, ProgInfo.FlatUsed); // Check the addressable register limit before we add ExtraSGPRs. if (STM.getGeneration() >= AMDGPUSubtarget::VOLCANIC_ISLANDS && @@ -905,9 +906,9 @@ void AMDGPUAsmPrinter::getSIProgramInfo(SIProgramInfo &ProgInfo, } ProgInfo.SGPRBlocks = IsaInfo::getNumSGPRBlocks( - STM.getFeatureBits(), ProgInfo.NumSGPRsForWavesPerEU); + getSTI(), ProgInfo.NumSGPRsForWavesPerEU); ProgInfo.VGPRBlocks = IsaInfo::getNumVGPRBlocks( - STM.getFeatureBits(), ProgInfo.NumVGPRsForWavesPerEU); + getSTI(), ProgInfo.NumVGPRsForWavesPerEU); // Update DebuggerWavefrontPrivateSegmentOffsetSGPR and // DebuggerPrivateSegmentBufferSGPR fields if "amdgpu-debugger-emit-prologue" @@ -1137,7 +1138,7 @@ void AMDGPUAsmPrinter::getAmdKernelCode(amd_kernel_code_t &Out, const SIMachineFunctionInfo *MFI = MF.getInfo(); const GCNSubtarget &STM = MF.getSubtarget(); - AMDGPU::initDefaultAMDKernelCodeT(Out, STM.getFeatureBits()); + AMDGPU::initDefaultAMDKernelCodeT(Out, getSTI()); Out.compute_pgm_resource_registers = CurrentProgramInfo.ComputePGMRSrc1 | diff --git a/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.cpp b/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.cpp index 713dab3..2a29b6f 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.cpp @@ -124,10 +124,8 @@ GCNSubtarget::initializeSubtargetDependencies(const Triple &TT, return *this; } -AMDGPUSubtarget::AMDGPUSubtarget(const Triple &TT, - const FeatureBitset &FeatureBits) : +AMDGPUSubtarget::AMDGPUSubtarget(const Triple &TT) : TargetTriple(TT), - SubtargetFeatureBits(FeatureBits), Has16BitInsts(false), HasMadMixInsts(false), FP32Denormals(false), @@ -144,9 +142,9 @@ AMDGPUSubtarget::AMDGPUSubtarget(const Triple &TT, { } GCNSubtarget::GCNSubtarget(const Triple &TT, StringRef GPU, StringRef FS, - const GCNTargetMachine &TM) : + const GCNTargetMachine &TM) : AMDGPUGenSubtargetInfo(TT, GPU, FS), - AMDGPUSubtarget(TT, getFeatureBits()), + AMDGPUSubtarget(TT), TargetTriple(TT), Gen(SOUTHERN_ISLANDS), IsaVersion(ISAVersion0_0_0), @@ -448,7 +446,7 @@ unsigned AMDGPUSubtarget::getKernArgSegmentSize(const Function &F, R600Subtarget::R600Subtarget(const Triple &TT, StringRef GPU, StringRef FS, const TargetMachine &TM) : R600GenSubtargetInfo(TT, GPU, FS), - AMDGPUSubtarget(TT, getFeatureBits()), + AMDGPUSubtarget(TT), InstrInfo(*this), FrameLowering(TargetFrameLowering::StackGrowsUp, getStackAlignment(), 0), FMA(false), diff --git a/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.h b/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.h index a25be48..d7713ae 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.h +++ b/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.h @@ -63,7 +63,6 @@ private: Triple TargetTriple; protected: - const FeatureBitset &SubtargetFeatureBits; bool Has16BitInsts; bool HasMadMixInsts; bool FP32Denormals; @@ -79,7 +78,7 @@ protected: unsigned WavefrontSize; public: - AMDGPUSubtarget(const Triple &TT, const FeatureBitset &FeatureBits); + AMDGPUSubtarget(const Triple &TT); static const AMDGPUSubtarget &get(const MachineFunction &MF); static const AMDGPUSubtarget &get(const TargetMachine &TM, @@ -203,33 +202,21 @@ public: /// \returns Maximum number of work groups per compute unit supported by the /// subtarget and limited by given \p FlatWorkGroupSize. - unsigned getMaxWorkGroupsPerCU(unsigned FlatWorkGroupSize) const { - return AMDGPU::IsaInfo::getMaxWorkGroupsPerCU(SubtargetFeatureBits, - FlatWorkGroupSize); - } + virtual unsigned getMaxWorkGroupsPerCU(unsigned FlatWorkGroupSize) const = 0; /// \returns Minimum flat work group size supported by the subtarget. - unsigned getMinFlatWorkGroupSize() const { - return AMDGPU::IsaInfo::getMinFlatWorkGroupSize(SubtargetFeatureBits); - } + virtual unsigned getMinFlatWorkGroupSize() const = 0; /// \returns Maximum flat work group size supported by the subtarget. - unsigned getMaxFlatWorkGroupSize() const { - return AMDGPU::IsaInfo::getMaxFlatWorkGroupSize(SubtargetFeatureBits); - } + virtual unsigned getMaxFlatWorkGroupSize() const = 0; /// \returns Maximum number of waves per execution unit supported by the /// subtarget and limited by given \p FlatWorkGroupSize. - unsigned getMaxWavesPerEU(unsigned FlatWorkGroupSize) const { - return AMDGPU::IsaInfo::getMaxWavesPerEU(SubtargetFeatureBits, - FlatWorkGroupSize); - } + virtual unsigned getMaxWavesPerEU(unsigned FlatWorkGroupSize) const = 0; /// \returns Minimum number of waves per execution unit supported by the /// subtarget. - unsigned getMinWavesPerEU() const { - return AMDGPU::IsaInfo::getMinWavesPerEU(SubtargetFeatureBits); - } + virtual unsigned getMinWavesPerEU() const = 0; unsigned getMaxWavesPerEU() const { return 10; } @@ -708,20 +695,19 @@ public: /// \returns Number of execution units per compute unit supported by the /// subtarget. unsigned getEUsPerCU() const { - return AMDGPU::IsaInfo::getEUsPerCU(MCSubtargetInfo::getFeatureBits()); + return AMDGPU::IsaInfo::getEUsPerCU(this); } /// \returns Maximum number of waves per compute unit supported by the /// subtarget without any kind of limitation. unsigned getMaxWavesPerCU() const { - return AMDGPU::IsaInfo::getMaxWavesPerCU(MCSubtargetInfo::getFeatureBits()); + return AMDGPU::IsaInfo::getMaxWavesPerCU(this); } /// \returns Maximum number of waves per compute unit supported by the /// subtarget and limited by given \p FlatWorkGroupSize. unsigned getMaxWavesPerCU(unsigned FlatWorkGroupSize) const { - return AMDGPU::IsaInfo::getMaxWavesPerCU(MCSubtargetInfo::getFeatureBits(), - FlatWorkGroupSize); + return AMDGPU::IsaInfo::getMaxWavesPerCU(this, FlatWorkGroupSize); } /// \returns Maximum number of waves per execution unit supported by the @@ -733,8 +719,7 @@ public: /// \returns Number of waves per work group supported by the subtarget and /// limited by given \p FlatWorkGroupSize. unsigned getWavesPerWorkGroup(unsigned FlatWorkGroupSize) const { - return AMDGPU::IsaInfo::getWavesPerWorkGroup( - MCSubtargetInfo::getFeatureBits(), FlatWorkGroupSize); + return AMDGPU::IsaInfo::getWavesPerWorkGroup(this, FlatWorkGroupSize); } // static wrappers @@ -853,39 +838,34 @@ public: /// \returns SGPR allocation granularity supported by the subtarget. unsigned getSGPRAllocGranule() const { - return AMDGPU::IsaInfo::getSGPRAllocGranule( - MCSubtargetInfo::getFeatureBits()); + return AMDGPU::IsaInfo::getSGPRAllocGranule(this); } /// \returns SGPR encoding granularity supported by the subtarget. unsigned getSGPREncodingGranule() const { - return AMDGPU::IsaInfo::getSGPREncodingGranule( - MCSubtargetInfo::getFeatureBits()); + return AMDGPU::IsaInfo::getSGPREncodingGranule(this); } /// \returns Total number of SGPRs supported by the subtarget. unsigned getTotalNumSGPRs() const { - return AMDGPU::IsaInfo::getTotalNumSGPRs(MCSubtargetInfo::getFeatureBits()); + return AMDGPU::IsaInfo::getTotalNumSGPRs(this); } /// \returns Addressable number of SGPRs supported by the subtarget. unsigned getAddressableNumSGPRs() const { - return AMDGPU::IsaInfo::getAddressableNumSGPRs( - MCSubtargetInfo::getFeatureBits()); + return AMDGPU::IsaInfo::getAddressableNumSGPRs(this); } /// \returns Minimum number of SGPRs that meets the given number of waves per /// execution unit requirement supported by the subtarget. unsigned getMinNumSGPRs(unsigned WavesPerEU) const { - return AMDGPU::IsaInfo::getMinNumSGPRs(MCSubtargetInfo::getFeatureBits(), - WavesPerEU); + return AMDGPU::IsaInfo::getMinNumSGPRs(this, WavesPerEU); } /// \returns Maximum number of SGPRs that meets the given number of waves per /// execution unit requirement supported by the subtarget. unsigned getMaxNumSGPRs(unsigned WavesPerEU, bool Addressable) const { - return AMDGPU::IsaInfo::getMaxNumSGPRs(MCSubtargetInfo::getFeatureBits(), - WavesPerEU, Addressable); + return AMDGPU::IsaInfo::getMaxNumSGPRs(this, WavesPerEU, Addressable); } /// \returns Reserved number of SGPRs for given function \p MF. @@ -903,39 +883,34 @@ public: /// \returns VGPR allocation granularity supported by the subtarget. unsigned getVGPRAllocGranule() const { - return AMDGPU::IsaInfo::getVGPRAllocGranule( - MCSubtargetInfo::getFeatureBits()); + return AMDGPU::IsaInfo::getVGPRAllocGranule(this); } /// \returns VGPR encoding granularity supported by the subtarget. unsigned getVGPREncodingGranule() const { - return AMDGPU::IsaInfo::getVGPREncodingGranule( - MCSubtargetInfo::getFeatureBits()); + return AMDGPU::IsaInfo::getVGPREncodingGranule(this); } /// \returns Total number of VGPRs supported by the subtarget. unsigned getTotalNumVGPRs() const { - return AMDGPU::IsaInfo::getTotalNumVGPRs(MCSubtargetInfo::getFeatureBits()); + return AMDGPU::IsaInfo::getTotalNumVGPRs(this); } /// \returns Addressable number of VGPRs supported by the subtarget. unsigned getAddressableNumVGPRs() const { - return AMDGPU::IsaInfo::getAddressableNumVGPRs( - MCSubtargetInfo::getFeatureBits()); + return AMDGPU::IsaInfo::getAddressableNumVGPRs(this); } /// \returns Minimum number of VGPRs that meets given number of waves per /// execution unit requirement supported by the subtarget. unsigned getMinNumVGPRs(unsigned WavesPerEU) const { - return AMDGPU::IsaInfo::getMinNumVGPRs(MCSubtargetInfo::getFeatureBits(), - WavesPerEU); + return AMDGPU::IsaInfo::getMinNumVGPRs(this, WavesPerEU); } /// \returns Maximum number of VGPRs that meets given number of waves per /// execution unit requirement supported by the subtarget. unsigned getMaxNumVGPRs(unsigned WavesPerEU) const { - return AMDGPU::IsaInfo::getMaxNumVGPRs(MCSubtargetInfo::getFeatureBits(), - WavesPerEU); + return AMDGPU::IsaInfo::getMaxNumVGPRs(this, WavesPerEU); } /// \returns Maximum number of VGPRs that meets number of waves per execution @@ -951,6 +926,34 @@ public: void getPostRAMutations( std::vector> &Mutations) const override; + + /// \returns Maximum number of work groups per compute unit supported by the + /// subtarget and limited by given \p FlatWorkGroupSize. + unsigned getMaxWorkGroupsPerCU(unsigned FlatWorkGroupSize) const override { + return AMDGPU::IsaInfo::getMaxWorkGroupsPerCU(this, FlatWorkGroupSize); + } + + /// \returns Minimum flat work group size supported by the subtarget. + unsigned getMinFlatWorkGroupSize() const override { + return AMDGPU::IsaInfo::getMinFlatWorkGroupSize(this); + } + + /// \returns Maximum flat work group size supported by the subtarget. + unsigned getMaxFlatWorkGroupSize() const override { + return AMDGPU::IsaInfo::getMaxFlatWorkGroupSize(this); + } + + /// \returns Maximum number of waves per execution unit supported by the + /// subtarget and limited by given \p FlatWorkGroupSize. + unsigned getMaxWavesPerEU(unsigned FlatWorkGroupSize) const override { + return AMDGPU::IsaInfo::getMaxWavesPerEU(this, FlatWorkGroupSize); + } + + /// \returns Minimum number of waves per execution unit supported by the + /// subtarget. + unsigned getMinWavesPerEU() const override { + return AMDGPU::IsaInfo::getMinWavesPerEU(this); + } }; class R600Subtarget final : public R600GenSubtargetInfo, @@ -1061,6 +1064,34 @@ public: bool enableSubRegLiveness() const override { return true; } + + /// \returns Maximum number of work groups per compute unit supported by the + /// subtarget and limited by given \p FlatWorkGroupSize. + unsigned getMaxWorkGroupsPerCU(unsigned FlatWorkGroupSize) const override { + return AMDGPU::IsaInfo::getMaxWorkGroupsPerCU(this, FlatWorkGroupSize); + } + + /// \returns Minimum flat work group size supported by the subtarget. + unsigned getMinFlatWorkGroupSize() const override { + return AMDGPU::IsaInfo::getMinFlatWorkGroupSize(this); + } + + /// \returns Maximum flat work group size supported by the subtarget. + unsigned getMaxFlatWorkGroupSize() const override { + return AMDGPU::IsaInfo::getMaxFlatWorkGroupSize(this); + } + + /// \returns Maximum number of waves per execution unit supported by the + /// subtarget and limited by given \p FlatWorkGroupSize. + unsigned getMaxWavesPerEU(unsigned FlatWorkGroupSize) const override { + return AMDGPU::IsaInfo::getMaxWavesPerEU(this, FlatWorkGroupSize); + } + + /// \returns Minimum number of waves per execution unit supported by the + /// subtarget. + unsigned getMinWavesPerEU() const override { + return AMDGPU::IsaInfo::getMinWavesPerEU(this); + } }; } // end namespace llvm diff --git a/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp b/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp index fe10f7a..bd94193 100644 --- a/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp +++ b/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp @@ -49,6 +49,7 @@ #include "llvm/Support/MachineValueType.h" #include "llvm/Support/MathExtras.h" #include "llvm/Support/SMLoc.h" +#include "llvm/Support/TargetParser.h" #include "llvm/Support/TargetRegistry.h" #include "llvm/Support/raw_ostream.h" #include @@ -917,8 +918,7 @@ public: // Currently there is none suitable machinery in the core llvm-mc for this. // MCSymbol::isRedefinable is intended for another purpose, and // AsmParser::parseDirectiveSet() cannot be specialized for specific target. - AMDGPU::IsaInfo::IsaVersion ISA = - AMDGPU::IsaInfo::getIsaVersion(getFeatureBits()); + AMDGPU::IsaVersion ISA = AMDGPU::getIsaVersion(getSTI().getCPU()); MCContext &Ctx = getContext(); if (ISA.Major >= 6 && AMDGPU::IsaInfo::hasCodeObjectV3(&getSTI())) { MCSymbol *Sym = @@ -1826,7 +1826,7 @@ bool AMDGPUAsmParser::updateGprCountSymbols(RegisterKind RegKind, unsigned DwordRegIndex, unsigned RegWidth) { // Symbols are only defined for GCN targets - if (AMDGPU::IsaInfo::getIsaVersion(getFeatureBits()).Major < 6) + if (AMDGPU::getIsaVersion(getSTI().getCPU()).Major < 6) return true; auto SymbolName = getGprCountSymbolName(RegKind); @@ -2637,18 +2637,18 @@ bool AMDGPUAsmParser::calculateGPRBlocks( unsigned &SGPRBlocks) { // TODO(scott.linder): These calculations are duplicated from // AMDGPUAsmPrinter::getSIProgramInfo and could be unified. - IsaInfo::IsaVersion Version = IsaInfo::getIsaVersion(Features); + IsaVersion Version = getIsaVersion(getSTI().getCPU()); unsigned NumVGPRs = NextFreeVGPR; unsigned NumSGPRs = NextFreeSGPR; - unsigned MaxAddressableNumSGPRs = IsaInfo::getAddressableNumSGPRs(Features); + unsigned MaxAddressableNumSGPRs = IsaInfo::getAddressableNumSGPRs(&getSTI()); if (Version.Major >= 8 && !Features.test(FeatureSGPRInitBug) && NumSGPRs > MaxAddressableNumSGPRs) return OutOfRangeError(SGPRRange); NumSGPRs += - IsaInfo::getNumExtraSGPRs(Features, VCCUsed, FlatScrUsed, XNACKUsed); + IsaInfo::getNumExtraSGPRs(&getSTI(), VCCUsed, FlatScrUsed, XNACKUsed); if ((Version.Major <= 7 || Features.test(FeatureSGPRInitBug)) && NumSGPRs > MaxAddressableNumSGPRs) @@ -2657,8 +2657,8 @@ bool AMDGPUAsmParser::calculateGPRBlocks( if (Features.test(FeatureSGPRInitBug)) NumSGPRs = IsaInfo::FIXED_NUM_SGPRS_FOR_INIT_BUG; - VGPRBlocks = IsaInfo::getNumVGPRBlocks(Features, NumVGPRs); - SGPRBlocks = IsaInfo::getNumSGPRBlocks(Features, NumSGPRs); + VGPRBlocks = IsaInfo::getNumVGPRBlocks(&getSTI(), NumVGPRs); + SGPRBlocks = IsaInfo::getNumSGPRBlocks(&getSTI(), NumSGPRs); return false; } @@ -2678,8 +2678,7 @@ bool AMDGPUAsmParser::ParseDirectiveAMDHSAKernel() { StringSet<> Seen; - IsaInfo::IsaVersion IVersion = - IsaInfo::getIsaVersion(getSTI().getFeatureBits()); + IsaVersion IVersion = getIsaVersion(getSTI().getCPU()); SMRange VGPRRange; uint64_t NextFreeVGPR = 0; @@ -2938,8 +2937,7 @@ bool AMDGPUAsmParser::ParseDirectiveHSACodeObjectISA() { // If this directive has no arguments, then use the ISA version for the // targeted GPU. if (getLexer().is(AsmToken::EndOfStatement)) { - AMDGPU::IsaInfo::IsaVersion ISA = - AMDGPU::IsaInfo::getIsaVersion(getFeatureBits()); + AMDGPU::IsaVersion ISA = AMDGPU::getIsaVersion(getSTI().getCPU()); getTargetStreamer().EmitDirectiveHSACodeObjectISA(ISA.Major, ISA.Minor, ISA.Stepping, "AMD", "AMDGPU"); @@ -3001,7 +2999,7 @@ bool AMDGPUAsmParser::ParseAMDKernelCodeTValue(StringRef ID, bool AMDGPUAsmParser::ParseDirectiveAMDKernelCodeT() { amd_kernel_code_t Header; - AMDGPU::initDefaultAMDKernelCodeT(Header, getFeatureBits()); + AMDGPU::initDefaultAMDKernelCodeT(Header, &getSTI()); while (true) { // Lex EndOfStatement. This is in a while loop, because lexing a comment @@ -3679,12 +3677,12 @@ void AMDGPUAsmParser::cvtExp(MCInst &Inst, const OperandVector &Operands) { static bool encodeCnt( - const AMDGPU::IsaInfo::IsaVersion ISA, + const AMDGPU::IsaVersion ISA, int64_t &IntVal, int64_t CntVal, bool Saturate, - unsigned (*encode)(const IsaInfo::IsaVersion &Version, unsigned, unsigned), - unsigned (*decode)(const IsaInfo::IsaVersion &Version, unsigned)) + unsigned (*encode)(const IsaVersion &Version, unsigned, unsigned), + unsigned (*decode)(const IsaVersion &Version, unsigned)) { bool Failed = false; @@ -3715,8 +3713,7 @@ bool AMDGPUAsmParser::parseCnt(int64_t &IntVal) { if (getParser().parseAbsoluteExpression(CntVal)) return true; - AMDGPU::IsaInfo::IsaVersion ISA = - AMDGPU::IsaInfo::getIsaVersion(getFeatureBits()); + AMDGPU::IsaVersion ISA = AMDGPU::getIsaVersion(getSTI().getCPU()); bool Failed = true; bool Sat = CntName.endswith("_sat"); @@ -3751,8 +3748,7 @@ bool AMDGPUAsmParser::parseCnt(int64_t &IntVal) { OperandMatchResultTy AMDGPUAsmParser::parseSWaitCntOps(OperandVector &Operands) { - AMDGPU::IsaInfo::IsaVersion ISA = - AMDGPU::IsaInfo::getIsaVersion(getFeatureBits()); + AMDGPU::IsaVersion ISA = AMDGPU::getIsaVersion(getSTI().getCPU()); int64_t Waitcnt = getWaitcntBitMask(ISA); SMLoc S = Parser.getTok().getLoc(); diff --git a/llvm/lib/Target/AMDGPU/InstPrinter/AMDGPUInstPrinter.cpp b/llvm/lib/Target/AMDGPU/InstPrinter/AMDGPUInstPrinter.cpp index 76b1976..fab0f87 100644 --- a/llvm/lib/Target/AMDGPU/InstPrinter/AMDGPUInstPrinter.cpp +++ b/llvm/lib/Target/AMDGPU/InstPrinter/AMDGPUInstPrinter.cpp @@ -1155,8 +1155,7 @@ void AMDGPUInstPrinter::printSwizzle(const MCInst *MI, unsigned OpNo, void AMDGPUInstPrinter::printWaitFlag(const MCInst *MI, unsigned OpNo, const MCSubtargetInfo &STI, raw_ostream &O) { - AMDGPU::IsaInfo::IsaVersion ISA = - AMDGPU::IsaInfo::getIsaVersion(STI.getFeatureBits()); + AMDGPU::IsaVersion ISA = AMDGPU::getIsaVersion(STI.getCPU()); unsigned SImm16 = MI->getOperand(OpNo).getImm(); unsigned Vmcnt, Expcnt, Lgkmcnt; diff --git a/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.cpp b/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.cpp index 6a41e3f..249f2bc 100644 --- a/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.cpp +++ b/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.cpp @@ -27,6 +27,7 @@ #include "llvm/MC/MCObjectFileInfo.h" #include "llvm/MC/MCSectionELF.h" #include "llvm/Support/FormattedStream.h" +#include "llvm/Support/TargetParser.h" namespace llvm { #include "AMDGPUPTNote.h" @@ -39,84 +40,6 @@ using namespace llvm::AMDGPU; // AMDGPUTargetStreamer //===----------------------------------------------------------------------===// -static const struct { - const char *Name; - unsigned Mach; -} MachTable[] = { - // Radeon HD 2000/3000 Series (R600). - { "r600", ELF::EF_AMDGPU_MACH_R600_R600 }, - { "r630", ELF::EF_AMDGPU_MACH_R600_R630 }, - { "rs880", ELF::EF_AMDGPU_MACH_R600_RS880 }, - { "rv670", ELF::EF_AMDGPU_MACH_R600_RV670 }, - // Radeon HD 4000 Series (R700). - { "rv710", ELF::EF_AMDGPU_MACH_R600_RV710 }, - { "rv730", ELF::EF_AMDGPU_MACH_R600_RV730 }, - { "rv770", ELF::EF_AMDGPU_MACH_R600_RV770 }, - // Radeon HD 5000 Series (Evergreen). - { "cedar", ELF::EF_AMDGPU_MACH_R600_CEDAR }, - { "cypress", ELF::EF_AMDGPU_MACH_R600_CYPRESS }, - { "juniper", ELF::EF_AMDGPU_MACH_R600_JUNIPER }, - { "redwood", ELF::EF_AMDGPU_MACH_R600_REDWOOD }, - { "sumo", ELF::EF_AMDGPU_MACH_R600_SUMO }, - // Radeon HD 6000 Series (Northern Islands). - { "barts", ELF::EF_AMDGPU_MACH_R600_BARTS }, - { "caicos", ELF::EF_AMDGPU_MACH_R600_CAICOS }, - { "cayman", ELF::EF_AMDGPU_MACH_R600_CAYMAN }, - { "turks", ELF::EF_AMDGPU_MACH_R600_TURKS }, - // AMDGCN GFX6. - { "gfx600", ELF::EF_AMDGPU_MACH_AMDGCN_GFX600 }, - { "tahiti", ELF::EF_AMDGPU_MACH_AMDGCN_GFX600 }, - { "gfx601", ELF::EF_AMDGPU_MACH_AMDGCN_GFX601 }, - { "hainan", ELF::EF_AMDGPU_MACH_AMDGCN_GFX601 }, - { "oland", ELF::EF_AMDGPU_MACH_AMDGCN_GFX601 }, - { "pitcairn", ELF::EF_AMDGPU_MACH_AMDGCN_GFX601 }, - { "verde", ELF::EF_AMDGPU_MACH_AMDGCN_GFX601 }, - // AMDGCN GFX7. - { "gfx700", ELF::EF_AMDGPU_MACH_AMDGCN_GFX700 }, - { "kaveri", ELF::EF_AMDGPU_MACH_AMDGCN_GFX700 }, - { "gfx701", ELF::EF_AMDGPU_MACH_AMDGCN_GFX701 }, - { "hawaii", ELF::EF_AMDGPU_MACH_AMDGCN_GFX701 }, - { "gfx702", ELF::EF_AMDGPU_MACH_AMDGCN_GFX702 }, - { "gfx703", ELF::EF_AMDGPU_MACH_AMDGCN_GFX703 }, - { "kabini", ELF::EF_AMDGPU_MACH_AMDGCN_GFX703 }, - { "mullins", ELF::EF_AMDGPU_MACH_AMDGCN_GFX703 }, - { "gfx704", ELF::EF_AMDGPU_MACH_AMDGCN_GFX704 }, - { "bonaire", ELF::EF_AMDGPU_MACH_AMDGCN_GFX704 }, - // AMDGCN GFX8. - { "gfx801", ELF::EF_AMDGPU_MACH_AMDGCN_GFX801 }, - { "carrizo", ELF::EF_AMDGPU_MACH_AMDGCN_GFX801 }, - { "gfx802", ELF::EF_AMDGPU_MACH_AMDGCN_GFX802 }, - { "iceland", ELF::EF_AMDGPU_MACH_AMDGCN_GFX802 }, - { "tonga", ELF::EF_AMDGPU_MACH_AMDGCN_GFX802 }, - { "gfx803", ELF::EF_AMDGPU_MACH_AMDGCN_GFX803 }, - { "fiji", ELF::EF_AMDGPU_MACH_AMDGCN_GFX803 }, - { "polaris10", ELF::EF_AMDGPU_MACH_AMDGCN_GFX803 }, - { "polaris11", ELF::EF_AMDGPU_MACH_AMDGCN_GFX803 }, - { "gfx810", ELF::EF_AMDGPU_MACH_AMDGCN_GFX810 }, - { "stoney", ELF::EF_AMDGPU_MACH_AMDGCN_GFX810 }, - // AMDGCN GFX9. - { "gfx900", ELF::EF_AMDGPU_MACH_AMDGCN_GFX900 }, - { "gfx902", ELF::EF_AMDGPU_MACH_AMDGCN_GFX902 }, - { "gfx904", ELF::EF_AMDGPU_MACH_AMDGCN_GFX904 }, - { "gfx906", ELF::EF_AMDGPU_MACH_AMDGCN_GFX906 }, - // Not specified processor. - { nullptr, ELF::EF_AMDGPU_MACH_NONE } -}; - -unsigned AMDGPUTargetStreamer::getMACH(StringRef GPU) const { - auto Entry = MachTable; - for (; Entry->Name && GPU != Entry->Name; ++Entry) - ; - return Entry->Mach; -} - -const char *AMDGPUTargetStreamer::getMachName(unsigned Mach) { - auto Entry = MachTable; - for (; Entry->Name && Mach != Entry->Mach; ++Entry) - ; - return Entry->Name; -} - bool AMDGPUTargetStreamer::EmitHSAMetadata(StringRef HSAMetadataString) { HSAMD::Metadata HSAMetadata; if (HSAMD::fromString(HSAMetadataString, HSAMetadata)) @@ -205,7 +128,7 @@ void AMDGPUTargetAsmStreamer::EmitAmdhsaKernelDescriptor( bool ReserveVCC, bool ReserveFlatScr, bool ReserveXNACK) { amdhsa::kernel_descriptor_t DefaultKD = getDefaultAmdhsaKernelDescriptor(); - IsaInfo::IsaVersion IVersion = IsaInfo::getIsaVersion(STI.getFeatureBits()); + IsaVersion IVersion = getIsaVersion(STI.getCPU()); OS << "\t.amdhsa_kernel " << KernelName << '\n'; @@ -342,7 +265,7 @@ AMDGPUTargetELFStreamer::AMDGPUTargetELFStreamer( unsigned EFlags = MCA.getELFHeaderEFlags(); EFlags &= ~ELF::EF_AMDGPU_MACH; - EFlags |= getMACH(STI.getCPU()); + EFlags |= getElfMach(STI.getCPU()); EFlags &= ~ELF::EF_AMDGPU_XNACK; if (AMDGPU::hasXNACK(STI)) diff --git a/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.h b/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.h index 472da1b..ad21e5e 100644 --- a/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.h +++ b/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.h @@ -31,13 +31,7 @@ class AMDGPUTargetStreamer : public MCTargetStreamer { protected: MCContext &getContext() const { return Streamer.getContext(); } - /// \returns Equivalent EF_AMDGPU_MACH_* value for given \p GPU name. - unsigned getMACH(StringRef GPU) const; - public: - /// \returns Equivalent GPU name for an EF_AMDGPU_MACH_* value. - static const char *getMachName(unsigned Mach); - AMDGPUTargetStreamer(MCStreamer &S) : MCTargetStreamer(S) {} virtual void EmitDirectiveAMDGCNTarget(StringRef Target) = 0; diff --git a/llvm/lib/Target/AMDGPU/SIInsertWaitcnts.cpp b/llvm/lib/Target/AMDGPU/SIInsertWaitcnts.cpp index b6dab35..819b1b9 100644 --- a/llvm/lib/Target/AMDGPU/SIInsertWaitcnts.cpp +++ b/llvm/lib/Target/AMDGPU/SIInsertWaitcnts.cpp @@ -369,7 +369,7 @@ private: const SIRegisterInfo *TRI = nullptr; const MachineRegisterInfo *MRI = nullptr; const MachineLoopInfo *MLI = nullptr; - AMDGPU::IsaInfo::IsaVersion IV; + AMDGPU::IsaVersion IV; DenseSet BlockVisitedSet; DenseSet TrackedWaitcntSet; @@ -1841,7 +1841,7 @@ bool SIInsertWaitcnts::runOnMachineFunction(MachineFunction &MF) { TRI = &TII->getRegisterInfo(); MRI = &MF.getRegInfo(); MLI = &getAnalysis(); - IV = AMDGPU::IsaInfo::getIsaVersion(ST->getFeatureBits()); + IV = AMDGPU::getIsaVersion(ST->getCPU()); const SIMachineFunctionInfo *MFI = MF.getInfo(); ForceEmitZeroWaitcnts = ForceEmitZeroFlag; diff --git a/llvm/lib/Target/AMDGPU/SIMemoryLegalizer.cpp b/llvm/lib/Target/AMDGPU/SIMemoryLegalizer.cpp index 645fbfc..6fa52f5 100644 --- a/llvm/lib/Target/AMDGPU/SIMemoryLegalizer.cpp +++ b/llvm/lib/Target/AMDGPU/SIMemoryLegalizer.cpp @@ -253,7 +253,7 @@ protected: /// Instruction info. const SIInstrInfo *TII = nullptr; - IsaInfo::IsaVersion IV; + IsaVersion IV; SICacheControl(const GCNSubtarget &ST); @@ -605,7 +605,7 @@ Optional SIMemOpAccess::getAtomicCmpxchgOrRmwInfo( SICacheControl::SICacheControl(const GCNSubtarget &ST) { TII = ST.getInstrInfo(); - IV = IsaInfo::getIsaVersion(ST.getFeatureBits()); + IV = getIsaVersion(ST.getCPU()); } /* static */ diff --git a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp index 3f361ed..b242345 100644 --- a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp +++ b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp @@ -137,68 +137,18 @@ int getMCOpcode(uint16_t Opcode, unsigned Gen) { namespace IsaInfo { -IsaVersion getIsaVersion(const FeatureBitset &Features) { - // GCN GFX6 (Southern Islands (SI)). - if (Features.test(FeatureISAVersion6_0_0)) - return {6, 0, 0}; - if (Features.test(FeatureISAVersion6_0_1)) - return {6, 0, 1}; - - // GCN GFX7 (Sea Islands (CI)). - if (Features.test(FeatureISAVersion7_0_0)) - return {7, 0, 0}; - if (Features.test(FeatureISAVersion7_0_1)) - return {7, 0, 1}; - if (Features.test(FeatureISAVersion7_0_2)) - return {7, 0, 2}; - if (Features.test(FeatureISAVersion7_0_3)) - return {7, 0, 3}; - if (Features.test(FeatureISAVersion7_0_4)) - return {7, 0, 4}; - if (Features.test(FeatureSeaIslands)) - return {7, 0, 0}; - - // GCN GFX8 (Volcanic Islands (VI)). - if (Features.test(FeatureISAVersion8_0_1)) - return {8, 0, 1}; - if (Features.test(FeatureISAVersion8_0_2)) - return {8, 0, 2}; - if (Features.test(FeatureISAVersion8_0_3)) - return {8, 0, 3}; - if (Features.test(FeatureISAVersion8_1_0)) - return {8, 1, 0}; - if (Features.test(FeatureVolcanicIslands)) - return {8, 0, 0}; - - // GCN GFX9. - if (Features.test(FeatureISAVersion9_0_0)) - return {9, 0, 0}; - if (Features.test(FeatureISAVersion9_0_2)) - return {9, 0, 2}; - if (Features.test(FeatureISAVersion9_0_4)) - return {9, 0, 4}; - if (Features.test(FeatureISAVersion9_0_6)) - return {9, 0, 6}; - if (Features.test(FeatureGFX9)) - return {9, 0, 0}; - - if (Features.test(FeatureSouthernIslands)) - return {0, 0, 0}; - return {7, 0, 0}; -} - void streamIsaVersion(const MCSubtargetInfo *STI, raw_ostream &Stream) { auto TargetTriple = STI->getTargetTriple(); - auto ISAVersion = IsaInfo::getIsaVersion(STI->getFeatureBits()); + auto Version = getIsaVersion(STI->getCPU()); Stream << TargetTriple.getArchName() << '-' << TargetTriple.getVendorName() << '-' << TargetTriple.getOSName() << '-' << TargetTriple.getEnvironmentName() << '-' << "gfx" - << ISAVersion.Major - << ISAVersion.Minor - << ISAVersion.Stepping; + << Version.Major + << Version.Minor + << Version.Stepping; if (hasXNACK(*STI)) Stream << "+xnack"; @@ -210,49 +160,49 @@ bool hasCodeObjectV3(const MCSubtargetInfo *STI) { return STI->getFeatureBits().test(FeatureCodeObjectV3); } -unsigned getWavefrontSize(const FeatureBitset &Features) { - if (Features.test(FeatureWavefrontSize16)) +unsigned getWavefrontSize(const MCSubtargetInfo *STI) { + if (STI->getFeatureBits().test(FeatureWavefrontSize16)) return 16; - if (Features.test(FeatureWavefrontSize32)) + if (STI->getFeatureBits().test(FeatureWavefrontSize32)) return 32; return 64; } -unsigned getLocalMemorySize(const FeatureBitset &Features) { - if (Features.test(FeatureLocalMemorySize32768)) +unsigned getLocalMemorySize(const MCSubtargetInfo *STI) { + if (STI->getFeatureBits().test(FeatureLocalMemorySize32768)) return 32768; - if (Features.test(FeatureLocalMemorySize65536)) + if (STI->getFeatureBits().test(FeatureLocalMemorySize65536)) return 65536; return 0; } -unsigned getEUsPerCU(const FeatureBitset &Features) { +unsigned getEUsPerCU(const MCSubtargetInfo *STI) { return 4; } -unsigned getMaxWorkGroupsPerCU(const FeatureBitset &Features, +unsigned getMaxWorkGroupsPerCU(const MCSubtargetInfo *STI, unsigned FlatWorkGroupSize) { - if (!Features.test(FeatureGCN)) + if (!STI->getFeatureBits().test(FeatureGCN)) return 8; - unsigned N = getWavesPerWorkGroup(Features, FlatWorkGroupSize); + unsigned N = getWavesPerWorkGroup(STI, FlatWorkGroupSize); if (N == 1) return 40; N = 40 / N; return std::min(N, 16u); } -unsigned getMaxWavesPerCU(const FeatureBitset &Features) { - return getMaxWavesPerEU() * getEUsPerCU(Features); +unsigned getMaxWavesPerCU(const MCSubtargetInfo *STI) { + return getMaxWavesPerEU() * getEUsPerCU(STI); } -unsigned getMaxWavesPerCU(const FeatureBitset &Features, +unsigned getMaxWavesPerCU(const MCSubtargetInfo *STI, unsigned FlatWorkGroupSize) { - return getWavesPerWorkGroup(Features, FlatWorkGroupSize); + return getWavesPerWorkGroup(STI, FlatWorkGroupSize); } -unsigned getMinWavesPerEU(const FeatureBitset &Features) { +unsigned getMinWavesPerEU(const MCSubtargetInfo *STI) { return 1; } @@ -261,89 +211,89 @@ unsigned getMaxWavesPerEU() { return 10; } -unsigned getMaxWavesPerEU(const FeatureBitset &Features, +unsigned getMaxWavesPerEU(const MCSubtargetInfo *STI, unsigned FlatWorkGroupSize) { - return alignTo(getMaxWavesPerCU(Features, FlatWorkGroupSize), - getEUsPerCU(Features)) / getEUsPerCU(Features); + return alignTo(getMaxWavesPerCU(STI, FlatWorkGroupSize), + getEUsPerCU(STI)) / getEUsPerCU(STI); } -unsigned getMinFlatWorkGroupSize(const FeatureBitset &Features) { +unsigned getMinFlatWorkGroupSize(const MCSubtargetInfo *STI) { return 1; } -unsigned getMaxFlatWorkGroupSize(const FeatureBitset &Features) { +unsigned getMaxFlatWorkGroupSize(const MCSubtargetInfo *STI) { return 2048; } -unsigned getWavesPerWorkGroup(const FeatureBitset &Features, +unsigned getWavesPerWorkGroup(const MCSubtargetInfo *STI, unsigned FlatWorkGroupSize) { - return alignTo(FlatWorkGroupSize, getWavefrontSize(Features)) / - getWavefrontSize(Features); + return alignTo(FlatWorkGroupSize, getWavefrontSize(STI)) / + getWavefrontSize(STI); } -unsigned getSGPRAllocGranule(const FeatureBitset &Features) { - IsaVersion Version = getIsaVersion(Features); +unsigned getSGPRAllocGranule(const MCSubtargetInfo *STI) { + IsaVersion Version = getIsaVersion(STI->getCPU()); if (Version.Major >= 8) return 16; return 8; } -unsigned getSGPREncodingGranule(const FeatureBitset &Features) { +unsigned getSGPREncodingGranule(const MCSubtargetInfo *STI) { return 8; } -unsigned getTotalNumSGPRs(const FeatureBitset &Features) { - IsaVersion Version = getIsaVersion(Features); +unsigned getTotalNumSGPRs(const MCSubtargetInfo *STI) { + IsaVersion Version = getIsaVersion(STI->getCPU()); if (Version.Major >= 8) return 800; return 512; } -unsigned getAddressableNumSGPRs(const FeatureBitset &Features) { - if (Features.test(FeatureSGPRInitBug)) +unsigned getAddressableNumSGPRs(const MCSubtargetInfo *STI) { + if (STI->getFeatureBits().test(FeatureSGPRInitBug)) return FIXED_NUM_SGPRS_FOR_INIT_BUG; - IsaVersion Version = getIsaVersion(Features); + IsaVersion Version = getIsaVersion(STI->getCPU()); if (Version.Major >= 8) return 102; return 104; } -unsigned getMinNumSGPRs(const FeatureBitset &Features, unsigned WavesPerEU) { +unsigned getMinNumSGPRs(const MCSubtargetInfo *STI, unsigned WavesPerEU) { assert(WavesPerEU != 0); if (WavesPerEU >= getMaxWavesPerEU()) return 0; - unsigned MinNumSGPRs = getTotalNumSGPRs(Features) / (WavesPerEU + 1); - if (Features.test(FeatureTrapHandler)) + unsigned MinNumSGPRs = getTotalNumSGPRs(STI) / (WavesPerEU + 1); + if (STI->getFeatureBits().test(FeatureTrapHandler)) MinNumSGPRs -= std::min(MinNumSGPRs, (unsigned)TRAP_NUM_SGPRS); - MinNumSGPRs = alignDown(MinNumSGPRs, getSGPRAllocGranule(Features)) + 1; - return std::min(MinNumSGPRs, getAddressableNumSGPRs(Features)); + MinNumSGPRs = alignDown(MinNumSGPRs, getSGPRAllocGranule(STI)) + 1; + return std::min(MinNumSGPRs, getAddressableNumSGPRs(STI)); } -unsigned getMaxNumSGPRs(const FeatureBitset &Features, unsigned WavesPerEU, +unsigned getMaxNumSGPRs(const MCSubtargetInfo *STI, unsigned WavesPerEU, bool Addressable) { assert(WavesPerEU != 0); - IsaVersion Version = getIsaVersion(Features); - unsigned AddressableNumSGPRs = getAddressableNumSGPRs(Features); + IsaVersion Version = getIsaVersion(STI->getCPU()); + unsigned AddressableNumSGPRs = getAddressableNumSGPRs(STI); if (Version.Major >= 8 && !Addressable) AddressableNumSGPRs = 112; - unsigned MaxNumSGPRs = getTotalNumSGPRs(Features) / WavesPerEU; - if (Features.test(FeatureTrapHandler)) + unsigned MaxNumSGPRs = getTotalNumSGPRs(STI) / WavesPerEU; + if (STI->getFeatureBits().test(FeatureTrapHandler)) MaxNumSGPRs -= std::min(MaxNumSGPRs, (unsigned)TRAP_NUM_SGPRS); - MaxNumSGPRs = alignDown(MaxNumSGPRs, getSGPRAllocGranule(Features)); + MaxNumSGPRs = alignDown(MaxNumSGPRs, getSGPRAllocGranule(STI)); return std::min(MaxNumSGPRs, AddressableNumSGPRs); } -unsigned getNumExtraSGPRs(const FeatureBitset &Features, bool VCCUsed, +unsigned getNumExtraSGPRs(const MCSubtargetInfo *STI, bool VCCUsed, bool FlatScrUsed, bool XNACKUsed) { unsigned ExtraSGPRs = 0; if (VCCUsed) ExtraSGPRs = 2; - IsaVersion Version = getIsaVersion(Features); + IsaVersion Version = getIsaVersion(STI->getCPU()); if (Version.Major < 8) { if (FlatScrUsed) ExtraSGPRs = 4; @@ -358,74 +308,74 @@ unsigned getNumExtraSGPRs(const FeatureBitset &Features, bool VCCUsed, return ExtraSGPRs; } -unsigned getNumExtraSGPRs(const FeatureBitset &Features, bool VCCUsed, +unsigned getNumExtraSGPRs(const MCSubtargetInfo *STI, bool VCCUsed, bool FlatScrUsed) { - return getNumExtraSGPRs(Features, VCCUsed, FlatScrUsed, - Features[AMDGPU::FeatureXNACK]); + return getNumExtraSGPRs(STI, VCCUsed, FlatScrUsed, + STI->getFeatureBits().test(AMDGPU::FeatureXNACK)); } -unsigned getNumSGPRBlocks(const FeatureBitset &Features, unsigned NumSGPRs) { - NumSGPRs = alignTo(std::max(1u, NumSGPRs), getSGPREncodingGranule(Features)); +unsigned getNumSGPRBlocks(const MCSubtargetInfo *STI, unsigned NumSGPRs) { + NumSGPRs = alignTo(std::max(1u, NumSGPRs), getSGPREncodingGranule(STI)); // SGPRBlocks is actual number of SGPR blocks minus 1. - return NumSGPRs / getSGPREncodingGranule(Features) - 1; + return NumSGPRs / getSGPREncodingGranule(STI) - 1; } -unsigned getVGPRAllocGranule(const FeatureBitset &Features) { +unsigned getVGPRAllocGranule(const MCSubtargetInfo *STI) { return 4; } -unsigned getVGPREncodingGranule(const FeatureBitset &Features) { - return getVGPRAllocGranule(Features); +unsigned getVGPREncodingGranule(const MCSubtargetInfo *STI) { + return getVGPRAllocGranule(STI); } -unsigned getTotalNumVGPRs(const FeatureBitset &Features) { +unsigned getTotalNumVGPRs(const MCSubtargetInfo *STI) { return 256; } -unsigned getAddressableNumVGPRs(const FeatureBitset &Features) { - return getTotalNumVGPRs(Features); +unsigned getAddressableNumVGPRs(const MCSubtargetInfo *STI) { + return getTotalNumVGPRs(STI); } -unsigned getMinNumVGPRs(const FeatureBitset &Features, unsigned WavesPerEU) { +unsigned getMinNumVGPRs(const MCSubtargetInfo *STI, unsigned WavesPerEU) { assert(WavesPerEU != 0); if (WavesPerEU >= getMaxWavesPerEU()) return 0; unsigned MinNumVGPRs = - alignDown(getTotalNumVGPRs(Features) / (WavesPerEU + 1), - getVGPRAllocGranule(Features)) + 1; - return std::min(MinNumVGPRs, getAddressableNumVGPRs(Features)); + alignDown(getTotalNumVGPRs(STI) / (WavesPerEU + 1), + getVGPRAllocGranule(STI)) + 1; + return std::min(MinNumVGPRs, getAddressableNumVGPRs(STI)); } -unsigned getMaxNumVGPRs(const FeatureBitset &Features, unsigned WavesPerEU) { +unsigned getMaxNumVGPRs(const MCSubtargetInfo *STI, unsigned WavesPerEU) { assert(WavesPerEU != 0); - unsigned MaxNumVGPRs = alignDown(getTotalNumVGPRs(Features) / WavesPerEU, - getVGPRAllocGranule(Features)); - unsigned AddressableNumVGPRs = getAddressableNumVGPRs(Features); + unsigned MaxNumVGPRs = alignDown(getTotalNumVGPRs(STI) / WavesPerEU, + getVGPRAllocGranule(STI)); + unsigned AddressableNumVGPRs = getAddressableNumVGPRs(STI); return std::min(MaxNumVGPRs, AddressableNumVGPRs); } -unsigned getNumVGPRBlocks(const FeatureBitset &Features, unsigned NumVGPRs) { - NumVGPRs = alignTo(std::max(1u, NumVGPRs), getVGPREncodingGranule(Features)); +unsigned getNumVGPRBlocks(const MCSubtargetInfo *STI, unsigned NumVGPRs) { + NumVGPRs = alignTo(std::max(1u, NumVGPRs), getVGPREncodingGranule(STI)); // VGPRBlocks is actual number of VGPR blocks minus 1. - return NumVGPRs / getVGPREncodingGranule(Features) - 1; + return NumVGPRs / getVGPREncodingGranule(STI) - 1; } } // end namespace IsaInfo void initDefaultAMDKernelCodeT(amd_kernel_code_t &Header, - const FeatureBitset &Features) { - IsaInfo::IsaVersion ISA = IsaInfo::getIsaVersion(Features); + const MCSubtargetInfo *STI) { + IsaVersion Version = getIsaVersion(STI->getCPU()); memset(&Header, 0, sizeof(Header)); Header.amd_kernel_code_version_major = 1; Header.amd_kernel_code_version_minor = 2; Header.amd_machine_kind = 1; // AMD_MACHINE_KIND_AMDGPU - Header.amd_machine_version_major = ISA.Major; - Header.amd_machine_version_minor = ISA.Minor; - Header.amd_machine_version_stepping = ISA.Stepping; + Header.amd_machine_version_major = Version.Major; + Header.amd_machine_version_minor = Version.Minor; + Header.amd_machine_version_stepping = Version.Stepping; Header.kernel_code_entry_byte_offset = sizeof(Header); // wavefront_size is specified as a power of 2: 2^6 = 64 threads. Header.wavefront_size = 6; @@ -513,7 +463,7 @@ std::pair getIntegerPairAttribute(const Function &F, return Ints; } -unsigned getVmcntBitMask(const IsaInfo::IsaVersion &Version) { +unsigned getVmcntBitMask(const IsaVersion &Version) { unsigned VmcntLo = (1 << getVmcntBitWidthLo()) - 1; if (Version.Major < 9) return VmcntLo; @@ -522,15 +472,15 @@ unsigned getVmcntBitMask(const IsaInfo::IsaVersion &Version) { return VmcntLo | VmcntHi; } -unsigned getExpcntBitMask(const IsaInfo::IsaVersion &Version) { +unsigned getExpcntBitMask(const IsaVersion &Version) { return (1 << getExpcntBitWidth()) - 1; } -unsigned getLgkmcntBitMask(const IsaInfo::IsaVersion &Version) { +unsigned getLgkmcntBitMask(const IsaVersion &Version) { return (1 << getLgkmcntBitWidth()) - 1; } -unsigned getWaitcntBitMask(const IsaInfo::IsaVersion &Version) { +unsigned getWaitcntBitMask(const IsaVersion &Version) { unsigned VmcntLo = getBitMask(getVmcntBitShiftLo(), getVmcntBitWidthLo()); unsigned Expcnt = getBitMask(getExpcntBitShift(), getExpcntBitWidth()); unsigned Lgkmcnt = getBitMask(getLgkmcntBitShift(), getLgkmcntBitWidth()); @@ -542,7 +492,7 @@ unsigned getWaitcntBitMask(const IsaInfo::IsaVersion &Version) { return Waitcnt | VmcntHi; } -unsigned decodeVmcnt(const IsaInfo::IsaVersion &Version, unsigned Waitcnt) { +unsigned decodeVmcnt(const IsaVersion &Version, unsigned Waitcnt) { unsigned VmcntLo = unpackBits(Waitcnt, getVmcntBitShiftLo(), getVmcntBitWidthLo()); if (Version.Major < 9) @@ -554,22 +504,22 @@ unsigned decodeVmcnt(const IsaInfo::IsaVersion &Version, unsigned Waitcnt) { return VmcntLo | VmcntHi; } -unsigned decodeExpcnt(const IsaInfo::IsaVersion &Version, unsigned Waitcnt) { +unsigned decodeExpcnt(const IsaVersion &Version, unsigned Waitcnt) { return unpackBits(Waitcnt, getExpcntBitShift(), getExpcntBitWidth()); } -unsigned decodeLgkmcnt(const IsaInfo::IsaVersion &Version, unsigned Waitcnt) { +unsigned decodeLgkmcnt(const IsaVersion &Version, unsigned Waitcnt) { return unpackBits(Waitcnt, getLgkmcntBitShift(), getLgkmcntBitWidth()); } -void decodeWaitcnt(const IsaInfo::IsaVersion &Version, unsigned Waitcnt, +void decodeWaitcnt(const IsaVersion &Version, unsigned Waitcnt, unsigned &Vmcnt, unsigned &Expcnt, unsigned &Lgkmcnt) { Vmcnt = decodeVmcnt(Version, Waitcnt); Expcnt = decodeExpcnt(Version, Waitcnt); Lgkmcnt = decodeLgkmcnt(Version, Waitcnt); } -unsigned encodeVmcnt(const IsaInfo::IsaVersion &Version, unsigned Waitcnt, +unsigned encodeVmcnt(const IsaVersion &Version, unsigned Waitcnt, unsigned Vmcnt) { Waitcnt = packBits(Vmcnt, Waitcnt, getVmcntBitShiftLo(), getVmcntBitWidthLo()); @@ -580,17 +530,17 @@ unsigned encodeVmcnt(const IsaInfo::IsaVersion &Version, unsigned Waitcnt, return packBits(Vmcnt, Waitcnt, getVmcntBitShiftHi(), getVmcntBitWidthHi()); } -unsigned encodeExpcnt(const IsaInfo::IsaVersion &Version, unsigned Waitcnt, +unsigned encodeExpcnt(const IsaVersion &Version, unsigned Waitcnt, unsigned Expcnt) { return packBits(Expcnt, Waitcnt, getExpcntBitShift(), getExpcntBitWidth()); } -unsigned encodeLgkmcnt(const IsaInfo::IsaVersion &Version, unsigned Waitcnt, +unsigned encodeLgkmcnt(const IsaVersion &Version, unsigned Waitcnt, unsigned Lgkmcnt) { return packBits(Lgkmcnt, Waitcnt, getLgkmcntBitShift(), getLgkmcntBitWidth()); } -unsigned encodeWaitcnt(const IsaInfo::IsaVersion &Version, +unsigned encodeWaitcnt(const IsaVersion &Version, unsigned Vmcnt, unsigned Expcnt, unsigned Lgkmcnt) { unsigned Waitcnt = getWaitcntBitMask(Version); Waitcnt = encodeVmcnt(Version, Waitcnt, Vmcnt); diff --git a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h index 97cf29c..da004a6 100644 --- a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h +++ b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h @@ -19,6 +19,7 @@ #include "llvm/Support/AMDHSAKernelDescriptor.h" #include "llvm/Support/Compiler.h" #include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/TargetParser.h" #include #include #include @@ -56,16 +57,6 @@ enum { TRAP_NUM_SGPRS = 16 }; -/// Instruction set architecture version. -struct IsaVersion { - unsigned Major; - unsigned Minor; - unsigned Stepping; -}; - -/// \returns Isa version for given subtarget \p Features. -IsaVersion getIsaVersion(const FeatureBitset &Features); - /// Streams isa version string for given subtarget \p STI into \p Stream. void streamIsaVersion(const MCSubtargetInfo *STI, raw_ostream &Stream); @@ -73,114 +64,114 @@ void streamIsaVersion(const MCSubtargetInfo *STI, raw_ostream &Stream); /// false otherwise. bool hasCodeObjectV3(const MCSubtargetInfo *STI); -/// \returns Wavefront size for given subtarget \p Features. -unsigned getWavefrontSize(const FeatureBitset &Features); +/// \returns Wavefront size for given subtarget \p STI. +unsigned getWavefrontSize(const MCSubtargetInfo *STI); -/// \returns Local memory size in bytes for given subtarget \p Features. -unsigned getLocalMemorySize(const FeatureBitset &Features); +/// \returns Local memory size in bytes for given subtarget \p STI. +unsigned getLocalMemorySize(const MCSubtargetInfo *STI); /// \returns Number of execution units per compute unit for given subtarget \p -/// Features. -unsigned getEUsPerCU(const FeatureBitset &Features); +/// STI. +unsigned getEUsPerCU(const MCSubtargetInfo *STI); /// \returns Maximum number of work groups per compute unit for given subtarget -/// \p Features and limited by given \p FlatWorkGroupSize. -unsigned getMaxWorkGroupsPerCU(const FeatureBitset &Features, +/// \p STI and limited by given \p FlatWorkGroupSize. +unsigned getMaxWorkGroupsPerCU(const MCSubtargetInfo *STI, unsigned FlatWorkGroupSize); /// \returns Maximum number of waves per compute unit for given subtarget \p -/// Features without any kind of limitation. -unsigned getMaxWavesPerCU(const FeatureBitset &Features); +/// STI without any kind of limitation. +unsigned getMaxWavesPerCU(const MCSubtargetInfo *STI); /// \returns Maximum number of waves per compute unit for given subtarget \p -/// Features and limited by given \p FlatWorkGroupSize. -unsigned getMaxWavesPerCU(const FeatureBitset &Features, +/// STI and limited by given \p FlatWorkGroupSize. +unsigned getMaxWavesPerCU(const MCSubtargetInfo *STI, unsigned FlatWorkGroupSize); /// \returns Minimum number of waves per execution unit for given subtarget \p -/// Features. -unsigned getMinWavesPerEU(const FeatureBitset &Features); +/// STI. +unsigned getMinWavesPerEU(const MCSubtargetInfo *STI); /// \returns Maximum number of waves per execution unit for given subtarget \p -/// Features without any kind of limitation. +/// STI without any kind of limitation. unsigned getMaxWavesPerEU(); /// \returns Maximum number of waves per execution unit for given subtarget \p -/// Features and limited by given \p FlatWorkGroupSize. -unsigned getMaxWavesPerEU(const FeatureBitset &Features, +/// STI and limited by given \p FlatWorkGroupSize. +unsigned getMaxWavesPerEU(const MCSubtargetInfo *STI, unsigned FlatWorkGroupSize); -/// \returns Minimum flat work group size for given subtarget \p Features. -unsigned getMinFlatWorkGroupSize(const FeatureBitset &Features); +/// \returns Minimum flat work group size for given subtarget \p STI. +unsigned getMinFlatWorkGroupSize(const MCSubtargetInfo *STI); -/// \returns Maximum flat work group size for given subtarget \p Features. -unsigned getMaxFlatWorkGroupSize(const FeatureBitset &Features); +/// \returns Maximum flat work group size for given subtarget \p STI. +unsigned getMaxFlatWorkGroupSize(const MCSubtargetInfo *STI); -/// \returns Number of waves per work group for given subtarget \p Features and +/// \returns Number of waves per work group for given subtarget \p STI and /// limited by given \p FlatWorkGroupSize. -unsigned getWavesPerWorkGroup(const FeatureBitset &Features, +unsigned getWavesPerWorkGroup(const MCSubtargetInfo *STI, unsigned FlatWorkGroupSize); -/// \returns SGPR allocation granularity for given subtarget \p Features. -unsigned getSGPRAllocGranule(const FeatureBitset &Features); +/// \returns SGPR allocation granularity for given subtarget \p STI. +unsigned getSGPRAllocGranule(const MCSubtargetInfo *STI); -/// \returns SGPR encoding granularity for given subtarget \p Features. -unsigned getSGPREncodingGranule(const FeatureBitset &Features); +/// \returns SGPR encoding granularity for given subtarget \p STI. +unsigned getSGPREncodingGranule(const MCSubtargetInfo *STI); -/// \returns Total number of SGPRs for given subtarget \p Features. -unsigned getTotalNumSGPRs(const FeatureBitset &Features); +/// \returns Total number of SGPRs for given subtarget \p STI. +unsigned getTotalNumSGPRs(const MCSubtargetInfo *STI); -/// \returns Addressable number of SGPRs for given subtarget \p Features. -unsigned getAddressableNumSGPRs(const FeatureBitset &Features); +/// \returns Addressable number of SGPRs for given subtarget \p STI. +unsigned getAddressableNumSGPRs(const MCSubtargetInfo *STI); /// \returns Minimum number of SGPRs that meets the given number of waves per -/// execution unit requirement for given subtarget \p Features. -unsigned getMinNumSGPRs(const FeatureBitset &Features, unsigned WavesPerEU); +/// execution unit requirement for given subtarget \p STI. +unsigned getMinNumSGPRs(const MCSubtargetInfo *STI, unsigned WavesPerEU); /// \returns Maximum number of SGPRs that meets the given number of waves per -/// execution unit requirement for given subtarget \p Features. -unsigned getMaxNumSGPRs(const FeatureBitset &Features, unsigned WavesPerEU, +/// execution unit requirement for given subtarget \p STI. +unsigned getMaxNumSGPRs(const MCSubtargetInfo *STI, unsigned WavesPerEU, bool Addressable); /// \returns Number of extra SGPRs implicitly required by given subtarget \p -/// Features when the given special registers are used. -unsigned getNumExtraSGPRs(const FeatureBitset &Features, bool VCCUsed, +/// STI when the given special registers are used. +unsigned getNumExtraSGPRs(const MCSubtargetInfo *STI, bool VCCUsed, bool FlatScrUsed, bool XNACKUsed); /// \returns Number of extra SGPRs implicitly required by given subtarget \p -/// Features when the given special registers are used. XNACK is inferred from -/// \p Features. -unsigned getNumExtraSGPRs(const FeatureBitset &Features, bool VCCUsed, +/// STI when the given special registers are used. XNACK is inferred from +/// \p STI. +unsigned getNumExtraSGPRs(const MCSubtargetInfo *STI, bool VCCUsed, bool FlatScrUsed); -/// \returns Number of SGPR blocks needed for given subtarget \p Features when +/// \returns Number of SGPR blocks needed for given subtarget \p STI when /// \p NumSGPRs are used. \p NumSGPRs should already include any special /// register counts. -unsigned getNumSGPRBlocks(const FeatureBitset &Features, unsigned NumSGPRs); +unsigned getNumSGPRBlocks(const MCSubtargetInfo *STI, unsigned NumSGPRs); -/// \returns VGPR allocation granularity for given subtarget \p Features. -unsigned getVGPRAllocGranule(const FeatureBitset &Features); +/// \returns VGPR allocation granularity for given subtarget \p STI. +unsigned getVGPRAllocGranule(const MCSubtargetInfo *STI); -/// \returns VGPR encoding granularity for given subtarget \p Features. -unsigned getVGPREncodingGranule(const FeatureBitset &Features); +/// \returns VGPR encoding granularity for given subtarget \p STI. +unsigned getVGPREncodingGranule(const MCSubtargetInfo *STI); -/// \returns Total number of VGPRs for given subtarget \p Features. -unsigned getTotalNumVGPRs(const FeatureBitset &Features); +/// \returns Total number of VGPRs for given subtarget \p STI. +unsigned getTotalNumVGPRs(const MCSubtargetInfo *STI); -/// \returns Addressable number of VGPRs for given subtarget \p Features. -unsigned getAddressableNumVGPRs(const FeatureBitset &Features); +/// \returns Addressable number of VGPRs for given subtarget \p STI. +unsigned getAddressableNumVGPRs(const MCSubtargetInfo *STI); /// \returns Minimum number of VGPRs that meets given number of waves per -/// execution unit requirement for given subtarget \p Features. -unsigned getMinNumVGPRs(const FeatureBitset &Features, unsigned WavesPerEU); +/// execution unit requirement for given subtarget \p STI. +unsigned getMinNumVGPRs(const MCSubtargetInfo *STI, unsigned WavesPerEU); /// \returns Maximum number of VGPRs that meets given number of waves per -/// execution unit requirement for given subtarget \p Features. -unsigned getMaxNumVGPRs(const FeatureBitset &Features, unsigned WavesPerEU); +/// execution unit requirement for given subtarget \p STI. +unsigned getMaxNumVGPRs(const MCSubtargetInfo *STI, unsigned WavesPerEU); -/// \returns Number of VGPR blocks needed for given subtarget \p Features when +/// \returns Number of VGPR blocks needed for given subtarget \p STI when /// \p NumVGPRs are used. -unsigned getNumVGPRBlocks(const FeatureBitset &Features, unsigned NumSGPRs); +unsigned getNumVGPRBlocks(const MCSubtargetInfo *STI, unsigned NumSGPRs); } // end namespace IsaInfo @@ -233,7 +224,7 @@ LLVM_READONLY int getMCOpcode(uint16_t Opcode, unsigned Gen); void initDefaultAMDKernelCodeT(amd_kernel_code_t &Header, - const FeatureBitset &Features); + const MCSubtargetInfo *STI); amdhsa::kernel_descriptor_t getDefaultAmdhsaKernelDescriptor(); @@ -268,25 +259,25 @@ std::pair getIntegerPairAttribute(const Function &F, bool OnlyFirstRequired = false); /// \returns Vmcnt bit mask for given isa \p Version. -unsigned getVmcntBitMask(const IsaInfo::IsaVersion &Version); +unsigned getVmcntBitMask(const IsaVersion &Version); /// \returns Expcnt bit mask for given isa \p Version. -unsigned getExpcntBitMask(const IsaInfo::IsaVersion &Version); +unsigned getExpcntBitMask(const IsaVersion &Version); /// \returns Lgkmcnt bit mask for given isa \p Version. -unsigned getLgkmcntBitMask(const IsaInfo::IsaVersion &Version); +unsigned getLgkmcntBitMask(const IsaVersion &Version); /// \returns Waitcnt bit mask for given isa \p Version. -unsigned getWaitcntBitMask(const IsaInfo::IsaVersion &Version); +unsigned getWaitcntBitMask(const IsaVersion &Version); /// \returns Decoded Vmcnt from given \p Waitcnt for given isa \p Version. -unsigned decodeVmcnt(const IsaInfo::IsaVersion &Version, unsigned Waitcnt); +unsigned decodeVmcnt(const IsaVersion &Version, unsigned Waitcnt); /// \returns Decoded Expcnt from given \p Waitcnt for given isa \p Version. -unsigned decodeExpcnt(const IsaInfo::IsaVersion &Version, unsigned Waitcnt); +unsigned decodeExpcnt(const IsaVersion &Version, unsigned Waitcnt); /// \returns Decoded Lgkmcnt from given \p Waitcnt for given isa \p Version. -unsigned decodeLgkmcnt(const IsaInfo::IsaVersion &Version, unsigned Waitcnt); +unsigned decodeLgkmcnt(const IsaVersion &Version, unsigned Waitcnt); /// Decodes Vmcnt, Expcnt and Lgkmcnt from given \p Waitcnt for given isa /// \p Version, and writes decoded values into \p Vmcnt, \p Expcnt and @@ -297,19 +288,19 @@ unsigned decodeLgkmcnt(const IsaInfo::IsaVersion &Version, unsigned Waitcnt); /// \p Vmcnt = \p Waitcnt[3:0] | \p Waitcnt[15:14] (gfx9+ only) /// \p Expcnt = \p Waitcnt[6:4] /// \p Lgkmcnt = \p Waitcnt[11:8] -void decodeWaitcnt(const IsaInfo::IsaVersion &Version, unsigned Waitcnt, +void decodeWaitcnt(const IsaVersion &Version, unsigned Waitcnt, unsigned &Vmcnt, unsigned &Expcnt, unsigned &Lgkmcnt); /// \returns \p Waitcnt with encoded \p Vmcnt for given isa \p Version. -unsigned encodeVmcnt(const IsaInfo::IsaVersion &Version, unsigned Waitcnt, +unsigned encodeVmcnt(const IsaVersion &Version, unsigned Waitcnt, unsigned Vmcnt); /// \returns \p Waitcnt with encoded \p Expcnt for given isa \p Version. -unsigned encodeExpcnt(const IsaInfo::IsaVersion &Version, unsigned Waitcnt, +unsigned encodeExpcnt(const IsaVersion &Version, unsigned Waitcnt, unsigned Expcnt); /// \returns \p Waitcnt with encoded \p Lgkmcnt for given isa \p Version. -unsigned encodeLgkmcnt(const IsaInfo::IsaVersion &Version, unsigned Waitcnt, +unsigned encodeLgkmcnt(const IsaVersion &Version, unsigned Waitcnt, unsigned Lgkmcnt); /// Encodes \p Vmcnt, \p Expcnt and \p Lgkmcnt into Waitcnt for given isa @@ -324,7 +315,7 @@ unsigned encodeLgkmcnt(const IsaInfo::IsaVersion &Version, unsigned Waitcnt, /// /// \returns Waitcnt with encoded \p Vmcnt, \p Expcnt and \p Lgkmcnt for given /// isa \p Version. -unsigned encodeWaitcnt(const IsaInfo::IsaVersion &Version, +unsigned encodeWaitcnt(const IsaVersion &Version, unsigned Vmcnt, unsigned Expcnt, unsigned Lgkmcnt); unsigned getInitialPSInputAddr(const Function &F); diff --git a/llvm/test/CodeGen/AMDGPU/gfx902-without-xnack.ll b/llvm/test/CodeGen/AMDGPU/gfx902-without-xnack.ll new file mode 100644 index 0000000..445e112 --- /dev/null +++ b/llvm/test/CodeGen/AMDGPU/gfx902-without-xnack.ll @@ -0,0 +1,8 @@ +; RUN: llc -march=amdgcn -mtriple=amdgcn-amd-amdhsa -mcpu=gfx902 -mattr=-xnack < %s | FileCheck %s + +; CHECK: .hsa_code_object_isa 9,0,2,"AMD","AMDGPU" +define amdgpu_kernel void @test_kernel(float addrspace(1)* %out0, double addrspace(1)* %out1) nounwind { + store float 0.0, float addrspace(1)* %out0 + ret void +} + diff --git a/llvm/test/MC/AMDGPU/hsa_isa_version_attrs.s b/llvm/test/MC/AMDGPU/hsa_isa_version_attrs.s index 0ee85c9..631e1a4 100644 --- a/llvm/test/MC/AMDGPU/hsa_isa_version_attrs.s +++ b/llvm/test/MC/AMDGPU/hsa_isa_version_attrs.s @@ -2,5 +2,5 @@ // RUN: llvm-mc -arch=amdgcn -mcpu=gfx900 -mattr=-mad-mix-insts -show-encoding %s | FileCheck --check-prefix=GFX9 %s .hsa_code_object_isa -// GFX8: .hsa_code_object_isa 8,0,0,"AMD","AMDGPU" +// GFX8: .hsa_code_object_isa 8,0,1,"AMD","AMDGPU" // GFX9: .hsa_code_object_isa 9,0,0,"AMD","AMDGPU"