return 5;
}
-unsigned GCNSubtarget::getOccupancyWithNumVGPRs(unsigned VGPRs) const {
- unsigned MaxWaves = getMaxWavesPerEU();
- unsigned Granule = getVGPRAllocGranule();
- if (VGPRs < Granule)
- return MaxWaves;
- unsigned RoundedRegs = ((VGPRs + Granule - 1) / Granule) * Granule;
- return std::min(std::max(getTotalNumVGPRs() / RoundedRegs, 1u), MaxWaves);
+unsigned GCNSubtarget::getOccupancyWithNumVGPRs(unsigned NumVGPRs) const {
+ return AMDGPU::IsaInfo::getNumWavesPerEUWithNumVGPRs(this, NumVGPRs);
}
unsigned
return AMDGPU::IsaInfo::getAddressableNumVGPRs(this);
}
- /// \returns Minimum number of VGPRs that meets given number of waves per
- /// execution unit requirement supported by the subtarget.
+ /// \returns the minimum number of VGPRs that will prevent achieving more than
+ /// the specified number of waves \p WavesPerEU.
unsigned getMinNumVGPRs(unsigned WavesPerEU) const {
return AMDGPU::IsaInfo::getMinNumVGPRs(this, WavesPerEU);
}
- /// \returns Maximum number of VGPRs that meets given number of waves per
- /// execution unit requirement supported by the subtarget.
+ /// \returns the maximum number of VGPRs that can be used and still achieved
+ /// at least the specified number of waves \p WavesPerEU.
unsigned getMaxNumVGPRs(unsigned WavesPerEU) const {
return AMDGPU::IsaInfo::getMaxNumVGPRs(this, WavesPerEU);
}
return 256;
}
+unsigned getNumWavesPerEUWithNumVGPRs(const MCSubtargetInfo *STI,
+ unsigned NumVGPRs) {
+ unsigned MaxWaves = getMaxWavesPerEU(STI);
+ unsigned Granule = getVGPRAllocGranule(STI);
+ if (NumVGPRs < Granule)
+ return MaxWaves;
+ unsigned RoundedRegs = alignTo(NumVGPRs, Granule);
+ return std::min(std::max(getTotalNumVGPRs(STI) / RoundedRegs, 1u), MaxWaves);
+}
+
unsigned getMinNumVGPRs(const MCSubtargetInfo *STI, unsigned WavesPerEU) {
assert(WavesPerEU != 0);
- if (WavesPerEU >= getMaxWavesPerEU(STI))
+ unsigned MaxWavesPerEU = getMaxWavesPerEU(STI);
+ if (WavesPerEU >= MaxWavesPerEU)
+ return 0;
+
+ unsigned TotNumVGPRs = getTotalNumVGPRs(STI);
+ unsigned AddrsableNumVGPRs = getAddressableNumVGPRs(STI);
+ unsigned Granule = getVGPRAllocGranule(STI);
+ unsigned MaxNumVGPRs = alignDown(TotNumVGPRs / WavesPerEU, Granule);
+
+ if (MaxNumVGPRs == alignDown(TotNumVGPRs / MaxWavesPerEU, Granule))
return 0;
- unsigned MinNumVGPRs =
- alignDown(getTotalNumVGPRs(STI) / (WavesPerEU + 1),
- getVGPRAllocGranule(STI)) + 1;
- return std::min(MinNumVGPRs, getAddressableNumVGPRs(STI));
+
+ unsigned MinWavesPerEU = getNumWavesPerEUWithNumVGPRs(STI, AddrsableNumVGPRs);
+ if (WavesPerEU < MinWavesPerEU)
+ return getMinNumVGPRs(STI, MinWavesPerEU);
+
+ unsigned MaxNumVGPRsNext = alignDown(TotNumVGPRs / (WavesPerEU + 1), Granule);
+ unsigned MinNumVGPRs = 1 + std::min(MaxNumVGPRs - Granule, MaxNumVGPRsNext);
+ return std::min(MinNumVGPRs, AddrsableNumVGPRs);
}
unsigned getMaxNumVGPRs(const MCSubtargetInfo *STI, unsigned WavesPerEU) {
/// execution unit requirement for given subtarget \p STI.
unsigned getMaxNumVGPRs(const MCSubtargetInfo *STI, unsigned WavesPerEU);
+/// \returns Number of waves reachable for a given \p NumVGPRs usage for given
+/// subtarget \p STI.
+unsigned getNumWavesPerEUWithNumVGPRs(const MCSubtargetInfo *STI,
+ unsigned NumVGPRs);
+
/// \returns Number of VGPR blocks needed for given subtarget \p STI when
/// \p NumVGPRs are used.
///
; GCN-LABEL: {{^}}limited_occupancy_19:
; GFX9: ; Occupancy: 10
-; GFX1010: ; Occupancy: 18
+; GFX1010: ; Occupancy: 20
; GFX1030: ; Occupancy: 16
; GFX1100: ; Occupancy: 16
define amdgpu_kernel void @limited_occupancy_19() #2 {
--- /dev/null
+//===--------- llvm/unittests/Target/AMDGPU/AMDGPUUnitTests.cpp -----------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "AMDGPUUnitTests.h"
+#include "AMDGPUTargetMachine.h"
+#include "GCNSubtarget.h"
+#include "llvm/MC/TargetRegistry.h"
+#include "llvm/Support/TargetParser.h"
+#include "llvm/Support/TargetSelect.h"
+#include "gtest/gtest.h"
+
+#include "AMDGPUGenSubtargetInfo.inc"
+
+using namespace llvm;
+
+std::once_flag flag;
+
+void InitializeAMDGPUTarget() {
+ std::call_once(flag, []() {
+ LLVMInitializeAMDGPUTargetInfo();
+ LLVMInitializeAMDGPUTarget();
+ LLVMInitializeAMDGPUTargetMC();
+ });
+}
+
+std::unique_ptr<const GCNTargetMachine>
+llvm::createAMDGPUTargetMachine(std::string TStr, StringRef CPU, StringRef FS) {
+ InitializeAMDGPUTarget();
+
+ std::string Error;
+ const Target *T = TargetRegistry::lookupTarget(TStr, Error);
+ if (!T)
+ return nullptr;
+
+ TargetOptions Options;
+ return std::unique_ptr<GCNTargetMachine>(
+ static_cast<GCNTargetMachine *>(T->createTargetMachine(
+ TStr, CPU, FS, Options, std::nullopt, std::nullopt)));
+}
+
+static cl::opt<bool> PrintCpuRegLimits(
+ "print-cpu-reg-limits", cl::NotHidden, cl::init(false),
+ cl::desc("force printing per AMDGPU CPU register limits"));
+
+static bool checkMinMax(std::stringstream &OS, unsigned Occ, unsigned MinOcc,
+ unsigned MaxOcc,
+ std::function<unsigned(unsigned)> GetOcc,
+ std::function<unsigned(unsigned)> GetMinGPRs,
+ std::function<unsigned(unsigned)> GetMaxGPRs) {
+ bool MinValid = true, MaxValid = true, RangeValid = true;
+ unsigned MinGPRs = GetMinGPRs(Occ);
+ unsigned MaxGPRs = GetMaxGPRs(Occ);
+ unsigned RealOcc;
+
+ if (MinGPRs >= MaxGPRs)
+ RangeValid = false;
+ else {
+ RealOcc = GetOcc(MinGPRs);
+ for (unsigned NumRegs = MinGPRs + 1; NumRegs <= MaxGPRs; ++NumRegs) {
+ if (RealOcc != GetOcc(NumRegs)) {
+ RangeValid = false;
+ break;
+ }
+ }
+ }
+
+ if (RangeValid && RealOcc > MinOcc && RealOcc <= MaxOcc) {
+ if (MinGPRs > 0 && GetOcc(MinGPRs - 1) <= RealOcc)
+ MinValid = false;
+
+ if (GetOcc(MaxGPRs + 1) >= RealOcc)
+ MaxValid = false;
+ }
+
+ std::stringstream MinStr;
+ MinStr << (MinValid ? ' ' : '<') << ' ' << std::setw(3) << MinGPRs << " (O"
+ << GetOcc(MinGPRs) << ") " << (RangeValid ? ' ' : 'R');
+
+ OS << std::left << std::setw(15) << MinStr.str() << std::setw(3) << MaxGPRs
+ << " (O" << GetOcc(MaxGPRs) << ')' << (MaxValid ? "" : " >");
+
+ return MinValid && MaxValid && RangeValid;
+}
+
+static const std::pair<StringRef, StringRef>
+ EmptyFS = {"", ""},
+ W32FS = {"+wavefrontsize32", "w32"},
+ W64FS = {"+wavefrontsize64", "w64"};
+
+static void testGPRLimits(
+ const char *RegName, bool TestW32W64,
+ std::function<bool(std::stringstream &, unsigned, GCNSubtarget &)> test) {
+ SmallVector<StringRef> CPUs;
+ AMDGPU::fillValidArchListAMDGCN(CPUs);
+
+ std::map<std::string, SmallVector<std::string>> TablePerCPUs;
+ for (auto CPUName : CPUs) {
+ auto CanonCPUName =
+ AMDGPU::getArchNameAMDGCN(AMDGPU::parseArchAMDGCN(CPUName));
+
+ auto *FS = &EmptyFS;
+ while (true) {
+ auto TM = createAMDGPUTargetMachine("amdgcn-amd-", CPUName, FS->first);
+ if (!TM)
+ break;
+
+ GCNSubtarget ST(TM->getTargetTriple(), std::string(TM->getTargetCPU()),
+ std::string(TM->getTargetFeatureString()), *TM);
+
+ if (TestW32W64 &&
+ ST.getFeatureBits().test(AMDGPU::FeatureWavefrontSize32))
+ FS = &W32FS;
+
+ std::stringstream Table;
+ bool Success = true;
+ unsigned MaxOcc = ST.getMaxWavesPerEU();
+ for (unsigned Occ = MaxOcc; Occ > 0; --Occ) {
+ Table << std::right << std::setw(3) << Occ << " ";
+ Success = test(Table, Occ, ST) && Success;
+ Table << '\n';
+ }
+ if (!Success || PrintCpuRegLimits)
+ TablePerCPUs[Table.str()].push_back((CanonCPUName + FS->second).str());
+
+ if (FS != &W32FS)
+ break;
+
+ FS = &W64FS;
+ }
+ }
+ std::stringstream OS;
+ for (auto &P : TablePerCPUs) {
+ for (auto &CPUName : P.second)
+ OS << ' ' << CPUName;
+ OS << ":\nOcc Min" << RegName << " Max" << RegName << '\n'
+ << P.first << '\n';
+ }
+ auto ErrStr = OS.str();
+ EXPECT_TRUE(ErrStr.empty()) << ErrStr;
+}
+
+TEST(AMDGPU, TestVGPRLimitsPerOccupancy) {
+ testGPRLimits("VGPR", true, [](std::stringstream &OS, unsigned Occ,
+ GCNSubtarget &ST) {
+ unsigned MaxVGPRNum = ST.getAddressableNumVGPRs();
+ return checkMinMax(
+ OS, Occ, ST.getOccupancyWithNumVGPRs(MaxVGPRNum), ST.getMaxWavesPerEU(),
+ [&](unsigned NumGPRs) { return ST.getOccupancyWithNumVGPRs(NumGPRs); },
+ [&](unsigned Occ) { return ST.getMinNumVGPRs(Occ); },
+ [&](unsigned Occ) { return ST.getMaxNumVGPRs(Occ); });
+ });
+}
--- /dev/null
+//===---------- llvm/unittests/Target/AMDGPU/AMDGPUUnitTests.h ------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_UNITTESTS_TARGET_AMDGPU_AMDGPUUNITTESTS_H
+#define LLVM_UNITTESTS_TARGET_AMDGPU_AMDGPUUNITTESTS_H
+
+#include <memory>
+#include <string>
+
+namespace llvm {
+
+class GCNTargetMachine;
+class StringRef;
+
+std::unique_ptr<const GCNTargetMachine>
+createAMDGPUTargetMachine(std::string TStr, StringRef CPU, StringRef FS);
+
+} // end namespace llvm
+
+#endif // LLVM_UNITTESTS_TARGET_AMDGPU_AMDGPUUNITTESTS_H
AMDGPUCodeGen
AMDGPUDesc
AMDGPUInfo
+ AMDGPUUtils
CodeGen
Core
MC
)
add_llvm_target_unittest(AMDGPUTests
+ AMDGPUUnitTests.cpp
DwarfRegMappings.cpp
ExecMayBeModifiedBeforeAnyUse.cpp
)
//
//===----------------------------------------------------------------------===//
-#include "AMDGPUSubtarget.h"
#include "AMDGPUTargetMachine.h"
-#include "llvm/CodeGen/TargetSubtargetInfo.h"
-#include "llvm/MC/MCTargetOptions.h"
-#include "llvm/MC/TargetRegistry.h"
-#include "llvm/Support/TargetSelect.h"
-#include "llvm/Target/TargetMachine.h"
+#include "AMDGPUUnitTests.h"
#include "gtest/gtest.h"
-#include <thread>
using namespace llvm;
-std::once_flag flag;
-
-void InitializeAMDGPUTarget() {
- std::call_once(flag, []() {
- LLVMInitializeAMDGPUTargetInfo();
- LLVMInitializeAMDGPUTarget();
- LLVMInitializeAMDGPUTargetMC();
- });
-}
-
-std::unique_ptr<const GCNTargetMachine>
-createTargetMachine(std::string TStr, StringRef CPU, StringRef FS) {
- InitializeAMDGPUTarget();
-
- std::string Error;
- const Target *T = TargetRegistry::lookupTarget(TStr, Error);
- if (!T)
- return nullptr;
-
- TargetOptions Options;
- return std::unique_ptr<GCNTargetMachine>(
- static_cast<GCNTargetMachine *>(T->createTargetMachine(
- TStr, CPU, FS, Options, std::nullopt, std::nullopt)));
-}
-
-TEST(AMDGPUDwarfRegMappingTests, TestWave64DwarfRegMapping) {
+TEST(AMDGPU, TestWave64DwarfRegMapping) {
for (auto Triple :
{"amdgcn-amd-", "amdgcn-amd-amdhsa", "amdgcn-amd-amdpal"}) {
- auto TM = createTargetMachine(Triple, "gfx1010", "+wavefrontsize64");
+ auto TM = createAMDGPUTargetMachine(Triple, "gfx1010", "+wavefrontsize64");
if (TM) {
GCNSubtarget ST(TM->getTargetTriple(), std::string(TM->getTargetCPU()),
std::string(TM->getTargetFeatureString()), *TM);
}
}
-TEST(AMDGPUDwarfRegMappingTests, TestWave32DwarfRegMapping) {
+TEST(AMDGPU, TestWave32DwarfRegMapping) {
for (auto Triple :
{"amdgcn-amd-", "amdgcn-amd-amdhsa", "amdgcn-amd-amdpal"}) {
- auto TM = createTargetMachine(Triple, "gfx1010", "+wavefrontsize32");
+ auto TM = createAMDGPUTargetMachine(Triple, "gfx1010", "+wavefrontsize32");
if (TM) {
GCNSubtarget ST(TM->getTargetTriple(), std::string(TM->getTargetCPU()),
std::string(TM->getTargetFeatureString()), *TM);
//===----------------------------------------------------------------------===//
#include "AMDGPUTargetMachine.h"
-#include "GCNSubtarget.h"
-#include "MCTargetDesc/AMDGPUMCTargetDesc.h"
+#include "AMDGPUUnitTests.h"
#include "llvm/CodeGen/MachineModuleInfo.h"
-#include "llvm/CodeGen/TargetSubtargetInfo.h"
-#include "llvm/MC/MCTargetOptions.h"
-#include "llvm/MC/TargetRegistry.h"
-#include "llvm/Support/TargetSelect.h"
-#include "llvm/Target/TargetMachine.h"
#include "gtest/gtest.h"
-#include <thread>
using namespace llvm;
-// implementation is in the llvm/unittests/Target/AMDGPU/DwarfRegMappings.cpp
-std::unique_ptr<const GCNTargetMachine>
-createTargetMachine(std::string TStr, StringRef CPU, StringRef FS);
-
-TEST(AMDGPUExecMayBeModifiedBeforeAnyUse, TheTest) {
- auto TM = createTargetMachine("amdgcn-amd-", "gfx906", "");
+TEST(AMDGPU, ExecMayBeModifiedBeforeAnyUse) {
+ auto TM = createAMDGPUTargetMachine("amdgcn-amd-", "gfx906", "");
if (!TM)
return;