[CUDA] Simplify GPU variant handling. NFC.
authorArtem Belevich <tra@google.com>
Mon, 6 Apr 2020 22:21:24 +0000 (15:21 -0700)
committerArtem Belevich <tra@google.com>
Wed, 8 Apr 2020 18:19:43 +0000 (11:19 -0700)
Instead of hardcoding individual GPU mappings in multiple functions, keep them
all in one table and use it to look up the mappings.

We also don't care about 'virtual' architecture much, so the API is trimmed down
down to a simpler GPU->Virtual arch name lookup.

Differential Revision: https://reviews.llvm.org/D77665

clang/include/clang/Basic/Cuda.h
clang/lib/Basic/Cuda.cpp
clang/lib/Driver/ToolChains/Cuda.cpp

index c2ebf87..fb85bb3 100644 (file)
@@ -72,36 +72,20 @@ enum class CudaArch {
   GFX1012,
   LAST,
 };
-const char *CudaArchToString(CudaArch A);
 
-// The input should have the form "sm_20".
-CudaArch StringToCudaArch(llvm::StringRef S);
+static inline bool IsNVIDIAGpuArch(CudaArch A) {
+  return A >= CudaArch::SM_20 && A < CudaArch::GFX600;
+}
 
-enum class CudaVirtualArch {
-  UNKNOWN,
-  COMPUTE_20,
-  COMPUTE_30,
-  COMPUTE_32,
-  COMPUTE_35,
-  COMPUTE_37,
-  COMPUTE_50,
-  COMPUTE_52,
-  COMPUTE_53,
-  COMPUTE_60,
-  COMPUTE_61,
-  COMPUTE_62,
-  COMPUTE_70,
-  COMPUTE_72,
-  COMPUTE_75,
-  COMPUTE_AMDGCN,
-};
-const char *CudaVirtualArchToString(CudaVirtualArch A);
+static inline bool IsAMDGpuArch(CudaArch A) {
+  return A >= CudaArch::GFX600 && A < CudaArch::LAST;
+}
 
-// The input should have the form "compute_20".
-CudaVirtualArch StringToCudaVirtualArch(llvm::StringRef S);
+const char *CudaArchToString(CudaArch A);
+const char *CudaArchToVirtualArchString(CudaArch A);
 
-/// Get the compute_xx corresponding to an sm_yy.
-CudaVirtualArch VirtualArchForCudaArch(CudaArch A);
+// The input should have the form "sm_20".
+CudaArch StringToCudaArch(llvm::StringRef S);
 
 /// Get the earliest CudaVersion that supports the given CudaArch.
 CudaVersion MinVersionForCudaArch(CudaArch A);
index 74eb547..7337836 100644 (file)
@@ -45,250 +45,81 @@ CudaVersion CudaStringToVersion(const llvm::Twine &S) {
       .Default(CudaVersion::UNKNOWN);
 }
 
-const char *CudaArchToString(CudaArch A) {
-  switch (A) {
-  case CudaArch::LAST:
-    break;
-  case CudaArch::UNKNOWN:
-    return "unknown";
-  case CudaArch::SM_20:
-    return "sm_20";
-  case CudaArch::SM_21:
-    return "sm_21";
-  case CudaArch::SM_30:
-    return "sm_30";
-  case CudaArch::SM_32:
-    return "sm_32";
-  case CudaArch::SM_35:
-    return "sm_35";
-  case CudaArch::SM_37:
-    return "sm_37";
-  case CudaArch::SM_50:
-    return "sm_50";
-  case CudaArch::SM_52:
-    return "sm_52";
-  case CudaArch::SM_53:
-    return "sm_53";
-  case CudaArch::SM_60:
-    return "sm_60";
-  case CudaArch::SM_61:
-    return "sm_61";
-  case CudaArch::SM_62:
-    return "sm_62";
-  case CudaArch::SM_70:
-    return "sm_70";
-  case CudaArch::SM_72:
-    return "sm_72";
-  case CudaArch::SM_75:
-    return "sm_75";
-  case CudaArch::GFX600: // tahiti
-    return "gfx600";
-  case CudaArch::GFX601: // pitcairn, verde, oland,hainan
-    return "gfx601";
-  case CudaArch::GFX700: // kaveri
-    return "gfx700";
-  case CudaArch::GFX701: // hawaii
-    return "gfx701";
-  case CudaArch::GFX702: // 290,290x,R390,R390x
-    return "gfx702";
-  case CudaArch::GFX703: // kabini mullins
-    return "gfx703";
-  case CudaArch::GFX704: // bonaire
-    return "gfx704";
-  case CudaArch::GFX801: // carrizo
-    return "gfx801";
-  case CudaArch::GFX802: // tonga,iceland
-    return "gfx802";
-  case CudaArch::GFX803: // fiji,polaris10
-    return "gfx803";
-  case CudaArch::GFX810: // stoney
-    return "gfx810";
-  case CudaArch::GFX900: // vega, instinct
-    return "gfx900";
-  case CudaArch::GFX902: // TBA
-    return "gfx902";
-  case CudaArch::GFX904: // TBA
-    return "gfx904";
-  case CudaArch::GFX906: // TBA
-    return "gfx906";
-  case CudaArch::GFX908: // TBA
-    return "gfx908";
-  case CudaArch::GFX909: // TBA
-    return "gfx909";
-  case CudaArch::GFX1010: // TBA
-    return "gfx1010";
-  case CudaArch::GFX1011: // TBA
-    return "gfx1011";
-  case CudaArch::GFX1012: // TBA
-    return "gfx1012";
-  }
-  llvm_unreachable("invalid enum");
-}
+struct CudaArchToStringMap {
+  CudaArch arch;
+  const char *arch_name;
+  const char *virtual_arch_name;
+};
 
-CudaArch StringToCudaArch(llvm::StringRef S) {
-  return llvm::StringSwitch<CudaArch>(S)
-      .Case("sm_20", CudaArch::SM_20)
-      .Case("sm_21", CudaArch::SM_21)
-      .Case("sm_30", CudaArch::SM_30)
-      .Case("sm_32", CudaArch::SM_32)
-      .Case("sm_35", CudaArch::SM_35)
-      .Case("sm_37", CudaArch::SM_37)
-      .Case("sm_50", CudaArch::SM_50)
-      .Case("sm_52", CudaArch::SM_52)
-      .Case("sm_53", CudaArch::SM_53)
-      .Case("sm_60", CudaArch::SM_60)
-      .Case("sm_61", CudaArch::SM_61)
-      .Case("sm_62", CudaArch::SM_62)
-      .Case("sm_70", CudaArch::SM_70)
-      .Case("sm_72", CudaArch::SM_72)
-      .Case("sm_75", CudaArch::SM_75)
-      .Case("gfx600", CudaArch::GFX600)
-      .Case("gfx601", CudaArch::GFX601)
-      .Case("gfx700", CudaArch::GFX700)
-      .Case("gfx701", CudaArch::GFX701)
-      .Case("gfx702", CudaArch::GFX702)
-      .Case("gfx703", CudaArch::GFX703)
-      .Case("gfx704", CudaArch::GFX704)
-      .Case("gfx801", CudaArch::GFX801)
-      .Case("gfx802", CudaArch::GFX802)
-      .Case("gfx803", CudaArch::GFX803)
-      .Case("gfx810", CudaArch::GFX810)
-      .Case("gfx900", CudaArch::GFX900)
-      .Case("gfx902", CudaArch::GFX902)
-      .Case("gfx904", CudaArch::GFX904)
-      .Case("gfx906", CudaArch::GFX906)
-      .Case("gfx908", CudaArch::GFX908)
-      .Case("gfx909", CudaArch::GFX909)
-      .Case("gfx1010", CudaArch::GFX1010)
-      .Case("gfx1011", CudaArch::GFX1011)
-      .Case("gfx1012", CudaArch::GFX1012)
-      .Default(CudaArch::UNKNOWN);
-}
+#define SM2(sm, ca)                                                            \
+  { CudaArch::SM_##sm, "sm_" #sm, ca }
+#define SM(sm) SM2(sm, "compute_" #sm)
+#define GFX(gpu)                                                               \
+  { CudaArch::GFX##gpu, "gfx" #gpu, "compute_amdgcn" }
+CudaArchToStringMap arch_names[] = {
+    // clang-format off
+    SM2(20, "compute_20"), SM2(21, "compute_20"), // Fermi
+    SM(30), SM(32), SM(35), SM(37),  // Kepler
+    SM(50), SM(52), SM(53),          // Maxwell
+    SM(60), SM(61), SM(62),          // Pascal
+    SM(70), SM(72),                  // Volta
+    SM(75),                          // Turing
+    GFX(600), // tahiti
+    GFX(601), // pitcairn, verde, oland,hainan
+    GFX(700), // kaveri
+    GFX(701), // hawaii
+    GFX(702), // 290,290x,R390,R390x
+    GFX(703), // kabini mullins
+    GFX(704), // bonaire
+    GFX(801), // carrizo
+    GFX(802), // tonga,iceland
+    GFX(803), // fiji,polaris10
+    GFX(810), // stoney
+    GFX(900), // vega, instinct
+    GFX(902), GFX(904), GFX(906), GFX(908), GFX(909),
+    GFX(1010), GFX(1011), GFX(1012),
+    // clang-format on
+};
+#undef SM
+#undef SM2
+#undef GFX
 
-const char *CudaVirtualArchToString(CudaVirtualArch A) {
-  switch (A) {
-  case CudaVirtualArch::UNKNOWN:
+const char *CudaArchToString(CudaArch A) {
+  auto result = std::find_if(
+      std::begin(arch_names), std::end(arch_names),
+      [A](const CudaArchToStringMap &map) { return A == map.arch; });
+  if (result == std::end(arch_names))
     return "unknown";
-  case CudaVirtualArch::COMPUTE_20:
-    return "compute_20";
-  case CudaVirtualArch::COMPUTE_30:
-    return "compute_30";
-  case CudaVirtualArch::COMPUTE_32:
-    return "compute_32";
-  case CudaVirtualArch::COMPUTE_35:
-    return "compute_35";
-  case CudaVirtualArch::COMPUTE_37:
-    return "compute_37";
-  case CudaVirtualArch::COMPUTE_50:
-    return "compute_50";
-  case CudaVirtualArch::COMPUTE_52:
-    return "compute_52";
-  case CudaVirtualArch::COMPUTE_53:
-    return "compute_53";
-  case CudaVirtualArch::COMPUTE_60:
-    return "compute_60";
-  case CudaVirtualArch::COMPUTE_61:
-    return "compute_61";
-  case CudaVirtualArch::COMPUTE_62:
-    return "compute_62";
-  case CudaVirtualArch::COMPUTE_70:
-    return "compute_70";
-  case CudaVirtualArch::COMPUTE_72:
-    return "compute_72";
-  case CudaVirtualArch::COMPUTE_75:
-    return "compute_75";
-  case CudaVirtualArch::COMPUTE_AMDGCN:
-    return "compute_amdgcn";
-  }
-  llvm_unreachable("invalid enum");
+  return result->arch_name;
 }
 
-CudaVirtualArch StringToCudaVirtualArch(llvm::StringRef S) {
-  return llvm::StringSwitch<CudaVirtualArch>(S)
-      .Case("compute_20", CudaVirtualArch::COMPUTE_20)
-      .Case("compute_30", CudaVirtualArch::COMPUTE_30)
-      .Case("compute_32", CudaVirtualArch::COMPUTE_32)
-      .Case("compute_35", CudaVirtualArch::COMPUTE_35)
-      .Case("compute_37", CudaVirtualArch::COMPUTE_37)
-      .Case("compute_50", CudaVirtualArch::COMPUTE_50)
-      .Case("compute_52", CudaVirtualArch::COMPUTE_52)
-      .Case("compute_53", CudaVirtualArch::COMPUTE_53)
-      .Case("compute_60", CudaVirtualArch::COMPUTE_60)
-      .Case("compute_61", CudaVirtualArch::COMPUTE_61)
-      .Case("compute_62", CudaVirtualArch::COMPUTE_62)
-      .Case("compute_70", CudaVirtualArch::COMPUTE_70)
-      .Case("compute_72", CudaVirtualArch::COMPUTE_72)
-      .Case("compute_75", CudaVirtualArch::COMPUTE_75)
-      .Case("compute_amdgcn", CudaVirtualArch::COMPUTE_AMDGCN)
-      .Default(CudaVirtualArch::UNKNOWN);
+const char *CudaArchToVirtualArchString(CudaArch A) {
+  auto result = std::find_if(
+      std::begin(arch_names), std::end(arch_names),
+      [A](const CudaArchToStringMap &map) { return A == map.arch; });
+  if (result == std::end(arch_names))
+    return "unknown";
+  return result->virtual_arch_name;
 }
 
-CudaVirtualArch VirtualArchForCudaArch(CudaArch A) {
-  switch (A) {
-  case CudaArch::LAST:
-    break;
-  case CudaArch::UNKNOWN:
-    return CudaVirtualArch::UNKNOWN;
-  case CudaArch::SM_20:
-  case CudaArch::SM_21:
-    return CudaVirtualArch::COMPUTE_20;
-  case CudaArch::SM_30:
-    return CudaVirtualArch::COMPUTE_30;
-  case CudaArch::SM_32:
-    return CudaVirtualArch::COMPUTE_32;
-  case CudaArch::SM_35:
-    return CudaVirtualArch::COMPUTE_35;
-  case CudaArch::SM_37:
-    return CudaVirtualArch::COMPUTE_37;
-  case CudaArch::SM_50:
-    return CudaVirtualArch::COMPUTE_50;
-  case CudaArch::SM_52:
-    return CudaVirtualArch::COMPUTE_52;
-  case CudaArch::SM_53:
-    return CudaVirtualArch::COMPUTE_53;
-  case CudaArch::SM_60:
-    return CudaVirtualArch::COMPUTE_60;
-  case CudaArch::SM_61:
-    return CudaVirtualArch::COMPUTE_61;
-  case CudaArch::SM_62:
-    return CudaVirtualArch::COMPUTE_62;
-  case CudaArch::SM_70:
-    return CudaVirtualArch::COMPUTE_70;
-  case CudaArch::SM_72:
-    return CudaVirtualArch::COMPUTE_72;
-  case CudaArch::SM_75:
-    return CudaVirtualArch::COMPUTE_75;
-  case CudaArch::GFX600:
-  case CudaArch::GFX601:
-  case CudaArch::GFX700:
-  case CudaArch::GFX701:
-  case CudaArch::GFX702:
-  case CudaArch::GFX703:
-  case CudaArch::GFX704:
-  case CudaArch::GFX801:
-  case CudaArch::GFX802:
-  case CudaArch::GFX803:
-  case CudaArch::GFX810:
-  case CudaArch::GFX900:
-  case CudaArch::GFX902:
-  case CudaArch::GFX904:
-  case CudaArch::GFX906:
-  case CudaArch::GFX908:
-  case CudaArch::GFX909:
-  case CudaArch::GFX1010:
-  case CudaArch::GFX1011:
-  case CudaArch::GFX1012:
-    return CudaVirtualArch::COMPUTE_AMDGCN;
-  }
-  llvm_unreachable("invalid enum");
+CudaArch StringToCudaArch(llvm::StringRef S) {
+  auto result = std::find_if(
+      std::begin(arch_names), std::end(arch_names),
+      [S](const CudaArchToStringMap &map) { return S == map.arch_name; });
+  if (result == std::end(arch_names))
+    return CudaArch::UNKNOWN;
+  return result->arch;
 }
 
 CudaVersion MinVersionForCudaArch(CudaArch A) {
-  switch (A) {
-  case CudaArch::LAST:
-    break;
-  case CudaArch::UNKNOWN:
+  if (A == CudaArch::UNKNOWN)
     return CudaVersion::UNKNOWN;
+
+  // AMD GPUs do not depend on CUDA versions.
+  if (IsAMDGpuArch(A))
+    return CudaVersion::CUDA_70;
+
+  switch (A) {
   case CudaArch::SM_20:
   case CudaArch::SM_21:
   case CudaArch::SM_30:
@@ -309,53 +140,21 @@ CudaVersion MinVersionForCudaArch(CudaArch A) {
     return CudaVersion::CUDA_91;
   case CudaArch::SM_75:
     return CudaVersion::CUDA_100;
-  case CudaArch::GFX600:
-  case CudaArch::GFX601:
-  case CudaArch::GFX700:
-  case CudaArch::GFX701:
-  case CudaArch::GFX702:
-  case CudaArch::GFX703:
-  case CudaArch::GFX704:
-  case CudaArch::GFX801:
-  case CudaArch::GFX802:
-  case CudaArch::GFX803:
-  case CudaArch::GFX810:
-  case CudaArch::GFX900:
-  case CudaArch::GFX902:
-  case CudaArch::GFX904:
-  case CudaArch::GFX906:
-  case CudaArch::GFX908:
-  case CudaArch::GFX909:
-  case CudaArch::GFX1010:
-  case CudaArch::GFX1011:
-  case CudaArch::GFX1012:
-    return CudaVersion::CUDA_70;
+  default:
+    llvm_unreachable("invalid enum");
   }
-  llvm_unreachable("invalid enum");
 }
 
 CudaVersion MaxVersionForCudaArch(CudaArch A) {
+  // AMD GPUs do not depend on CUDA versions.
+  if (IsAMDGpuArch(A))
+    return CudaVersion::LATEST;
+
   switch (A) {
   case CudaArch::UNKNOWN:
     return CudaVersion::UNKNOWN;
   case CudaArch::SM_20:
   case CudaArch::SM_21:
-  case CudaArch::GFX600:
-  case CudaArch::GFX601:
-  case CudaArch::GFX700:
-  case CudaArch::GFX701:
-  case CudaArch::GFX702:
-  case CudaArch::GFX703:
-  case CudaArch::GFX704:
-  case CudaArch::GFX801:
-  case CudaArch::GFX802:
-  case CudaArch::GFX803:
-  case CudaArch::GFX810:
-  case CudaArch::GFX900:
-  case CudaArch::GFX902:
-  case CudaArch::GFX1010:
-  case CudaArch::GFX1011:
-  case CudaArch::GFX1012:
     return CudaVersion::CUDA_80;
   default:
     return CudaVersion::LATEST;
index 80259b5..4bd6cc6 100644 (file)
@@ -161,13 +161,13 @@ CudaInstallationDetector::CudaInstallationDetector(
       // CUDA-9+ uses single libdevice file for all GPU variants.
       std::string FilePath = LibDevicePath + "/libdevice.10.bc";
       if (FS.exists(FilePath)) {
-        for (const char *GpuArchName :
-             {"sm_30", "sm_32", "sm_35", "sm_37", "sm_50", "sm_52", "sm_53",
-              "sm_60", "sm_61", "sm_62", "sm_70", "sm_72", "sm_75"}) {
-          const CudaArch GpuArch = StringToCudaArch(GpuArchName);
-          if (Version >= MinVersionForCudaArch(GpuArch) &&
-              Version <= MaxVersionForCudaArch(GpuArch))
-            LibDeviceMap[GpuArchName] = FilePath;
+        for (int Arch = (int)CudaArch::SM_30, E = (int)CudaArch::LAST; Arch < E;
+             ++Arch) {
+          CudaArch GpuArch = static_cast<CudaArch>(Arch);
+          if (!IsNVIDIAGpuArch(GpuArch))
+            continue;
+          std::string GpuArchName(CudaArchToString(GpuArch));
+          LibDeviceMap[GpuArchName] = FilePath;
         }
       }
     } else {
@@ -471,10 +471,9 @@ void NVPTX::Linker::ConstructJob(Compilation &C, const JobAction &JA,
       continue;
     // We need to pass an Arch of the form "sm_XX" for cubin files and
     // "compute_XX" for ptx.
-    const char *Arch =
-        (II.getType() == types::TY_PP_Asm)
-            ? CudaVirtualArchToString(VirtualArchForCudaArch(gpu_arch))
-            : gpu_arch_str;
+    const char *Arch = (II.getType() == types::TY_PP_Asm)
+                           ? CudaArchToVirtualArchString(gpu_arch)
+                           : gpu_arch_str;
     CmdArgs.push_back(Args.MakeArgString(llvm::Twine("--image=profile=") +
                                          Arch + ",file=" + II.getFilename()));
   }