[CUDA][HIP] Fix bound arch for offload action for fat binary

author Yaxun (Sam) Liu <yaxun.liu@amd.com>

Wed, 30 Sep 2020 00:16:32 +0000 (20:16 -0400)

committer Yaxun (Sam) Liu <yaxun.liu@amd.com>

Fri, 2 Oct 2020 23:05:51 +0000 (19:05 -0400)
author Yaxun (Sam) Liu <yaxun.liu@amd.com>
Wed, 30 Sep 2020 00:16:32 +0000 (20:16 -0400)
committer Yaxun (Sam) Liu <yaxun.liu@amd.com>
Fri, 2 Oct 2020 23:05:51 +0000 (19:05 -0400)
diff --git a/clang/include/clang/Basic/Cuda.h b/clang/include/clang/Basic/Cuda.h

index 19301e8..93394f3 100644 (file)
--- a/clang/include/clang/Basic/Cuda.h
+++ b/clang/include/clang/Basic/Cuda.h
@@ -37,6 +37,7 @@ const char *CudaVersionToString(CudaVersion V);
  CudaVersion CudaStringToVersion(const llvm::Twine &S);
  
  enum class CudaArch {
+  UNUSED,
    UNKNOWN,
    SM_20,
    SM_21,
diff --git a/clang/lib/Basic/Cuda.cpp b/clang/lib/Basic/Cuda.cpp

index 2abbe3e..7de42c1 100644 (file)
--- a/clang/lib/Basic/Cuda.cpp
+++ b/clang/lib/Basic/Cuda.cpp
@@ -64,6 +64,7 @@ struct CudaArchToStringMap {
    { CudaArch::GFX##gpu, "gfx" #gpu, "compute_amdgcn" }
  CudaArchToStringMap arch_names[] = {
      // clang-format off
+    {CudaArch::UNUSED, "", ""},
      SM2(20, "compute_20"), SM2(21, "compute_20"), // Fermi
      SM(30), SM(32), SM(35), SM(37),  // Kepler
      SM(50), SM(52), SM(53),          // Maxwell
diff --git a/clang/lib/Basic/Targets/NVPTX.cpp b/clang/lib/Basic/Targets/NVPTX.cpp

index ef61b8f..aae8947 100644 (file)
--- a/clang/lib/Basic/Targets/NVPTX.cpp
+++ b/clang/lib/Basic/Targets/NVPTX.cpp
@@ -204,6 +204,7 @@ void NVPTXTargetInfo::getTargetDefines(const LangOptions &Opts,
        case CudaArch::GFX1031:
        case CudaArch::LAST:
          break;
+      case CudaArch::UNUSED:
        case CudaArch::UNKNOWN:
          assert(false && "No GPU arch when compiling CUDA device code.");
          return "";
diff --git a/clang/lib/CodeGen/CGOpenMPRuntimeGPU.cpp b/clang/lib/CodeGen/CGOpenMPRuntimeGPU.cpp

index d9ef6c2..4332563 100644 (file)
--- a/clang/lib/CodeGen/CGOpenMPRuntimeGPU.cpp
+++ b/clang/lib/CodeGen/CGOpenMPRuntimeGPU.cpp
@@ -5034,6 +5034,7 @@ void CGOpenMPRuntimeGPU::processRequiresDirective(
        case CudaArch::GFX1012:
        case CudaArch::GFX1030:
        case CudaArch::GFX1031:
+      case CudaArch::UNUSED:
        case CudaArch::UNKNOWN:
          break;
        case CudaArch::LAST:
@@ -5095,6 +5096,7 @@ static std::pair<unsigned, unsigned> getSMsBlocksPerSM(CodeGenModule &CGM) {
    case CudaArch::GFX1012:
    case CudaArch::GFX1030:
    case CudaArch::GFX1031:
+  case CudaArch::UNUSED:
    case CudaArch::UNKNOWN:
      break;
    case CudaArch::LAST:
diff --git a/clang/lib/Driver/Driver.cpp b/clang/lib/Driver/Driver.cpp

index 9cc4b82..96798b3 100644 (file)
--- a/clang/lib/Driver/Driver.cpp
+++ b/clang/lib/Driver/Driver.cpp
@@ -2517,7 +2517,7 @@ class OffloadingActionBuilder final {
  
        // If we have a fat binary, add it to the list.
        if (CudaFatBinary) {
-        AddTopLevel(CudaFatBinary, CudaArch::UNKNOWN);
+        AddTopLevel(CudaFatBinary, CudaArch::UNUSED);
          CudaDeviceActions.clear();
          CudaFatBinary = nullptr;
          return;
diff --git a/clang/test/Driver/hip-phases.hip b/clang/test/Driver/hip-phases.hip

index 7c2dc13..241448d 100644 (file)
--- a/clang/test/Driver/hip-phases.hip
+++ b/clang/test/Driver/hip-phases.hip
@@ -219,6 +219,12 @@
  // DBIN-DAG: [[P0:[0-9]+]]: input, "{{.*}}hip-phases.hip", [[T:hip]], (device-[[T]], [[ARCH:gfx803]])
  // DBIN-DAG: [[P1:[0-9]+]]: preprocessor, {[[P0]]}, [[T]]-cpp-output, (device-[[T]], [[ARCH]])
  // DBIN-DAG: [[P2:[0-9]+]]: compiler, {[[P1]]}, ir, (device-[[T]], [[ARCH]])
+// DBIN-DAG: [[P3:[0-9]+]]: backend, {[[P2]]}, assembler, (device-[[T]], [[ARCH]])
+// DBIN-DAG: [[P4:[0-9]+]]: assembler, {[[P3]]}, object, (device-[[T]], [[ARCH]])
+// DBIN-DAG: [[P5:[0-9]+]]: linker, {[[P4]]}, image, (device-[[T]], [[ARCH]])
+// DBIN-DAG: [[P6:[0-9]+]]: offload, "device-[[T]] (amdgcn-amd-amdhsa:[[ARCH]])" {[[P5]]}, image
+// DBIN-DAG: [[P7:[0-9]+]]: linker, {[[P6]]}, hip-fatbin, (device-hip, )
+// DBIN-DAG: [[P8:[0-9]+]]: offload, "device-[[T]] (amdgcn-amd-amdhsa:)" {[[P7]]}, hip-fatbin
  // DBIN-NOT: host
  //
  // Test single gpu architecture up to the assemble phase in device-only
@@ -230,6 +236,8 @@
  // DASM-DAG: [[P0:[0-9]+]]: input, "{{.*}}hip-phases.hip", [[T:hip]], (device-[[T]], [[ARCH:gfx803]])
  // DASM-DAG: [[P1:[0-9]+]]: preprocessor, {[[P0]]}, [[T]]-cpp-output, (device-[[T]], [[ARCH]])
  // DASM-DAG: [[P2:[0-9]+]]: compiler, {[[P1]]}, ir, (device-[[T]], [[ARCH]])
+// DASM-DAG: [[P3:[0-9]+]]: backend, {[[P2]]}, assembler, (device-[[T]], [[ARCH]])
+// DASM-DAG: [[P4:[0-9]+]]: offload, "device-[[T]] (amdgcn-amd-amdhsa:[[ARCH]])" {[[P3]]}, assembler
  // DASM-NOT: host
  
  //
@@ -242,9 +250,19 @@
  // DBIN2-DAG: [[P0:[0-9]+]]: input, "{{.*}}hip-phases.hip", [[T:hip]], (device-[[T]], [[ARCH:gfx803]])
  // DBIN2-DAG: [[P1:[0-9]+]]: preprocessor, {[[P0]]}, [[T]]-cpp-output, (device-[[T]], [[ARCH]])
  // DBIN2-DAG: [[P2:[0-9]+]]: compiler, {[[P1]]}, ir, (device-[[T]], [[ARCH]])
-// DBIN2-DAG: [[P6:[0-9]+]]: input, "{{.*}}hip-phases.hip", [[T]], (device-[[T]], [[ARCH2:gfx900]])
-// DBIN2-DAG: [[P7:[0-9]+]]: preprocessor, {[[P6]]}, [[T]]-cpp-output, (device-[[T]], [[ARCH2]])
-// DBIN2-DAG: [[P8:[0-9]+]]: compiler, {[[P7]]}, ir, (device-[[T]], [[ARCH2]])
+// DBIN2-DAG: [[P3:[0-9]+]]: backend, {[[P2]]}, assembler, (device-[[T]], [[ARCH]])
+// DBIN2-DAG: [[P4:[0-9]+]]: assembler, {[[P3]]}, object, (device-[[T]], [[ARCH]])
+// DBIN2-DAG: [[P5:[0-9]+]]: linker, {[[P4]]}, image, (device-[[T]], [[ARCH]])
+// DBIN2-DAG: [[P6:[0-9]+]]: offload, "device-[[T]] (amdgcn-amd-amdhsa:[[ARCH]])" {[[P5]]}, image
+// DBIN2-DAG: [[P7:[0-9]+]]: input, "{{.*}}hip-phases.hip", [[T]], (device-[[T]], [[ARCH2:gfx900]])
+// DBIN2-DAG: [[P8:[0-9]+]]: preprocessor, {[[P7]]}, [[T]]-cpp-output, (device-[[T]], [[ARCH2]])
+// DBIN2-DAG: [[P9:[0-9]+]]: compiler, {[[P8]]}, ir, (device-[[T]], [[ARCH2]])
+// DBIN2-DAG: [[P10:[0-9]+]]: backend, {[[P9]]}, assembler, (device-[[T]], [[ARCH2]])
+// DBIN2-DAG: [[P11:[0-9]+]]: assembler, {[[P10]]}, object, (device-[[T]], [[ARCH2]])
+// DBIN2-DAG: [[P12:[0-9]+]]: linker, {[[P11]]}, image, (device-[[T]], [[ARCH2]])
+// DBIN2-DAG: [[P13:[0-9]+]]: offload, "device-[[T]] (amdgcn-amd-amdhsa:[[ARCH2]])" {[[P12]]}, image
+// DBIN2-DAG: [[P14:[0-9]+]]: linker, {[[P6]], [[P13]]}, hip-fatbin, (device-hip, )
+// DBIN2-DAG: [[P15:[0-9]+]]: offload, "device-[[T]] (amdgcn-amd-amdhsa:)" {[[P14]]}, hip-fatbin
  // DBIN2-NOT: host
  //
  // Test two gpu architectures up to the assemble phase in device-only
@@ -257,9 +275,13 @@
  // DASM2-DAG: [[P0:[0-9]+]]: input, "{{.*}}hip-phases.hip", [[T:hip]], (device-[[T]], [[ARCH:gfx803]])
  // DASM2-DAG: [[P1:[0-9]+]]: preprocessor, {[[P0]]}, [[T]]-cpp-output, (device-[[T]], [[ARCH]])
  // DASM2-DAG: [[P2:[0-9]+]]: compiler, {[[P1]]}, ir, (device-[[T]], [[ARCH]])
+// DASM2-DAG: [[P3:[0-9]+]]: backend, {[[P2]]}, assembler, (device-[[T]], [[ARCH]])
+// DASM2-DAG: [[P4:[0-9]+]]: offload, "device-[[T]] (amdgcn-amd-amdhsa:[[ARCH]])" {[[P3]]}, assembler
  // DASM2-DAG: [[P5:[0-9]+]]: input, "{{.*}}hip-phases.hip", [[T]], (device-[[T]], [[ARCH2:gfx900]])
  // DASM2-DAG: [[P6:[0-9]+]]: preprocessor, {[[P5]]}, [[T]]-cpp-output, (device-[[T]], [[ARCH2]])
  // DASM2-DAG: [[P7:[0-9]+]]: compiler, {[[P6]]}, ir, (device-[[T]], [[ARCH2]])
+// DASM2-DAG: [[P8:[0-9]+]]: backend, {[[P7]]}, assembler, (device-[[T]], [[ARCH2]])
+// DASM2-DAG: [[P9:[0-9]+]]: offload, "device-[[T]] (amdgcn-amd-amdhsa:[[ARCH2]])" {[[P8]]}, assembler
  // DASM2-NOT: host
  
  //
diff --git a/clang/test/Driver/hip-toolchain-device-only.hip b/clang/test/Driver/hip-toolchain-device-only.hip

new file mode 100644 (file)

index 0000000..19afeca
--- /dev/null
+++ b/clang/test/Driver/hip-toolchain-device-only.hip
@@ -0,0 +1,29 @@
+// REQUIRES: clang-driver, amdgpu-registered-target
+
+// RUN: %clang -### -target x86_64-linux-gnu \
+// RUN:   --offload-arch=gfx803 --offload-arch=gfx900 \
+// RUN:   --cuda-device-only -nogpuinc -nogpulib -c \
+// RUN:   %s 2>&1 | FileCheck -check-prefixes=CHECK,LINK %s
+
+// CHECK-NOT: error:
+
+// CHECK: [[CLANG:".*clang.*"]] "-cc1" "-triple" "amdgcn-amd-amdhsa"
+// CHECK-SAME: "-fcuda-is-device"
+// CHECK-SAME: "-target-cpu" "gfx803"
+// CHECK-SAME: {{.*}} "-o" [[OBJ_DEV_A_803:".*o"]] "-x" "hip"
+
+// CHECK: [[LLD: ".*lld.*"]] "-flavor" "gnu" "--no-undefined" "-shared"
+// CHECK-SAME: "-o" "[[IMG_DEV_A_803:.*out]]" [[OBJ_DEV_A_803]]
+
+// CHECK: [[CLANG:".*clang.*"]] "-cc1" "-triple" "amdgcn-amd-amdhsa"
+// CHECK-SAME: "-emit-obj"
+// CHECK-SAME: "-fcuda-is-device"
+// CHECK-SAME: "-target-cpu" "gfx900"
+// CHECK-SAME: {{.*}} "-o" [[OBJ_DEV_A_900:".*o"]] "-x" "hip"
+
+// CHECK: [[LLD]] "-flavor" "gnu" "--no-undefined" "-shared"
+// CHECK-SAME: "-o" "[[IMG_DEV_A_900:.*out]]" [[OBJ_DEV_A_900]]
+
+// CHECK: [[BUNDLER:".*clang-offload-bundler"]] "-type=o"
+// CHECK-SAME: "-targets={{.*}},hip-amdgcn-amd-amdhsa-gfx803,hip-amdgcn-amd-amdhsa-gfx900"
+// CHECK-SAME: "-inputs={{.*}},[[IMG_DEV_A_803]],[[IMG_DEV_A_900]]" "-outputs=[[BUNDLE_A:.*hipfb]]"
author	Yaxun (Sam) Liu <yaxun.liu@amd.com>
	Wed, 30 Sep 2020 00:16:32 +0000 (20:16 -0400)
committer	Yaxun (Sam) Liu <yaxun.liu@amd.com>
	Fri, 2 Oct 2020 23:05:51 +0000 (19:05 -0400)
clang/include/clang/Basic/Cuda.h		patch \| blob \| history
clang/lib/Basic/Cuda.cpp		patch \| blob \| history
clang/lib/Basic/Targets/NVPTX.cpp		patch \| blob \| history
clang/lib/CodeGen/CGOpenMPRuntimeGPU.cpp		patch \| blob \| history
clang/lib/Driver/Driver.cpp		patch \| blob \| history
clang/test/Driver/hip-phases.hip		patch \| blob \| history
clang/test/Driver/hip-toolchain-device-only.hip	[new file with mode: 0644]	patch \| blob