[OpenMP] Expand short verisions of OpenMP offloading triples

author Joseph Huber <jhuber6@vols.utk.edu>

Wed, 19 Jan 2022 19:52:08 +0000 (14:52 -0500)

committer Joseph Huber <jhuber6@vols.utk.edu>

Thu, 20 Jan 2022 01:26:37 +0000 (20:26 -0500)
author Joseph Huber <jhuber6@vols.utk.edu>
Wed, 19 Jan 2022 19:52:08 +0000 (14:52 -0500)
committer Joseph Huber <jhuber6@vols.utk.edu>
Thu, 20 Jan 2022 01:26:37 +0000 (20:26 -0500)
diff --git a/clang/lib/Driver/Driver.cpp b/clang/lib/Driver/Driver.cpp

index 82d67a8..c7314e1 100644 (file)
--- a/clang/lib/Driver/Driver.cpp
+++ b/clang/lib/Driver/Driver.cpp
@@ -774,6 +774,18 @@ void Driver::CreateOffloadingDeviceToolChains(Compilation &C,
            llvm::Triple TT(Val);
            std::string NormalizedName = TT.normalize();
  
+          // We want to expand the shortened versions of the triples passed in to
+          // the values used for the bitcode libraries for convenience.
+          if (TT.getVendor() == llvm::Triple::UnknownVendor ||
+              TT.getOS() == llvm::Triple::UnknownOS) {
+            if (TT.getArch() == llvm::Triple::nvptx)
+              TT = llvm::Triple("nvptx-nvidia-cuda");
+            else if (TT.getArch() == llvm::Triple::nvptx64)
+              TT = llvm::Triple("nvptx64-nvidia-cuda");
+            else if (TT.getArch() == llvm::Triple::amdgcn)
+              TT = llvm::Triple("amdgcn-amd-amdhsa");
+          }
+
            // Make sure we don't have a duplicate triple.
            auto Duplicate = FoundNormalizedTriples.find(NormalizedName);
            if (Duplicate != FoundNormalizedTriples.end()) {
diff --git a/clang/test/Driver/fat_archive_nvptx.cpp b/clang/test/Driver/fat_archive_nvptx.cpp

index 4c93752..a46c44f 100644 (file)
--- a/clang/test/Driver/fat_archive_nvptx.cpp
+++ b/clang/test/Driver/fat_archive_nvptx.cpp
@@ -6,9 +6,9 @@
  
  // Given a FatArchive, clang-offload-bundler should be called to create a
  // device specific archive, which should be passed to clang-nvlink-wrapper.
-// RUN: %clang -O2 -### -fopenmp -fopenmp-targets=nvptx64 %s -L%S/Inputs/openmp_static_device_link -lFatArchive 2>&1 | FileCheck %s
-// CHECK: clang{{.*}}"-cc1"{{.*}}"-triple" "nvptx64"{{.*}}"-target-cpu" "[[GPU:sm_[0-9]+]]"{{.*}}"-o" "[[HOSTBC:.*.s]]" "-x" "c++"{{.*}}.cpp
-// CHECK: clang-offload-bundler" "-unbundle" "-type=a" "-inputs={{.*}}/Inputs/openmp_static_device_link/libFatArchive.a" "-targets=openmp-nvptx64-[[GPU]]" "-outputs=[[DEVICESPECIFICARCHIVE:.*.a]]" "-allow-missing-bundles"
+// RUN: %clang -O2 -### -fopenmp -fopenmp-targets=nvptx64-nvidia-cuda %s -L%S/Inputs/openmp_static_device_link -lFatArchive 2>&1 | FileCheck %s
+// CHECK: clang{{.*}}"-cc1"{{.*}}"-triple" "nvptx64-nvidia-cuda"{{.*}}"-target-cpu" "[[GPU:sm_[0-9]+]]"{{.*}}"-o" "[[HOSTBC:.*.s]]" "-x" "c++"{{.*}}.cpp
+// CHECK: clang-offload-bundler" "-unbundle" "-type=a" "-inputs={{.*}}/Inputs/openmp_static_device_link/libFatArchive.a" "-targets=openmp-nvptx64-nvidia-cuda-[[GPU]]" "-outputs=[[DEVICESPECIFICARCHIVE:.*.a]]" "-allow-missing-bundles"
  // CHECK: clang-nvlink-wrapper{{.*}}"-o" "{{.*}}.out" "-arch" "[[GPU]]" "{{.*}}[[DEVICESPECIFICARCHIVE]]"
  // expected-no-diagnostics
  
@@ -72,8 +72,8 @@ void func_present(float* in, float* out, unsigned n){
      clang -O2 -fopenmp -fopenmp-targets=amdgcn-amd-amdhsa -Xopenmp-target=amdgcn-amd-amdhsa -march=gfx908 -c func_1.c -o func_1_gfx908.o
      clang -O2 -fopenmp -fopenmp-targets=amdgcn-amd-amdhsa -Xopenmp-target=amdgcn-amd-amdhsa -march=gfx906 -c func_2.c -o func_2_gfx906.o
      clang -O2 -fopenmp -fopenmp-targets=amdgcn-amd-amdhsa -Xopenmp-target=amdgcn-amd-amdhsa -march=gfx908 -c func_2.c -o func_2_gfx908.o
-    clang -O2 -fopenmp -fopenmp-targets=nvptx64 -c func_1.c -o func_1_nvptx.o
-    clang -O2 -fopenmp -fopenmp-targets=nvptx64 -c func_2.c -o func_2_nvptx.o
+    clang -O2 -fopenmp -fopenmp-targets=nvptx64-nvidia-cuda -c func_1.c -o func_1_nvptx.o
+    clang -O2 -fopenmp -fopenmp-targets=nvptx64-nvidia-cuda -c func_2.c -o func_2_nvptx.o
  
  2. Create a fat archive by combining all the object file(s)
      llvm-ar cr libFatArchive.a func_1_gfx906.o func_1_gfx908.o func_2_gfx906.o func_2_gfx908.o func_1_nvptx.o func_2_nvptx.o
diff --git a/openmp/libomptarget/DeviceRTL/CMakeLists.txt b/openmp/libomptarget/DeviceRTL/CMakeLists.txt

index a3c6536..8185727 100644 (file)
--- a/openmp/libomptarget/DeviceRTL/CMakeLists.txt
+++ b/openmp/libomptarget/DeviceRTL/CMakeLists.txt
@@ -227,7 +227,7 @@ endfunction()
  
  # Generate a Bitcode library for all the compute capabilities the user requested
  foreach(sm ${nvptx_sm_list})
-  compileDeviceRTLLibrary(sm_${sm} nvptx -target nvptx64 -Xclang -target-feature -Xclang +ptx61 "-D__CUDA_ARCH__=${sm}0")
+  compileDeviceRTLLibrary(sm_${sm} nvptx -target nvptx64-nvidia-cuda -Xclang -target-feature -Xclang +ptx61 "-D__CUDA_ARCH__=${sm}0")
  endforeach()
  
  foreach(mcpu ${amdgpu_mcpus})
author	Joseph Huber <jhuber6@vols.utk.edu>
	Wed, 19 Jan 2022 19:52:08 +0000 (14:52 -0500)
committer	Joseph Huber <jhuber6@vols.utk.edu>
	Thu, 20 Jan 2022 01:26:37 +0000 (20:26 -0500)
clang/lib/Driver/Driver.cpp		patch \| blob \| history
clang/test/Driver/fat_archive_nvptx.cpp		patch \| blob \| history
openmp/libomptarget/DeviceRTL/CMakeLists.txt		patch \| blob \| history