[OpenMP] Allow compiling multiple target architectures with OpenMP

author Joseph Huber <jhuber6@vols.utk.edu>

Sat, 30 Apr 2022 22:46:58 +0000 (18:46 -0400)

committer Joseph Huber <jhuber6@vols.utk.edu>

Fri, 6 May 2022 20:57:16 +0000 (16:57 -0400)
author Joseph Huber <jhuber6@vols.utk.edu>
Sat, 30 Apr 2022 22:46:58 +0000 (18:46 -0400)
committer Joseph Huber <jhuber6@vols.utk.edu>
Fri, 6 May 2022 20:57:16 +0000 (16:57 -0400)
diff --git a/clang/lib/Driver/Driver.cpp b/clang/lib/Driver/Driver.cpp

index 5bba896..b7dd59c 100644 (file)
--- a/clang/lib/Driver/Driver.cpp
+++ b/clang/lib/Driver/Driver.cpp
@@ -4214,17 +4214,20 @@ void Driver::BuildActions(Compilation &C, DerivedArgList &Args,
  /// Returns the canonical name for the offloading architecture when using HIP or
  /// CUDA.
  static StringRef getCanonicalArchString(Compilation &C,
-                                        llvm::opt::DerivedArgList &Args,
+                                        const llvm::opt::DerivedArgList &Args,
                                          StringRef ArchStr,
-                                        Action::OffloadKind Kind) {
-  if (Kind == Action::OFK_Cuda) {
+                                        Action::OffloadKind Kind,
+                                        const ToolChain *TC) {
+  if (Kind == Action::OFK_Cuda ||
+      (Kind == Action::OFK_OpenMP && TC->getTriple().isNVPTX())) {
      CudaArch Arch = StringToCudaArch(ArchStr);
      if (Arch == CudaArch::UNKNOWN || !IsNVIDIAGpuArch(Arch)) {
        C.getDriver().Diag(clang::diag::err_drv_cuda_bad_gpu_arch) << ArchStr;
        return StringRef();
      }
      return Args.MakeArgStringRef(CudaArchToString(Arch));
-  } else if (Kind == Action::OFK_HIP) {
+  } else if (Kind == Action::OFK_HIP ||
+             (Kind == Action::OFK_OpenMP && TC->getTriple().isAMDGPU())) {
      llvm::StringMap<bool> Features;
      // getHIPOffloadTargetTriple() is known to return valid value as it has
      // been called successfully in the CreateOffloadingDeviceToolChains().
@@ -4239,7 +4242,8 @@ static StringRef getCanonicalArchString(Compilation &C,
      return Args.MakeArgStringRef(
          getCanonicalTargetID(Arch.getValue(), Features));
    }
-  return StringRef();
+  // If the input isn't CUDA or HIP just return the architecture.
+  return ArchStr;
  }
  
  /// Checks if the set offloading architectures does not conflict. Returns the
@@ -4259,12 +4263,8 @@ getConflictOffloadArchCombination(const llvm::DenseSet<StringRef> &Archs,
  /// This function returns a set of bound architectures, if there are no bound
  /// architctures we return a set containing only the empty string.
  static llvm::DenseSet<StringRef>
-getOffloadArchs(Compilation &C, llvm::opt::DerivedArgList &Args,
-                Action::OffloadKind Kind) {
-
-  // If this is OpenMP offloading we don't use a bound architecture.
-  if (Kind == Action::OFK_OpenMP)
-    return llvm::DenseSet<StringRef>{StringRef()};
+getOffloadArchs(Compilation &C, const llvm::opt::DerivedArgList &Args,
+                Action::OffloadKind Kind, const ToolChain *TC) {
  
    // --offload and --offload-arch options are mutually exclusive.
    if (Args.hasArgNoClaim(options::OPT_offload_EQ) &&
@@ -4280,12 +4280,12 @@ getOffloadArchs(Compilation &C, llvm::opt::DerivedArgList &Args,
    llvm::DenseSet<StringRef> Archs;
    for (auto &Arg : Args) {
      if (Arg->getOption().matches(options::OPT_offload_arch_EQ)) {
-      Archs.insert(getCanonicalArchString(C, Args, Arg->getValue(), Kind));
+      Archs.insert(getCanonicalArchString(C, Args, Arg->getValue(), Kind, TC));
      } else if (Arg->getOption().matches(options::OPT_no_offload_arch_EQ)) {
        if (Arg->getValue() == StringRef("all"))
          Archs.clear();
        else
-        Archs.erase(getCanonicalArchString(C, Args, Arg->getValue(), Kind));
+        Archs.erase(getCanonicalArchString(C, Args, Arg->getValue(), Kind, TC));
      }
    }
  
@@ -4301,6 +4301,11 @@ getOffloadArchs(Compilation &C, llvm::opt::DerivedArgList &Args,
        Archs.insert(CudaArchToString(CudaArch::CudaDefault));
      else if (Kind == Action::OFK_HIP)
        Archs.insert(CudaArchToString(CudaArch::HIPDefault));
+    else if (Kind == Action::OFK_OpenMP)
+      Archs.insert(StringRef());
+  } else {
+    Args.ClaimAllArgs(options::OPT_offload_arch_EQ);
+    Args.ClaimAllArgs(options::OPT_no_offload_arch_EQ);
    }
  
    return Archs;
@@ -4346,7 +4351,8 @@ Action *Driver::BuildOffloadingActions(Compilation &C,
      // Get the product of all bound architectures and toolchains.
      SmallVector<std::pair<const ToolChain *, StringRef>> TCAndArchs;
      for (const ToolChain *TC : ToolChains)
-      for (StringRef Arch : getOffloadArchs(C, Args, Kind))
+      for (StringRef Arch : getOffloadArchs(
+               C, C.getArgsForToolChain(TC, "generic", Kind), Kind, TC))
          TCAndArchs.push_back(std::make_pair(TC, Arch));
  
      for (unsigned I = 0, E = TCAndArchs.size(); I != E; ++I)
@@ -4375,9 +4381,9 @@ Action *Driver::BuildOffloadingActions(Compilation &C,
            HostAction->setCannotBeCollapsedWithNextDependentAction();
            OffloadAction::HostDependence HDep(
                *HostAction, *C.getSingleOffloadToolChain<Action::OFK_Host>(),
-              /*BoundArch=*/nullptr, Kind);
+              TCAndArch->second.data(), Kind);
            OffloadAction::DeviceDependences DDep;
-          DDep.add(*A, *TCAndArch->first, /*BoundArch=*/nullptr, Kind);
+          DDep.add(*A, *TCAndArch->first, TCAndArch->second.data(), Kind);
            A = C.MakeAction<OffloadAction>(HDep, DDep);
          } else if (isa<AssembleJobAction>(A) && Kind == Action::OFK_Cuda) {
            // The Cuda toolchain uses fatbinary as the linker phase to bundle the
diff --git a/clang/lib/Driver/ToolChains/AMDGPUOpenMP.cpp b/clang/lib/Driver/ToolChains/AMDGPUOpenMP.cpp

index 998bb0b..fa9b8b6 100644 (file)
--- a/clang/lib/Driver/ToolChains/AMDGPUOpenMP.cpp
+++ b/clang/lib/Driver/ToolChains/AMDGPUOpenMP.cpp
@@ -307,9 +307,10 @@ llvm::opt::DerivedArgList *AMDGPUOpenMPToolChain::TranslateArgs(
        if (!llvm::is_contained(*DAL, A))
          DAL->append(A);
  
-    std::string Arch = DAL->getLastArgValue(options::OPT_march_EQ).str();
-    if (Arch.empty()) {
-      checkSystemForAMDGPU(Args, *this, Arch);
+    if (!DAL->hasArg(options::OPT_march_EQ)) {
+      std::string Arch = BoundArch.str();
+      if (BoundArch.empty())
+        checkSystemForAMDGPU(Args, *this, Arch);
        DAL->AddJoinedArg(nullptr, Opts.getOption(options::OPT_march_EQ), Arch);
      }
  
diff --git a/clang/lib/Driver/ToolChains/Cuda.cpp b/clang/lib/Driver/ToolChains/Cuda.cpp

index 6103c42..666f568 100644 (file)
--- a/clang/lib/Driver/ToolChains/Cuda.cpp
+++ b/clang/lib/Driver/ToolChains/Cuda.cpp
@@ -847,10 +847,10 @@ CudaToolChain::TranslateArgs(const llvm::opt::DerivedArgList &Args,
        if (!llvm::is_contained(*DAL, A))
          DAL->append(A);
  
-    StringRef Arch = DAL->getLastArgValue(options::OPT_march_EQ);
-    if (Arch.empty())
+    if (!DAL->hasArg(options::OPT_march_EQ))
        DAL->AddJoinedArg(nullptr, Opts.getOption(options::OPT_march_EQ),
-                        CLANG_OPENMP_NVPTX_DEFAULT_ARCH);
+                        !BoundArch.empty() ? BoundArch
+                                           : CLANG_OPENMP_NVPTX_DEFAULT_ARCH);
  
      return DAL;
    }
diff --git a/clang/test/Driver/amdgpu-openmp-toolchain-new.c b/clang/test/Driver/amdgpu-openmp-toolchain-new.c

index a321365..bfb6810 100644 (file)
--- a/clang/test/Driver/amdgpu-openmp-toolchain-new.c
+++ b/clang/test/Driver/amdgpu-openmp-toolchain-new.c
@@ -3,6 +3,9 @@
  // RUN:   %clang -### --target=x86_64-unknown-linux-gnu -fopenmp -fopenmp-targets=amdgcn-amd-amdhsa \
  // RUN:          -Xopenmp-target=amdgcn-amd-amdhsa -march=gfx906 --libomptarget-amdgpu-bc-path=%S/Inputs/hip_dev_lib %s 2>&1 \
  // RUN:   | FileCheck %s
+// RUN:   %clang -### --target=x86_64-unknown-linux-gnu -fopenmp -fopenmp-targets=amdgcn-amd-amdhsa \
+// RUN:          --offload-arch=gfx906 --libomptarget-amdgpu-bc-path=%S/Inputs/hip_dev_lib %s 2>&1 \
+// RUN:   | FileCheck %s
  
  // verify the tools invocations
  // CHECK: "-cc1" "-triple" "x86_64-unknown-linux-gnu"{{.*}}"-emit-llvm-bc"{{.*}}"-x" "c"
@@ -34,6 +37,7 @@
  // CHECK-NOGPULIB-NOT: "-cc1" "-triple" "amdgcn-amd-amdhsa"{{.*}}"-target-cpu" "gfx803" "-fcuda-is-device" "-mlink-builtin-bitcode"{{.*}}libomptarget-amdgpu-gfx803.bc"{{.*}}
  
  // RUN:   %clang -### --target=x86_64-unknown-linux-gnu -ccc-print-bindings -fopenmp -fopenmp-targets=amdgcn-amd-amdhsa -Xopenmp-target=amdgcn-amd-amdhsa -march=gfx803 -nogpulib %s 2>&1 | FileCheck %s --check-prefix=CHECK-BINDINGS
+// RUN:   %clang -### --target=x86_64-unknown-linux-gnu -ccc-print-bindings -fopenmp -fopenmp-targets=amdgcn-amd-amdhsa --offload-arch=gfx803 -nogpulib %s 2>&1 | FileCheck %s --check-prefix=CHECK-BINDINGS
  // CHECK-BINDINGS: "x86_64-unknown-linux-gnu" - "clang", inputs: ["[[INPUT:.*]]"], output: "[[HOST_BC:.*]]"
  // CHECK-BINDINGS: "amdgcn-amd-amdhsa" - "clang", inputs: ["[[INPUT]]", "[[HOST_BC]]"], output: "[[DEVICE_BC:.*]]"
  // CHECK-BINDINGS: "x86_64-unknown-linux-gnu" - "clang", inputs: ["[[HOST_BC]]", "[[DEVICE_BC]]"], output: "[[HOST_OBJ:.*]]"
diff --git a/clang/test/Driver/openmp-offload-gpu-new.c b/clang/test/Driver/openmp-offload-gpu-new.c

index 7281ba7..782f008 100644 (file)
--- a/clang/test/Driver/openmp-offload-gpu-new.c
+++ b/clang/test/Driver/openmp-offload-gpu-new.c
@@ -10,6 +10,10 @@
  // RUN:          -Xopenmp-target=nvptx64-nvidia-cuda -march=sm_52 \
  // RUN:          --libomptarget-nvptx-bc-path=%S/Inputs/libomptarget/libomptarget-nvptx-test.bc %s 2>&1 \
  // RUN:   | FileCheck %s
+// RUN:   %clang -### --target=x86_64-unknown-linux-gnu -fopenmp -fopenmp-targets=nvptx64-nvidia-cuda \
+// RUN:          --offload-arch=sm_52 \
+// RUN:          --libomptarget-nvptx-bc-path=%S/Inputs/libomptarget/libomptarget-nvptx-test.bc %s 2>&1 \
+// RUN:   | FileCheck %s
  
  // verify the tools invocations
  // CHECK: "-cc1" "-triple" "x86_64-unknown-linux-gnu"{{.*}}"-emit-llvm-bc"{{.*}}"-x" "c"
@@ -40,6 +44,27 @@
  // CHECK-BINDINGS: "x86_64-unknown-linux-gnu" - "clang", inputs: ["[[HOST_BC]]", "[[DEVICE_OBJ]]"], output: "[[HOST_OBJ:.*]]"
  // CHECK-BINDINGS: "x86_64-unknown-linux-gnu" - "Offload::Linker", inputs: ["[[HOST_OBJ]]"], output: "a.out"
  
+// RUN:   %clang -### --target=x86_64-unknown-linux-gnu -ccc-print-bindings -fopenmp -fopenmp-targets=nvptx64-nvidia-cuda --offload-arch=sm_52 --offload-arch=sm_70 -nogpulib %s 2>&1 | FileCheck %s --check-prefix=CHECK-ARCH-BINDINGS
+// CHECK-ARCH-BINDINGS: "x86_64-unknown-linux-gnu" - "clang", inputs: ["[[INPUT:.*]]"], output: "[[HOST_BC:.*]]"
+// CHECK-ARCH-BINDINGS: "nvptx64-nvidia-cuda" - "clang", inputs: ["[[INPUT]]", "[[HOST_BC]]"], output: "[[DEVICE_BC_SM_52:.*]]"
+// CHECK-ARCH-BINDINGS: "nvptx64-nvidia-cuda" - "NVPTX::Assembler", inputs: ["[[DEVICE_BC_SM_52]]"], output: "[[DEVICE_OBJ_SM_52:.*]]"
+// CHECK-ARCH-BINDINGS: "nvptx64-nvidia-cuda" - "clang", inputs: ["[[INPUT]]", "[[HOST_BC]]"], output: "[[DEVICE_BC_SM_70:.*]]"
+// CHECK-ARCH-BINDINGS: "nvptx64-nvidia-cuda" - "NVPTX::Assembler", inputs: ["[[DEVICE_BC_SM_70]]"], output: "[[DEVICE_OBJ_SM_70:.*]]"
+// CHECK-ARCH-BINDINGS: "x86_64-unknown-linux-gnu" - "clang", inputs: ["[[HOST_BC]]", "[[DEVICE_OBJ_SM_52]]", "[[DEVICE_OBJ_SM_70]]"], output: "[[HOST_OBJ:.*]]"
+// CHECK-ARCH-BINDINGS: "x86_64-unknown-linux-gnu" - "Offload::Linker", inputs: ["[[HOST_OBJ]]"], output: "a.out"
+
+// RUN:   %clang -### --target=x86_64-unknown-linux-gnu -ccc-print-bindings -fopenmp \
+// RUN:     -fopenmp-targets=nvptx64-nvidia-cuda,amdgcn-amd-amdhsa -Xopenmp-target=nvptx64-nvidia-cuda --offload-arch=sm_70 \
+// RUN:     -fopenmp-targets=nvptx64-nvidia-cuda,amdgcn-amd-amdhsa -Xopenmp-target=amdgcn-amd-amdhsa --offload-arch=gfx908  \
+// RUN:     -nogpulib %s 2>&1 | FileCheck %s --check-prefix=CHECK-NVIDIA-AMDGPU
+
+// CHECK-NVIDIA-AMDGPU: "x86_64-unknown-linux-gnu" - "clang", inputs: ["[[INPUT:.+]]"], output: "[[HOST_BC:.+]]"
+// CHECK-NVIDIA-AMDGPU: "nvptx64-nvidia-cuda" - "clang", inputs: ["[[INPUT]]", "[[HOST_BC]]"], output: "[[NVIDIA_PTX:.+]]"
+// CHECK-NVIDIA-AMDGPU: "nvptx64-nvidia-cuda" - "NVPTX::Assembler", inputs: ["[[NVIDIA_PTX]]"], output: "[[NVIDIA_CUBIN:.+]]"
+// CHECK-NVIDIA-AMDGPU: "amdgcn-amd-amdhsa" - "clang", inputs: ["[[INPUT]]", "[[HOST_BC]]"], output: "[[AMD_BC:.+]]"
+// CHECK-NVIDIA-AMDGPU: "x86_64-unknown-linux-gnu" - "clang", inputs: ["[[HOST_BC]]", "[[NVIDIA_CUBIN]]", "[[AMD_BC]]"], output: "[[HOST_OBJ:.+]]"
+// CHECK-NVIDIA-AMDGPU: "x86_64-unknown-linux-gnu" - "Offload::Linker", inputs: ["[[HOST_OBJ]]"], output: "a.out"
+
  // RUN:   %clang -### --target=x86_64-unknown-linux-gnu -emit-llvm -S -fopenmp -fopenmp-targets=nvptx64-nvidia-cuda -Xopenmp-target=nvptx64-nvidia-cuda -march=sm_52 -nogpulib %s 2>&1 | FileCheck %s --check-prefix=CHECK-EMIT-LLVM-IR
  // CHECK-EMIT-LLVM-IR: "-cc1"{{.*}}"-triple" "nvptx64-nvidia-cuda"{{.*}}"-emit-llvm"
author	Joseph Huber <jhuber6@vols.utk.edu>
	Sat, 30 Apr 2022 22:46:58 +0000 (18:46 -0400)
committer	Joseph Huber <jhuber6@vols.utk.edu>
	Fri, 6 May 2022 20:57:16 +0000 (16:57 -0400)
clang/lib/Driver/Driver.cpp		patch \| blob \| history
clang/lib/Driver/ToolChains/AMDGPUOpenMP.cpp		patch \| blob \| history
clang/lib/Driver/ToolChains/Cuda.cpp		patch \| blob \| history
clang/test/Driver/amdgpu-openmp-toolchain-new.c		patch \| blob \| history
clang/test/Driver/openmp-offload-gpu-new.c		patch \| blob \| history