/// Returns the canonical name for the offloading architecture when using HIP or
/// CUDA.
static StringRef getCanonicalArchString(Compilation &C,
- llvm::opt::DerivedArgList &Args,
+ const llvm::opt::DerivedArgList &Args,
StringRef ArchStr,
- Action::OffloadKind Kind) {
- if (Kind == Action::OFK_Cuda) {
+ Action::OffloadKind Kind,
+ const ToolChain *TC) {
+ if (Kind == Action::OFK_Cuda ||
+ (Kind == Action::OFK_OpenMP && TC->getTriple().isNVPTX())) {
CudaArch Arch = StringToCudaArch(ArchStr);
if (Arch == CudaArch::UNKNOWN || !IsNVIDIAGpuArch(Arch)) {
C.getDriver().Diag(clang::diag::err_drv_cuda_bad_gpu_arch) << ArchStr;
return StringRef();
}
return Args.MakeArgStringRef(CudaArchToString(Arch));
- } else if (Kind == Action::OFK_HIP) {
+ } else if (Kind == Action::OFK_HIP ||
+ (Kind == Action::OFK_OpenMP && TC->getTriple().isAMDGPU())) {
llvm::StringMap<bool> Features;
// getHIPOffloadTargetTriple() is known to return valid value as it has
// been called successfully in the CreateOffloadingDeviceToolChains().
return Args.MakeArgStringRef(
getCanonicalTargetID(Arch.getValue(), Features));
}
- return StringRef();
+ // If the input isn't CUDA or HIP just return the architecture.
+ return ArchStr;
}
/// Checks if the set offloading architectures does not conflict. Returns the
/// This function returns a set of bound architectures, if there are no bound
/// architctures we return a set containing only the empty string.
static llvm::DenseSet<StringRef>
-getOffloadArchs(Compilation &C, llvm::opt::DerivedArgList &Args,
- Action::OffloadKind Kind) {
-
- // If this is OpenMP offloading we don't use a bound architecture.
- if (Kind == Action::OFK_OpenMP)
- return llvm::DenseSet<StringRef>{StringRef()};
+getOffloadArchs(Compilation &C, const llvm::opt::DerivedArgList &Args,
+ Action::OffloadKind Kind, const ToolChain *TC) {
// --offload and --offload-arch options are mutually exclusive.
if (Args.hasArgNoClaim(options::OPT_offload_EQ) &&
llvm::DenseSet<StringRef> Archs;
for (auto &Arg : Args) {
if (Arg->getOption().matches(options::OPT_offload_arch_EQ)) {
- Archs.insert(getCanonicalArchString(C, Args, Arg->getValue(), Kind));
+ Archs.insert(getCanonicalArchString(C, Args, Arg->getValue(), Kind, TC));
} else if (Arg->getOption().matches(options::OPT_no_offload_arch_EQ)) {
if (Arg->getValue() == StringRef("all"))
Archs.clear();
else
- Archs.erase(getCanonicalArchString(C, Args, Arg->getValue(), Kind));
+ Archs.erase(getCanonicalArchString(C, Args, Arg->getValue(), Kind, TC));
}
}
Archs.insert(CudaArchToString(CudaArch::CudaDefault));
else if (Kind == Action::OFK_HIP)
Archs.insert(CudaArchToString(CudaArch::HIPDefault));
+ else if (Kind == Action::OFK_OpenMP)
+ Archs.insert(StringRef());
+ } else {
+ Args.ClaimAllArgs(options::OPT_offload_arch_EQ);
+ Args.ClaimAllArgs(options::OPT_no_offload_arch_EQ);
}
return Archs;
// Get the product of all bound architectures and toolchains.
SmallVector<std::pair<const ToolChain *, StringRef>> TCAndArchs;
for (const ToolChain *TC : ToolChains)
- for (StringRef Arch : getOffloadArchs(C, Args, Kind))
+ for (StringRef Arch : getOffloadArchs(
+ C, C.getArgsForToolChain(TC, "generic", Kind), Kind, TC))
TCAndArchs.push_back(std::make_pair(TC, Arch));
for (unsigned I = 0, E = TCAndArchs.size(); I != E; ++I)
HostAction->setCannotBeCollapsedWithNextDependentAction();
OffloadAction::HostDependence HDep(
*HostAction, *C.getSingleOffloadToolChain<Action::OFK_Host>(),
- /*BoundArch=*/nullptr, Kind);
+ TCAndArch->second.data(), Kind);
OffloadAction::DeviceDependences DDep;
- DDep.add(*A, *TCAndArch->first, /*BoundArch=*/nullptr, Kind);
+ DDep.add(*A, *TCAndArch->first, TCAndArch->second.data(), Kind);
A = C.MakeAction<OffloadAction>(HDep, DDep);
} else if (isa<AssembleJobAction>(A) && Kind == Action::OFK_Cuda) {
// The Cuda toolchain uses fatbinary as the linker phase to bundle the
// RUN: %clang -### --target=x86_64-unknown-linux-gnu -fopenmp -fopenmp-targets=amdgcn-amd-amdhsa \
// RUN: -Xopenmp-target=amdgcn-amd-amdhsa -march=gfx906 --libomptarget-amdgpu-bc-path=%S/Inputs/hip_dev_lib %s 2>&1 \
// RUN: | FileCheck %s
+// RUN: %clang -### --target=x86_64-unknown-linux-gnu -fopenmp -fopenmp-targets=amdgcn-amd-amdhsa \
+// RUN: --offload-arch=gfx906 --libomptarget-amdgpu-bc-path=%S/Inputs/hip_dev_lib %s 2>&1 \
+// RUN: | FileCheck %s
// verify the tools invocations
// CHECK: "-cc1" "-triple" "x86_64-unknown-linux-gnu"{{.*}}"-emit-llvm-bc"{{.*}}"-x" "c"
// CHECK-NOGPULIB-NOT: "-cc1" "-triple" "amdgcn-amd-amdhsa"{{.*}}"-target-cpu" "gfx803" "-fcuda-is-device" "-mlink-builtin-bitcode"{{.*}}libomptarget-amdgpu-gfx803.bc"{{.*}}
// RUN: %clang -### --target=x86_64-unknown-linux-gnu -ccc-print-bindings -fopenmp -fopenmp-targets=amdgcn-amd-amdhsa -Xopenmp-target=amdgcn-amd-amdhsa -march=gfx803 -nogpulib %s 2>&1 | FileCheck %s --check-prefix=CHECK-BINDINGS
+// RUN: %clang -### --target=x86_64-unknown-linux-gnu -ccc-print-bindings -fopenmp -fopenmp-targets=amdgcn-amd-amdhsa --offload-arch=gfx803 -nogpulib %s 2>&1 | FileCheck %s --check-prefix=CHECK-BINDINGS
// CHECK-BINDINGS: "x86_64-unknown-linux-gnu" - "clang", inputs: ["[[INPUT:.*]]"], output: "[[HOST_BC:.*]]"
// CHECK-BINDINGS: "amdgcn-amd-amdhsa" - "clang", inputs: ["[[INPUT]]", "[[HOST_BC]]"], output: "[[DEVICE_BC:.*]]"
// CHECK-BINDINGS: "x86_64-unknown-linux-gnu" - "clang", inputs: ["[[HOST_BC]]", "[[DEVICE_BC]]"], output: "[[HOST_OBJ:.*]]"
// RUN: -Xopenmp-target=nvptx64-nvidia-cuda -march=sm_52 \
// RUN: --libomptarget-nvptx-bc-path=%S/Inputs/libomptarget/libomptarget-nvptx-test.bc %s 2>&1 \
// RUN: | FileCheck %s
+// RUN: %clang -### --target=x86_64-unknown-linux-gnu -fopenmp -fopenmp-targets=nvptx64-nvidia-cuda \
+// RUN: --offload-arch=sm_52 \
+// RUN: --libomptarget-nvptx-bc-path=%S/Inputs/libomptarget/libomptarget-nvptx-test.bc %s 2>&1 \
+// RUN: | FileCheck %s
// verify the tools invocations
// CHECK: "-cc1" "-triple" "x86_64-unknown-linux-gnu"{{.*}}"-emit-llvm-bc"{{.*}}"-x" "c"
// CHECK-BINDINGS: "x86_64-unknown-linux-gnu" - "clang", inputs: ["[[HOST_BC]]", "[[DEVICE_OBJ]]"], output: "[[HOST_OBJ:.*]]"
// CHECK-BINDINGS: "x86_64-unknown-linux-gnu" - "Offload::Linker", inputs: ["[[HOST_OBJ]]"], output: "a.out"
+// RUN: %clang -### --target=x86_64-unknown-linux-gnu -ccc-print-bindings -fopenmp -fopenmp-targets=nvptx64-nvidia-cuda --offload-arch=sm_52 --offload-arch=sm_70 -nogpulib %s 2>&1 | FileCheck %s --check-prefix=CHECK-ARCH-BINDINGS
+// CHECK-ARCH-BINDINGS: "x86_64-unknown-linux-gnu" - "clang", inputs: ["[[INPUT:.*]]"], output: "[[HOST_BC:.*]]"
+// CHECK-ARCH-BINDINGS: "nvptx64-nvidia-cuda" - "clang", inputs: ["[[INPUT]]", "[[HOST_BC]]"], output: "[[DEVICE_BC_SM_52:.*]]"
+// CHECK-ARCH-BINDINGS: "nvptx64-nvidia-cuda" - "NVPTX::Assembler", inputs: ["[[DEVICE_BC_SM_52]]"], output: "[[DEVICE_OBJ_SM_52:.*]]"
+// CHECK-ARCH-BINDINGS: "nvptx64-nvidia-cuda" - "clang", inputs: ["[[INPUT]]", "[[HOST_BC]]"], output: "[[DEVICE_BC_SM_70:.*]]"
+// CHECK-ARCH-BINDINGS: "nvptx64-nvidia-cuda" - "NVPTX::Assembler", inputs: ["[[DEVICE_BC_SM_70]]"], output: "[[DEVICE_OBJ_SM_70:.*]]"
+// CHECK-ARCH-BINDINGS: "x86_64-unknown-linux-gnu" - "clang", inputs: ["[[HOST_BC]]", "[[DEVICE_OBJ_SM_52]]", "[[DEVICE_OBJ_SM_70]]"], output: "[[HOST_OBJ:.*]]"
+// CHECK-ARCH-BINDINGS: "x86_64-unknown-linux-gnu" - "Offload::Linker", inputs: ["[[HOST_OBJ]]"], output: "a.out"
+
+// RUN: %clang -### --target=x86_64-unknown-linux-gnu -ccc-print-bindings -fopenmp \
+// RUN: -fopenmp-targets=nvptx64-nvidia-cuda,amdgcn-amd-amdhsa -Xopenmp-target=nvptx64-nvidia-cuda --offload-arch=sm_70 \
+// RUN: -fopenmp-targets=nvptx64-nvidia-cuda,amdgcn-amd-amdhsa -Xopenmp-target=amdgcn-amd-amdhsa --offload-arch=gfx908 \
+// RUN: -nogpulib %s 2>&1 | FileCheck %s --check-prefix=CHECK-NVIDIA-AMDGPU
+
+// CHECK-NVIDIA-AMDGPU: "x86_64-unknown-linux-gnu" - "clang", inputs: ["[[INPUT:.+]]"], output: "[[HOST_BC:.+]]"
+// CHECK-NVIDIA-AMDGPU: "nvptx64-nvidia-cuda" - "clang", inputs: ["[[INPUT]]", "[[HOST_BC]]"], output: "[[NVIDIA_PTX:.+]]"
+// CHECK-NVIDIA-AMDGPU: "nvptx64-nvidia-cuda" - "NVPTX::Assembler", inputs: ["[[NVIDIA_PTX]]"], output: "[[NVIDIA_CUBIN:.+]]"
+// CHECK-NVIDIA-AMDGPU: "amdgcn-amd-amdhsa" - "clang", inputs: ["[[INPUT]]", "[[HOST_BC]]"], output: "[[AMD_BC:.+]]"
+// CHECK-NVIDIA-AMDGPU: "x86_64-unknown-linux-gnu" - "clang", inputs: ["[[HOST_BC]]", "[[NVIDIA_CUBIN]]", "[[AMD_BC]]"], output: "[[HOST_OBJ:.+]]"
+// CHECK-NVIDIA-AMDGPU: "x86_64-unknown-linux-gnu" - "Offload::Linker", inputs: ["[[HOST_OBJ]]"], output: "a.out"
+
// RUN: %clang -### --target=x86_64-unknown-linux-gnu -emit-llvm -S -fopenmp -fopenmp-targets=nvptx64-nvidia-cuda -Xopenmp-target=nvptx64-nvidia-cuda -march=sm_52 -nogpulib %s 2>&1 | FileCheck %s --check-prefix=CHECK-EMIT-LLVM-IR
// CHECK-EMIT-LLVM-IR: "-cc1"{{.*}}"-triple" "nvptx64-nvidia-cuda"{{.*}}"-emit-llvm"