// OpenMP
//
// We need to generate an OpenMP toolchain if the user specified targets with
- // the -fopenmp-targets option.
- if (Arg *OpenMPTargets =
- C.getInputArgs().getLastArg(options::OPT_fopenmp_targets_EQ)) {
- if (OpenMPTargets->getNumValues()) {
- // We expect that -fopenmp-targets is always used in conjunction with the
- // option -fopenmp specifying a valid runtime with offloading support,
- // i.e. libomp or libiomp.
- bool HasValidOpenMPRuntime = C.getInputArgs().hasFlag(
- options::OPT_fopenmp, options::OPT_fopenmp_EQ,
- options::OPT_fno_openmp, false);
- if (HasValidOpenMPRuntime) {
- OpenMPRuntimeKind OpenMPKind = getOpenMPRuntime(C.getInputArgs());
- HasValidOpenMPRuntime =
- OpenMPKind == OMPRT_OMP || OpenMPKind == OMPRT_IOMP5;
+ // the -fopenmp-targets option or used --offload-arch with OpenMP enabled.
+ bool IsOpenMPOffloading =
+ C.getInputArgs().hasFlag(options::OPT_fopenmp, options::OPT_fopenmp_EQ,
+ options::OPT_fno_openmp, false) &&
+ (C.getInputArgs().hasArg(options::OPT_fopenmp_targets_EQ) ||
+ C.getInputArgs().hasArg(options::OPT_offload_arch_EQ));
+ if (IsOpenMPOffloading) {
+ // We expect that -fopenmp-targets is always used in conjunction with the
+ // option -fopenmp specifying a valid runtime with offloading support, i.e.
+ // libomp or libiomp.
+ OpenMPRuntimeKind RuntimeKind = getOpenMPRuntime(C.getInputArgs());
+ if (RuntimeKind != OMPRT_OMP && RuntimeKind != OMPRT_IOMP5) {
+ Diag(clang::diag::err_drv_expecting_fopenmp_with_fopenmp_targets);
+ return;
+ }
+
+ llvm::StringMap<llvm::DenseSet<StringRef>> DerivedArchs;
+ llvm::StringMap<StringRef> FoundNormalizedTriples;
+ llvm::SmallVector<StringRef, 4> OpenMPTriples;
+
+ // If the user specified -fopenmp-targets= we create a toolchain for each
+ // valid triple. Otherwise, if only --offload-arch= was specified we instead
+ // attempt to derive the appropriate toolchains from the arguments.
+ if (Arg *OpenMPTargets =
+ C.getInputArgs().getLastArg(options::OPT_fopenmp_targets_EQ)) {
+ if (OpenMPTargets && !OpenMPTargets->getNumValues()) {
+ Diag(clang::diag::warn_drv_empty_joined_argument)
+ << OpenMPTargets->getAsString(C.getInputArgs());
+ return;
+ }
+ llvm::copy(OpenMPTargets->getValues(), std::back_inserter(OpenMPTriples));
+ } else if (C.getInputArgs().hasArg(options::OPT_offload_arch_EQ) &&
+ !IsHIP && !IsCuda) {
+ const ToolChain *HostTC = C.getSingleOffloadToolChain<Action::OFK_Host>();
+ auto AMDTriple = getHIPOffloadTargetTriple(*this, C.getInputArgs());
+ auto NVPTXTriple = getNVIDIAOffloadTargetTriple(*this, C.getInputArgs(),
+ HostTC->getTriple());
+
+ // Attempt to deduce the offloading triple from the set of architectures.
+ // We can only correctly deduce NVPTX / AMDGPU triples currently.
+ llvm::DenseSet<StringRef> Archs =
+ getOffloadArchs(C, C.getArgs(), Action::OFK_OpenMP, nullptr);
+ for (StringRef Arch : Archs) {
+ if (NVPTXTriple && IsNVIDIAGpuArch(StringToCudaArch(
+ getProcessorFromTargetID(*NVPTXTriple, Arch)))) {
+ DerivedArchs[NVPTXTriple->getTriple()].insert(Arch);
+ } else if (AMDTriple &&
+ IsAMDGpuArch(StringToCudaArch(
+ getProcessorFromTargetID(*AMDTriple, Arch)))) {
+ DerivedArchs[AMDTriple->getTriple()].insert(Arch);
+ } else {
+ Diag(clang::diag::err_drv_failed_to_deduce_target_from_arch) << Arch;
+ return;
+ }
}
- if (HasValidOpenMPRuntime) {
- llvm::StringMap<const char *> FoundNormalizedTriples;
- for (const char *Val : OpenMPTargets->getValues()) {
- llvm::Triple TT(ToolChain::getOpenMPTriple(Val));
- std::string NormalizedName = TT.normalize();
-
- // Make sure we don't have a duplicate triple.
- auto Duplicate = FoundNormalizedTriples.find(NormalizedName);
- if (Duplicate != FoundNormalizedTriples.end()) {
- Diag(clang::diag::warn_drv_omp_offload_target_duplicate)
- << Val << Duplicate->second;
- continue;
- }
+ for (const auto &TripleAndArchs : DerivedArchs)
+ OpenMPTriples.push_back(TripleAndArchs.first());
+ }
- // Store the current triple so that we can check for duplicates in the
- // following iterations.
- FoundNormalizedTriples[NormalizedName] = Val;
-
- // If the specified target is invalid, emit a diagnostic.
- if (TT.getArch() == llvm::Triple::UnknownArch)
- Diag(clang::diag::err_drv_invalid_omp_target) << Val;
- else {
- const ToolChain *TC;
- // Device toolchains have to be selected differently. They pair host
- // and device in their implementation.
- if (TT.isNVPTX() || TT.isAMDGCN()) {
- const ToolChain *HostTC =
- C.getSingleOffloadToolChain<Action::OFK_Host>();
- assert(HostTC && "Host toolchain should be always defined.");
- auto &DeviceTC =
- ToolChains[TT.str() + "/" + HostTC->getTriple().normalize()];
- if (!DeviceTC) {
- if (TT.isNVPTX())
- DeviceTC = std::make_unique<toolchains::CudaToolChain>(
- *this, TT, *HostTC, C.getInputArgs(), Action::OFK_OpenMP);
- else if (TT.isAMDGCN())
- DeviceTC =
- std::make_unique<toolchains::AMDGPUOpenMPToolChain>(
- *this, TT, *HostTC, C.getInputArgs());
- else
- assert(DeviceTC && "Device toolchain not defined.");
- }
-
- TC = DeviceTC.get();
- } else
- TC = &getToolChain(C.getInputArgs(), TT);
- C.addOffloadDeviceToolChain(TC, Action::OFK_OpenMP);
+ for (StringRef Val : OpenMPTriples) {
+ llvm::Triple TT(ToolChain::getOpenMPTriple(Val));
+ std::string NormalizedName = TT.normalize();
+
+ // Make sure we don't have a duplicate triple.
+ auto Duplicate = FoundNormalizedTriples.find(NormalizedName);
+ if (Duplicate != FoundNormalizedTriples.end()) {
+ Diag(clang::diag::warn_drv_omp_offload_target_duplicate)
+ << Val << Duplicate->second;
+ continue;
+ }
+
+ // Store the current triple so that we can check for duplicates in the
+ // following iterations.
+ FoundNormalizedTriples[NormalizedName] = Val;
+
+ // If the specified target is invalid, emit a diagnostic.
+ if (TT.getArch() == llvm::Triple::UnknownArch)
+ Diag(clang::diag::err_drv_invalid_omp_target) << Val;
+ else {
+ const ToolChain *TC;
+ // Device toolchains have to be selected differently. They pair host
+ // and device in their implementation.
+ if (TT.isNVPTX() || TT.isAMDGCN()) {
+ const ToolChain *HostTC =
+ C.getSingleOffloadToolChain<Action::OFK_Host>();
+ assert(HostTC && "Host toolchain should be always defined.");
+ auto &DeviceTC =
+ ToolChains[TT.str() + "/" + HostTC->getTriple().normalize()];
+ if (!DeviceTC) {
+ if (TT.isNVPTX())
+ DeviceTC = std::make_unique<toolchains::CudaToolChain>(
+ *this, TT, *HostTC, C.getInputArgs(), Action::OFK_OpenMP);
+ else if (TT.isAMDGCN())
+ DeviceTC = std::make_unique<toolchains::AMDGPUOpenMPToolChain>(
+ *this, TT, *HostTC, C.getInputArgs());
+ else
+ assert(DeviceTC && "Device toolchain not defined.");
}
- }
- } else
- Diag(clang::diag::err_drv_expecting_fopenmp_with_fopenmp_targets);
- } else
- Diag(clang::diag::warn_drv_empty_joined_argument)
- << OpenMPTargets->getAsString(C.getInputArgs());
+
+ TC = DeviceTC.get();
+ } else
+ TC = &getToolChain(C.getInputArgs(), TT);
+ C.addOffloadDeviceToolChain(TC, Action::OFK_OpenMP);
+ if (DerivedArchs.find(TT.getTriple()) != DerivedArchs.end())
+ KnownArchs[TC] = DerivedArchs[TT.getTriple()];
+ }
+ }
+ } else if (C.getInputArgs().hasArg(options::OPT_fopenmp_targets_EQ)) {
+ Diag(clang::diag::err_drv_expecting_fopenmp_with_fopenmp_targets);
+ return;
}
//
Args.ClaimAllArgs(options::OPT_offload_host_device);
}
-/// Returns the canonical name for the offloading architecture when using HIP or
-/// CUDA.
+/// Returns the canonical name for the offloading architecture when using a HIP
+/// or CUDA architecture.
static StringRef getCanonicalArchString(Compilation &C,
const llvm::opt::DerivedArgList &Args,
StringRef ArchStr,
- Action::OffloadKind Kind,
- const ToolChain *TC) {
- if (Kind == Action::OFK_Cuda ||
- (Kind == Action::OFK_OpenMP && TC->getTriple().isNVPTX())) {
- CudaArch Arch = StringToCudaArch(ArchStr);
- if (Arch == CudaArch::UNKNOWN || !IsNVIDIAGpuArch(Arch)) {
- C.getDriver().Diag(clang::diag::err_drv_cuda_bad_gpu_arch) << ArchStr;
- return StringRef();
- }
+ const llvm::Triple &Triple) {
+ // Lookup the CUDA / HIP architecture string. Only report an error if we were
+ // expecting the triple to be only NVPTX / AMDGPU.
+ CudaArch Arch = StringToCudaArch(getProcessorFromTargetID(Triple, ArchStr));
+ if (Triple.isNVPTX() &&
+ (Arch == CudaArch::UNKNOWN || !IsNVIDIAGpuArch(Arch))) {
+ C.getDriver().Diag(clang::diag::err_drv_offload_bad_gpu_arch)
+ << "CUDA" << ArchStr;
+ return StringRef();
+ } else if (Triple.isAMDGPU() &&
+ (Arch == CudaArch::UNKNOWN || !IsAMDGpuArch(Arch))) {
+ C.getDriver().Diag(clang::diag::err_drv_offload_bad_gpu_arch)
+ << "HIP" << ArchStr;
+ return StringRef();
+ }
+
+ if (IsNVIDIAGpuArch(Arch))
return Args.MakeArgStringRef(CudaArchToString(Arch));
- } else if (Kind == Action::OFK_HIP ||
- (Kind == Action::OFK_OpenMP && TC->getTriple().isAMDGPU())) {
+
+ if (IsAMDGpuArch(Arch)) {
llvm::StringMap<bool> Features;
- // getHIPOffloadTargetTriple() is known to return valid value as it has
- // been called successfully in the CreateOffloadingDeviceToolChains().
- auto Arch = parseTargetID(
- *getHIPOffloadTargetTriple(C.getDriver(), C.getInputArgs()), ArchStr,
- &Features);
+ auto HIPTriple = getHIPOffloadTargetTriple(C.getDriver(), C.getInputArgs());
+ if (!HIPTriple)
+ return StringRef();
+ auto Arch = parseTargetID(*HIPTriple, ArchStr, &Features);
if (!Arch) {
C.getDriver().Diag(clang::diag::err_drv_bad_target_id) << ArchStr;
C.setContainsError();
return Args.MakeArgStringRef(
getCanonicalTargetID(Arch.getValue(), Features));
}
+
// If the input isn't CUDA or HIP just return the architecture.
return ArchStr;
}
return getConflictTargetIDCombination(ArchSet);
}
-/// Returns the set of bound architectures active for this compilation kind.
-/// This function returns a set of bound architectures, if there are no bound
-/// architctures we return a set containing only the empty string.
-static llvm::DenseSet<StringRef>
-getOffloadArchs(Compilation &C, const llvm::opt::DerivedArgList &Args,
- Action::OffloadKind Kind, const ToolChain *TC) {
+llvm::DenseSet<StringRef>
+Driver::getOffloadArchs(Compilation &C, const llvm::opt::DerivedArgList &Args,
+ Action::OffloadKind Kind, const ToolChain *TC) const {
+ if (!TC)
+ TC = &C.getDefaultToolChain();
// --offload and --offload-arch options are mutually exclusive.
if (Args.hasArgNoClaim(options::OPT_offload_EQ) &&
: "--no-offload-arch");
}
+ if (KnownArchs.find(TC) != KnownArchs.end())
+ return KnownArchs.lookup(TC);
+
llvm::DenseSet<StringRef> Archs;
for (auto &Arg : Args) {
if (Arg->getOption().matches(options::OPT_offload_arch_EQ)) {
- Archs.insert(getCanonicalArchString(C, Args, Arg->getValue(), Kind, TC));
+ Archs.insert(
+ getCanonicalArchString(C, Args, Arg->getValue(), TC->getTriple()));
} else if (Arg->getOption().matches(options::OPT_no_offload_arch_EQ)) {
if (Arg->getValue() == StringRef("all"))
Archs.clear();
else
- Archs.erase(getCanonicalArchString(C, Args, Arg->getValue(), Kind, TC));
+ Archs.erase(
+ getCanonicalArchString(C, Args, Arg->getValue(), TC->getTriple()));
}
}
--- /dev/null
+// REQUIRES: x86-registered-target
+// REQUIRES: nvptx-registered-target
+// REQUIRES: amdgpu-registered-target
+
+// RUN: %clang -### --target=x86_64-unknown-linux-gnu -fopenmp \
+// RUN: --offload-arch=sm_52 --offload-arch=gfx803 \
+// RUN: --libomptarget-amdgpu-bc-path=%S/Inputs/hip_dev_lib/libomptarget-amdgpu-gfx803.bc \
+// RUN: --libomptarget-nvptx-bc-path=%S/Inputs/libomptarget/libomptarget-nvptx-test.bc %s 2>&1 \
+// RUN: | FileCheck %s
+
+// verify the tools invocations
+// CHECK: "-cc1" "-triple" "x86_64-unknown-linux-gnu"{{.*}}"-emit-llvm-bc"{{.*}}"-x" "c"
+// CHECK: "-cc1" "-triple" "amdgcn-amd-amdhsa" "-aux-triple" "x86_64-unknown-linux-gnu"{{.*}}"-target-cpu" "gfx803"
+// CHECK: "-cc1" "-triple" "nvptx64-nvidia-cuda" "-aux-triple" "x86_64-unknown-linux-gnu"{{.*}}"-target-cpu" "sm_52"
+// CHECK: "-cc1" "-triple" "x86_64-unknown-linux-gnu"{{.*}}"-emit-obj"
+// CHECK: clang-linker-wrapper{{.*}}"--"{{.*}} "-o" "a.out"
+
+// RUN: %clang -### --target=x86_64-unknown-linux-gnu -ccc-print-bindings -fopenmp \
+// RUN: --offload-arch=sm_70 --offload-arch=gfx908:sramecc+:xnack- \
+// RUN: -nogpulib %s 2>&1 | FileCheck %s --check-prefix=CHECK-NVIDIA-AMDGPU
+
+// CHECK-NVIDIA-AMDGPU: "x86_64-unknown-linux-gnu" - "clang", inputs: ["[[INPUT:.+]]"], output: "[[HOST_BC:.+]]"
+// CHECK-NVIDIA-AMDGPU: "amdgcn-amd-amdhsa" - "clang", inputs: ["[[INPUT]]", "[[HOST_BC]]"], output: "[[AMD_BC:.+]]"
+// CHECK-NVIDIA-AMDGPU: "nvptx64-nvidia-cuda" - "clang", inputs: ["[[INPUT]]", "[[HOST_BC]]"], output: "[[NVIDIA_PTX:.+]]"
+// CHECK-NVIDIA-AMDGPU: "nvptx64-nvidia-cuda" - "NVPTX::Assembler", inputs: ["[[NVIDIA_PTX]]"], output: "[[NVIDIA_CUBIN:.+]]"
+// CHECK-NVIDIA-AMDGPU: "x86_64-unknown-linux-gnu" - "clang", inputs: ["[[HOST_BC]]", "[[AMD_BC]]", "[[NVIDIA_CUBIN]]"], output: "[[HOST_OBJ:.+]]"
+// CHECK-NVIDIA-AMDGPU: "x86_64-unknown-linux-gnu" - "Offload::Linker", inputs: ["[[HOST_OBJ]]"], output: "a.out"
+
+// RUN: %clang -### --target=x86_64-unknown-linux-gnu -ccc-print-bindings -fopenmp \
+// RUN: --offload-arch=sm_52 --offload-arch=sm_70 -nogpulib %s 2>&1 | FileCheck %s --check-prefix=CHECK-ARCH-BINDINGS
+
+// CHECK-ARCH-BINDINGS: "x86_64-unknown-linux-gnu" - "clang", inputs: ["[[INPUT:.*]]"], output: "[[HOST_BC:.*]]"
+// CHECK-ARCH-BINDINGS: "nvptx64-nvidia-cuda" - "clang", inputs: ["[[INPUT]]", "[[HOST_BC]]"], output: "[[DEVICE_BC_SM_52:.*]]"
+// CHECK-ARCH-BINDINGS: "nvptx64-nvidia-cuda" - "NVPTX::Assembler", inputs: ["[[DEVICE_BC_SM_52]]"], output: "[[DEVICE_OBJ_SM_52:.*]]"
+// CHECK-ARCH-BINDINGS: "nvptx64-nvidia-cuda" - "clang", inputs: ["[[INPUT]]", "[[HOST_BC]]"], output: "[[DEVICE_BC_SM_70:.*]]"
+// CHECK-ARCH-BINDINGS: "nvptx64-nvidia-cuda" - "NVPTX::Assembler", inputs: ["[[DEVICE_BC_SM_70]]"], output: "[[DEVICE_OBJ_SM_70:.*]]"
+// CHECK-ARCH-BINDINGS: "x86_64-unknown-linux-gnu" - "clang", inputs: ["[[HOST_BC]]", "[[DEVICE_OBJ_SM_52]]", "[[DEVICE_OBJ_SM_70]]"], output: "[[HOST_OBJ:.*]]"
+// CHECK-ARCH-BINDINGS: "x86_64-unknown-linux-gnu" - "Offload::Linker", inputs: ["[[HOST_OBJ]]"], output: "a.out"
+
+// RUN: %clang -### --target=x86_64-unknown-linux-gnu -ccc-print-bindings -fopenmp \
+// RUN: --offload-arch=sm_70 --offload-arch=gfx908 --offload-arch=native \
+// RUN: -nogpulib %s 2>&1 | FileCheck %s --check-prefix=CHECK-FAILED
+
+// CHECK-FAILED: error: failed to deduce triple for target architecture 'native'; specify the triple using '-fopenmp-targets' and '-Xopenmp-target' instead.
+
+// RUN: %clang -### --target=x86_64-unknown-linux-gnu -ccc-print-bindings -fopenmp \
+// RUN: --offload-arch=sm_70 --offload-arch=gfx908 -fno-openmp \
+// RUN: -nogpulib %s 2>&1 | FileCheck %s --check-prefix=CHECK-DISABLED
+
+// CHECK-DISABLED-NOT: "nvptx64-nvidia-cuda" - "clang",