From a17ab7aa3be0b2b2adf992e3754a58f17802491f Mon Sep 17 00:00:00 2001 From: Joseph Huber Date: Thu, 5 Jan 2023 21:01:26 -0600 Subject: [PATCH] [OpenMP] Add support for '--offload-arch=native' to OpenMP offloading This patch adds support for '--offload-arch=native' to OpenMP offloading. This will automatically generate the toolchains required to fulfil whatever GPUs the user has installed. Getting this to work requires a bit of a hack. The problem is that we need the ToolChain to launch its searching program. But we do not yet have that ToolChain built. I had to temporarily make the ToolChain and also add some logic to ignore regular warnings & errors. Depends on D141078 Reviewed By: jdoerfert Differential Revision: https://reviews.llvm.org/D141105 --- clang/include/clang/Driver/Driver.h | 5 ++- clang/lib/Driver/Driver.cpp | 73 ++++++++++++++++++++++++++-------- clang/test/Driver/openmp-system-arch.c | 56 ++++++++++++++++++++++++++ 3 files changed, 115 insertions(+), 19 deletions(-) create mode 100644 clang/test/Driver/openmp-system-arch.c diff --git a/clang/include/clang/Driver/Driver.h b/clang/include/clang/Driver/Driver.h index 73cd036..4bbb113 100644 --- a/clang/include/clang/Driver/Driver.h +++ b/clang/include/clang/Driver/Driver.h @@ -481,10 +481,11 @@ public: /// Returns the set of bound architectures active for this offload kind. /// If there are no bound architctures we return a set containing only the - /// empty string. + /// empty string. The \p SuppressError option is used to suppress errors. llvm::DenseSet getOffloadArchs(Compilation &C, const llvm::opt::DerivedArgList &Args, - Action::OffloadKind Kind, const ToolChain *TC) const; + Action::OffloadKind Kind, const ToolChain *TC, + bool SuppressError = false) const; /// Check that the file referenced by Value exists. If it doesn't, /// issue a diagnostic and return false. diff --git a/clang/lib/Driver/Driver.cpp b/clang/lib/Driver/Driver.cpp index 3e7616b..1c71e71 100644 --- a/clang/lib/Driver/Driver.cpp +++ b/clang/lib/Driver/Driver.cpp @@ -859,9 +859,30 @@ void Driver::CreateOffloadingDeviceToolChains(Compilation &C, HostTC->getTriple()); // Attempt to deduce the offloading triple from the set of architectures. - // We can only correctly deduce NVPTX / AMDGPU triples currently. - llvm::DenseSet Archs = - getOffloadArchs(C, C.getArgs(), Action::OFK_OpenMP, nullptr); + // We can only correctly deduce NVPTX / AMDGPU triples currently. We need + // to temporarily create these toolchains so that we can access tools for + // inferring architectures. + llvm::DenseSet Archs; + if (NVPTXTriple) { + auto TempTC = std::make_unique( + *this, *NVPTXTriple, *HostTC, C.getInputArgs()); + for (StringRef Arch : getOffloadArchs( + C, C.getArgs(), Action::OFK_OpenMP, &*TempTC, true)) + Archs.insert(Arch); + } + if (AMDTriple) { + auto TempTC = std::make_unique( + *this, *AMDTriple, *HostTC, C.getInputArgs()); + for (StringRef Arch : getOffloadArchs( + C, C.getArgs(), Action::OFK_OpenMP, &*TempTC, true)) + Archs.insert(Arch); + } + if (!AMDTriple && !NVPTXTriple) { + for (StringRef Arch : + getOffloadArchs(C, C.getArgs(), Action::OFK_OpenMP, nullptr, true)) + Archs.insert(Arch); + } + for (StringRef Arch : Archs) { if (NVPTXTriple && IsNVIDIAGpuArch(StringToCudaArch( getProcessorFromTargetID(*NVPTXTriple, Arch)))) { @@ -876,6 +897,13 @@ void Driver::CreateOffloadingDeviceToolChains(Compilation &C, } } + // If the set is empty then we failed to find a native architecture. + if (Archs.empty()) { + Diag(clang::diag::err_drv_failed_to_deduce_target_from_arch) + << "native"; + return; + } + for (const auto &TripleAndArchs : DerivedArchs) OpenMPTriples.push_back(TripleAndArchs.first()); } @@ -4193,16 +4221,17 @@ void Driver::BuildActions(Compilation &C, DerivedArgList &Args, static StringRef getCanonicalArchString(Compilation &C, const llvm::opt::DerivedArgList &Args, StringRef ArchStr, - const llvm::Triple &Triple) { + const llvm::Triple &Triple, + bool SuppressError = false) { // Lookup the CUDA / HIP architecture string. Only report an error if we were // expecting the triple to be only NVPTX / AMDGPU. CudaArch Arch = StringToCudaArch(getProcessorFromTargetID(Triple, ArchStr)); - if (Triple.isNVPTX() && + if (!SuppressError && Triple.isNVPTX() && (Arch == CudaArch::UNKNOWN || !IsNVIDIAGpuArch(Arch))) { C.getDriver().Diag(clang::diag::err_drv_offload_bad_gpu_arch) << "CUDA" << ArchStr; return StringRef(); - } else if (Triple.isAMDGPU() && + } else if (!SuppressError && Triple.isAMDGPU() && (Arch == CudaArch::UNKNOWN || !IsAMDGpuArch(Arch))) { C.getDriver().Diag(clang::diag::err_drv_offload_bad_gpu_arch) << "HIP" << ArchStr; @@ -4245,7 +4274,8 @@ getConflictOffloadArchCombination(const llvm::DenseSet &Archs, llvm::DenseSet Driver::getOffloadArchs(Compilation &C, const llvm::opt::DerivedArgList &Args, - Action::OffloadKind Kind, const ToolChain *TC) const { + Action::OffloadKind Kind, const ToolChain *TC, + bool SuppressError) const { if (!TC) TC = &C.getDefaultToolChain(); @@ -4279,21 +4309,26 @@ Driver::getOffloadArchs(Compilation &C, const llvm::opt::DerivedArgList &Args, // invalid architecture is given we simply exit. if (Arg->getOption().matches(options::OPT_offload_arch_EQ)) { for (StringRef Arch : llvm::split(Arg->getValue(), ",")) { - if (Arch == "native") { + if (Arch == "native" || Arch.empty()) { auto GPUsOrErr = TC->getSystemGPUArchs(Args); if (!GPUsOrErr) { - TC->getDriver().Diag(diag::err_drv_undetermined_gpu_arch) - << llvm::Triple::getArchTypeName(TC->getArch()) - << llvm::toString(GPUsOrErr.takeError()) << "--offload-arch"; + if (SuppressError) + llvm::consumeError(GPUsOrErr.takeError()); + else + TC->getDriver().Diag(diag::err_drv_undetermined_gpu_arch) + << llvm::Triple::getArchTypeName(TC->getArch()) + << llvm::toString(GPUsOrErr.takeError()) << "--offload-arch"; continue; } - for (auto ArchStr : *GPUsOrErr) + for (auto ArchStr : *GPUsOrErr) { Archs.insert( - getCanonicalArchString(C, Args, ArchStr, TC->getTriple())); + getCanonicalArchString(C, Args, Args.MakeArgString(ArchStr), + TC->getTriple(), SuppressError)); + } } else { - StringRef ArchStr = - getCanonicalArchString(C, Args, Arch, TC->getTriple()); + StringRef ArchStr = getCanonicalArchString( + C, Args, Arch, TC->getTriple(), SuppressError); if (ArchStr.empty()) return Archs; Archs.insert(ArchStr); @@ -4304,8 +4339,8 @@ Driver::getOffloadArchs(Compilation &C, const llvm::opt::DerivedArgList &Args, if (Arch == "all") { Archs.clear(); } else { - StringRef ArchStr = - getCanonicalArchString(C, Args, Arch, TC->getTriple()); + StringRef ArchStr = getCanonicalArchString( + C, Args, Arch, TC->getTriple(), SuppressError); if (ArchStr.empty()) return Archs; Archs.erase(ArchStr); @@ -4320,6 +4355,10 @@ Driver::getOffloadArchs(Compilation &C, const llvm::opt::DerivedArgList &Args, C.setContainsError(); } + // Skip filling defaults if we're just querying what is availible. + if (SuppressError) + return Archs; + if (Archs.empty()) { if (Kind == Action::OFK_Cuda) Archs.insert(CudaArchToString(CudaArch::CudaDefault)); diff --git a/clang/test/Driver/openmp-system-arch.c b/clang/test/Driver/openmp-system-arch.c new file mode 100644 index 0000000..13ce814 --- /dev/null +++ b/clang/test/Driver/openmp-system-arch.c @@ -0,0 +1,56 @@ +// RUN: mkdir -p %t +// RUN: cp %S/Inputs/amdgpu-arch/amdgpu_arch_fail %t/ +// RUN: cp %S/Inputs/amdgpu-arch/amdgpu_arch_gfx906 %t/ +// RUN: cp %S/Inputs/nvptx-arch/nvptx_arch_fail %t/ +// RUN: cp %S/Inputs/nvptx-arch/nvptx_arch_sm_70 %t/ +// RUN: echo '#!/bin/sh' > %t/amdgpu_arch_empty +// RUN: chmod +x %t/amdgpu_arch_fail +// RUN: chmod +x %t/amdgpu_arch_gfx906 +// RUN: chmod +x %t/amdgpu_arch_empty +// RUN: echo '#!/bin/sh' > %t/nvptx_arch_empty +// RUN: chmod +x %t/nvptx_arch_fail +// RUN: chmod +x %t/nvptx_arch_sm_70 +// RUN: chmod +x %t/nvptx_arch_empty + +// case when nvptx-arch and amdgpu-arch return nothing or fails +// RUN: %clang -### --target=x86_64-unknown-linux-gnu -nogpulib -fopenmp --offload-arch=native \ +// RUN: --nvptx-arch-tool=%t/nvptx_arch_fail --amdgpu-arch-tool=%t/amdgpu_arch_fail %s 2>&1 \ +// RUN: | FileCheck %s --check-prefix=NO-OUTPUT-ERROR +// RUN: %clang -### --target=x86_64-unknown-linux-gnu -nogpulib -fopenmp --offload-arch=native \ +// RUN: --nvptx-arch-tool=%t/nvptx_arch_empty --amdgpu-arch-tool=%t/amdgpu_arch_empty %s 2>&1 \ +// RUN: | FileCheck %s --check-prefix=NO-OUTPUT-ERROR +// RUN: %clang -### --target=x86_64-unknown-linux-gnu -nogpulib -fopenmp --offload-arch= \ +// RUN: --nvptx-arch-tool=%t/nvptx_arch_fail --amdgpu-arch-tool=%t/amdgpu_arch_fail %s 2>&1 \ +// RUN: | FileCheck %s --check-prefix=NO-OUTPUT-ERROR +// RUN: %clang -### --target=x86_64-unknown-linux-gnu -nogpulib -fopenmp --offload-arch= \ +// RUN: --nvptx-arch-tool=%t/nvptx_arch_empty --amdgpu-arch-tool=%t/amdgpu_arch_empty %s 2>&1 \ +// RUN: | FileCheck %s --check-prefix=NO-OUTPUT-ERROR +// NO-OUTPUT-ERROR: error: failed to deduce triple for target architecture 'native'; specify the triple using '-fopenmp-targets' and '-Xopenmp-target' instead. + +// case when amdgpu-arch succeeds. +// RUN: %clang -### --target=x86_64-unknown-linux-gnu -nogpulib -fopenmp --offload-arch=native \ +// RUN: --nvptx-arch-tool=%t/nvptx_arch_fail --amdgpu-arch-tool=%t/amdgpu_arch_gfx906 %s 2>&1 \ +// RUN: | FileCheck %s --check-prefix=ARCH-GFX906 +// ARCH-GFX906: "-cc1" "-triple" "amdgcn-amd-amdhsa"{{.*}}"-target-cpu" "gfx906" + +// case when nvptx-arch succeeds. +// RUN: %clang -### --target=x86_64-unknown-linux-gnu -nogpulib -fopenmp --offload-arch=native \ +// RUN: --nvptx-arch-tool=%t/nvptx_arch_sm_70 --amdgpu-arch-tool=%t/amdgpu_arch_fail %s 2>&1 \ +// RUN: | FileCheck %s --check-prefix=ARCH-SM_70 +// ARCH-SM_70: "-cc1" "-triple" "nvptx64-nvidia-cuda"{{.*}}"-target-cpu" "sm_70" + +// case when both nvptx-arch and amdgpu-arch succeed. +// RUN: %clang -### --target=x86_64-unknown-linux-gnu -nogpulib -fopenmp --offload-arch=native \ +// RUN: --nvptx-arch-tool=%t/nvptx_arch_sm_70 --amdgpu-arch-tool=%t/amdgpu_arch_gfx906 %s 2>&1 \ +// RUN: | FileCheck %s --check-prefix=ARCH-SM_70-GFX906 +// ARCH-SM_70-GFX906: "-cc1" "-triple" "amdgcn-amd-amdhsa"{{.*}}"-target-cpu" "gfx906" +// ARCH-SM_70-GFX906: "-cc1" "-triple" "nvptx64-nvidia-cuda"{{.*}}"-target-cpu" "sm_70" + +// case when both nvptx-arch and amdgpu-arch succeed with other archs. +// RUN: %clang -### --target=x86_64-unknown-linux-gnu -nogpulib -fopenmp --offload-arch=native,sm_75,gfx1030 \ +// RUN: --nvptx-arch-tool=%t/nvptx_arch_sm_70 --amdgpu-arch-tool=%t/amdgpu_arch_gfx906 %s 2>&1 \ +// RUN: | FileCheck %s --check-prefix=ARCH-MULTIPLE +// ARCH-MULTIPLE: "-cc1" "-triple" "amdgcn-amd-amdhsa"{{.*}}"-target-cpu" "gfx1030" +// ARCH-MULTIPLE: "-cc1" "-triple" "amdgcn-amd-amdhsa"{{.*}}"-target-cpu" "gfx906" +// ARCH-MULTIPLE: "-cc1" "-triple" "nvptx64-nvidia-cuda"{{.*}}"-target-cpu" "sm_70" +// ARCH-MULTIPLE: "-cc1" "-triple" "nvptx64-nvidia-cuda"{{.*}}"-target-cpu" "sm_75" -- 2.7.4