"'--hip-path' must be specified when offloading to "
"SPIR-V%select{| unless %1 is given}0.">;
-def err_drv_undetermined_amdgpu_arch : Error<
- "cannot determine AMDGPU architecture: %0; consider passing it via "
- "'--march'">;
+def err_drv_undetermined_gpu_arch : Error<
+ "cannot determine %0 architecture: %1; consider passing it via "
+ "'%2'">;
def err_drv_cuda_version_unsupported : Error<
"GPU arch %0 is supported by CUDA versions between %1 and %2 (inclusive), "
"but installation at %3 is %4; use '--cuda-path' to specify a different CUDA "
HelpText<"HIP runtime installation path, used for finding HIP version and adding HIP include path.">;
def amdgpu_arch_tool_EQ : Joined<["--"], "amdgpu-arch-tool=">, Group<i_Group>,
HelpText<"Tool used for detecting AMD GPU arch in the system.">;
+def nvptx_arch_tool_EQ : Joined<["--"], "nvptx-arch-tool=">, Group<i_Group>,
+ HelpText<"Tool used for detecting NVIDIA GPU arch in the system.">;
def rocm_device_lib_path_EQ : Joined<["--"], "rocm-device-lib-path=">, Group<Link_Group>,
HelpText<"ROCm device library path. Alternative to rocm-path.">;
def : Joined<["--"], "hip-device-lib-path=">, Alias<rocm_device_lib_path_EQ>;
ToolChain(const Driver &D, const llvm::Triple &T,
const llvm::opt::ArgList &Args);
+ /// Executes the given \p Executable and returns the stdout.
+ llvm::Expected<std::unique_ptr<llvm::MemoryBuffer>>
+ executeToolChainProgram(StringRef Executable) const;
+
void setTripleEnvironment(llvm::Triple::EnvironmentType Env);
virtual Tool *buildAssembler() const;
bool addFastMathRuntimeIfAvailable(
const llvm::opt::ArgList &Args, llvm::opt::ArgStringList &CmdArgs) const;
+ /// getSystemGPUArchs - Use a tool to detect the user's availible GPUs.
+ virtual Expected<SmallVector<std::string>>
+ getSystemGPUArchs(const llvm::opt::ArgList &Args) const;
+
/// addProfileRTLibs - When -fprofile-instr-profile is specified, try to pass
/// a suitable profile runtime library to the linker.
virtual void addProfileRTLibs(const llvm::opt::ArgList &Args,
if (A->getOption().matches(options::OPT_no_offload_arch_EQ) &&
ArchStr == "all") {
GpuArchs.clear();
- } else if (ArchStr == "native" &&
- ToolChains.front()->getTriple().isAMDGPU()) {
- auto *TC = static_cast<const toolchains::HIPAMDToolChain *>(
- ToolChains.front());
- SmallVector<std::string, 1> GPUs;
- auto Err = TC->detectSystemGPUs(Args, GPUs);
- if (!Err) {
- for (auto GPU : GPUs)
- GpuArchs.insert(Args.MakeArgString(GPU));
- } else
- llvm::consumeError(std::move(Err));
+ } else if (ArchStr == "native") {
+ const ToolChain &TC = *ToolChains.front();
+ auto GPUsOrErr = ToolChains.front()->getSystemGPUArchs(Args);
+ if (!GPUsOrErr) {
+ TC.getDriver().Diag(diag::err_drv_undetermined_gpu_arch)
+ << llvm::Triple::getArchTypeName(TC.getArch())
+ << llvm::toString(GPUsOrErr.takeError()) << "--offload-arch";
+ continue;
+ }
+
+ for (auto GPU : *GPUsOrErr) {
+ GpuArchs.insert(Args.MakeArgString(GPU));
+ }
} else {
ArchStr = getCanonicalOffloadArch(ArchStr);
if (ArchStr.empty()) {
#include "llvm/Option/Option.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/FileSystem.h"
+#include "llvm/Support/FileUtilities.h"
#include "llvm/Support/Path.h"
#include "llvm/Support/TargetParser.h"
#include "llvm/Support/VersionTuple.h"
addIfExists(getFilePaths(), getArchSpecificLibPath());
}
+llvm::Expected<std::unique_ptr<llvm::MemoryBuffer>>
+ToolChain::executeToolChainProgram(StringRef Executable) const {
+ llvm::SmallString<64> OutputFile;
+ llvm::sys::fs::createTemporaryFile("toolchain-program", "txt", OutputFile);
+ llvm::FileRemover OutputRemover(OutputFile.c_str());
+ std::optional<llvm::StringRef> Redirects[] = {
+ {""},
+ OutputFile.str(),
+ {""},
+ };
+
+ std::string ErrorMessage;
+ if (llvm::sys::ExecuteAndWait(Executable, {}, {}, Redirects,
+ /* SecondsToWait */ 0,
+ /*MemoryLimit*/ 0, &ErrorMessage))
+ return llvm::createStringError(std::error_code(),
+ Executable + ": " + ErrorMessage);
+
+ llvm::ErrorOr<std::unique_ptr<llvm::MemoryBuffer>> OutputBuf =
+ llvm::MemoryBuffer::getFile(OutputFile.c_str());
+ if (!OutputBuf)
+ return llvm::createStringError(OutputBuf.getError(),
+ "Failed to read stdout of " + Executable +
+ ": " + OutputBuf.getError().message());
+ return std::move(*OutputBuf);
+}
+
void ToolChain::setTripleEnvironment(llvm::Triple::EnvironmentType Env) {
Triple.setEnvironment(Env);
if (EffectiveTriple != llvm::Triple())
return false;
}
+Expected<SmallVector<std::string>>
+ToolChain::getSystemGPUArchs(const llvm::opt::ArgList &Args) const {
+ return SmallVector<std::string>();
+}
+
SanitizerMask ToolChain::getSupportedSanitizers() const {
// Return sanitizers which don't require runtime support and are not
// platform dependent.
#include "clang/Driver/Options.h"
#include "llvm/Option/ArgList.h"
#include "llvm/Support/Error.h"
-#include "llvm/Support/FileUtilities.h"
#include "llvm/Support/Host.h"
#include "llvm/Support/LineIterator.h"
#include "llvm/Support/Path.h"
#include <optional>
#include <system_error>
-#define AMDGPU_ARCH_PROGRAM_NAME "amdgpu-arch"
-
using namespace clang::driver;
using namespace clang::driver::tools;
using namespace clang::driver::toolchains;
}
}
-llvm::Error
-AMDGPUToolChain::detectSystemGPUs(const ArgList &Args,
- SmallVector<std::string, 1> &GPUArchs) const {
+Expected<SmallVector<std::string>>
+AMDGPUToolChain::getSystemGPUArchs(const ArgList &Args) const {
+ // Detect AMD GPUs availible on the system.
std::string Program;
if (Arg *A = Args.getLastArg(options::OPT_amdgpu_arch_tool_EQ))
Program = A->getValue();
else
- Program = GetProgramPath(AMDGPU_ARCH_PROGRAM_NAME);
- llvm::SmallString<64> OutputFile;
- llvm::sys::fs::createTemporaryFile("print-system-gpus", "" /* No Suffix */,
- OutputFile);
- llvm::FileRemover OutputRemover(OutputFile.c_str());
- std::optional<llvm::StringRef> Redirects[] = {
- {""},
- OutputFile.str(),
- {""},
- };
-
- std::string ErrorMessage;
- if (int Result = llvm::sys::ExecuteAndWait(
- Program, {}, {}, Redirects, /* SecondsToWait */ 0,
- /*MemoryLimit*/ 0, &ErrorMessage)) {
- if (Result > 0) {
- ErrorMessage = "Exited with error code " + std::to_string(Result);
- } else if (Result == -1) {
- ErrorMessage = "Execute failed: " + ErrorMessage;
- } else {
- ErrorMessage = "Crashed: " + ErrorMessage;
- }
+ Program = GetProgramPath("amdgpu-arch");
- return llvm::createStringError(std::error_code(),
- Program + ": " + ErrorMessage);
- }
-
- llvm::ErrorOr<std::unique_ptr<llvm::MemoryBuffer>> OutputBuf =
- llvm::MemoryBuffer::getFile(OutputFile.c_str());
- if (!OutputBuf) {
- return llvm::createStringError(OutputBuf.getError(),
- "Failed to read stdout of " + Program +
- ": " + OutputBuf.getError().message());
- }
+ auto StdoutOrErr = executeToolChainProgram(Program);
+ if (!StdoutOrErr)
+ return StdoutOrErr.takeError();
- for (llvm::line_iterator LineIt(**OutputBuf); !LineIt.is_at_end(); ++LineIt) {
- GPUArchs.push_back(LineIt->str());
- }
- return llvm::Error::success();
-}
-
-llvm::Error AMDGPUToolChain::getSystemGPUArch(const ArgList &Args,
- std::string &GPUArch) const {
- // detect the AMDGPU installed in system
SmallVector<std::string, 1> GPUArchs;
- auto Err = detectSystemGPUs(Args, GPUArchs);
- if (Err) {
- return Err;
- }
- if (GPUArchs.empty()) {
+ for (StringRef Arch : llvm::split((*StdoutOrErr)->getBuffer(), "\n"))
+ if (!Arch.empty())
+ GPUArchs.push_back(Arch.str());
+
+ if (GPUArchs.empty())
return llvm::createStringError(std::error_code(),
"No AMD GPU detected in the system");
- }
- GPUArch = GPUArchs[0];
- if (GPUArchs.size() > 1) {
- if (!llvm::all_equal(GPUArchs))
- return llvm::createStringError(
- std::error_code(), "Multiple AMD GPUs found with different archs");
- }
- return llvm::Error::success();
+
+ return GPUArchs;
}
void ROCMToolChain::addClangTargetOptions(
/// Should skip argument.
bool shouldSkipArgument(const llvm::opt::Arg *Arg) const;
- /// Uses amdgpu_arch tool to get arch of the system GPU. Will return error
+ /// Uses amdgpu-arch tool to get arch of the system GPU. Will return error
/// if unable to find one.
- llvm::Error getSystemGPUArch(const llvm::opt::ArgList &Args,
- std::string &GPUArch) const;
-
- llvm::Error detectSystemGPUs(const llvm::opt::ArgList &Args,
- SmallVector<std::string, 1> &GPUArchs) const;
+ virtual Expected<SmallVector<std::string>>
+ getSystemGPUArchs(const llvm::opt::ArgList &Args) const override;
protected:
/// Check and diagnose invalid target ID specified by -mcpu.
static bool checkSystemForAMDGPU(const ArgList &Args, const AMDGPUToolChain &TC,
std::string &GPUArch) {
- if (auto Err = TC.getSystemGPUArch(Args, GPUArch)) {
+ auto CheckError = [&](llvm::Error Err) -> bool {
std::string ErrMsg =
llvm::formatv("{0}", llvm::fmt_consume(std::move(Err)));
- TC.getDriver().Diag(diag::err_drv_undetermined_amdgpu_arch) << ErrMsg;
+ TC.getDriver().Diag(diag::err_drv_undetermined_gpu_arch)
+ << llvm::Triple::getArchTypeName(TC.getArch()) << ErrMsg << "-march";
return false;
- }
+ };
+
+ auto ArchsOrErr = TC.getSystemGPUArchs(Args);
+ if (!ArchsOrErr)
+ return CheckError(ArchsOrErr.takeError());
+
+ if (ArchsOrErr->size() > 1)
+ if (!llvm::all_equal(*ArchsOrErr))
+ return CheckError(llvm::createStringError(
+ std::error_code(), "Multiple AMD GPUs found with different archs"));
+ GPUArch = ArchsOrErr->front();
return true;
}
} // namespace
return DAL;
}
+Expected<SmallVector<std::string>>
+CudaToolChain::getSystemGPUArchs(const ArgList &Args) const {
+ // Detect NVIDIA GPUs availible on the system.
+ std::string Program;
+ if (Arg *A = Args.getLastArg(options::OPT_nvptx_arch_tool_EQ))
+ Program = A->getValue();
+ else
+ Program = GetProgramPath("nvptx-arch");
+
+ auto StdoutOrErr = executeToolChainProgram(Program);
+ if (!StdoutOrErr)
+ return StdoutOrErr.takeError();
+
+ SmallVector<std::string, 1> GPUArchs;
+ for (StringRef Arch : llvm::split((*StdoutOrErr)->getBuffer(), "\n"))
+ if (!Arch.empty())
+ GPUArchs.push_back(Arch.str());
+
+ if (GPUArchs.empty())
+ return llvm::createStringError(std::error_code(),
+ "No NVIDIA GPU detected in the system");
+
+ return GPUArchs;
+}
+
Tool *CudaToolChain::buildAssembler() const {
return new tools::NVPTX::Assembler(*this);
}
const ToolChain &HostTC;
CudaInstallationDetector CudaInstallation;
+ /// Uses nvptx-arch tool to get arch of the system GPU. Will return error
+ /// if unable to find one.
+ virtual Expected<SmallVector<std::string>>
+ getSystemGPUArchs(const llvm::opt::ArgList &Args) const override;
+
protected:
Tool *buildAssembler() const override; // ptxas
Tool *buildLinker() const override; // fatbinary (ok, not really a linker)
--- /dev/null
+#!/bin/sh
+exit 1
--- /dev/null
+#!/bin/sh
+echo sm_70
+exit 0
--- /dev/null
+// REQUIRES: system-linux
+// REQUIRES: x86-registered-target
+// REQUIRES: amdgpu-registered-target
+// REQUIRES: shell
+
+// RUN: mkdir -p %t
+// RUN: cp %S/Inputs/amdgpu-arch/amdgpu_arch_fail %t/
+// RUN: cp %S/Inputs/amdgpu-arch/amdgpu_arch_gfx906 %t/
+// RUN: echo '#!/bin/sh' > %t/amdgpu_arch_empty
+// RUN: chmod +x %t/amdgpu_arch_fail
+// RUN: chmod +x %t/amdgpu_arch_gfx906
+// RUN: chmod +x %t/amdgpu_arch_empty
+
+// case when amdgpu-arch returns nothing or fails
+// RUN: %clang -### --target=x86_64-unknown-linux-gnu -nogpulib --offload-arch=native --amdgpu-arch-tool=%t/amdgpu_arch_fail -x hip %s 2>&1 \
+// RUN: | FileCheck %s --check-prefix=NO-OUTPUT-ERROR
+// NO-OUTPUT-ERROR: error: cannot determine amdgcn architecture{{.*}}; consider passing it via '--offload-arch'
+
+// case when amdgpu-arch does not return anything with successful execution
+// RUN: %clang -### --target=x86_64-unknown-linux-gnu -nogpulib --offload-arch=native --amdgpu-arch-tool=%t/amdgpu_arch_empty -x hip %s 2>&1 \
+// RUN: | FileCheck %s --check-prefix=EMPTY-OUTPUT
+// EMPTY-OUTPUT: error: cannot determine amdgcn architecture: No AMD GPU detected in the system; consider passing it via '--offload-arch'
+
+// case when amdgpu-arch returns a gfx906 GPU.
+// RUN: %clang -### --target=x86_64-unknown-linux-gnu -nogpulib --offload-arch=native --amdgpu-arch-tool=%t/amdgpu_arch_gfx906 -x hip %s 2>&1 \
+// RUN: | FileCheck %s --check-prefix=ARCH-GFX906
+// ARCH-GFX906: "-cc1" "-triple" "amdgcn-amd-amdhsa"{{.*}}"-target-cpu" "gfx906"
// case when amdgpu_arch returns nothing or fails
// RUN: %clang -### --target=x86_64-unknown-linux-gnu -fopenmp=libomp -fopenmp-targets=amdgcn-amd-amdhsa -nogpulib --amdgpu-arch-tool=%t/amdgpu_arch_fail %s 2>&1 \
// RUN: | FileCheck %s --check-prefix=NO-OUTPUT-ERROR
-// NO-OUTPUT-ERROR: error: cannot determine AMDGPU architecture{{.*}}Exited with error code 1; consider passing it via '--march'
+// NO-OUTPUT-ERROR: error: cannot determine amdgcn architecture{{.*}}; consider passing it via '-march'
// case when amdgpu_arch returns multiple gpus but all are different
// RUN: %clang -### --target=x86_64-unknown-linux-gnu -fopenmp=libomp -fopenmp-targets=amdgcn-amd-amdhsa -nogpulib --amdgpu-arch-tool=%t/amdgpu_arch_different %s 2>&1 \
// RUN: | FileCheck %s --check-prefix=MULTIPLE-OUTPUT-ERROR
-// MULTIPLE-OUTPUT-ERROR: error: cannot determine AMDGPU architecture: Multiple AMD GPUs found with different archs; consider passing it via '--march'
+// MULTIPLE-OUTPUT-ERROR: error: cannot determine amdgcn architecture: Multiple AMD GPUs found with different archs; consider passing it via '-march'
// case when amdgpu_arch does not return anything with successful execution
// RUN: %clang -### --target=x86_64-unknown-linux-gnu -fopenmp=libomp -fopenmp-targets=amdgcn-amd-amdhsa -nogpulib --amdgpu-arch-tool=%t/amdgpu_arch_empty %s 2>&1 \
// RUN: | FileCheck %s --check-prefix=EMPTY-OUTPUT
-// EMPTY-OUTPUT: error: cannot determine AMDGPU architecture: No AMD GPU detected in the system; consider passing it via '--march'
+// EMPTY-OUTPUT: error: cannot determine amdgcn architecture: No AMD GPU detected in the system; consider passing it via '-march'
--- /dev/null
+// REQUIRES: system-linux
+// REQUIRES: x86-registered-target
+// REQUIRES: nvptx-registered-target
+// REQUIRES: shell
+
+// RUN: mkdir -p %t
+// RUN: cp %S/Inputs/nvptx-arch/nvptx_arch_fail %t/
+// RUN: cp %S/Inputs/nvptx-arch/nvptx_arch_sm_70 %t/
+// RUN: echo '#!/bin/sh' > %t/nvptx_arch_empty
+// RUN: chmod +x %t/nvptx_arch_fail
+// RUN: chmod +x %t/nvptx_arch_sm_70
+// RUN: chmod +x %t/nvptx_arch_empty
+
+// case when nvptx-arch returns nothing or fails
+// RUN: %clang -### --target=x86_64-unknown-linux-gnu -nogpulib --offload-arch=native --nvptx-arch-tool=%t/nvptx_arch_fail -x cuda %s 2>&1 \
+// RUN: | FileCheck %s --check-prefix=NO-OUTPUT-ERROR
+// NO-OUTPUT-ERROR: error: cannot determine nvptx64 architecture{{.*}}; consider passing it via '--offload-arch'
+
+// case when nvptx-arch does not return anything with successful execution
+// RUN: %clang -### --target=x86_64-unknown-linux-gnu -nogpulib --offload-arch=native --nvptx-arch-tool=%t/nvptx_arch_empty -x cuda %s 2>&1 \
+// RUN: | FileCheck %s --check-prefix=EMPTY-OUTPUT
+// EMPTY-OUTPUT: error: cannot determine nvptx64 architecture: No NVIDIA GPU detected in the system; consider passing it via '--offload-arch'
+
+// case when nvptx-arch does not return anything with successful execution
+// RUN: %clang -### --target=x86_64-unknown-linux-gnu -nogpulib --offload-arch=native --nvptx-arch-tool=%t/nvptx_arch_sm_70 -x cuda %s 2>&1 \
+// RUN: | FileCheck %s --check-prefix=ARCH-sm_70
+// ARCH-sm_70: "-cc1" "-triple" "nvptx64-nvidia-cuda"{{.*}}"-target-cpu" "sm_70"