if (I)
Triples += ',';
+ // Find ToolChain for this input.
Action::OffloadKind CurKind = Action::OFK_Host;
const ToolChain *CurTC = &getToolChain();
const Action *CurDep = JA.getInputs()[I];
if (const auto *OA = dyn_cast<OffloadAction>(CurDep)) {
+ CurTC = nullptr;
OA->doOnEachDependence([&](Action *A, const ToolChain *TC, const char *) {
+ assert(CurTC == nullptr && "Expected one dependence!");
CurKind = A->getOffloadingDeviceKind();
CurTC = TC;
});
for (unsigned I = 0; I < Inputs.size(); ++I) {
if (I)
UB += ',';
- UB += Inputs[I].getFilename();
+
+ // Find ToolChain for this input.
+ const ToolChain *CurTC = &getToolChain();
+ if (const auto *OA = dyn_cast<OffloadAction>(JA.getInputs()[I])) {
+ CurTC = nullptr;
+ OA->doOnEachDependence([&](Action *, const ToolChain *TC, const char *) {
+ assert(CurTC == nullptr && "Expected one dependence!");
+ CurTC = TC;
+ });
+ }
+ UB += CurTC->getInputFilename(Inputs[I]);
}
CmdArgs.push_back(TCArgs.MakeArgString(UB));
for (unsigned I = 0; I < Outputs.size(); ++I) {
if (I)
UB += ',';
- SmallString<256> OutputFileName(Outputs[I].getFilename());
- // Change extension of target files for OpenMP offloading
- // to NVIDIA GPUs.
- if (DepInfo[I].DependentToolChain->getTriple().isNVPTX() &&
- JA.isOffloading(Action::OFK_OpenMP))
- llvm::sys::path::replace_extension(OutputFileName, "cubin");
- UB += OutputFileName;
+ UB += DepInfo[I].DependentToolChain->getInputFilename(Outputs[I]);
}
CmdArgs.push_back(TCArgs.MakeArgString(UB));
CmdArgs.push_back("-unbundle");
CmdArgs.push_back("--gpu-name");
CmdArgs.push_back(Args.MakeArgString(CudaArchToString(gpu_arch)));
CmdArgs.push_back("--output-file");
- SmallString<256> OutputFileName(Output.getFilename());
- if (JA.isOffloading(Action::OFK_OpenMP))
- llvm::sys::path::replace_extension(OutputFileName, "cubin");
- CmdArgs.push_back(Args.MakeArgString(OutputFileName));
+ CmdArgs.push_back(Args.MakeArgString(TC.getInputFilename(Output)));
for (const auto& II : Inputs)
CmdArgs.push_back(Args.MakeArgString(II.getFilename()));
if (!II.isFilename())
continue;
- SmallString<256> Name(II.getFilename());
- llvm::sys::path::replace_extension(Name, "cubin");
-
- const char *CubinF =
- C.addTempFile(C.getArgs().MakeArgString(Name));
+ const char *CubinF = C.addTempFile(
+ C.getArgs().MakeArgString(getToolChain().getInputFilename(II)));
CmdArgs.push_back(CubinF);
}
getProgramPaths().push_back(getDriver().Dir);
}
+std::string CudaToolChain::getInputFilename(const InputInfo &Input) const {
+ // Only object files are changed, for example assembly files keep their .s
+ // extensions. CUDA also continues to use .o as they don't use nvlink but
+ // fatbinary.
+ if (!(OK == Action::OFK_OpenMP && Input.getType() == types::TY_Object))
+ return ToolChain::getInputFilename(Input);
+
+ // Replace extension for object files with cubin because nvlink relies on
+ // these particular file names.
+ SmallString<256> Filename(ToolChain::getInputFilename(Input));
+ llvm::sys::path::replace_extension(Filename, "cubin");
+ return Filename.str();
+}
+
void CudaToolChain::addClangTargetOptions(
const llvm::opt::ArgList &DriverArgs,
llvm::opt::ArgStringList &CC1Args,
/// ###########################################################################
/// Check -Xopenmp-target uses one of the archs provided when several archs are used.
-// RUN: %clang -### -no-canonical-prefixes -fopenmp=libomp -fopenmp-targets=nvptx64-nvidia-cuda -Xopenmp-target -march=sm_35 -Xopenmp-target -march=sm_60 %s 2>&1 \
+// RUN: %clang -### -no-canonical-prefixes -fopenmp=libomp -fopenmp-targets=nvptx64-nvidia-cuda \
+// RUN: -Xopenmp-target -march=sm_35 -Xopenmp-target -march=sm_60 %s 2>&1 \
// RUN: | FileCheck -check-prefix=CHK-FOPENMP-TARGET-ARCHS %s
// CHK-FOPENMP-TARGET-ARCHS: ptxas{{.*}}" "--gpu-name" "sm_60"
/// ###########################################################################
/// Check -Xopenmp-target -march=sm_35 works as expected when two triples are present.
-// RUN: %clang -### -no-canonical-prefixes -fopenmp=libomp -fopenmp-targets=powerpc64le-ibm-linux-gnu,nvptx64-nvidia-cuda -Xopenmp-target=nvptx64-nvidia-cuda -march=sm_35 %s 2>&1 \
+// RUN: %clang -### -no-canonical-prefixes -fopenmp=libomp \
+// RUN: -fopenmp-targets=powerpc64le-ibm-linux-gnu,nvptx64-nvidia-cuda \
+// RUN: -Xopenmp-target=nvptx64-nvidia-cuda -march=sm_35 %s 2>&1 \
// RUN: | FileCheck -check-prefix=CHK-FOPENMP-TARGET-COMPILATION %s
// CHK-FOPENMP-TARGET-COMPILATION: ptxas{{.*}}" "--gpu-name" "sm_35"
/// ###########################################################################
/// Check cubin file generation and usage by nvlink
-// RUN: %clang -### -no-canonical-prefixes -fopenmp=libomp -fopenmp-targets=nvptx64-nvidia-cuda -save-temps %s 2>&1 \
-// RUN: | FileCheck -check-prefix=CHK-CUBIN %s
+// RUN: %clang -### -no-canonical-prefixes -target powerpc64le-unknown-linux-gnu -fopenmp=libomp \
+// RUN: -fopenmp-targets=nvptx64-nvidia-cuda -save-temps %s 2>&1 \
+// RUN: | FileCheck -check-prefix=CHK-CUBIN-NVLINK %s
+/// Check cubin file generation and usage by nvlink when toolchain has BindArchAction
+// RUN: %clang -### -no-canonical-prefixes -target x86_64-apple-darwin17.0.0 -fopenmp=libomp \
+// RUN: -fopenmp-targets=nvptx64-nvidia-cuda %s 2>&1 \
+// RUN: | FileCheck -check-prefix=CHK-CUBIN-NVLINK %s
+
+// CHK-CUBIN-NVLINK: clang{{.*}}" "-o" "[[PTX:.*\.s]]"
+// CHK-CUBIN-NVLINK-NEXT: ptxas{{.*}}" "--output-file" "[[CUBIN:.*\.cubin]]" {{.*}}"[[PTX]]"
+// CHK-CUBIN-NVLINK-NEXT: nvlink{{.*}}" {{.*}}"[[CUBIN]]"
+
+/// ###########################################################################
-// CHK-CUBIN: clang{{.*}}" "-o" "{{.*}}.s"
-// CHK-CUBIN-NEXT: ptxas{{.*}}" "--output-file" {{.*}}.cubin" {{.*}}.s"
-// CHK-CUBIN-NEXT: nvlink" {{.*}}.cubin"
+/// Check unbundlink of assembly file, cubin file generation and usage by nvlink
+// RUN: touch %t.s
+// RUN: %clang -### -no-canonical-prefixes -fopenmp=libomp -fopenmp-targets=nvptx64-nvidia-cuda -save-temps %t.s 2>&1 \
+// RUN: | FileCheck -check-prefix=CHK-UNBUNDLING-PTXAS-CUBIN-NVLINK %s
+/// Use DAG to ensure that assembly file has been unbundled.
+// CHK-UNBUNDLING-PTXAS-CUBIN-NVLINK-DAG: ptxas{{.*}}" "--output-file" "[[CUBIN:.*\.cubin]]" {{.*}}"[[PTX:.*\.s]]"
+// CHK-UNBUNDLING-PTXAS-CUBIN-NVLINK-DAG: clang-offload-bundler{{.*}}" "-type=s" {{.*}}"-outputs={{.*}}[[PTX]]
+// CHK-UNBUNDLING-PTXAS-CUBIN-NVLINK-DAG-SAME: "-unbundle"
+// CHK-UNBUNDLING-PTXAS-CUBIN-NVLINK: nvlink{{.*}}" {{.*}}"[[CUBIN]]"
/// ###########################################################################
-/// Check cubin file generation and usage by nvlink when toolchain has BindArchAction
-// RUN: %clang -### -no-canonical-prefixes -target x86_64-apple-darwin17.0.0 -fopenmp=libomp -fopenmp-targets=nvptx64-nvidia-cuda %s 2>&1 \
-// RUN: | FileCheck -check-prefix=CHK-CUBIN-DARWIN %s
+/// Check cubin file generation and bundling
+// RUN: %clang -### -no-canonical-prefixes -fopenmp=libomp -fopenmp-targets=nvptx64-nvidia-cuda -save-temps %s -c 2>&1 \
+// RUN: | FileCheck -check-prefix=CHK-PTXAS-CUBIN-BUNDLING %s
-// CHK-CUBIN-DARWIN: clang{{.*}}" "-o" "{{.*}}.s"
-// CHK-CUBIN-DARWIN-NEXT: ptxas{{.*}}" "--output-file" {{.*}}.cubin" {{.*}}.s"
-// CHK-CUBIN-DARWIN-NEXT: nvlink" {{.*}}.cubin"
+// CHK-PTXAS-CUBIN-BUNDLING: clang{{.*}}" "-o" "[[PTX:.*\.s]]"
+// CHK-PTXAS-CUBIN-BUNDLING-NEXT: ptxas{{.*}}" "--output-file" "[[CUBIN:.*\.cubin]]" {{.*}}"[[PTX]]"
+// CHK-PTXAS-CUBIN-BUNDLING: clang-offload-bundler{{.*}}" "-type=o" {{.*}}"-inputs={{.*}}[[CUBIN]]
/// ###########################################################################
-/// Check cubin file generation and usage by nvlink
-// RUN: touch %t1.o
-// RUN: touch %t2.o
-// RUN: %clang -### -no-canonical-prefixes -fopenmp=libomp -fopenmp-targets=nvptx64-nvidia-cuda %t1.o %t2.o 2>&1 \
-// RUN: | FileCheck -check-prefix=CHK-TWOCUBIN %s
+/// Check cubin file unbundling and usage by nvlink
+// RUN: touch %t.o
+// RUN: %clang -### -no-canonical-prefixes -fopenmp=libomp -fopenmp-targets=nvptx64-nvidia-cuda -save-temps %t.o 2>&1 \
+// RUN: | FileCheck -check-prefix=CHK-CUBIN-UNBUNDLING-NVLINK %s
-// CHK-TWOCUBIN: nvlink{{.*}}openmp-offload-{{.*}}.cubin" "{{.*}}openmp-offload-{{.*}}.cubin"
+/// Use DAG to ensure that cubin file has been unbundled.
+// CHK-CUBIN-UNBUNDLING-NVLINK-DAG: nvlink{{.*}}" {{.*}}"[[CUBIN:.*\.cubin]]"
+// CHK-CUBIN-UNBUNDLING-NVLINK-DAG: clang-offload-bundler{{.*}}" "-type=o" {{.*}}"-outputs={{.*}}[[CUBIN]]
+// CHK-CUBIN-UNBUNDLING-NVLINK-DAG-SAME: "-unbundle"
/// ###########################################################################
-/// Check cubin file generation and usage by nvlink when toolchain has BindArchAction
+/// Check cubin file generation and usage by nvlink
// RUN: touch %t1.o
// RUN: touch %t2.o
-// RUN: %clang -### -no-canonical-prefixes -target x86_64-apple-darwin17.0.0 -fopenmp=libomp -fopenmp-targets=nvptx64-nvidia-cuda %t1.o %t2.o 2>&1 \
-// RUN: | FileCheck -check-prefix=CHK-TWOCUBIN-DARWIN %s
+// RUN: %clang -### -no-canonical-prefixes -target powerpc64le-unknown-linux-gnu -fopenmp=libomp \
+// RUN: -fopenmp-targets=nvptx64-nvidia-cuda %t1.o %t2.o 2>&1 \
+// RUN: | FileCheck -check-prefix=CHK-TWOCUBIN %s
+/// Check cubin file generation and usage by nvlink when toolchain has BindArchAction
+// RUN: %clang -### -no-canonical-prefixes -target x86_64-apple-darwin17.0.0 -fopenmp=libomp \
+// RUN: -fopenmp-targets=nvptx64-nvidia-cuda %t1.o %t2.o 2>&1 \
+// RUN: | FileCheck -check-prefix=CHK-TWOCUBIN %s
-// CHK-TWOCUBIN-DARWIN: nvlink{{.*}}openmp-offload-{{.*}}.cubin" "{{.*}}openmp-offload-{{.*}}.cubin"
+// CHK-TWOCUBIN: nvlink{{.*}}openmp-offload-{{.*}}.cubin" "{{.*}}openmp-offload-{{.*}}.cubin"
/// ###########################################################################
/// ###########################################################################
/// PTXAS is passed -c flag by default when offloading to an NVIDIA device using OpenMP - disable it.
-// RUN: %clang -### -fopenmp=libomp -fopenmp-targets=nvptx64-nvidia-cuda -fnoopenmp-relocatable-target -save-temps -no-canonical-prefixes %s 2>&1 \
+// RUN: %clang -### -fopenmp=libomp -fopenmp-targets=nvptx64-nvidia-cuda -fnoopenmp-relocatable-target \
+// RUN: -save-temps -no-canonical-prefixes %s 2>&1 \
// RUN: | FileCheck -check-prefix=CHK-PTXAS-NORELO %s
// CHK-PTXAS-NORELO-NOT: ptxas{{.*}}" "-c"
/// PTXAS is passed -c flag by default when offloading to an NVIDIA device using OpenMP
/// Check that the flag is passed when -fopenmp-relocatable-target is used.
-// RUN: %clang -### -fopenmp=libomp -fopenmp-targets=nvptx64-nvidia-cuda -fopenmp-relocatable-target -save-temps -no-canonical-prefixes %s 2>&1 \
+// RUN: %clang -### -fopenmp=libomp -fopenmp-targets=nvptx64-nvidia-cuda -fopenmp-relocatable-target \
+// RUN: -save-temps -no-canonical-prefixes %s 2>&1 \
// RUN: | FileCheck -check-prefix=CHK-PTXAS-RELO %s
// CHK-PTXAS-RELO: ptxas{{.*}}" "-c"