"cannot find libdevice for %0; provide path to different CUDA installation "
"via '--cuda-path', or pass '-nocudalib' to build without linking with "
"libdevice">;
-def err_drv_no_rdc_new_driver : Error<
- "Using '--offload-new-driver' requires '-fgpu-rdc'">;
def err_drv_no_rocm_device_lib : Error<
"cannot find ROCm device library%select{| for %1|for ABI version %1}0; provide its path via "
CGNVCUDARuntime::CGNVCUDARuntime(CodeGenModule &CGM)
: CGCUDARuntime(CGM), Context(CGM.getLLVMContext()),
TheModule(CGM.getModule()),
- RelocatableDeviceCode(CGM.getLangOpts().GPURelocatableDeviceCode ||
- CGM.getLangOpts().OffloadingNewDriver),
+ RelocatableDeviceCode(CGM.getLangOpts().GPURelocatableDeviceCode),
DeviceMC(InitDeviceMC(CGM)) {
CodeGen::CodeGenTypes &Types = CGM.getTypes();
ASTContext &Ctx = CGM.getContext();
}
return nullptr;
}
- if (!(CGM.getLangOpts().OffloadingNewDriver && RelocatableDeviceCode))
+ if (CGM.getLangOpts().OffloadingNewDriver && RelocatableDeviceCode)
+ createOffloadingEntries();
+ else
return makeModuleCtorFunction();
- createOffloadingEntries();
return nullptr;
}
return false;
Relocatable = Args.hasFlag(options::OPT_fgpu_rdc,
- options::OPT_fno_gpu_rdc, /*Default=*/false);
+ options::OPT_fno_gpu_rdc, /*Default=*/false);
const ToolChain *HostTC = C.getSingleOffloadToolChain<Action::OFK_Host>();
assert(HostTC && "No toolchain for host compilation.");
if (offloadDeviceOnly())
return C.MakeAction<OffloadAction>(DDeps, types::TY_Nothing);
- Action *OffloadPackager =
- C.MakeAction<OffloadPackagerJobAction>(OffloadActions, types::TY_Image);
OffloadAction::DeviceDependences DDep;
- DDep.add(*OffloadPackager, *C.getSingleOffloadToolChain<Action::OFK_Host>(),
- nullptr, Action::OFK_None);
+ if (C.isOffloadingHostKind(Action::OFK_Cuda) &&
+ !Args.hasFlag(options::OPT_fgpu_rdc, options::OPT_fno_gpu_rdc, false)) {
+ // If we are not in RDC-mode we just emit the final CUDA fatbinary for each
+ // translation unit without requiring any linking.
+ Action *FatbinAction =
+ C.MakeAction<LinkJobAction>(OffloadActions, types::TY_CUDA_FATBIN);
+ DDep.add(*FatbinAction, *C.getSingleOffloadToolChain<Action::OFK_Cuda>(),
+ nullptr, Action::OFK_Cuda);
+ } else {
+ // Package all the offloading actions into a single output that can be
+ // embedded in the host and linked.
+ Action *PackagerAction =
+ C.MakeAction<OffloadPackagerJobAction>(OffloadActions, types::TY_Image);
+ DDep.add(*PackagerAction, *C.getSingleOffloadToolChain<Action::OFK_Host>(),
+ nullptr, Action::OFK_None);
+ }
OffloadAction::HostDependence HDep(
*HostAction, *C.getSingleOffloadToolChain<Action::OFK_Host>(),
/*BoundArch=*/nullptr, isa<CompileJobAction>(HostAction) ? DDep : DDeps);
Args.hasFlag(options::OPT_offload_new_driver,
options::OPT_no_offload_new_driver, false));
+ bool IsRDCMode =
+ Args.hasFlag(options::OPT_fgpu_rdc, options::OPT_fno_gpu_rdc, false);
bool IsUsingLTO = D.isUsingLTO(IsDeviceOffloadAction);
auto LTOMode = D.getLTOMode(IsDeviceOffloadAction);
// A header module compilation doesn't have a main input file, so invent a
// fake one as a placeholder.
- const char *ModuleName = [&]{
+ const char *ModuleName = [&] {
auto *ModuleNameArg = Args.getLastArg(options::OPT_fmodule_name_EQ);
return ModuleNameArg ? ModuleNameArg->getValue() : "";
}();
}
if (IsCuda || IsHIP) {
- if (!Args.hasFlag(options::OPT_fgpu_rdc, options::OPT_fno_gpu_rdc, false) &&
- Args.hasArg(options::OPT_offload_new_driver))
- D.Diag(diag::err_drv_no_rdc_new_driver);
- if (Args.hasFlag(options::OPT_fgpu_rdc, options::OPT_fno_gpu_rdc, false))
+ if (IsRDCMode)
CmdArgs.push_back("-fgpu-rdc");
if (Args.hasFlag(options::OPT_fgpu_defer_diag,
options::OPT_fno_gpu_defer_diag, false))
}
}
- // Host-side cuda compilation receives all device-side outputs in a single
- // fatbin as Inputs[1]. Include the binary with -fcuda-include-gpubinary.
+ // Host-side offloading compilation receives all device-side outputs. Include
+ // them in the host compilation depending on the target. If the host inputs
+ // are not empty we use the new-driver scheme, otherwise use the old scheme.
if ((IsCuda || IsHIP) && CudaDeviceInput) {
+ CmdArgs.push_back("-fcuda-include-gpubinary");
+ CmdArgs.push_back(CudaDeviceInput->getFilename());
+ } else if (!HostOffloadingInputs.empty()) {
+ if (IsCuda && !IsRDCMode) {
+ assert(HostOffloadingInputs.size() == 1 && "Only one input expected");
CmdArgs.push_back("-fcuda-include-gpubinary");
- CmdArgs.push_back(CudaDeviceInput->getFilename());
+ CmdArgs.push_back(HostOffloadingInputs.front().getFilename());
+ } else {
+ for (const InputInfo Input : HostOffloadingInputs)
+ CmdArgs.push_back(Args.MakeArgString("-fembed-offload-object=" +
+ TC.getInputFilename(Input)));
+ }
}
if (IsCuda) {
}
}
- // Host-side offloading recieves the device object files and embeds it in a
- // named section including the associated target triple and architecture.
- for (const InputInfo Input : HostOffloadingInputs)
- CmdArgs.push_back(Args.MakeArgString("-fembed-offload-object=" +
- TC.getInputFilename(Input)));
-
if (Triple.isAMDGPU()) {
handleAMDGPUCodeObjectVersionOptions(D, Args, CmdArgs);
// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --check-globals --global-value-regex ".omp_offloading.entry.*"
-// RUN: %clang_cc1 -std=c++11 -triple x86_64-unknown-linux-gnu \
+// RUN: %clang_cc1 -std=c++11 -triple x86_64-unknown-linux-gnu -fgpu-rdc \
// RUN: --offload-new-driver -emit-llvm -o - -x cuda %s | FileCheck \
// RUN: --check-prefix=CUDA %s
-// RUN: %clang_cc1 -std=c++11 -triple x86_64-unknown-linux-gnu \
+// RUN: %clang_cc1 -std=c++11 -triple x86_64-unknown-linux-gnu -fgpu-rdc \
// RUN: --offload-new-driver -emit-llvm -o - -x hip %s | FileCheck \
// RUN: --check-prefix=HIP %s
// Same tests for OpenMP
// RUN: %clang -### -target x86_64-linux-gnu -fopenmp=libomp -fopenmp-targets=nvptx64-nvidia-cuda -c %s \
-// RUN: -g -gz 2>&1 | FileCheck %s --check-prefixes WARN,COMMON
+// RUN: -fgpu-rdc -g -gz 2>&1 | FileCheck %s --check-prefixes WARN,COMMON
// RUN: %clang -### -target x86_64-linux-gnu -fopenmp=libomp -fopenmp-targets=nvptx64-nvidia-cuda -c %s \
-// RUN: -gdwarf -fdebug-info-for-profiling 2>&1 | FileCheck %s --check-prefixes WARN,COMMON
+// RUN: -fgpu-rdc -gdwarf -fdebug-info-for-profiling 2>&1 | FileCheck %s --check-prefixes WARN,COMMON
// RUN: %clang -### -target x86_64-linux-gnu -fopenmp=libomp -fopenmp-targets=nvptx64-nvidia-cuda -c %s \
-// RUN: -gdwarf-2 -gsplit-dwarf 2>&1 | FileCheck %s --check-prefixes WARN,COMMON
+// RUN: -fgpu-rdc -gdwarf-2 -gsplit-dwarf 2>&1 | FileCheck %s --check-prefixes WARN,COMMON
// RUN: %clang -### -target x86_64-linux-gnu -fopenmp=libomp -fopenmp-targets=nvptx64-nvidia-cuda -c %s \
-// RUN: -gdwarf-3 -glldb 2>&1 | FileCheck %s --check-prefixes WARN,COMMON
+// RUN: -fgpu-rdc -gdwarf-3 -glldb 2>&1 | FileCheck %s --check-prefixes WARN,COMMON
// RUN: %clang -### -target x86_64-linux-gnu -fopenmp=libomp -fopenmp-targets=nvptx64-nvidia-cuda -c %s \
-// RUN: -gdwarf-4 -gcodeview 2>&1 | FileCheck %s --check-prefixes WARN,COMMON
+// RUN: -fgpu-rdc -gdwarf-4 -gcodeview 2>&1 | FileCheck %s --check-prefixes WARN,COMMON
// RUN: %clang -### -target x86_64-linux-gnu -fopenmp=libomp -fopenmp-targets=nvptx64-nvidia-cuda -c %s \
-// RUN: -gdwarf-5 -gmodules 2>&1 | FileCheck %s --check-prefixes WARN,COMMON
+// RUN: -fgpu-rdc -gdwarf-5 -gmodules 2>&1 | FileCheck %s --check-prefixes WARN,COMMON
// RUN: %clang -### -target x86_64-linux-gnu -fopenmp=libomp -fopenmp-targets=nvptx64-nvidia-cuda -c %s \
-// RUN: -ggdb1 -fdebug-macro 2>&1 | FileCheck %s --check-prefixes WARN,COMMON
+// RUN: -fgpu-rdc -ggdb1 -fdebug-macro 2>&1 | FileCheck %s --check-prefixes WARN,COMMON
// RUN: %clang -### -target x86_64-linux-gnu -fopenmp=libomp -fopenmp-targets=nvptx64-nvidia-cuda -c %s \
-// RUN: -ggdb2 -ggnu-pubnames 2>&1 | FileCheck %s --check-prefixes WARN,COMMON
+// RUN: -fgpu-rdc -ggdb2 -ggnu-pubnames 2>&1 | FileCheck %s --check-prefixes WARN,COMMON
// RUN: %clang -### -target x86_64-linux-gnu -fopenmp=libomp -fopenmp-targets=nvptx64-nvidia-cuda -c %s \
-// RUN: -ggdb3 -gdwarf-aranges 2>&1 | FileCheck %s --check-prefixes WARN,COMMON
+// RUN: -fgpu-rdc -ggdb3 -gdwarf-aranges 2>&1 | FileCheck %s --check-prefixes WARN,COMMON
// RUN: %clang -### -target x86_64-linux-gnu -fopenmp=libomp -fopenmp-targets=nvptx64-nvidia-cuda -c %s \
-// RUN: -g -gcolumn-info -fdebug-types-section 2>&1 | FileCheck %s --check-prefixes WARN,COMMON
+// RUN: -fgpu-rdc -g -gcolumn-info -fdebug-types-section 2>&1 | FileCheck %s --check-prefixes WARN,COMMON
// RUN: %clang -### -target x86_64-linux-gnu -c %s -gdwarf-5 -gembed-source 2>&1 \
// RUN: | FileCheck %s --check-prefixes WARN-GES,COMMON
// RUN: %clang -### -target x86_64-linux-gnu -c %s -ggdb -gembed-source -gdwarf-5 2>&1 \
// RUN: | FileCheck %s --check-prefixes WARN-GES,COMMON
// RUN: %clang -### -target x86_64-linux-gnu -fopenmp=libomp -fopenmp-targets=nvptx64-nvidia-cuda -c %s \
-// RUN: -gdwarf-5 -gembed-source 2>&1 | FileCheck %s --check-prefixes WARN-GES,COMMON
+// RUN: -fgpu-rdc -gdwarf-5 -gembed-source 2>&1 | FileCheck %s --check-prefixes WARN-GES,COMMON
// RUN: %clang -### -target x86_64-linux-gnu -fopenmp=libomp -fopenmp-targets=nvptx64-nvidia-cuda -c %s \
-// RUN: -ggdb -gembed-source -gdwarf-5 2>&1 | FileCheck %s --check-prefixes WARN-GES,COMMON
+// RUN: -fgpu-rdc -ggdb -gembed-source -gdwarf-5 2>&1 | FileCheck %s --check-prefixes WARN-GES,COMMON
// COMMON: warning: debug information option '{{-gz|-fdebug-info-for-profiling|-gsplit-dwarf|-glldb|-gcodeview|-gmodules|-gembed-source|-fdebug-macro|-ggnu-pubnames|-gdwarf-aranges|-fdebug-types-section}}' is not supported
// WARN-SAME: for target 'nvptx64-nvidia-cuda' [-Wunsupported-target-opt]
// BINDINGS-NEXT: "x86_64-unknown-linux-gnu" - "clang", inputs: ["[[INPUT]]", "[[BINARY]]"], output: "[[HOST_OBJ:.+]]"
// BINDINGS-NEXT: "x86_64-unknown-linux-gnu" - "Offload::Linker", inputs: ["[[HOST_OBJ]]"], output: "a.out"
-// RUN: %clang -### -nocudalib --offload-new-driver %s 2>&1 | FileCheck -check-prefix RDC %s
-// RDC: error: Using '--offload-new-driver' requires '-fgpu-rdc'
-
// RUN: %clang -### -target x86_64-linux-gnu -nocudalib -ccc-print-bindings -fgpu-rdc \
// RUN: --offload-new-driver --offload-arch=sm_35 --offload-arch=sm_70 %s 2>&1 \
// RUN: | FileCheck -check-prefix BINDINGS-HOST %s
// RUN: | FileCheck -check-prefix DEVICE-LINK %s
// DEVICE-LINK: "x86_64-unknown-linux-gnu" - "Offload::Linker", inputs: ["[[INPUT:.+]]"], output: "a.out"
+
+// RUN: %clang -### -target x86_64-linux-gnu -nocudalib --offload-new-driver \
+// RUN: --offload-arch=sm_35 --offload-arch=sm_70 %s 2>&1 \
+// RUN: | FileCheck -check-prefix GPU-BINARY %s
+
+// GPU-BINARY: fatbinary{{.*}}"--create" "{{.*}}.fatbin"
+// GPU-BINARY: -cc1{{.*}}-fcuda-include-gpubinary" "{{.*}}.fatbin"
//
// Test the phases generated when using the new offloading driver.
//
-// RUN: %clang -### -target powerpc64le-ibm-linux-gnu -ccc-print-phases --offload-new-driver \
-// RUN: --offload-arch=sm_52 --offload-arch=sm_70 %s 2>&1 | FileCheck --check-prefix=NEW_DRIVER %s
-// NEW_DRIVER: 0: input, "[[INPUT:.+]]", cuda
-// NEW_DRIVER: 1: preprocessor, {0}, cuda-cpp-output
-// NEW_DRIVER: 2: compiler, {1}, ir
-// NEW_DRIVER: 3: input, "[[INPUT]]", cuda, (device-cuda, sm_52)
-// NEW_DRIVER: 4: preprocessor, {3}, cuda-cpp-output, (device-cuda, sm_52)
-// NEW_DRIVER: 5: compiler, {4}, ir, (device-cuda, sm_52)
-// NEW_DRIVER: 6: backend, {5}, assembler, (device-cuda, sm_52)
-// NEW_DRIVER: 7: assembler, {6}, object, (device-cuda, sm_52)
-// NEW_DRIVER: 8: offload, "device-cuda (nvptx64-nvidia-cuda:sm_52)" {7}, object
-// NEW_DRIVER: 9: input, "[[INPUT]]", cuda, (device-cuda, sm_70)
-// NEW_DRIVER: 10: preprocessor, {9}, cuda-cpp-output, (device-cuda, sm_70)
-// NEW_DRIVER: 11: compiler, {10}, ir, (device-cuda, sm_70)
-// NEW_DRIVER: 12: backend, {11}, assembler, (device-cuda, sm_70)
-// NEW_DRIVER: 13: assembler, {12}, object, (device-cuda, sm_70)
-// NEW_DRIVER: 14: offload, "device-cuda (nvptx64-nvidia-cuda:sm_70)" {13}, object
-// NEW_DRIVER: 15: clang-offload-packager, {8, 14}, image
-// NEW_DRIVER: 16: offload, " (powerpc64le-ibm-linux-gnu)" {2}, " (powerpc64le-ibm-linux-gnu)" {15}, ir
-// NEW_DRIVER: 17: backend, {16}, assembler, (host-cuda)
-// NEW_DRIVER: 18: assembler, {17}, object, (host-cuda)
-// NEW_DRIVER: 19: clang-linker-wrapper, {18}, image, (host-cuda)
+// RUN: %clang -### -target powerpc64le-ibm-linux-gnu -ccc-print-phases --offload-new-driver -fgpu-rdc \
+// RUN: --offload-arch=sm_52 --offload-arch=sm_70 %s 2>&1 | FileCheck --check-prefix=NEW-DRIVER-RDC %s
+// NEW-DRIVER-RDC: 0: input, "[[INPUT:.+]]", cuda
+// NEW-DRIVER-RDC: 1: preprocessor, {0}, cuda-cpp-output
+// NEW-DRIVER-RDC: 2: compiler, {1}, ir
+// NEW-DRIVER-RDC: 3: input, "[[INPUT]]", cuda, (device-cuda, sm_52)
+// NEW-DRIVER-RDC: 4: preprocessor, {3}, cuda-cpp-output, (device-cuda, sm_52)
+// NEW-DRIVER-RDC: 5: compiler, {4}, ir, (device-cuda, sm_52)
+// NEW-DRIVER-RDC: 6: backend, {5}, assembler, (device-cuda, sm_52)
+// NEW-DRIVER-RDC: 7: assembler, {6}, object, (device-cuda, sm_52)
+// NEW-DRIVER-RDC: 8: offload, "device-cuda (nvptx64-nvidia-cuda:sm_52)" {7}, object
+// NEW-DRIVER-RDC: 9: input, "[[INPUT]]", cuda, (device-cuda, sm_70)
+// NEW-DRIVER-RDC: 10: preprocessor, {9}, cuda-cpp-output, (device-cuda, sm_70)
+// NEW-DRIVER-RDC: 11: compiler, {10}, ir, (device-cuda, sm_70)
+// NEW-DRIVER-RDC: 12: backend, {11}, assembler, (device-cuda, sm_70)
+// NEW-DRIVER-RDC: 13: assembler, {12}, object, (device-cuda, sm_70)
+// NEW-DRIVER-RDC: 14: offload, "device-cuda (nvptx64-nvidia-cuda:sm_70)" {13}, object
+// NEW-DRIVER-RDC: 15: clang-offload-packager, {8, 14}, image
+// NEW-DRIVER-RDC: 16: offload, " (powerpc64le-ibm-linux-gnu)" {2}, " (powerpc64le-ibm-linux-gnu)" {15}, ir
+// NEW-DRIVER-RDC: 17: backend, {16}, assembler, (host-cuda)
+// NEW-DRIVER-RDC: 18: assembler, {17}, object, (host-cuda)
+// NEW-DRIVER-RDC: 19: clang-linker-wrapper, {18}, image, (host-cuda)
+
+// RUN: %clang -### -target powerpc64le-ibm-linux-gnu -ccc-print-phases --offload-new-driver -fgpu-rdc \
+// RUN: --offload-arch=sm_52 --offload-arch=sm_70 %s 2>&1 | FileCheck --check-prefix=NEW-DRIVER %s
+// NEW-DRIVER: 0: input, "[[INPUT:.+]]", cuda
+// NEW-DRIVER: 1: preprocessor, {0}, cuda-cpp-output
+// NEW-DRIVER: 2: compiler, {1}, ir
+// NEW-DRIVER: 3: input, "[[INPUT]]", cuda, (device-cuda, sm_52)
+// NEW-DRIVER: 4: preprocessor, {3}, cuda-cpp-output, (device-cuda, sm_52)
+// NEW-DRIVER: 5: compiler, {4}, ir, (device-cuda, sm_52)
+// NEW-DRIVER: 6: backend, {5}, assembler, (device-cuda, sm_52)
+// NEW-DRIVER: 7: assembler, {6}, object, (device-cuda, sm_52)
+// NEW-DRIVER: 8: offload, "device-cuda (nvptx64-nvidia-cuda:sm_52)" {7}, object
+// NEW-DRIVER: 9: input, "[[INPUT]]", cuda, (device-cuda, sm_70)
+// NEW-DRIVER: 10: preprocessor, {9}, cuda-cpp-output, (device-cuda, sm_70)
+// NEW-DRIVER: 11: compiler, {10}, ir, (device-cuda, sm_70)
+// NEW-DRIVER: 12: backend, {11}, assembler, (device-cuda, sm_70)
+// NEW-DRIVER: 13: assembler, {12}, object, (device-cuda, sm_70)
+// NEW-DRIVER: 14: offload, "device-cuda (nvptx64-nvidia-cuda:sm_70)" {13}, object
+// NEW-DRIVER: 15: clang-offload-packager, {8, 14}, image
+// NEW-DRIVER: 16: offload, " (powerpc64le-ibm-linux-gnu)" {2}, " (powerpc64le-ibm-linux-gnu)" {15}, ir
+// NEW-DRIVER: 17: backend, {16}, assembler, (host-cuda)
+// NEW-DRIVER: 18: assembler, {17}, object, (host-cuda)
+// NEW-DRIVER: 19: clang-linker-wrapper, {18}, image, (host-cuda)