From: Joseph Huber Date: Thu, 23 Jun 2022 14:06:26 +0000 (-0400) Subject: [CUDA] Do not embed a fatbinary when using the new driver X-Git-Tag: upstream/15.0.7~3748 X-Git-Url: http://review.tizen.org/git/?a=commitdiff_plain;h=4d3c010f1d012ade0ca36fdd7bc8b8baeb8df1f2;p=platform%2Fupstream%2Fllvm.git [CUDA] Do not embed a fatbinary when using the new driver Previously, when using the new driver we created a fatbinary with the PTX and Cubin output. This was mainly done in an attempt to create some backwards compatibility with the existing CUDA support that embeds the fatbinary in each TU. This will most likely be more work than necessary to actually implement. The linker wrapper cannot do anything with these embedded PTX files because we do not know how to link them, and if we did want to include multiple files it should go through the `clang-offload-packager` instead. Also this didn't repsect the setting that disables embedding PTX (although it wasn't used anyway). Reviewed By: tra Differential Revision: https://reviews.llvm.org/D128441 --- diff --git a/clang/lib/Driver/Driver.cpp b/clang/lib/Driver/Driver.cpp index 67bcb41..d831422 100644 --- a/clang/lib/Driver/Driver.cpp +++ b/clang/lib/Driver/Driver.cpp @@ -4459,17 +4459,6 @@ Action *Driver::BuildOffloadingActions(Compilation &C, OffloadAction::DeviceDependences DDep; DDep.add(*A, *TCAndArch->first, TCAndArch->second.data(), Kind); A = C.MakeAction(HDep, DDep); - } else if (isa(A) && Kind == Action::OFK_Cuda) { - // The Cuda toolchain uses fatbinary as the linker phase to bundle the - // PTX and Cubin output. - ActionList FatbinActions; - for (Action *A : {A, A->getInputs()[0]}) { - OffloadAction::DeviceDependences DDep; - DDep.add(*A, *TCAndArch->first, TCAndArch->second.data(), Kind); - FatbinActions.emplace_back( - C.MakeAction(DDep, A->getType())); - } - A = C.MakeAction(FatbinActions, types::TY_CUDA_FATBIN); } ++TCAndArch; } diff --git a/clang/lib/Driver/ToolChains/Cuda.cpp b/clang/lib/Driver/ToolChains/Cuda.cpp index dcdcc99..0e87540 100644 --- a/clang/lib/Driver/ToolChains/Cuda.cpp +++ b/clang/lib/Driver/ToolChains/Cuda.cpp @@ -536,8 +536,9 @@ void NVPTX::Linker::ConstructJob(Compilation &C, const JobAction &JA, const char *Arch = (II.getType() == types::TY_PP_Asm) ? CudaArchToVirtualArchString(gpu_arch) : gpu_arch_str; - CmdArgs.push_back(Args.MakeArgString(llvm::Twine("--image=profile=") + - Arch + ",file=" + II.getFilename())); + CmdArgs.push_back( + Args.MakeArgString(llvm::Twine("--image=profile=") + Arch + + ",file=" + getToolChain().getInputFilename(II))); } for (const auto& A : Args.getAllArgValues(options::OPT_Xcuda_fatbinary)) @@ -695,9 +696,8 @@ CudaToolChain::CudaToolChain(const Driver &D, const llvm::Triple &Triple, std::string CudaToolChain::getInputFilename(const InputInfo &Input) const { // Only object files are changed, for example assembly files keep their .s - // extensions. CUDA also continues to use .o as they don't use nvlink but - // fatbinary. - if (!(OK == Action::OFK_OpenMP && Input.getType() == types::TY_Object)) + // extensions. + if (Input.getType() != types::TY_Object) return ToolChain::getInputFilename(Input); // Replace extension for object files with cubin because nvlink relies on diff --git a/clang/test/Driver/cuda-openmp-driver.cu b/clang/test/Driver/cuda-openmp-driver.cu index b27195d..ded2a75 100644 --- a/clang/test/Driver/cuda-openmp-driver.cu +++ b/clang/test/Driver/cuda-openmp-driver.cu @@ -5,13 +5,11 @@ // RUN: --offload-new-driver --offload-arch=sm_35 --offload-arch=sm_70 %s 2>&1 \ // RUN: | FileCheck -check-prefix BINDINGS %s -// BINDINGS: "nvptx64-nvidia-cuda" - "clang", inputs: ["[[INPUT:.+]]"], output: "[[PTX_SM_35:.+]]" +// BINDINGS: "nvptx64-nvidia-cuda" - "clang", inputs: ["[[INPUT:.+]]"], output: "[[PTX_SM_35:.+]]" // BINDINGS-NEXT: "nvptx64-nvidia-cuda" - "NVPTX::Assembler", inputs: ["[[PTX_SM_35]]"], output: "[[CUBIN_SM_35:.+]]" -// BINDINGS-NEXT: "nvptx64-nvidia-cuda" - "NVPTX::Linker", inputs: ["[[CUBIN_SM_35]]", "[[PTX_SM_35]]"], output: "[[FATBIN_SM_35:.+]]" // BINDINGS-NEXT: "nvptx64-nvidia-cuda" - "clang", inputs: ["[[INPUT]]"], output: "[[PTX_SM_70:.+]]" // BINDINGS-NEXT: "nvptx64-nvidia-cuda" - "NVPTX::Assembler", inputs: ["[[PTX_SM_70:.+]]"], output: "[[CUBIN_SM_70:.+]]" -// BINDINGS-NEXT: "nvptx64-nvidia-cuda" - "NVPTX::Linker", inputs: ["[[CUBIN_SM_70]]", "[[PTX_SM_70:.+]]"], output: "[[FATBIN_SM_70:.+]]" -// BINDINGS-NEXT: "x86_64-unknown-linux-gnu" - "Offload::Packager", inputs: ["[[FATBIN_SM_35]]", "[[FATBIN_SM_70]]"], output: "[[BINARY:.+]]" +// BINDINGS-NEXT: "x86_64-unknown-linux-gnu" - "Offload::Packager", inputs: ["[[CUBIN_SM_35]]", "[[CUBIN_SM_70]]"], output: "[[BINARY:.+]]" // BINDINGS-NEXT: "x86_64-unknown-linux-gnu" - "clang", inputs: ["[[INPUT]]", "[[BINARY]]"], output: "[[HOST_OBJ:.+]]" // BINDINGS-NEXT: "x86_64-unknown-linux-gnu" - "Offload::Linker", inputs: ["[[HOST_OBJ]]"], output: "a.out" @@ -31,7 +29,6 @@ // BINDINGS-DEVICE: # "nvptx64-nvidia-cuda" - "clang", inputs: ["[[INPUT:.+]]"], output: "[[PTX:.+]]" // BINDINGS-DEVICE: # "nvptx64-nvidia-cuda" - "NVPTX::Assembler", inputs: ["[[PTX]]"], output: "[[CUBIN:.+]]" -// BINDINGS-DEVICE: # "nvptx64-nvidia-cuda" - "NVPTX::Linker", inputs: ["[[CUBIN]]", "[[PTX]]"], output: "{{.*}}.fatbin" // RUN: %clang -### -target x86_64-linux-gnu -nocudalib --cuda-feature=+ptx61 --offload-arch=sm_70 %s 2>&1 | FileCheck -check-prefix MANUAL-FEATURE %s // MANUAL-FEATURE: -cc1{{.*}}-target-feature{{.*}}+ptx61 diff --git a/clang/test/Driver/cuda-phases.cu b/clang/test/Driver/cuda-phases.cu index dac6394..404db69 100644 --- a/clang/test/Driver/cuda-phases.cu +++ b/clang/test/Driver/cuda-phases.cu @@ -232,20 +232,14 @@ // NEW_DRIVER: 6: backend, {5}, assembler, (device-cuda, sm_52) // NEW_DRIVER: 7: assembler, {6}, object, (device-cuda, sm_52) // NEW_DRIVER: 8: offload, "device-cuda (nvptx64-nvidia-cuda:sm_52)" {7}, object -// NEW_DRIVER: 9: offload, "device-cuda (nvptx64-nvidia-cuda:sm_52)" {6}, assembler -// NEW_DRIVER: 10: linker, {8, 9}, cuda-fatbin, (device-cuda, sm_52) -// NEW_DRIVER: 11: offload, "device-cuda (nvptx64-nvidia-cuda:sm_52)" {10}, cuda-fatbin -// NEW_DRIVER: 12: input, "[[INPUT]]", cuda, (device-cuda, sm_70) -// NEW_DRIVER: 13: preprocessor, {12}, cuda-cpp-output, (device-cuda, sm_70) -// NEW_DRIVER: 14: compiler, {13}, ir, (device-cuda, sm_70) -// NEW_DRIVER: 15: backend, {14}, assembler, (device-cuda, sm_70) -// NEW_DRIVER: 16: assembler, {15}, object, (device-cuda, sm_70) -// NEW_DRIVER: 17: offload, "device-cuda (nvptx64-nvidia-cuda:sm_70)" {16}, object -// NEW_DRIVER: 18: offload, "device-cuda (nvptx64-nvidia-cuda:sm_70)" {15}, assembler -// NEW_DRIVER: 19: linker, {17, 18}, cuda-fatbin, (device-cuda, sm_70) -// NEW_DRIVER: 20: offload, "device-cuda (nvptx64-nvidia-cuda:sm_70)" {19}, cuda-fatbin -// NEW_DRIVER: 21: clang-offload-packager, {11, 20}, image -// NEW_DRIVER: 22: offload, " (powerpc64le-ibm-linux-gnu)" {2}, " (powerpc64le-ibm-linux-gnu)" {21}, ir -// NEW_DRIVER: 23: backend, {22}, assembler, (host-cuda) -// NEW_DRIVER: 24: assembler, {23}, object, (host-cuda) -// NEW_DRIVER: 25: clang-linker-wrapper, {24}, image, (host-cuda) +// NEW_DRIVER: 9: input, "[[INPUT]]", cuda, (device-cuda, sm_70) +// NEW_DRIVER: 10: preprocessor, {9}, cuda-cpp-output, (device-cuda, sm_70) +// NEW_DRIVER: 11: compiler, {10}, ir, (device-cuda, sm_70) +// NEW_DRIVER: 12: backend, {11}, assembler, (device-cuda, sm_70) +// NEW_DRIVER: 13: assembler, {12}, object, (device-cuda, sm_70) +// NEW_DRIVER: 14: offload, "device-cuda (nvptx64-nvidia-cuda:sm_70)" {13}, object +// NEW_DRIVER: 15: clang-offload-packager, {8, 14}, image +// NEW_DRIVER: 16: offload, " (powerpc64le-ibm-linux-gnu)" {2}, " (powerpc64le-ibm-linux-gnu)" {15}, ir +// NEW_DRIVER: 17: backend, {16}, assembler, (host-cuda) +// NEW_DRIVER: 18: assembler, {17}, object, (host-cuda) +// NEW_DRIVER: 19: clang-linker-wrapper, {18}, image, (host-cuda)