From 0c1b32717bcffcf8edf95294e98933bd4c1e76ed Mon Sep 17 00:00:00 2001 From: Joseph Huber Date: Thu, 14 Jul 2022 11:46:43 -0400 Subject: [PATCH] [HIP] Allow the new driver to compile HIP in non-RDC mode The new driver primarily allows us to support RDC-mode compilations with proper linking. This is not needed for non-RDC mode compilation, but we still would like the new driver to be able to handle this mode so we can transition away from the old driver in the future. This patch adds the necessary code to support creating a fatbinary for HIP code generation. Reviewed By: yaxunl Differential Revision: https://reviews.llvm.org/D129784 --- clang/lib/Driver/Driver.cpp | 23 +++++++++++++++++++++-- clang/lib/Driver/ToolChains/Clang.cpp | 2 +- clang/test/Driver/hip-binding.hip | 11 +++++++++++ clang/test/Driver/hip-phases.hip | 8 ++++++-- clang/test/Driver/hip-toolchain-no-rdc.hip | 10 ++++++++++ 5 files changed, 49 insertions(+), 5 deletions(-) diff --git a/clang/lib/Driver/Driver.cpp b/clang/lib/Driver/Driver.cpp index e50fdd6..3f29afd 100644 --- a/clang/lib/Driver/Driver.cpp +++ b/clang/lib/Driver/Driver.cpp @@ -4478,6 +4478,15 @@ Action *Driver::BuildOffloadingActions(Compilation &C, } } + // Compiling HIP in non-RDC mode requires linking each action individually. + for (Action *&A : DeviceActions) { + if (A->getType() != types::TY_Object || Kind != Action::OFK_HIP || + Args.hasFlag(options::OPT_fgpu_rdc, options::OPT_fno_gpu_rdc, false)) + continue; + ActionList LinkerInput = {A}; + A = C.MakeAction(LinkerInput, types::TY_Image); + } + auto TCAndArch = TCAndArchs.begin(); for (Action *A : DeviceActions) { DDeps.add(*A, *TCAndArch->first, TCAndArch->second.data(), Kind); @@ -4497,12 +4506,21 @@ Action *Driver::BuildOffloadingActions(Compilation &C, OffloadAction::DeviceDependences DDep; if (C.isOffloadingHostKind(Action::OFK_Cuda) && !Args.hasFlag(options::OPT_fgpu_rdc, options::OPT_fno_gpu_rdc, false)) { - // If we are not in RDC-mode we just emit the final CUDA fatbinary for each - // translation unit without requiring any linking. + // If we are not in RDC-mode we just emit the final CUDA fatbinary for + // each translation unit without requiring any linking. Action *FatbinAction = C.MakeAction(OffloadActions, types::TY_CUDA_FATBIN); DDep.add(*FatbinAction, *C.getSingleOffloadToolChain(), nullptr, Action::OFK_Cuda); + } else if (C.isOffloadingHostKind(Action::OFK_HIP) && + !Args.hasFlag(options::OPT_fgpu_rdc, options::OPT_fno_gpu_rdc, + false)) { + // If we are not in RDC-mode we just emit the final HIP fatbinary for each + // translation unit, linking each input individually. + Action *FatbinAction = + C.MakeAction(OffloadActions, types::TY_HIP_FATBIN); + DDep.add(*FatbinAction, *C.getSingleOffloadToolChain(), + nullptr, Action::OFK_HIP); } else { // Package all the offloading actions into a single output that can be // embedded in the host and linked. @@ -4511,6 +4529,7 @@ Action *Driver::BuildOffloadingActions(Compilation &C, DDep.add(*PackagerAction, *C.getSingleOffloadToolChain(), nullptr, Action::OFK_None); } + OffloadAction::HostDependence HDep( *HostAction, *C.getSingleOffloadToolChain(), /*BoundArch=*/nullptr, isa(HostAction) ? DDep : DDeps); diff --git a/clang/lib/Driver/ToolChains/Clang.cpp b/clang/lib/Driver/ToolChains/Clang.cpp index f50a80d..6337a99 100644 --- a/clang/lib/Driver/ToolChains/Clang.cpp +++ b/clang/lib/Driver/ToolChains/Clang.cpp @@ -7006,7 +7006,7 @@ void Clang::ConstructJob(Compilation &C, const JobAction &JA, CmdArgs.push_back("-fcuda-include-gpubinary"); CmdArgs.push_back(CudaDeviceInput->getFilename()); } else if (!HostOffloadingInputs.empty()) { - if (IsCuda && !IsRDCMode) { + if ((IsCuda || IsHIP) && !IsRDCMode) { assert(HostOffloadingInputs.size() == 1 && "Only one input expected"); CmdArgs.push_back("-fcuda-include-gpubinary"); CmdArgs.push_back(HostOffloadingInputs.front().getFilename()); diff --git a/clang/test/Driver/hip-binding.hip b/clang/test/Driver/hip-binding.hip index 9461295..6a27558 100644 --- a/clang/test/Driver/hip-binding.hip +++ b/clang/test/Driver/hip-binding.hip @@ -4,6 +4,9 @@ // RUN: %clang -ccc-print-bindings -target x86_64-linux-gnu \ // RUN: --cuda-gpu-arch=gfx803 --cuda-gpu-arch=gfx900 %s \ // RUN: -c 2>&1 | FileCheck -check-prefix=NRDCS %s +// RUN: %clang -ccc-print-bindings -target x86_64-linux-gnu --offload-new-driver \ +// RUN: --cuda-gpu-arch=gfx803 --cuda-gpu-arch=gfx900 %s \ +// RUN: -c 2>&1 | FileCheck -check-prefix=NRDCS %s // NRDCS: # "amdgcn-amd-amdhsa" - "clang", inputs: ["[[IN:.*hip-binding.hip]]"], output: "[[OBJ1:.*o]]" // NRDCS: # "amdgcn-amd-amdhsa" - "AMDGCN::Linker", inputs: ["[[OBJ1]]"], output: "[[IMG1:.*]]" // NRDCS: # "amdgcn-amd-amdhsa" - "clang", inputs: ["[[IN:.*hip-binding.hip]]"], output: "[[OBJ2:.*o]]" @@ -19,6 +22,14 @@ // RDCS: # "x86_64-unknown-linux-gnu" - "clang", inputs: ["[[IN]]"], output: "[[HOSTOBJ:.*o]]" // RDCS: # "x86_64-unknown-linux-gnu" - "offload bundler", inputs: ["[[BC1]]", "[[BC2]]", "[[HOSTOBJ]]"], output: "{{.*}}" +// RUN: %clang -ccc-print-bindings -target x86_64-linux-gnu --offload-new-driver \ +// RUN: --cuda-gpu-arch=gfx803 --cuda-gpu-arch=gfx900 %s \ +// RUN: -c -fgpu-rdc 2>&1 | FileCheck -check-prefix=RDCS-NEW %s +// RDCS-NEW: # "amdgcn-amd-amdhsa" - "clang", inputs: ["[[INPUT:.+]]"], output: "[[HIP803:.+]]" +// RDCS-NEW: # "amdgcn-amd-amdhsa" - "clang", inputs: ["[[INPUT]]"], output: "[[HIP900:.+]]" +// RDCS-NEW: # "x86_64-unknown-linux-gnu" - "Offload::Packager", inputs: ["[[HIP803]]", "[[HIP900]]"], output: "[[HIPFB:.+]]" +// RDCS-NEW: # "x86_64-unknown-linux-gnu" - "clang", inputs: ["[[INPUT]]", "[[HIPFB]]"], output: "{{.*}}" + // RUN: touch %t.o // RUN: %clang --hip-link -ccc-print-bindings -target x86_64-linux-gnu \ // RUN: --cuda-gpu-arch=gfx803 --cuda-gpu-arch=gfx900 -fgpu-rdc %t.o\ diff --git a/clang/test/Driver/hip-phases.hip b/clang/test/Driver/hip-phases.hip index 808a137..d330ea8 100644 --- a/clang/test/Driver/hip-phases.hip +++ b/clang/test/Driver/hip-phases.hip @@ -11,7 +11,10 @@ // // RUN: %clang -x hip -target x86_64-unknown-linux-gnu -ccc-print-phases \ // RUN: --cuda-gpu-arch=gfx803 %s 2>&1 \ -// RUN: | FileCheck -check-prefixes=BIN,NRD %s +// RUN: | FileCheck -check-prefixes=BIN,NRD,OLD %s +// RUN: %clang -x hip -target x86_64-unknown-linux-gnu -ccc-print-phases \ +// RUN: --offload-new-driver --cuda-gpu-arch=gfx803 %s 2>&1 \ +// RUN: | FileCheck -check-prefixes=BIN,NRD,NEW %s // // RUN: %clang -x hip -target x86_64-unknown-linux-gnu -ccc-print-phases \ // RUN: --cuda-gpu-arch=gfx803 -fgpu-rdc %s 2>&1 \ @@ -38,7 +41,8 @@ // RDC-DAG: [[P11:[0-9]+]]: offload, "device-[[T]] (amdgcn-amd-amdhsa)" {[[P10]]}, object // NRD-DAG: [[P12:[0-9]+]]: backend, {[[P11]]}, assembler, (host-[[T]]) // NRD-DAG: [[P13:[0-9]+]]: assembler, {[[P12]]}, object, (host-[[T]]) -// NRD-DAG: [[P14:[0-9]+]]: linker, {[[P13]]}, image, (host-[[T]]) +// OLD-DAG: [[P14:[0-9]+]]: linker, {[[P13]]}, image, (host-[[T]]) +// NEW-DAG: [[P14:[0-9]+]]: clang-linker-wrapper, {[[P13]]}, image, (host-[[T]]) // RDC-DAG: [[P14:[0-9]+]]: linker, {[[P13]], [[P11]]}, image, (host-[[T]]) // diff --git a/clang/test/Driver/hip-toolchain-no-rdc.hip b/clang/test/Driver/hip-toolchain-no-rdc.hip index 0d8756d..180b3c2 100644 --- a/clang/test/Driver/hip-toolchain-no-rdc.hip +++ b/clang/test/Driver/hip-toolchain-no-rdc.hip @@ -21,6 +21,16 @@ // RUN: %S/Inputs/hip_multiple_inputs/b.hip \ // RUN: 2>&1 | FileCheck -check-prefixes=CHECK %s +// RUN: %clang -### -target x86_64-linux-gnu -fno-gpu-rdc \ +// RUN: -x hip --cuda-gpu-arch=gfx803 --cuda-gpu-arch=gfx900 \ +// RUN: --hip-device-lib=lib1.bc --hip-device-lib=lib2.bc \ +// RUN: --hip-device-lib-path=%S/Inputs/hip_multiple_inputs/lib1 \ +// RUN: --hip-device-lib-path=%S/Inputs/hip_multiple_inputs/lib2 \ +// RUN: -fuse-ld=lld -nogpuinc --offload-new-driver -c \ +// RUN: %S/Inputs/hip_multiple_inputs/a.cu \ +// RUN: %S/Inputs/hip_multiple_inputs/b.hip \ +// RUN: 2>&1 | FileCheck -check-prefixes=CHECK %s + // RUN: touch %T/a.o // RUN: touch %T/b.o // RUN: %clang -### -target x86_64-linux-gnu \ -- 2.7.4