From f1aee32f1c85aa476bce70ec110284011c6df354 Mon Sep 17 00:00:00 2001 From: Siu Chi Chan Date: Tue, 6 Jun 2023 15:48:26 -0400 Subject: [PATCH] [HIP] Instruct lld to go through all archives Add the --whole-archive flag when linking HIP programs to instruct lld to go through every archive library to link in all the kernel functions (entry pointers to the GPU program); otherwise, lld may skip some library files if there are no more symbols that need to be resolved. Differential Revision: https://reviews.llvm.org/D152207 Change-Id: I084d3d606f9cee646f9adc65f4b648c9bcb252e6 --- clang/lib/Driver/ToolChains/HIPAMD.cpp | 14 ++++++++++++++ clang/test/Driver/hip-toolchain-rdc-separate.hip | 4 ++++ clang/test/Driver/hip-toolchain-rdc-static-lib.hip | 2 ++ 3 files changed, 20 insertions(+) diff --git a/clang/lib/Driver/ToolChains/HIPAMD.cpp b/clang/lib/Driver/ToolChains/HIPAMD.cpp index 3131c8ed2463..a9afa5858b1b 100644 --- a/clang/lib/Driver/ToolChains/HIPAMD.cpp +++ b/clang/lib/Driver/ToolChains/HIPAMD.cpp @@ -152,6 +152,18 @@ void AMDGCN::Linker::constructLldCommand(Compilation &C, const JobAction &JA, addLinkerCompressDebugSectionsOption(TC, Args, LldArgs); + // Given that host and device linking happen in separate processes, the device + // linker doesn't always have the visibility as to which device symbols are + // needed by a program, especially for the device symbol dependencies that are + // introduced through the host symbol resolution. + // For example: host_A() (A.obj) --> host_B(B.obj) --> device_kernel_B() + // (B.obj) In this case, the device linker doesn't know that A.obj actually + // depends on the kernel functions in B.obj. When linking to static device + // library, the device linker may drop some of the device global symbols if + // they aren't referenced. As a workaround, we are adding to the + // --whole-archive flag such that all global symbols would be linked in. + LldArgs.push_back("--whole-archive"); + for (auto *Arg : Args.filtered(options::OPT_Xoffload_linker)) { LldArgs.push_back(Arg->getValue(1)); Arg->claim(); @@ -169,6 +181,8 @@ void AMDGCN::Linker::constructLldCommand(Compilation &C, const JobAction &JA, /*IsBitCodeSDL=*/true, /*PostClangLink=*/false); + LldArgs.push_back("--no-whole-archive"); + const char *Lld = Args.MakeArgString(getToolChain().GetProgramPath("lld")); C.addCommand(std::make_unique(JA, *this, ResponseFileSupport::None(), Lld, LldArgs, Inputs, Output)); diff --git a/clang/test/Driver/hip-toolchain-rdc-separate.hip b/clang/test/Driver/hip-toolchain-rdc-separate.hip index 4782434df4f8..286acfdb6d06 100644 --- a/clang/test/Driver/hip-toolchain-rdc-separate.hip +++ b/clang/test/Driver/hip-toolchain-rdc-separate.hip @@ -126,18 +126,22 @@ // LINK-NOT: ".*llc" // LINK: {{".*lld.*"}} {{.*}} "-plugin-opt=-amdgpu-internalize-symbols" // LINK-SAME: "-plugin-opt=mcpu=gfx803" +// LINK-SAME: "--whole-archive" // LLD-TMP-SAME: "-o" "[[IMG_DEV1:.*.out]]" // LLD-FIN-SAME: "-o" "[[IMG_DEV1:a.out-.*gfx803]]" // LINK-SAME "[[A_BC1]]" "[[B_BC1]]" +// LINK-SAME: "--no-whole-archive" // LINK-NOT: "*.llvm-link" // LINK-NOT: ".*opt" // LINK-NOT: ".*llc" // LINK: {{".*lld.*"}} {{.*}} "-plugin-opt=-amdgpu-internalize-symbols" // LINK-SAME: "-plugin-opt=mcpu=gfx900" +// LINK-SAME: "--whole-archive" // LLD-TMP-SAME: "-o" "[[IMG_DEV2:.*.out]]" // LLD-FIN-SAME: "-o" "[[IMG_DEV1:a.out-.*gfx900]]" // LINK-SAME "[[A_BC2]]" "[[B_BC2]]" +// LINK-SAME: "--no-whole-archive" // LINK-BUNDLE: [[BUNDLER:".*clang-offload-bundler"]] "-type=o" // LINK-BUNDLE-SAME: "-targets={{.*}},hipv4-amdgcn-amd-amdhsa--gfx803,hipv4-amdgcn-amd-amdhsa--gfx900" diff --git a/clang/test/Driver/hip-toolchain-rdc-static-lib.hip b/clang/test/Driver/hip-toolchain-rdc-static-lib.hip index 66eac74876cf..20fd2fb29c1e 100644 --- a/clang/test/Driver/hip-toolchain-rdc-static-lib.hip +++ b/clang/test/Driver/hip-toolchain-rdc-static-lib.hip @@ -80,7 +80,9 @@ // CHECK-NOT: ".*llc" // CHECK: [[LLD]] {{.*}} "-plugin-opt=-amdgpu-internalize-symbols" // CHECK-SAME: "-plugin-opt=mcpu=gfx900" +// CHECK-SAME: "--whole-archive" // CHECK-SAME: "-o" "[[IMG_DEV2:.*out]]" [[A_BC2]] [[B_BC2]] +// CHECK-SAME: "--no-whole-archive" // combine images generated into hip fat binary object // CHECK: [[BUNDLER:".*clang-offload-bundler"]] "-type=o" -- 2.34.1