From d0ee8b64ecf359737ce550d8f47f465ab6657be7 Mon Sep 17 00:00:00 2001 From: Teresa Johnson Date: Wed, 2 Jun 2021 16:37:23 -0700 Subject: [PATCH] [LTO] Fix -fwhole-program-vtables handling after HIP ThinLTO patch A recent change (D99683) to support ThinLTO for HIP caused a regression when compiling cuda code with -flto=thin -fwhole-program-vtables. Specifically, we now get an error: error: invalid argument '-fwhole-program-vtables' only allowed with '-flto' This error is coming from the device offload cc1 action being set up for the cuda compile, for which -flto=thin doesn't apply and gets dropped. This is a regression, but points to a potential issue that was silently occurring before the patch, details below. Before D99683, the check for fwhole-program-vtables in the driver looked like: if (WholeProgramVTables) { if (!D.isUsingLTO()) D.Diag(diag::err_drv_argument_only_allowed_with) << "-fwhole-program-vtables" << "-flto"; CmdArgs.push_back("-fwhole-program-vtables"); } And D.isUsingLTO() returned true since we have -flto=thin. However, because the cuda cc1 compile is doing device offloading, which didn't support any LTO, there was other code that suppressed -flto* options from being passed to the cc1 invocation. So the cc1 invocation silently had -fwhole-program-vtables without any -flto*. This seems potentially problematic, since if we had any virtual calls we would get type test assume sequences without the corresponding LTO pass that handles them. However, with the patch, which adds support for device offloading LTO option -foffload-lto=thin, the code has changed so that we set a bool IsUsingLTO based on either -flto* or -foffload-lto*, depending on whether this is the device offloading action. For the device offload action in our compile, since we don't have -foffload-lto, IsUsingLTO is false, and the check for LTO with -fwhole-program-vtables now fails. What we should do is only pass through -fwhole-program-vtables to the cc1 invocation that has LTO enabled (either the device offload action with -foffload-lto, or the non-device offload action with -flto), and otherwise drop the -fwhole-program-vtables for the non-LTO action. Then we should error only if we have -fwhole-program-vtables without any -f*lto* options. Differential Revision: https://reviews.llvm.org/D103579 --- clang/lib/Driver/ToolChains/Clang.cpp | 10 +++++++-- clang/test/Driver/cuda-options.cu | 10 +++++++++ clang/test/Driver/hip-options.hip | 29 +++++++++++++++++++++------ 3 files changed, 41 insertions(+), 8 deletions(-) diff --git a/clang/lib/Driver/ToolChains/Clang.cpp b/clang/lib/Driver/ToolChains/Clang.cpp index dea4ade683ef..ee40df35b850 100644 --- a/clang/lib/Driver/ToolChains/Clang.cpp +++ b/clang/lib/Driver/ToolChains/Clang.cpp @@ -6647,11 +6647,17 @@ void Clang::ConstructJob(Compilation &C, const JobAction &JA, } if (WholeProgramVTables) { - if (!IsUsingLTO) + // Propagate -fwhole-program-vtables if this is an LTO compile. + if (IsUsingLTO) + CmdArgs.push_back("-fwhole-program-vtables"); + // Check if we passed LTO options but they were suppressed because this is a + // device offloading action, or we passed device offload LTO options which + // were suppressed because this is not the device offload action. + // Otherwise, issue an error. + else if (!D.isUsingLTO(!IsDeviceOffloadAction)) D.Diag(diag::err_drv_argument_only_allowed_with) << "-fwhole-program-vtables" << "-flto"; - CmdArgs.push_back("-fwhole-program-vtables"); } bool DefaultsSplitLTOUnit = diff --git a/clang/test/Driver/cuda-options.cu b/clang/test/Driver/cuda-options.cu index 175e4b877ce9..5b67d7e4d04f 100644 --- a/clang/test/Driver/cuda-options.cu +++ b/clang/test/Driver/cuda-options.cu @@ -183,6 +183,12 @@ // RUN: -c %s 2>&1 \ // RUN: | FileCheck -check-prefixes FATBIN-COMMON,PTX-SM35,PTX-SM30 %s +// Verify -flto=thin -fwhole-program-vtables handling. This should result in +// both options being passed to the host compilation, with neither passed to +// the device compilation. +// RUN: %clang -### -target x86_64-linux-gnu -c -flto=thin -fwhole-program-vtables %s 2>&1 \ +// RUN: | FileCheck -check-prefixes DEVICE,DEVICE-NOSAVE,HOST,INCLUDES-DEVICE,NOLINK,THINLTOWPD %s +// THINLTOWPD-NOT: error: invalid argument '-fwhole-program-vtables' only allowed with '-flto' // ARCH-SM20: "-cc1"{{.*}}"-target-cpu" "sm_20" // NOARCH-SM20-NOT: "-cc1"{{.*}}"-target-cpu" "sm_20" @@ -206,8 +212,10 @@ // Match the job that produces PTX assembly. // DEVICE: "-cc1" "-triple" "nvptx64-nvidia-cuda" // DEVICE-NOSAVE-SAME: "-aux-triple" "x86_64-unknown-linux-gnu" +// THINLTOWPD-NOT: "-flto=thin" // DEVICE-SAME: "-fcuda-is-device" // DEVICE-SM30-SAME: "-target-cpu" "sm_30" +// THINLTOWPD-NOT: "-fwhole-program-vtables" // DEVICE-SAME: "-o" "[[PTXFILE:[^"]*]]" // DEVICE-NOSAVE-SAME: "-x" "cuda" // DEVICE-SAVE-SAME: "-x" "ir" @@ -252,12 +260,14 @@ // Match host-side compilation. // HOST: "-cc1" "-triple" "x86_64-unknown-linux-gnu" // HOST-SAME: "-aux-triple" "nvptx64-nvidia-cuda" +// THINLTOWPD-SAME: "-flto=thin" // HOST-NOT: "-fcuda-is-device" // There is only one GPU binary after combining it with fatbinary! // INCLUDES-DEVICE2-NOT: "-fcuda-include-gpubinary" // INCLUDES-DEVICE-SAME: "-fcuda-include-gpubinary" "[[FATBINARY]]" // There is only one GPU binary after combining it with fatbinary. // INCLUDES-DEVICE2-NOT: "-fcuda-include-gpubinary" +// THINLTOWPD-SAME: "-fwhole-program-vtables" // HOST-SAME: "-o" "[[HOSTOUTPUT:[^"]*]]" // HOST-NOSAVE-SAME: "-x" "cuda" // HOST-SAVE-SAME: "-x" "cuda-cpp-output" diff --git a/clang/test/Driver/hip-options.hip b/clang/test/Driver/hip-options.hip index ec723053da05..08a821c89a19 100644 --- a/clang/test/Driver/hip-options.hip +++ b/clang/test/Driver/hip-options.hip @@ -60,13 +60,30 @@ // Check -foffload-lto=thin translated correctly. // RUN: %clang -### -target x86_64-unknown-linux-gnu -nogpuinc -nogpulib \ -// RUN: --cuda-gpu-arch=gfx906 -foffload-lto=thin %s 2>&1 \ -// RUN: | FileCheck -check-prefix=THINLTO %s +// RUN: --cuda-gpu-arch=gfx906 -foffload-lto=thin -fwhole-program-vtables %s 2>&1 \ +// RUN: | FileCheck -check-prefix=HIPTHINLTO %s + +// RUN: %clang -### -target x86_64-unknown-linux-gnu -nogpuinc -nogpulib \ +// RUN: --cuda-gpu-arch=gfx906 -fgpu-rdc -foffload-lto=thin -fwhole-program-vtables %s 2>&1 \ +// RUN: | FileCheck -check-prefix=HIPTHINLTO %s +// Ensure we don't error about -fwhole-program-vtables for the non-device offload compile. +// HIPTHINLTO-NOT: error: invalid argument '-fwhole-program-vtables' only allowed with '-flto' +// HIPTHINLTO-NOT: clang{{.*}} "-triple" "x86_64-unknown-linux-gnu" {{.*}} "-flto-unit" +// HIPTHINLTO: clang{{.*}} "-triple" "amdgcn-amd-amdhsa" {{.*}} "-flto=thin" "-flto-unit" {{.*}} "-fwhole-program-vtables" +// HIPTHINLTO-NOT: clang{{.*}} "-triple" "x86_64-unknown-linux-gnu" {{.*}} "-flto-unit" +// HIPTHINLTO: lld{{.*}}"-plugin-opt=mcpu=gfx906" "-plugin-opt=thinlto" "-plugin-opt=-force-import-all" + +// Check that -flto=thin is handled correctly, particularly with -fwhole-program-vtables. +// // RUN: %clang -### -target x86_64-unknown-linux-gnu -nogpuinc -nogpulib \ -// RUN: --cuda-gpu-arch=gfx906 -fgpu-rdc -foffload-lto=thin %s 2>&1 \ +// RUN: --cuda-gpu-arch=gfx906 -flto=thin -fwhole-program-vtables %s 2>&1 \ // RUN: | FileCheck -check-prefix=THINLTO %s -// THINLTO-NOT: clang{{.*}} "-triple" "x86_64-unknown-linux-gnu" {{.*}} "-flto-unit" -// THINLTO: clang{{.*}} "-triple" "amdgcn-amd-amdhsa" {{.*}} "-flto=thin" "-flto-unit" -// THINLTO: lld{{.*}}"-plugin-opt=mcpu=gfx906" "-plugin-opt=thinlto" "-plugin-opt=-force-import-all" +// Ensure we don't error about -fwhole-program-vtables for the device offload compile. We should +// drop -fwhole-program-vtables for the device offload compile and pass it through for the +// non-device offload compile along with -flto=thin. +// THINLTO-NOT: error: invalid argument '-fwhole-program-vtables' only allowed with '-flto' +// THINLTO-NOT: clang{{.*}}" "-triple" "amdgcn-amd-amdhsa" {{.*}} "-fwhole-program-vtables" +// THINLTO: clang{{.*}}" "-triple" "x86_64-unknown-linux-gnu" {{.*}} "-flto=thin" {{.*}} "-fwhole-program-vtables" +// THINLTO-NOT: clang{{.*}}" "-triple" "amdgcn-amd-amdhsa" {{.*}} "-fwhole-program-vtables" -- 2.34.1