From 461a1836d3d77371bb6271fefd645897997a22b8 Mon Sep 17 00:00:00 2001 From: Wael Yehia Date: Thu, 20 Oct 2022 16:07:35 +0000 Subject: [PATCH] [PGO][AIX] Improve dummy var retention and allow -bcdtors:csect linking. 1) Use a static array of pointer to retain the dummy vars. 2) Associate liveness of the array with that of the runtime hook variable __llvm_profile_runtime. 3) Perform the runtime initialization through the runtime hook variable. 4) Preserve the runtime hook variable using the -u linker flag. Reviewed By: hubert.reinterpretcast Differential Revision: https://reviews.llvm.org/D136192 --- clang/lib/Driver/ToolChains/AIX.cpp | 11 +++++++++ clang/lib/Driver/ToolChains/AIX.h | 3 +++ .../lib/profile/InstrProfilingPlatformLinux.c | 22 ++++++++--------- compiler-rt/lib/profile/InstrProfilingRuntime.cpp | 28 ++++++++++++---------- compiler-rt/test/profile/AIX/lit.local.cfg.py | 9 +++++++ compiler-rt/test/profile/AIX/shared-bexpall-pgo.c | 14 +++++++++++ .../Transforms/Instrumentation/InstrProfiling.cpp | 2 +- .../Instrumentation/InstrProfiling/no-counters.ll | 2 +- .../Instrumentation/InstrProfiling/profiling.ll | 9 +++---- 9 files changed, 68 insertions(+), 32 deletions(-) create mode 100644 compiler-rt/test/profile/AIX/lit.local.cfg.py create mode 100644 compiler-rt/test/profile/AIX/shared-bexpall-pgo.c diff --git a/clang/lib/Driver/ToolChains/AIX.cpp b/clang/lib/Driver/ToolChains/AIX.cpp index 748b6a3..1e72f1e 100644 --- a/clang/lib/Driver/ToolChains/AIX.cpp +++ b/clang/lib/Driver/ToolChains/AIX.cpp @@ -13,6 +13,7 @@ #include "clang/Driver/Options.h" #include "clang/Driver/SanitizerArgs.h" #include "llvm/Option/ArgList.h" +#include "llvm/ProfileData/InstrProf.h" #include "llvm/Support/Path.h" using AIX = clang::driver::toolchains::AIX; @@ -348,6 +349,16 @@ void AIX::AddCXXStdlibLibArgs(const llvm::opt::ArgList &Args, llvm_unreachable("Unexpected C++ library type; only libc++ is supported."); } +void AIX::addProfileRTLibs(const llvm::opt::ArgList &Args, + llvm::opt::ArgStringList &CmdArgs) const { + // Add linker option -u__llvm_profile_runtime to cause runtime + // initialization to occur. + if (needsProfileRT(Args)) + CmdArgs.push_back(Args.MakeArgString( + Twine("-u", llvm::getInstrProfRuntimeHookVarName()))); + ToolChain::addProfileRTLibs(Args, CmdArgs); +} + ToolChain::CXXStdlibType AIX::GetDefaultCXXStdlibType() const { return ToolChain::CST_Libcxx; } diff --git a/clang/lib/Driver/ToolChains/AIX.h b/clang/lib/Driver/ToolChains/AIX.h index c9948a6..e03aebc 100644 --- a/clang/lib/Driver/ToolChains/AIX.h +++ b/clang/lib/Driver/ToolChains/AIX.h @@ -80,6 +80,9 @@ public: void AddCXXStdlibLibArgs(const llvm::opt::ArgList &Args, llvm::opt::ArgStringList &CmdArgs) const override; + void addProfileRTLibs(const llvm::opt::ArgList &Args, + llvm::opt::ArgStringList &CmdArgs) const override; + CXXStdlibType GetDefaultCXXStdlibType() const override; RuntimeLibType GetDefaultRuntimeLibType() const override; diff --git a/compiler-rt/lib/profile/InstrProfilingPlatformLinux.c b/compiler-rt/lib/profile/InstrProfilingPlatformLinux.c index 3af61d2..adf4132 100644 --- a/compiler-rt/lib/profile/InstrProfilingPlatformLinux.c +++ b/compiler-rt/lib/profile/InstrProfilingPlatformLinux.c @@ -250,23 +250,21 @@ void __llvm_profile_register_names_function(void *NamesStart, // section exists. So for the scenario where the user objects have no such // section (i.e. when they are compiled with -fno-profile-generate), we always // define these zero length variables in each of the above 4 sections. -COMPILER_RT_VISIBILITY int dummy_cnts[0] COMPILER_RT_SECTION( +static int dummy_cnts[0] COMPILER_RT_SECTION( COMPILER_RT_SEG INSTR_PROF_CNTS_SECT_NAME); -COMPILER_RT_VISIBILITY int dummy_data[0] COMPILER_RT_SECTION( +static int dummy_data[0] COMPILER_RT_SECTION( COMPILER_RT_SEG INSTR_PROF_DATA_SECT_NAME); -COMPILER_RT_VISIBILITY const int dummy_name[0] COMPILER_RT_SECTION( +static const int dummy_name[0] COMPILER_RT_SECTION( COMPILER_RT_SEG INSTR_PROF_NAME_SECT_NAME); -COMPILER_RT_VISIBILITY int dummy_vnds[0] COMPILER_RT_SECTION( +static int dummy_vnds[0] COMPILER_RT_SECTION( COMPILER_RT_SEG INSTR_PROF_VNODES_SECT_NAME); -// Create a fake reference to avoid GC'ing of the dummy variables by the linker. -// Ideally, we create a ".ref" of each variable inside the function -// __llvm_profile_begin_counters(), but there's no source level construct -// that allows us to generate that. -__attribute__((destructor)) void keep() { - int volatile use = &dummy_cnts < &dummy_data && &dummy_name < &dummy_vnds; - (void)use; -} +// To avoid GC'ing of the dummy variables by the linker, reference them in an +// array and reference the array in the runtime registration code +// (InstrProfilingRuntime.cpp) +COMPILER_RT_VISIBILITY +void *__llvm_profile_keep[] = {(void *)&dummy_cnts, (void *)&dummy_data, + (void *)&dummy_name, (void *)&dummy_vnds}; #endif #endif diff --git a/compiler-rt/lib/profile/InstrProfilingRuntime.cpp b/compiler-rt/lib/profile/InstrProfilingRuntime.cpp index 4ea2bb2..04f761c 100644 --- a/compiler-rt/lib/profile/InstrProfilingRuntime.cpp +++ b/compiler-rt/lib/profile/InstrProfilingRuntime.cpp @@ -10,19 +10,23 @@ extern "C" { #include "InstrProfiling.h" -/* int __llvm_profile_runtime */ -COMPILER_RT_VISIBILITY int INSTR_PROF_PROFILE_RUNTIME_VAR; +static int RegisterRuntime() { + __llvm_profile_initialize(); + return 0; } -namespace { - -class RegisterRuntime { -public: - RegisterRuntime() { - __llvm_profile_initialize(); - } -}; - -RegisterRuntime Registration; +#ifndef _AIX +/* int __llvm_profile_runtime */ +COMPILER_RT_VISIBILITY int INSTR_PROF_PROFILE_RUNTIME_VAR; +static int Registration = RegisterRuntime(); +#else +extern COMPILER_RT_VISIBILITY void *__llvm_profile_keep[]; +/* On AIX, when linking with -bcdtors:csect, the variable whose constructor does + * the registration needs to be explicitly kept, hence we reuse the runtime hook + * variable to do the registration since it'll be kept via the -u linker flag. + * Create a volatile reference to __llvm_profile_keep to keep the array alive.*/ +COMPILER_RT_VISIBILITY int INSTR_PROF_PROFILE_RUNTIME_VAR = + ((void)*(void *volatile *)__llvm_profile_keep, RegisterRuntime()); +#endif } diff --git a/compiler-rt/test/profile/AIX/lit.local.cfg.py b/compiler-rt/test/profile/AIX/lit.local.cfg.py new file mode 100644 index 0000000..7ec27bf --- /dev/null +++ b/compiler-rt/test/profile/AIX/lit.local.cfg.py @@ -0,0 +1,9 @@ +def getRoot(config): + if not config.parent: + return config + return getRoot(config.parent) + +root = getRoot(config) + +if root.host_os not in ['AIX']: + config.unsupported = True diff --git a/compiler-rt/test/profile/AIX/shared-bexpall-pgo.c b/compiler-rt/test/profile/AIX/shared-bexpall-pgo.c new file mode 100644 index 0000000..b3b865d --- /dev/null +++ b/compiler-rt/test/profile/AIX/shared-bexpall-pgo.c @@ -0,0 +1,14 @@ +// RUN: split-file %s %t +// RUN: cd %t +// +// RUN: %clang_pgogen foo.c -c -o foo.o +// RUN: %clang_pgogen -shared foo.o -o libfoo.so -bexpall +// RUN: %clang_pgogen -L%t user.c libfoo.so -o user1 +// RUN: ./user1 + +//--- foo.c +void foo() {} + +//--- user.c +void foo(); +int main() { foo(); } diff --git a/llvm/lib/Transforms/Instrumentation/InstrProfiling.cpp b/llvm/lib/Transforms/Instrumentation/InstrProfiling.cpp index 7d54a9b..3af3f90 100644 --- a/llvm/lib/Transforms/Instrumentation/InstrProfiling.cpp +++ b/llvm/lib/Transforms/Instrumentation/InstrProfiling.cpp @@ -1199,7 +1199,7 @@ void InstrProfiling::emitRegistration() { bool InstrProfiling::emitRuntimeHook() { // We expect the linker to be invoked with -u flag for Linux // in which case there is no need to emit the external variable. - if (TT.isOSLinux()) + if (TT.isOSLinux() || TT.isOSAIX()) return false; // If the module's provided its own runtime, we don't need to do anything. diff --git a/llvm/test/Instrumentation/InstrProfiling/no-counters.ll b/llvm/test/Instrumentation/InstrProfiling/no-counters.ll index 165fe49..9afaf4f 100644 --- a/llvm/test/Instrumentation/InstrProfiling/no-counters.ll +++ b/llvm/test/Instrumentation/InstrProfiling/no-counters.ll @@ -2,7 +2,7 @@ ; RUN: opt < %s -mtriple=x86_64-apple-macosx10.10.0 -passes=instrprof -S | FileCheck %s -check-prefixes=ALL,DARWIN ; RUN: opt < %s -mtriple=x86_64-linux-unknown -passes=instrprof -S | FileCheck %s -check-prefixes=ALL,LINUX -; RUN: opt < %s -mtriple=powerpc64-ibm-aix-xcoff -passes=instrprof -S | FileCheck %s -check-prefixes=ALL,DARWIN +; RUN: opt < %s -mtriple=powerpc64-ibm-aix-xcoff -passes=instrprof -S | FileCheck %s -check-prefixes=ALL,LINUX ; ALL-NOT: @__profc ; ALL-NOT: @__profd ; DARWIN: @__llvm_profile_runtime diff --git a/llvm/test/Instrumentation/InstrProfiling/profiling.ll b/llvm/test/Instrumentation/InstrProfiling/profiling.ll index 95e5e08..caff611 100644 --- a/llvm/test/Instrumentation/InstrProfiling/profiling.ll +++ b/llvm/test/Instrumentation/InstrProfiling/profiling.ll @@ -13,7 +13,7 @@ ; MACHO: @__llvm_profile_runtime = external hidden global i32 ; ELF_GENERIC: @__llvm_profile_runtime = external hidden global i32 ; ELF-NOT: @__llvm_profile_runtime = external global i32 -; XCOFF: @__llvm_profile_runtime = external hidden global i32 +; XCOFF-NOT: @__llvm_profile_runtime = external hidden global i32 ; COFF: @__llvm_profile_runtime = external hidden global i32 ; ELF: $__profc_foo = comdat nodeduplicate @@ -101,7 +101,7 @@ declare void @llvm.instrprof.increment(ptr, i64, i32, i32) ; ELF_GENERIC: @llvm.compiler.used = appending global [6 x ptr] [ptr @__llvm_profile_runtime, ptr @__profd_foo, ptr @__profd_foo_weak, ptr @"__profd_linkage.ll:foo_internal", ptr @__profd_foo_inline, ptr @__profd_foo_extern] ; MACHO: @llvm.compiler.used = appending global [6 x ptr] [ptr @__llvm_profile_runtime_user, ptr @__profd_foo, {{.*}} ; COFF: @llvm.compiler.used = appending global [6 x ptr] [ptr @__llvm_profile_runtime_user, ptr @__profd_foo, ptr @__profd_foo_weak, ptr @"__profd_linkage.ll:foo_internal", ptr @__profd_foo_inline, ptr @__profd_foo_extern] -; XCOFF: @llvm.used = appending global [7 x ptr] [ptr @__llvm_profile_runtime_user, ptr @__profd_foo, ptr @__profd_foo_weak, ptr @"__profd_linkage.ll:foo_internal", ptr @__profd_foo_inline, ptr @__profd_foo_extern, ptr @__llvm_prf_nm] +; XCOFF: @llvm.used = appending global [6 x ptr] [ptr @__profd_foo, ptr @__profd_foo_weak, ptr @"__profd_linkage.ll:foo_internal", ptr @__profd_foo_inline, ptr @__profd_foo_extern, ptr @__llvm_prf_nm] ; MACHO: define linkonce_odr hidden i32 @__llvm_profile_runtime_user() {{.*}} { ; MACHO: %[[REG:.*]] = load i32, ptr @__llvm_profile_runtime @@ -112,10 +112,7 @@ declare void @llvm.instrprof.increment(ptr, i64, i32, i32) ; ELFRT-NOT: %[[REG:.*]] = load i32, ptr @__llvm_profile_runtime ; PS: define linkonce_odr hidden i32 @__llvm_profile_runtime_user() {{.*}} { ; PS: %[[REG:.*]] = load i32, ptr @__llvm_profile_runtime -; XCOFF: define linkonce_odr hidden i32 @__llvm_profile_runtime_user() {{.*}} { -; XCOFF: %[[REG:.*]] = load i32, ptr @__llvm_profile_runtime -; XCOFF: ret i32 %[[REG]] -; XCOFF: } +; XCOFF-NOT: define .* __llvm_profile_runtime_user ; ELF_GENERIC: define internal void @__llvm_profile_register_functions() unnamed_addr { ; ELF_GENERIC-NEXT: call void @__llvm_profile_register_function(ptr @__llvm_profile_runtime) -- 2.7.4