From f50aef745c3ba981f3d0bf118b809d0c3880a490 Mon Sep 17 00:00:00 2001 From: Hans Wennborg Date: Fri, 12 Mar 2021 13:43:36 +0100 Subject: [PATCH] Revert "[InstrProfiling] Don't generate __llvm_profile_runtime_user" This broke the check-profile tests on Mac, see comment on the code review. > This is no longer needed, we can add __llvm_profile_runtime directly > to llvm.compiler.used or llvm.used to achieve the same effect. > > Differential Revision: https://reviews.llvm.org/D98325 This reverts commit c7712087cbb505d324e1149fa224f607c91a8c6a. Also reverting the dependent follow-up commit: Revert "[InstrProfiling] Generate runtime hook for ELF platforms" > When using -fprofile-list to selectively apply instrumentation only > to certain files or functions, we may end up with a binary that doesn't > have any counters in the case where no files were selected. However, > because on Linux and Fuchsia, we pass -u__llvm_profile_runtime, the > runtime would still be pulled in and incur some non-trivial overhead, > especially in the case when the continuous or runtime counter relocation > mode is being used. A better way would be to pull in the profile runtime > only when needed by declaring the __llvm_profile_runtime symbol in the > translation unit only when needed. > > This approach was already used prior to 9a041a75221ca, but we changed it > to always generate the __llvm_profile_runtime due to a TAPI limitation. > Since TAPI is only used on Mach-O platforms, we could use the early > emission of __llvm_profile_runtime there, and on other platforms we > could change back to the earlier approach where the symbol is generated > later only when needed. We can stop passing -u__llvm_profile_runtime to > the linker on Linux and Fuchsia since the generated undefined symbol in > each translation unit that needed it serves the same purpose. > > Differential Revision: https://reviews.llvm.org/D98061 This reverts commit 87fd09b25f8892e07b7ba11525baa9c3ec3e5d3f. --- clang/docs/UsersManual.rst | 8 ------ clang/lib/Driver/ToolChains/Fuchsia.cpp | 10 ++++++++ clang/lib/Driver/ToolChains/Fuchsia.h | 3 +++ clang/lib/Driver/ToolChains/Linux.cpp | 10 ++++++++ clang/lib/Driver/ToolChains/Linux.h | 2 ++ clang/test/Driver/coverage-ld.c | 2 ++ clang/test/Driver/fuchsia.c | 2 ++ llvm/include/llvm/ProfileData/InstrProf.h | 6 +++++ .../Transforms/Instrumentation/InstrProfiling.cpp | 30 +++++++++++++++++----- .../test/Instrumentation/InstrProfiling/linkage.ll | 9 +++++++ .../Instrumentation/InstrProfiling/profiling.ll | 26 +++++++++---------- 11 files changed, 80 insertions(+), 28 deletions(-) diff --git a/clang/docs/UsersManual.rst b/clang/docs/UsersManual.rst index 8a369c8..28de4e3 100644 --- a/clang/docs/UsersManual.rst +++ b/clang/docs/UsersManual.rst @@ -2301,14 +2301,6 @@ In these cases, you can use the flag ``-fno-profile-instr-generate`` (or Note that these flags should appear after the corresponding profile flags to have an effect. -.. note:: - - When none of the translation units inside a binary is instrumented, in the - case of ELF and COFF binary format the profile runtime will not be linked - into the binary and no profile will be produced, while in the case of Mach-O - the profile runtime will be linked and profile will be produced but there - will not be any counters. - Instrumenting only selected files or functions ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ diff --git a/clang/lib/Driver/ToolChains/Fuchsia.cpp b/clang/lib/Driver/ToolChains/Fuchsia.cpp index 4b27a102..8e08601 100644 --- a/clang/lib/Driver/ToolChains/Fuchsia.cpp +++ b/clang/lib/Driver/ToolChains/Fuchsia.cpp @@ -394,3 +394,13 @@ SanitizerMask Fuchsia::getDefaultSanitizers() const { } return Res; } + +void Fuchsia::addProfileRTLibs(const llvm::opt::ArgList &Args, + llvm::opt::ArgStringList &CmdArgs) const { + // Add linker option -u__llvm_profile_runtime to cause runtime + // initialization module to be linked in. + if (needsProfileRT(Args)) + CmdArgs.push_back(Args.MakeArgString( + Twine("-u", llvm::getInstrProfRuntimeHookVarName()))); + ToolChain::addProfileRTLibs(Args, CmdArgs); +} diff --git a/clang/lib/Driver/ToolChains/Fuchsia.h b/clang/lib/Driver/ToolChains/Fuchsia.h index 1a0263c..07adf9b 100644 --- a/clang/lib/Driver/ToolChains/Fuchsia.h +++ b/clang/lib/Driver/ToolChains/Fuchsia.h @@ -71,6 +71,9 @@ public: SanitizerMask getSupportedSanitizers() const override; SanitizerMask getDefaultSanitizers() const override; + void addProfileRTLibs(const llvm::opt::ArgList &Args, + llvm::opt::ArgStringList &CmdArgs) const override; + RuntimeLibType GetRuntimeLibType(const llvm::opt::ArgList &Args) const override; CXXStdlibType diff --git a/clang/lib/Driver/ToolChains/Linux.cpp b/clang/lib/Driver/ToolChains/Linux.cpp index 5f5b069..ad98013 100644 --- a/clang/lib/Driver/ToolChains/Linux.cpp +++ b/clang/lib/Driver/ToolChains/Linux.cpp @@ -925,6 +925,16 @@ SanitizerMask Linux::getSupportedSanitizers() const { return Res; } +void Linux::addProfileRTLibs(const llvm::opt::ArgList &Args, + llvm::opt::ArgStringList &CmdArgs) const { + // Add linker option -u__llvm_profile_runtime to cause runtime + // initialization module to be linked in. + if (needsProfileRT(Args)) + CmdArgs.push_back(Args.MakeArgString( + Twine("-u", llvm::getInstrProfRuntimeHookVarName()))); + ToolChain::addProfileRTLibs(Args, CmdArgs); +} + llvm::DenormalMode Linux::getDefaultDenormalModeForType(const llvm::opt::ArgList &DriverArgs, const JobAction &JA, diff --git a/clang/lib/Driver/ToolChains/Linux.h b/clang/lib/Driver/ToolChains/Linux.h index 605ba54..05e01a2 100644 --- a/clang/lib/Driver/ToolChains/Linux.h +++ b/clang/lib/Driver/ToolChains/Linux.h @@ -43,6 +43,8 @@ public: bool isNoExecStackDefault() const override; bool IsMathErrnoDefault() const override; SanitizerMask getSupportedSanitizers() const override; + void addProfileRTLibs(const llvm::opt::ArgList &Args, + llvm::opt::ArgStringList &CmdArgs) const override; std::string computeSysRoot() const override; std::string getDynamicLinker(const llvm::opt::ArgList &Args) const override; diff --git a/clang/test/Driver/coverage-ld.c b/clang/test/Driver/coverage-ld.c index fd13f775..c15531f 100644 --- a/clang/test/Driver/coverage-ld.c +++ b/clang/test/Driver/coverage-ld.c @@ -12,7 +12,9 @@ // RUN: --sysroot=%S/Inputs/basic_linux_tree \ // RUN: | FileCheck --check-prefix=CHECK-LINUX-I386 %s // +// CHECK-LINUX-I386-NOT: "-u__llvm_profile_runtime" // CHECK-LINUX-I386: /Inputs/resource_dir{{/|\\\\}}lib{{/|\\\\}}linux{{/|\\\\}}libclang_rt.profile-i386.a" +// CHECK-LINUX-I386-NOT: "-u__llvm_profile_runtime" // CHECK-LINUX-I386: "-lc" // // RUN: %clang -no-canonical-prefixes %s -### -o %t.o 2>&1 \ diff --git a/clang/test/Driver/fuchsia.c b/clang/test/Driver/fuchsia.c index 9b03fea..5d3b4c8 100644 --- a/clang/test/Driver/fuchsia.c +++ b/clang/test/Driver/fuchsia.c @@ -249,6 +249,7 @@ // RUN: -fuse-ld=lld 2>&1 \ // RUN: | FileCheck %s -check-prefix=CHECK-PROFRT-AARCH64 // CHECK-PROFRT-AARCH64: "-resource-dir" "[[RESOURCE_DIR:[^"]+]]" +// CHECK-PROFRT-AARCH64: "-u__llvm_profile_runtime" // CHECK-PROFRT-AARCH64: "[[RESOURCE_DIR]]{{/|\\\\}}lib{{/|\\\\}}aarch64-fuchsia{{/|\\\\}}libclang_rt.profile.a" // RUN: %clang %s -### --target=x86_64-fuchsia \ @@ -257,4 +258,5 @@ // RUN: -fuse-ld=lld 2>&1 \ // RUN: | FileCheck %s -check-prefix=CHECK-PROFRT-X86_64 // CHECK-PROFRT-X86_64: "-resource-dir" "[[RESOURCE_DIR:[^"]+]]" +// CHECK-PROFRT-X86_64: "-u__llvm_profile_runtime" // CHECK-PROFRT-X86_64: "[[RESOURCE_DIR]]{{/|\\\\}}lib{{/|\\\\}}x86_64-fuchsia{{/|\\\\}}libclang_rt.profile.a" diff --git a/llvm/include/llvm/ProfileData/InstrProf.h b/llvm/include/llvm/ProfileData/InstrProf.h index 1ceab4f..4a2cebe 100644 --- a/llvm/include/llvm/ProfileData/InstrProf.h +++ b/llvm/include/llvm/ProfileData/InstrProf.h @@ -149,6 +149,12 @@ inline StringRef getInstrProfRuntimeHookVarName() { return INSTR_PROF_QUOTE(INSTR_PROF_PROFILE_RUNTIME_VAR); } +/// Return the name of the compiler generated function that references the +/// runtime hook variable. The function is a weak global. +inline StringRef getInstrProfRuntimeHookVarUseFuncName() { + return "__llvm_profile_runtime_user"; +} + inline StringRef getInstrProfCounterBiasVarName() { return "__llvm_profile_counter_bias"; } diff --git a/llvm/lib/Transforms/Instrumentation/InstrProfiling.cpp b/llvm/lib/Transforms/Instrumentation/InstrProfiling.cpp index 998a8b9..bdb1824 100644 --- a/llvm/lib/Transforms/Instrumentation/InstrProfiling.cpp +++ b/llvm/lib/Transforms/Instrumentation/InstrProfiling.cpp @@ -543,10 +543,8 @@ bool InstrProfiling::run( UsedVars.clear(); TT = Triple(M.getTargetTriple()); - bool MadeChange = false; - // Emit the runtime hook even if no counters are present in Mach-O. - if (TT.isOSBinFormatMachO()) - MadeChange = emitRuntimeHook(); + // Emit the runtime hook even if no counters are present. + bool MadeChange = emitRuntimeHook(); // Improve compile time by avoiding linear scans when there is no work. GlobalVariable *CoverageNamesVar = @@ -586,8 +584,6 @@ bool InstrProfiling::run( emitVNodes(); emitNameData(); emitRegistration(); - if (!TT.isOSBinFormatMachO()) - emitRuntimeHook(); emitUses(); emitInitialization(); return true; @@ -1062,6 +1058,11 @@ void InstrProfiling::emitRegistration() { } bool InstrProfiling::emitRuntimeHook() { + // We expect the linker to be invoked with -u flag for Linux or + // Fuchsia, in which case there is no need to emit the user function. + if (TT.isOSLinux() || TT.isOSFuchsia()) + return false; + // If the module's provided its own runtime, we don't need to do anything. if (M->getGlobalVariable(getInstrProfRuntimeHookVarName())) return false; @@ -1072,8 +1073,23 @@ bool InstrProfiling::emitRuntimeHook() { new GlobalVariable(*M, Int32Ty, false, GlobalValue::ExternalLinkage, nullptr, getInstrProfRuntimeHookVarName()); + // Make a function that uses it. + auto *User = Function::Create(FunctionType::get(Int32Ty, false), + GlobalValue::LinkOnceODRLinkage, + getInstrProfRuntimeHookVarUseFuncName(), M); + User->addFnAttr(Attribute::NoInline); + if (Options.NoRedZone) + User->addFnAttr(Attribute::NoRedZone); + User->setVisibility(GlobalValue::HiddenVisibility); + if (TT.supportsCOMDAT()) + User->setComdat(M->getOrInsertComdat(User->getName())); + + IRBuilder<> IRB(BasicBlock::Create(M->getContext(), "", User)); + auto *Load = IRB.CreateLoad(Int32Ty, Var); + IRB.CreateRet(Load); + // Mark the user variable as used so that it isn't stripped out. - CompilerUsedVars.push_back(Var); + CompilerUsedVars.push_back(User); return true; } diff --git a/llvm/test/Instrumentation/InstrProfiling/linkage.ll b/llvm/test/Instrumentation/InstrProfiling/linkage.ll index 81ba4d3..8de6a2e 100644 --- a/llvm/test/Instrumentation/InstrProfiling/linkage.ll +++ b/llvm/test/Instrumentation/InstrProfiling/linkage.ll @@ -10,6 +10,7 @@ ; RUN: opt < %s -mtriple=x86_64-pc-win32-coff -passes=instrprof -S | FileCheck %s --check-prefixes=COFF ; MACHO: @__llvm_profile_runtime = external global i32 +; ELF-NOT: @__llvm_profile_runtime = external global i32 ; ELF: $__profd_foo = comdat noduplicates ; ELF: $__profd_foo_weak = comdat noduplicates @@ -80,3 +81,11 @@ define available_externally void @foo_extern() { } declare void @llvm.instrprof.increment(i8*, i64, i32, i32) + +; MACHO: define linkonce_odr hidden i32 @__llvm_profile_runtime_user() {{.*}} { +; MACHO: %[[REG:.*]] = load i32, i32* @__llvm_profile_runtime +; MACHO: ret i32 %[[REG]] +; MACHO: } +; COFF: define linkonce_odr hidden i32 @__llvm_profile_runtime_user() {{.*}} comdat { +; ELF-NOT: define linkonce_odr hidden i32 @__llvm_profile_runtime_user() {{.*}} { +; ELF-NOT: %[[REG:.*]] = load i32, i32* @__llvm_profile_runtime diff --git a/llvm/test/Instrumentation/InstrProfiling/profiling.ll b/llvm/test/Instrumentation/InstrProfiling/profiling.ll index 76d319d..44262dd 100644 --- a/llvm/test/Instrumentation/InstrProfiling/profiling.ll +++ b/llvm/test/Instrumentation/InstrProfiling/profiling.ll @@ -1,11 +1,14 @@ ; RUN: opt < %s -mtriple=x86_64 -passes=instrprof -S | FileCheck %s --check-prefixes=CHECK,ELF,ELF_GENERIC ; RUN: opt < %s -mtriple=x86_64-linux -passes=instrprof -S | FileCheck %s --check-prefixes=CHECK,ELF_LINUX ; RUN: opt < %s -mtriple=x86_64-apple-macosx10.10.0 -passes=instrprof -S | FileCheck %s --check-prefixes=CHECK,MACHO -; RUN: opt < %s -mtriple=x86_64-windows -passes=instrprof -S | FileCheck %s --check-prefixes=CHECK,COFF +; RUN: opt < %s -mtriple=x86_64-windows -passes=instrprof -S | FileCheck %s --check-prefixes=CHECK,WIN ; RUN: opt < %s -mtriple=x86_64-apple-macosx10.10.0 -instrprof -S | FileCheck %s +; ELF_GENERIC: @__llvm_profile_runtime = external global i32 +; ELF_LINUX-NOT: @__llvm_profile_runtime ; MACHO: @__llvm_profile_runtime = external global i32 +; WIN: @__llvm_profile_runtime = external global i32 @__profn_foo = hidden constant [3 x i8] c"foo" ; CHECK-NOT: __profn_foo @@ -18,8 +21,8 @@ ; ELF: @__profd_foo = hidden {{.*}}, section "__llvm_prf_data", comdat, align 8 ; MACHO: @__profc_foo = hidden global [1 x i64] zeroinitializer, section "__DATA,__llvm_prf_cnts", align 8 ; MACHO: @__profd_foo = hidden {{.*}}, section "__DATA,__llvm_prf_data,regular,live_support", align 8 -; COFF: @__profc_foo = internal global [1 x i64] zeroinitializer, section ".lprfc$M", align 8 -; COFF: @__profd_foo = internal {{.*}}, section ".lprfd$M", align 8 +; WIN: @__profc_foo = internal global [1 x i64] zeroinitializer, section ".lprfc$M", align 8 +; WIN: @__profd_foo = internal {{.*}}, section ".lprfd$M", align 8 define void @foo() { call void @llvm.instrprof.increment(i8* getelementptr inbounds ([3 x i8], [3 x i8]* @__profn_foo, i32 0, i32 0), i64 0, i32 1, i32 0) ret void @@ -29,8 +32,8 @@ define void @foo() { ; ELF: @__profd_bar = hidden {{.*}}, section "__llvm_prf_data", comdat, align 8 ; MACHO: @__profc_bar = hidden global [1 x i64] zeroinitializer, section "__DATA,__llvm_prf_cnts", align 8 ; MACHO: @__profd_bar = hidden {{.*}}, section "__DATA,__llvm_prf_data,regular,live_support", align 8 -; COFF: @__profc_bar = internal global [1 x i64] zeroinitializer, section ".lprfc$M", align 8 -; COFF: @__profd_bar = internal {{.*}}, section ".lprfd$M", align 8 +; WIN: @__profc_bar = internal global [1 x i64] zeroinitializer, section ".lprfc$M", align 8 +; WIN: @__profd_bar = internal {{.*}}, section ".lprfd$M", align 8 define void @bar() { call void @llvm.instrprof.increment(i8* getelementptr inbounds ([4 x i8], [4 x i8]* @__profn_bar, i32 0, i32 0), i64 0, i32 1, i32 0) ret void @@ -40,8 +43,8 @@ define void @bar() { ; ELF: @__profd_baz = hidden {{.*}}, section "__llvm_prf_data", comdat, align 8 ; MACHO: @__profc_baz = hidden global [3 x i64] zeroinitializer, section "__DATA,__llvm_prf_cnts", align 8 ; MACHO: @__profd_baz = hidden {{.*}}, section "__DATA,__llvm_prf_data,regular,live_support", align 8 -; COFF: @__profc_baz = internal global [3 x i64] zeroinitializer, section ".lprfc$M", align 8 -; COFF: @__profd_baz = internal {{.*}}, section ".lprfd$M", align 8 +; WIN: @__profc_baz = internal global [3 x i64] zeroinitializer, section ".lprfc$M", align 8 +; WIN: @__profd_baz = internal {{.*}}, section ".lprfd$M", align 8 define void @baz() { call void @llvm.instrprof.increment(i8* getelementptr inbounds ([3 x i8], [3 x i8]* @__profn_baz, i32 0, i32 0), i64 0, i32 3, i32 0) call void @llvm.instrprof.increment(i8* getelementptr inbounds ([3 x i8], [3 x i8]* @__profn_baz, i32 0, i32 0), i64 0, i32 3, i32 1) @@ -51,12 +54,9 @@ define void @baz() { declare void @llvm.instrprof.increment(i8*, i64, i32, i32) -; ELF: @__llvm_profile_runtime = external global i32 -; COFF: @__llvm_profile_runtime = external global i32 - -; ELF: @llvm.compiler.used = appending global {{.*}} @__profd_foo {{.*}} @__profd_bar {{.*}} @__profd_baz {{.*}} @__llvm_profile_runtime -; MACHO: @llvm.used = appending global {{.*}} @__llvm_profile_runtime {{.*}} @__profd_foo {{.*}} @__profd_bar {{.*}} @__profd_baz -; COFF: @llvm.used = appending global {{.*}} @__profd_foo {{.*}} @__profd_bar {{.*}} @__profd_baz {{.*}} @__llvm_profile_runtime +; ELF: @llvm.compiler.used = appending global {{.*}} @__llvm_profile_runtime_user {{.*}} @__profd_foo {{.*}} @__profd_bar {{.*}} @__profd_baz +; MACHO: @llvm.used = appending global {{.*}} @__llvm_profile_runtime_user {{.*}} @__profd_foo {{.*}} @__profd_bar {{.*}} @__profd_baz +; WIN: @llvm.used = appending global {{.*}} @__llvm_profile_runtime_user {{.*}} @__profd_foo {{.*}} @__profd_bar {{.*}} @__profd_baz ; ELF_GENERIC: define internal void @__llvm_profile_register_functions() unnamed_addr { ; ELF_GENERIC-NEXT: call void @__llvm_profile_register_function(i8* bitcast ({ i64, i64, i64*, i8*, i8*, i32, [2 x i16] }* @__profd_foo to i8*)) -- 2.7.4