From e56626e43826c9d7c35113635d62b57c905ef3c0 Mon Sep 17 00:00:00 2001 From: Fangrui Song Date: Mon, 3 Aug 2020 20:35:50 -0700 Subject: [PATCH] [PGO] Move __profc_ and __profvp_ from their own comdat groups to __profd_'s comdat group D68041 placed `__profc_`, `__profd_` and (if exists) `__profvp_` in different comdat groups. There are some issues: * Cost: one or two additional section headers (`.group` section(s)): 64 or 128 bytes on ELF64. * `__profc_`, `__profd_` and (if exists) `__profvp_` should be retained or discarded. Placing them into separate comdat groups is conceptually inferior. * If the prevailing group does not include `__profvp_` (value profiling not used) but a non-prevailing group from another translation unit has `__profvp_` (the function is inlined into another and triggers value profiling), there will be a stray `__profvp_` if --gc-sections is not enabled. This has been fixed by 3d6f53018f845e893ad34f64ff2851a2e5c3ba1d. Actually, we can reuse an existing symbol (we choose `__profd_`) as the group signature to avoid a string in the string table (the sole reason that D68041 could improve code size is that `__profv_` was an otherwise unused symbol which wasted string table space). This saves one or two section headers. For a -DCMAKE_BUILD_TYPE=Release -DLLVM_BUILD_INSTRUMENTED=IR build, `ninja clang lld`, the patch has saved 10.5MiB (2.2%) for the total .o size. Reviewed By: davidxl Differential Revision: https://reviews.llvm.org/D84723 --- llvm/lib/Transforms/Instrumentation/InstrProfiling.cpp | 9 +++++---- llvm/test/Instrumentation/InstrProfiling/PR23499.ll | 4 ++-- llvm/test/Instrumentation/InstrProfiling/comdat.ll | 8 ++++---- llvm/test/Instrumentation/InstrProfiling/icall.ll | 15 ++++++++++++++- llvm/test/Instrumentation/InstrProfiling/linkage.ll | 4 ++-- llvm/test/Transforms/PGOProfile/comdat_internal.ll | 3 ++- 6 files changed, 29 insertions(+), 14 deletions(-) diff --git a/llvm/lib/Transforms/Instrumentation/InstrProfiling.cpp b/llvm/lib/Transforms/Instrumentation/InstrProfiling.cpp index 7598889..623f463 100644 --- a/llvm/lib/Transforms/Instrumentation/InstrProfiling.cpp +++ b/llvm/lib/Transforms/Instrumentation/InstrProfiling.cpp @@ -882,9 +882,10 @@ InstrProfiling::getOrCreateRegionCounters(InstrProfIncrementInst *Inc) { Visibility = GlobalValue::HiddenVisibility; } } + std::string DataVarName = getVarName(Inc, getInstrProfDataVarPrefix()); auto MaybeSetComdat = [=](GlobalVariable *GV) { if (NeedComdat) - GV->setComdat(M->getOrInsertComdat(GV->getName())); + GV->setComdat(M->getOrInsertComdat(DataVarName)); }; uint64_t NumCounters = Inc->getNumCounters()->getZExtValue(); @@ -949,9 +950,9 @@ InstrProfiling::getOrCreateRegionCounters(InstrProfIncrementInst *Inc) { #define INSTR_PROF_DATA(Type, LLVMType, Name, Init) Init, #include "llvm/ProfileData/InstrProfData.inc" }; - auto *Data = new GlobalVariable(*M, DataTy, false, Linkage, - ConstantStruct::get(DataTy, DataVals), - getVarName(Inc, getInstrProfDataVarPrefix())); + auto *Data = + new GlobalVariable(*M, DataTy, false, Linkage, + ConstantStruct::get(DataTy, DataVals), DataVarName); Data->setVisibility(Visibility); Data->setSection(getInstrProfSectionName(IPSK_data, TT.getObjectFormat())); Data->setAlignment(Align(INSTR_PROF_DATA_ALIGNMENT)); diff --git a/llvm/test/Instrumentation/InstrProfiling/PR23499.ll b/llvm/test/Instrumentation/InstrProfiling/PR23499.ll index 1a4c049..098153f 100644 --- a/llvm/test/Instrumentation/InstrProfiling/PR23499.ll +++ b/llvm/test/Instrumentation/InstrProfiling/PR23499.ll @@ -14,13 +14,13 @@ $_Z3barIvEvv = comdat any @__profn__Z3barIvEvv = linkonce_odr hidden constant [11 x i8] c"_Z3barIvEvv", align 1 ; CHECK-NOT: __profn__Z3barIvEvv -; CHECK: @__profc__Z3barIvEvv = linkonce_odr hidden global [1 x i64] zeroinitializer, section "{{.*}}__llvm_prf_cnts", comdat, align 8 +; CHECK: @__profc__Z3barIvEvv = linkonce_odr hidden global [1 x i64] zeroinitializer, section "{{.*}}__llvm_prf_cnts", comdat($__profd__Z3barIvEvv), align 8 ; CHECK: @__profd__Z3barIvEvv = linkonce_odr hidden global { i64, i64, i64*, i8*, i8*, i32, [2 x i16] } { i64 4947693190065689389, i64 0, i64* getelementptr inbounds ([1 x i64], [1 x i64]* @__profc__Z3barIvEvv, i32 0, i32 0), i8*{{.*}}, i8* null, i32 1, [2 x i16] zeroinitializer }, section "{{.*}}__llvm_prf_data{{.*}}", comdat, align 8 ; CHECK: @__llvm_prf_nm = private constant [{{.*}} x i8] c"{{.*}}", section "{{.*}}__llvm_prf_names" ; COFF-NOT: __profn__Z3barIvEvv -; COFF: @__profc__Z3barIvEvv = linkonce_odr hidden global [1 x i64] zeroinitializer, section "{{.*}}prfc$M", comdat, align 8 +; COFF: @__profc__Z3barIvEvv = linkonce_odr hidden global [1 x i64] zeroinitializer, section "{{.*}}prfc$M", comdat($__profd__Z3barIvEvv), align 8 ; COFF: @__profd__Z3barIvEvv = linkonce_odr hidden global { i64, i64, i64*, i8*, i8*, i32, [2 x i16] } { i64 4947693190065689389, i64 0, i64* getelementptr inbounds ([1 x i64], [1 x i64]* @__profc__Z3barIvEvv, i32 0, i32 0), i8*{{.*}}, i8* null, i32 1, [2 x i16] zeroinitializer }, section "{{.*}}prfd{{.*}}", comdat, align 8 diff --git a/llvm/test/Instrumentation/InstrProfiling/comdat.ll b/llvm/test/Instrumentation/InstrProfiling/comdat.ll index 3f169c4..dfcd71f 100644 --- a/llvm/test/Instrumentation/InstrProfiling/comdat.ll +++ b/llvm/test/Instrumentation/InstrProfiling/comdat.ll @@ -15,9 +15,9 @@ $foo_inline = comdat any @__profn_foo_inline = linkonce_odr hidden constant [10 x i8] c"foo_inline" -; ELF: @__profc_foo_inline = linkonce_odr hidden global{{.*}}, section "__llvm_prf_cnts", comdat, align 8 +; ELF: @__profc_foo_inline = linkonce_odr hidden global{{.*}}, section "__llvm_prf_cnts", comdat($__profd_foo_inline), align 8 ; ELF: @__profd_foo_inline = linkonce_odr hidden global{{.*}}, section "__llvm_prf_data", comdat, align 8 -; COFF: @__profc_foo_inline = linkonce_odr hidden global{{.*}}, section ".lprfc$M", comdat, align 8 +; COFF: @__profc_foo_inline = linkonce_odr hidden global{{.*}}, section ".lprfc$M", comdat($__profd_foo_inline), align 8 ; COFF: @__profd_foo_inline = linkonce_odr hidden global{{.*}}, section ".lprfd$M", comdat, align 8 define weak_odr void @foo_inline() comdat { call void @llvm.instrprof.increment(i8* getelementptr inbounds ([10 x i8], [10 x i8]* @__profn_foo_inline, i32 0, i32 0), i64 0, i32 1, i32 0) @@ -28,9 +28,9 @@ $foo_extern = comdat any @__profn_foo_extern = linkonce_odr hidden constant [10 x i8] c"foo_extern" -; ELF: @__profc_foo_extern = linkonce_odr hidden global{{.*}}, section "__llvm_prf_cnts", comdat, align 8 +; ELF: @__profc_foo_extern = linkonce_odr hidden global{{.*}}, section "__llvm_prf_cnts", comdat($__profd_foo_extern), align 8 ; ELF: @__profd_foo_extern = linkonce_odr hidden global{{.*}}, section "__llvm_prf_data", comdat, align 8 -; COFF: @__profc_foo_extern = linkonce_odr hidden global{{.*}}, section ".lprfc$M", comdat, align 8 +; COFF: @__profc_foo_extern = linkonce_odr hidden global{{.*}}, section ".lprfc$M", comdat($__profd_foo_extern), align 8 ; COFF: @__profd_foo_extern = linkonce_odr hidden global{{.*}}, section ".lprfd$M", comdat, align 8 define available_externally void @foo_extern() { call void @llvm.instrprof.increment(i8* getelementptr inbounds ([10 x i8], [10 x i8]* @__profn_foo_extern, i32 0, i32 0), i64 0, i32 1, i32 0) diff --git a/llvm/test/Instrumentation/InstrProfiling/icall.ll b/llvm/test/Instrumentation/InstrProfiling/icall.ll index d92de47..9d45d7c 100644 --- a/llvm/test/Instrumentation/InstrProfiling/icall.ll +++ b/llvm/test/Instrumentation/InstrProfiling/icall.ll @@ -14,6 +14,7 @@ @__profn_foo = private constant [3 x i8] c"foo" +@__profn_bar = private constant [3 x i8] c"bar" define i32 @foo(i32 ()* ) { call void @llvm.instrprof.increment(i8* getelementptr inbounds ([3 x i8], [3 x i8]* @__profn_foo, i32 0, i32 0), i64 12884901887, i32 1, i32 0) @@ -23,6 +24,17 @@ define i32 @foo(i32 ()* ) { ret i32 %3 } +$bar = comdat any + +define i32 @bar(i32 ()* ) comdat { +entry: + call void @llvm.instrprof.increment(i8* getelementptr inbounds ([3 x i8], [3 x i8]* @__profn_bar, i32 0, i32 0), i64 12884901887, i32 1, i32 0) + %1 = ptrtoint i32 ()* %0 to i64 + call void @llvm.instrprof.value.profile(i8* getelementptr inbounds ([3 x i8], [3 x i8]* @__profn_bar, i32 0, i32 0), i64 12884901887, i64 %1, i32 0, i32 0) + %2 = tail call i32 %0() + ret i32 %2 +} + ; Function Attrs: nounwind declare void @llvm.instrprof.increment(i8*, i64, i32, i32) #0 @@ -31,7 +43,8 @@ declare void @llvm.instrprof.value.profile(i8*, i64, i64, i32, i32) #0 attributes #0 = { nounwind } -; STATIC: @__profvp_foo +; STATIC: @__profvp_foo = private global [1 x i64] zeroinitializer, section "{{[^"]+}}", align 8 +; STATIC: @__profvp_bar = private global [1 x i64] zeroinitializer, section "{{[^"]+}}", comdat($__profd_bar), align 8 ; STATIC: @__llvm_prf_vnodes ; DYN-NOT: @__profvp_foo diff --git a/llvm/test/Instrumentation/InstrProfiling/linkage.ll b/llvm/test/Instrumentation/InstrProfiling/linkage.ll index be3af8e..5e55ef7 100644 --- a/llvm/test/Instrumentation/InstrProfiling/linkage.ll +++ b/llvm/test/Instrumentation/InstrProfiling/linkage.ll @@ -55,11 +55,11 @@ define linkonce_odr void @foo_inline() { ret void } -; LINUX: @__profc_foo_extern = linkonce_odr hidden global {{.*}}section "__llvm_prf_cnts", comdat, align 8 +; LINUX: @__profc_foo_extern = linkonce_odr hidden global {{.*}}section "__llvm_prf_cnts", comdat($__profd_foo_extern), align 8 ; LINUX: @__profd_foo_extern = linkonce_odr hidden global {{.*}}section "__llvm_prf_data", comdat, align 8 ; MACHO: @__profc_foo_extern = linkonce_odr hidden global ; MACHO: @__profd_foo_extern = linkonce_odr hidden global -; COFF: @__profc_foo_extern = linkonce_odr hidden global {{.*}}section ".lprfc$M", comdat, align 8 +; COFF: @__profc_foo_extern = linkonce_odr hidden global {{.*}}section ".lprfc$M", comdat($__profd_foo_extern), align 8 ; COFF: @__profd_foo_extern = linkonce_odr hidden global {{.*}}section ".lprfd$M", comdat, align 8 define available_externally void @foo_extern() { call void @llvm.instrprof.increment(i8* getelementptr inbounds ([10 x i8], [10 x i8]* @__profn_foo_extern, i32 0, i32 0), i64 0, i32 1, i32 0) diff --git a/llvm/test/Transforms/PGOProfile/comdat_internal.ll b/llvm/test/Transforms/PGOProfile/comdat_internal.ll index f0906bf..e5915d9 100644 --- a/llvm/test/Transforms/PGOProfile/comdat_internal.ll +++ b/llvm/test/Transforms/PGOProfile/comdat_internal.ll @@ -7,12 +7,13 @@ $foo = comdat any ; CHECK: $foo = comdat any ; CHECK: $__llvm_profile_raw_version = comdat any +; CHECK: $__profd__stdin__foo.[[FOO_HASH:[0-9]+]] = comdat any @bar = global i32 ()* @foo, align 8 ; CHECK: @__llvm_profile_raw_version = constant i64 {{[0-9]+}}, comdat ; CHECK-NOT: __profn__stdin__foo -; CHECK: @__profc__stdin__foo.[[FOO_HASH:[0-9]+]] = private global [1 x i64] zeroinitializer, section "__llvm_prf_cnts", comdat, align 8 +; CHECK: @__profc__stdin__foo.[[FOO_HASH]] = private global [1 x i64] zeroinitializer, section "__llvm_prf_cnts", comdat($__profd__stdin__foo.[[FOO_HASH]]), align 8 ; CHECK: @__profd__stdin__foo.[[FOO_HASH]] = private global { i64, i64, i64*, i8*, i8*, i32, [2 x i16] } { i64 -5640069336071256030, i64 [[FOO_HASH]], i64* getelementptr inbounds ([1 x i64], [1 x i64]* @__profc__stdin__foo.[[FOO_HASH]], i32 0, i32 0), i8* null ; CHECK-NOT: bitcast (i32 ()* @foo to i8*) ; CHECK-SAME: , i8* null, i32 1, [2 x i16] zeroinitializer }, section "__llvm_prf_data", comdat, align 8 -- 2.7.4