From c844f8846aabda577a8bf3b460d4993e89475218 Mon Sep 17 00:00:00 2001 From: Teresa Johnson Date: Fri, 25 Oct 2019 14:56:12 -0700 Subject: [PATCH] [ThinLTO/WPD] Fix index-based WPD for available_externally vtables Summary: Clang does not add type metadata to available_externally vtables. When choosing a summary to look at for virtual function definitions, make sure we skip summaries for any available externally vtables as they will not describe any virtual function functions, which are only summarized in the presence of type metadata on the vtable def. Simply look for the corresponding strong def's summary. Also add handling for same-named local vtables with the same GUID because of same-named files without enough distinguishing path. In that case we return a conservative result with no devirtualization. Reviewers: pcc, davidxl, evgeny777 Subscribers: mehdi_amini, inglorion, hiraditya, steven_wu, dexonsmith, arphaman, llvm-commits Tags: #llvm Differential Revision: https://reviews.llvm.org/D69452 --- llvm/lib/Transforms/IPO/WholeProgramDevirt.cpp | 34 +++++++--- .../X86/Inputs/devirt_available_externally.ll | 14 ++++ .../ThinLTO/X86/Inputs/devirt_local_same_guid.ll | 41 ++++++++++++ .../ThinLTO/X86/devirt_available_externally.ll | 72 +++++++++++++++++++++ llvm/test/ThinLTO/X86/devirt_local_same_guid.ll | 74 ++++++++++++++++++++++ 5 files changed, 227 insertions(+), 8 deletions(-) create mode 100644 llvm/test/ThinLTO/X86/Inputs/devirt_available_externally.ll create mode 100644 llvm/test/ThinLTO/X86/Inputs/devirt_local_same_guid.ll create mode 100644 llvm/test/ThinLTO/X86/devirt_available_externally.ll create mode 100644 llvm/test/ThinLTO/X86/devirt_local_same_guid.ll diff --git a/llvm/lib/Transforms/IPO/WholeProgramDevirt.cpp b/llvm/lib/Transforms/IPO/WholeProgramDevirt.cpp index f0cf558..d7c7504 100644 --- a/llvm/lib/Transforms/IPO/WholeProgramDevirt.cpp +++ b/llvm/lib/Transforms/IPO/WholeProgramDevirt.cpp @@ -841,17 +841,35 @@ bool DevirtIndex::tryFindVirtualCallTargets( std::vector &TargetsForSlot, const TypeIdCompatibleVtableInfo TIdInfo, uint64_t ByteOffset) { for (const TypeIdOffsetVtableInfo P : TIdInfo) { - // VTable initializer should have only one summary, or all copies must be - // linkonce/weak ODR. + // Ensure that we have at most one external linkage vtable initializer. assert(P.VTableVI.getSummaryList().size() == 1 || - llvm::all_of( + llvm::count_if( P.VTableVI.getSummaryList(), [&](const std::unique_ptr &Summary) { - return GlobalValue::isLinkOnceODRLinkage(Summary->linkage()) || - GlobalValue::isWeakODRLinkage(Summary->linkage()); - })); - const auto *VS = cast(P.VTableVI.getSummaryList()[0].get()); - if (!P.VTableVI.getSummaryList()[0]->isLive()) + return GlobalValue::isExternalLinkage(Summary->linkage()); + }) <= 1); + // Find the first non-available_externally linkage vtable initializer. + // We can have multiple available_externally, linkonce_odr and weak_odr + // vtable initializers, however we want to skip available_externally as they + // do not have type metadata attached, and therefore the summary will not + // contain any vtable functions. + // + // Also, handle the case of same-named local Vtables with the same path + // and therefore the same GUID. This can happen if there isn't enough + // distinguishing path when compiling the source file. In that case we + // conservatively return false early. + const GlobalVarSummary *VS = nullptr; + bool LocalFound = false; + for (auto &S : P.VTableVI.getSummaryList()) { + if (GlobalValue::isLocalLinkage(S->linkage())) { + if (LocalFound) + return false; + LocalFound = true; + } + if (!GlobalValue::isAvailableExternallyLinkage(S->linkage())) + VS = cast(S.get()); + } + if (!VS->isLive()) continue; for (auto VTP : VS->vTableFuncs()) { if (VTP.VTableOffset != P.AddressPointOffset + ByteOffset) diff --git a/llvm/test/ThinLTO/X86/Inputs/devirt_available_externally.ll b/llvm/test/ThinLTO/X86/Inputs/devirt_available_externally.ll new file mode 100644 index 0000000..56d6ea5 --- /dev/null +++ b/llvm/test/ThinLTO/X86/Inputs/devirt_available_externally.ll @@ -0,0 +1,14 @@ +target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128" +target triple = "x86_64-grtev4-linux-gnu" + +%struct.D = type { i32 (...)** } + +@_ZTV1D = constant { [3 x i8*] } { [3 x i8*] [i8* null, i8* undef, i8* bitcast (i32 (%struct.D*, i32)* @_ZN1D1mEi to i8*)] }, !type !3 + +define i32 @_ZN1D1mEi(%struct.D* %this, i32 %a) #0 { + ret i32 0; +} + +attributes #0 = { noinline optnone } + +!3 = !{i64 16, !"_ZTS1D"} diff --git a/llvm/test/ThinLTO/X86/Inputs/devirt_local_same_guid.ll b/llvm/test/ThinLTO/X86/Inputs/devirt_local_same_guid.ll new file mode 100644 index 0000000..cf3d024 --- /dev/null +++ b/llvm/test/ThinLTO/X86/Inputs/devirt_local_same_guid.ll @@ -0,0 +1,41 @@ +target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128" +target triple = "x86_64-grtev4-linux-gnu" + +source_filename = "-" + +%struct.A = type { i32 (...)** } +%struct.B = type { %struct.A } + +@_ZTV1B = internal constant { [4 x i8*] } { [4 x i8*] [i8* null, i8* undef, i8* bitcast (i32 (%struct.B*, i32)* @_ZN1B1fEi to i8*), i8* bitcast (i32 (%struct.A*, i32)* @_ZN1A1nEi to i8*)] }, !type !0, !type !1 + +define internal i32 @_ZN1B1fEi(%struct.B* %this, i32 %a) #0 { + ret i32 0; +} + +define internal i32 @_ZN1A1nEi(%struct.A* %this, i32 %a) #0 { + ret i32 0; +} + +define i32 @test2(%struct.B* %obj, i32 %a) { +entry: + %0 = bitcast %struct.B* %obj to i8*** + %vtable2 = load i8**, i8*** %0 + %1 = bitcast i8** %vtable2 to i8* + %p2 = call i1 @llvm.type.test(i8* %1, metadata !"_ZTS1B") + call void @llvm.assume(i1 %p2) + + %fptrptr = getelementptr i8*, i8** %vtable2, i32 1 + %2 = bitcast i8** %fptrptr to i32 (%struct.B*, i32)** + %fptr33 = load i32 (%struct.B*, i32)*, i32 (%struct.B*, i32)** %2, align 8 + + %call4 = tail call i32 %fptr33(%struct.B* nonnull %obj, i32 %a) + ret i32 %call4 +} + +attributes #0 = { noinline optnone } + +declare i1 @llvm.type.test(i8*, metadata) +declare void @llvm.assume(i1) + +!0 = !{i64 16, !"_ZTS1A"} +!1 = !{i64 16, !"_ZTS1B"} diff --git a/llvm/test/ThinLTO/X86/devirt_available_externally.ll b/llvm/test/ThinLTO/X86/devirt_available_externally.ll new file mode 100644 index 0000000..128055f --- /dev/null +++ b/llvm/test/ThinLTO/X86/devirt_available_externally.ll @@ -0,0 +1,72 @@ +; REQUIRES: x86-registered-target + +; Test index-based devirtualization when first copy is available_externally, +; which doesn't have type metadata. We should use the strong external +; def in the other module to devirtualize. + +; Generate unsplit module with summary for ThinLTO index-based WPD. +; RUN: opt -thinlto-bc -o %t3.o %s +; RUN: opt -thinlto-bc -o %t4.o %p/Inputs/devirt_available_externally.ll + +; The available_externally copy should not get vTableFuncs information in its +; summary entry, but the external def should. +; RUN: llvm-dis -o - %t3.o | FileCheck %s --check-prefix=AVAILEXTERNAL +; AVAILEXTERNAL: gv: (name: "_ZTV1D" +; AVAILEXTERNAL-NOT: vTableFuncs +; AVAILEXTERNAL-SAME: ; guid = +; RUN: llvm-dis -o - %t4.o | FileCheck %s --check-prefix=EXTERNAL +; EXTERNAL: gv: (name: "_ZTV1D", {{.*}} vTableFuncs: ((virtFunc: + +; RUN: llvm-lto2 run %t3.o %t4.o -save-temps -pass-remarks=. \ +; RUN: -wholeprogramdevirt-print-index-based \ +; RUN: -o %t5 \ +; RUN: -r=%t3.o,test,px \ +; RUN: -r=%t3.o,_ZTV1D, \ +; RUN: -r=%t3.o,_ZN1D1mEi, \ +; RUN: -r=%t4.o,_ZN1D1mEi,p \ +; RUN: -r=%t4.o,_ZTV1D,px \ +; RUN: 2>&1 | FileCheck %s --check-prefix=REMARK --check-prefix=PRINT +; RUN: llvm-dis %t5.1.4.opt.bc -o - | FileCheck %s --check-prefix=CHECK-IR1 +; RUN: llvm-nm %t5.1 | FileCheck %s --check-prefix=NM-INDEX1 +; RUN: llvm-nm %t5.2 | FileCheck %s --check-prefix=NM-INDEX2 + +; NM-INDEX1-DAG: U _ZN1D1mEi + +; NM-INDEX2-DAG: T _ZN1D1mEi + +; PRINT-DAG: Devirtualized call to {{.*}} (_ZN1D1mEi) + +; REMARK-DAG: single-impl: devirtualized a call to _ZN1D1mEi + +target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128" +target triple = "x86_64-grtev4-linux-gnu" + +%struct.D = type { i32 (...)** } + +@_ZTV1D = available_externally constant { [3 x i8*] } { [3 x i8*] [i8* null, i8* undef, i8* bitcast (i32 (%struct.D*, i32)* @_ZN1D1mEi to i8*)] } + +; CHECK-IR1-LABEL: define i32 @test +define i32 @test(%struct.D* %obj2, i32 %a) { +entry: + %0 = bitcast %struct.D* %obj2 to i8*** + %vtable2 = load i8**, i8*** %0 + %1 = bitcast i8** %vtable2 to i8* + %p2 = call i1 @llvm.type.test(i8* %1, metadata !"_ZTS1D") + call void @llvm.assume(i1 %p2) + + %2 = bitcast i8** %vtable2 to i32 (%struct.D*, i32)** + %fptr33 = load i32 (%struct.D*, i32)*, i32 (%struct.D*, i32)** %2, align 8 + + ; Check that the call was devirtualized. + ; CHECK-IR1: %call4 = tail call i32 @_ZN1D1mEi + %call4 = tail call i32 %fptr33(%struct.D* nonnull %obj2, i32 %a) + ret i32 %call4 +} +; CHECK-IR1-LABEL: ret i32 +; CHECK-IR1-LABEL: } + +declare i1 @llvm.type.test(i8*, metadata) +declare void @llvm.assume(i1) +declare i32 @_ZN1D1mEi(%struct.D* %this, i32 %a) + +attributes #0 = { noinline optnone } diff --git a/llvm/test/ThinLTO/X86/devirt_local_same_guid.ll b/llvm/test/ThinLTO/X86/devirt_local_same_guid.ll new file mode 100644 index 0000000..8354bc1 --- /dev/null +++ b/llvm/test/ThinLTO/X86/devirt_local_same_guid.ll @@ -0,0 +1,74 @@ +; REQUIRES: x86-registered-target + +; Test that index-based devirtualization in the presence of same-named +; local vtables in same named source files fails. + +; Generate unsplit module with summary for ThinLTO index-based WPD. +; RUN: opt -thinlto-bc -o %t3.o %s +; RUN: opt -thinlto-bc -o %t4.o %p/Inputs/devirt_local_same_guid.ll + +; RUN: llvm-lto2 run %t3.o %t4.o -save-temps -use-new-pm -pass-remarks=. \ +; RUN: -wholeprogramdevirt-print-index-based \ +; RUN: -o %t5 \ +; RUN: -r=%t3.o,use_B,px \ +; RUN: -r=%t3.o,test,px \ +; RUN: -r=%t4.o,test2,px +; RUN: llvm-dis %t5.1.4.opt.bc -o - | FileCheck %s --check-prefix=CHECK-IR1 +; RUN: llvm-dis %t5.2.4.opt.bc -o - | FileCheck %s --check-prefix=CHECK-IR2 + +target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128" +target triple = "x86_64-grtev4-linux-gnu" + +source_filename = "-" + +%struct.A = type { i32 (...)** } +%struct.B = type { %struct.A } + +@_ZTV1B = internal constant { [4 x i8*] } { [4 x i8*] [i8* null, i8* undef, i8* bitcast (i32 (%struct.B*, i32)* @_ZN1B1fEi to i8*), i8* bitcast (i32 (%struct.B*, i32)* @_ZN1B1nEi to i8*)] }, !type !0, !type !1 + +define internal i32 @_ZN1B1fEi(%struct.B* %this, i32 %a) #0 { + ret i32 0; +} + +define internal i32 @_ZN1B1nEi(%struct.B* %this, i32 %a) #0 { + ret i32 0; +} + +; Ensures that vtable of B is live so that we will attempt devirt. +define dso_local i32 @use_B(%struct.B* %a) { +entry: + %0 = bitcast %struct.B* %a to i32 (...)*** + store i32 (...)** bitcast (i8** getelementptr inbounds ({ [4 x i8*] }, { [4 x i8*] }* @_ZTV1B, i64 0, inrange i32 0, i64 2) to i32 (...)**), i32 (...)*** %0, align 8 + ret i32 0 +} + +; CHECK-IR1: define i32 @test( +define i32 @test(%struct.A* %obj, i32 %a) { +entry: + %0 = bitcast %struct.A* %obj to i8*** + %vtable = load i8**, i8*** %0 + %1 = bitcast i8** %vtable to i8* + %p = call i1 @llvm.type.test(i8* %1, metadata !"_ZTS1A") + call void @llvm.assume(i1 %p) + %fptrptr = getelementptr i8*, i8** %vtable, i32 1 + %2 = bitcast i8** %fptrptr to i32 (%struct.A*, i32)** + %fptr1 = load i32 (%struct.A*, i32)*, i32 (%struct.A*, i32)** %2, align 8 + + ; Check that the call was not devirtualized. + ; CHECK-IR1: %call = tail call i32 %fptr1 + %call = tail call i32 %fptr1(%struct.A* nonnull %obj, i32 %a) + + ret i32 %call +} + +; CHECK-IR2: define i32 @test2 +; Check that the call was not devirtualized. +; CHECK-IR2: %call4 = tail call i32 %fptr + +declare i1 @llvm.type.test(i8*, metadata) +declare void @llvm.assume(i1) + +attributes #0 = { noinline optnone } + +!0 = !{i64 16, !"_ZTS1A"} +!1 = !{i64 16, !"_ZTS1B"} -- 2.7.4