From 2431b143aeacda1c92b82fbae3f7121d78048785 Mon Sep 17 00:00:00 2001 From: Florian Hahn Date: Sat, 22 Aug 2020 10:08:59 +0100 Subject: [PATCH] [DSE,MemorySSA] Limit elimination at end of function to single UO. Limit elimination of stores at the end of a function to MemoryDefs with a single underlying object, to save compile time. In practice, the case with multiple underlying objects seems not very important in practice. For -O3 -flto on MultiSource/SPEC2000/SPEC2006 this results in a total of 2 more stores being eliminated. We can always re-visit that in the future. --- llvm/lib/Transforms/Scalar/DeadStoreElimination.cpp | 18 +++++++++--------- .../Transforms/DeadStoreElimination/MSSA/simple.ll | 13 ++++++++++--- 2 files changed, 19 insertions(+), 12 deletions(-) diff --git a/llvm/lib/Transforms/Scalar/DeadStoreElimination.cpp b/llvm/lib/Transforms/Scalar/DeadStoreElimination.cpp index e2c428c..fd3a662 100644 --- a/llvm/lib/Transforms/Scalar/DeadStoreElimination.cpp +++ b/llvm/lib/Transforms/Scalar/DeadStoreElimination.cpp @@ -2096,17 +2096,17 @@ struct DSEState { auto DefLoc = getLocForWriteEx(DefI); if (!DefLoc) continue; - getUnderlyingObjects(DefLoc->Ptr, Pointers); - bool CanKill = true; - for (const Value *Pointer : Pointers) { - if (!InvisibleToCallerAfterRet.count(Pointer)) { - CanKill = false; - break; - } - } + // NOTE: Currently eliminating writes at the end of a function is limited + // to MemoryDefs with a single underlying object, to save compile-time. In + // practice it appears the case with multiple underlying objects is very + // uncommon. If it turns out to be important, we can use + // getUnderlyingObjects here instead. + const Value *UO = getUnderlyingObject(DefLoc->Ptr); + if (!UO || !InvisibleToCallerAfterRet.count(UO)) + continue; - if (CanKill && isWriteAtEndOfFunction(Def)) { + if (isWriteAtEndOfFunction(Def)) { // See through pointer-to-pointer bitcasts LLVM_DEBUG(dbgs() << " ... MemoryDef is not accessed until the end " "of the function\n"); diff --git a/llvm/test/Transforms/DeadStoreElimination/MSSA/simple.ll b/llvm/test/Transforms/DeadStoreElimination/MSSA/simple.ll index 0c83a75..c237c7f 100644 --- a/llvm/test/Transforms/DeadStoreElimination/MSSA/simple.ll +++ b/llvm/test/Transforms/DeadStoreElimination/MSSA/simple.ll @@ -285,8 +285,15 @@ define void @test21() { ret void } +; Currently elimination of stores at the end of a function is limited to a +; single underlying object, for compile-time. This case appears to not be +; very important in practice. define void @test22(i1 %i, i32 %k, i32 %m) nounwind { ; CHECK-LABEL: @test22( +; CHECK-NEXT: [[K_ADDR:%.*]] = alloca i32, align 4 +; CHECK-NEXT: [[M_ADDR:%.*]] = alloca i32, align 4 +; CHECK-NEXT: [[K_ADDR_M_ADDR:%.*]] = select i1 [[I:%.*]], i32* [[K_ADDR]], i32* [[M_ADDR]] +; CHECK-NEXT: store i32 0, i32* [[K_ADDR_M_ADDR]], align 4 ; CHECK-NEXT: ret void ; %k.addr = alloca i32 @@ -305,7 +312,7 @@ define noalias i8* @test23() nounwind uwtable ssp { ; CHECK-NEXT: store i8 97, i8* [[ARRAYIDX]], align 1 ; CHECK-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds [2 x i8], [2 x i8]* [[X]], i64 0, i64 1 ; CHECK-NEXT: store i8 0, i8* [[ARRAYIDX1]], align 1 -; CHECK-NEXT: [[CALL:%.*]] = call i8* @strdup(i8* [[ARRAYIDX]]) #3 +; CHECK-NEXT: [[CALL:%.*]] = call i8* @strdup(i8* [[ARRAYIDX]]) ; CHECK-NEXT: ret i8* [[CALL]] ; %x = alloca [2 x i8], align 1 @@ -343,7 +350,7 @@ define i8* @test25(i8* %p) nounwind { ; CHECK-NEXT: [[P_4:%.*]] = getelementptr i8, i8* [[P:%.*]], i64 4 ; CHECK-NEXT: [[TMP:%.*]] = load i8, i8* [[P_4]], align 1 ; CHECK-NEXT: store i8 0, i8* [[P_4]], align 1 -; CHECK-NEXT: [[Q:%.*]] = call i8* @strdup(i8* [[P]]) #6 +; CHECK-NEXT: [[Q:%.*]] = call i8* @strdup(i8* [[P]]) ; CHECK-NEXT: store i8 [[TMP]], i8* [[P_4]], align 1 ; CHECK-NEXT: ret i8* [[Q]] ; @@ -711,7 +718,7 @@ define void @test44_volatile(i32* %P) { define void @test45_volatile(i32* %P) { ; CHECK-LABEL: @test45_volatile( -; CHECK-NEXT: store volatile i32 2, i32* [[P]], align 4 +; CHECK-NEXT: store volatile i32 2, i32* [[P:%.*]], align 4 ; CHECK-NEXT: store volatile i32 3, i32* [[P]], align 4 ; CHECK-NEXT: ret void ; -- 2.7.4