From 1f1145006b32533484c9ebc0f45e241a02fe6c8b Mon Sep 17 00:00:00 2001 From: Nikita Popov Date: Mon, 30 Nov 2020 23:51:54 +0100 Subject: [PATCH] [DSE] Use correct memory location for read clobber check MSSA DSE starts at a killing store, finds an earlier store and then checks that the earlier store is not read along any paths (without being killed first). However, it uses the memory location of the killing store for that, not the earlier store that we're attempting to eliminate. This has a number of problems: * Mismatches between what BasicAA considers aliasing and what DSE considers an overwrite (even though both are correct in isolation) can result in miscompiles. This is PR48279, which D92045 tries to fix in a different way. The problem is that we're using a location from a store that is potentially not executed and thus may be UB, in which case analysis results can be arbitrary. * Metadata on the killing store may be used to determine aliasing, but there is no guarantee that the metadata is valid, as the specific killing store may not be executed. Using the metadata on the earlier store is valid (it is the store we're removing, so on any execution where its removal may be observed, it must be executed). * The location is imprecise. For full overwrites the killing store will always have a location that is larger or equal than the earlier access location, so it's beneficial to use the earlier access location. This is not the case for partial overwrites, in which case either location might be smaller. There is some room for improvement here. Using the earlier access location means that we can no longer cache which accesses are read for a given killing store, as we may be querying different locations. However, it turns out that simply dropping the cache has no notable impact on compile-time. Differential Revision: https://reviews.llvm.org/D93523 --- .../lib/Transforms/Scalar/DeadStoreElimination.cpp | 79 ++++++---------------- .../MSSA/multiblock-memintrinsics.ll | 4 +- .../MSSA/out-of-bounds-stores.ll | 2 + .../DeadStoreElimination/MSSA/overlap.ll | 6 +- .../DeadStoreElimination/MSSA/scoped-noalias.ll | 4 +- 5 files changed, 28 insertions(+), 67 deletions(-) diff --git a/llvm/lib/Transforms/Scalar/DeadStoreElimination.cpp b/llvm/lib/Transforms/Scalar/DeadStoreElimination.cpp index 6cc1b43..1a1d0c0 100644 --- a/llvm/lib/Transforms/Scalar/DeadStoreElimination.cpp +++ b/llvm/lib/Transforms/Scalar/DeadStoreElimination.cpp @@ -1634,18 +1634,6 @@ struct DSEState { /// basic block. DenseMap IOLs; - struct CheckCache { - SmallPtrSet KnownNoReads; - SmallPtrSet KnownReads; - - bool isKnownNoRead(MemoryAccess *A) const { - return KnownNoReads.find(A) != KnownNoReads.end(); - } - bool isKnownRead(MemoryAccess *A) const { - return KnownReads.find(A) != KnownReads.end(); - } - }; - DSEState(Function &F, AliasAnalysis &AA, MemorySSA &MSSA, DominatorTree &DT, PostDominatorTree &PDT, const TargetLibraryInfo &TLI) : F(F), AA(AA), BatchAA(AA), MSSA(MSSA), DT(DT), PDT(PDT), TLI(TLI), @@ -1940,9 +1928,8 @@ struct DSEState { Optional getDomMemoryDef(MemoryDef *KillingDef, MemoryAccess *StartAccess, const MemoryLocation &DefLoc, const Value *DefUO, - CheckCache &Cache, unsigned &ScanLimit, - unsigned &WalkerStepLimit, bool IsMemTerm, - unsigned &PartialLimit) { + unsigned &ScanLimit, unsigned &WalkerStepLimit, + bool IsMemTerm, unsigned &PartialLimit) { if (ScanLimit == 0 || WalkerStepLimit == 0) { LLVM_DEBUG(dbgs() << "\n ... hit scan limit\n"); return None; @@ -1954,6 +1941,7 @@ struct DSEState { LLVM_DEBUG(dbgs() << " trying to get dominating access\n"); // Find the next clobbering Mod access for DefLoc, starting at StartAccess. + Optional CurrentLoc; do { StepAgain = false; LLVM_DEBUG({ @@ -2017,12 +2005,8 @@ struct DSEState { // clobber, bail out, as the path is not profitable. We skip this check // for intrinsic calls, because the code knows how to handle memcpy // intrinsics. - if (!isa(CurrentI) && - (Cache.KnownReads.contains(Current) || - isReadClobber(DefLoc, CurrentI))) { - Cache.KnownReads.insert(Current); + if (!isa(CurrentI) && isReadClobber(DefLoc, CurrentI)) return None; - } // Quick check if there are direct uses that are read-clobbers. if (any_of(Current->uses(), [this, &DefLoc, StartAccess](Use &U) { @@ -2031,7 +2015,6 @@ struct DSEState { isReadClobber(DefLoc, UseOrDef->getMemoryInst()); return false; })) { - Cache.KnownReads.insert(Current); LLVM_DEBUG(dbgs() << " ... found a read clobber\n"); return None; } @@ -2045,13 +2028,24 @@ struct DSEState { } // If Current does not have an analyzable write location, skip it - auto CurrentLoc = getLocForWriteEx(CurrentI); + CurrentLoc = getLocForWriteEx(CurrentI); if (!CurrentLoc) { StepAgain = true; Current = CurrentDef->getDefiningAccess(); continue; } + // AliasAnalysis does not account for loops. Limit elimination to + // candidates for which we can guarantee they always store to the same + // memory location and not multiple locations in a loop. + if (Current->getBlock() != KillingDef->getBlock() && + !IsGuaranteedLoopInvariant(const_cast(CurrentLoc->Ptr))) { + StepAgain = true; + Current = CurrentDef->getDefiningAccess(); + WalkerStepLimit -= 1; + continue; + } + if (IsMemTerm) { // If the killing def is a memory terminator (e.g. lifetime.end), check // the next candidate if the current Current does not write the same @@ -2062,17 +2056,6 @@ struct DSEState { } continue; } else { - // AliasAnalysis does not account for loops. Limit elimination to - // candidates for which we can guarantee they always store to the same - // memory location and not multiple locations in a loop. - if (Current->getBlock() != KillingDef->getBlock() && - !IsGuaranteedLoopInvariant(const_cast(CurrentLoc->Ptr))) { - StepAgain = true; - Current = CurrentDef->getDefiningAccess(); - WalkerStepLimit -= 1; - continue; - } - int64_t InstWriteOffset, DepWriteOffset; auto OR = isOverwrite(KillingI, CurrentI, DefLoc, *CurrentLoc, DL, TLI, DepWriteOffset, InstWriteOffset, BatchAA, &F); @@ -2133,18 +2116,6 @@ struct DSEState { } --ScanLimit; NumDomMemDefChecks++; - - // Check if we already visited this access. - if (Cache.isKnownNoRead(UseAccess)) { - LLVM_DEBUG(dbgs() << " ... skip, discovered that " << *UseAccess - << " is safe earlier.\n"); - continue; - } - if (Cache.isKnownRead(UseAccess)) { - LLVM_DEBUG(dbgs() << " ... bail out, discovered that " << *UseAccess - << " has a read-clobber earlier.\n"); - return None; - } KnownNoReads.insert(UseAccess); if (isa(UseAccess)) { @@ -2172,7 +2143,7 @@ struct DSEState { // A memory terminator kills all preceeding MemoryDefs and all succeeding // MemoryAccesses. We do not have to check it's users. - if (isMemTerminator(DefLoc, KillingI, UseInst)) { + if (isMemTerminator(*CurrentLoc, EarlierMemInst, UseInst)) { LLVM_DEBUG( dbgs() << " ... skipping, memterminator invalidates following accesses\n"); @@ -2187,19 +2158,13 @@ struct DSEState { if (UseInst->mayThrow() && !isInvisibleToCallerBeforeRet(DefUO)) { LLVM_DEBUG(dbgs() << " ... found throwing instruction\n"); - Cache.KnownReads.insert(UseAccess); - Cache.KnownReads.insert(StartAccess); - Cache.KnownReads.insert(EarlierAccess); return None; } // Uses which may read the original MemoryDef mean we cannot eliminate the // original MD. Stop walk. - if (isReadClobber(DefLoc, UseInst)) { + if (isReadClobber(*CurrentLoc, UseInst)) { LLVM_DEBUG(dbgs() << " ... found read clobber\n"); - Cache.KnownReads.insert(UseAccess); - Cache.KnownReads.insert(StartAccess); - Cache.KnownReads.insert(EarlierAccess); return None; } @@ -2223,7 +2188,7 @@ struct DSEState { // 3 = Def(1) ; <---- Current (3, 2) = NoAlias, (3,1) = MayAlias, // stores [0,1] if (MemoryDef *UseDef = dyn_cast(UseAccess)) { - if (isCompleteOverwrite(DefLoc, KillingI, UseInst)) { + if (isCompleteOverwrite(*CurrentLoc, EarlierMemInst, UseInst)) { if (!isInvisibleToCallerAfterRet(DefUO) && UseAccess != EarlierAccess) { BasicBlock *MaybeKillingBlock = UseInst->getParent(); @@ -2311,7 +2276,6 @@ struct DSEState { // No aliasing MemoryUses of EarlierAccess found, EarlierAccess is // potentially dead. - Cache.KnownNoReads.insert(KnownNoReads.begin(), KnownNoReads.end()); return {EarlierAccess}; } @@ -2550,7 +2514,6 @@ bool eliminateDeadStoresMemorySSA(Function &F, AliasAnalysis &AA, bool Shortend = false; bool IsMemTerm = State.isMemTerminatorInst(SI); - DSEState::CheckCache Cache; // Check if MemoryAccesses in the worklist are killed by KillingDef. for (unsigned I = 0; I < ToCheck.size(); I++) { Current = ToCheck[I]; @@ -2558,8 +2521,8 @@ bool eliminateDeadStoresMemorySSA(Function &F, AliasAnalysis &AA, continue; Optional Next = State.getDomMemoryDef( - KillingDef, Current, SILoc, SILocUnd, Cache, ScanLimit, - WalkerStepLimit, IsMemTerm, PartialLimit); + KillingDef, Current, SILoc, SILocUnd, ScanLimit, WalkerStepLimit, + IsMemTerm, PartialLimit); if (!Next) { LLVM_DEBUG(dbgs() << " finished walk\n"); diff --git a/llvm/test/Transforms/DeadStoreElimination/MSSA/multiblock-memintrinsics.ll b/llvm/test/Transforms/DeadStoreElimination/MSSA/multiblock-memintrinsics.ll index b22f5b6..fe50876 100644 --- a/llvm/test/Transforms/DeadStoreElimination/MSSA/multiblock-memintrinsics.ll +++ b/llvm/test/Transforms/DeadStoreElimination/MSSA/multiblock-memintrinsics.ll @@ -41,13 +41,13 @@ bb3: } ; Post-dominating store. +; TODO: The memset can be shortened. define void @accessible_after_return_2(i32* noalias %P, i1 %c) { ; CHECK-LABEL: @accessible_after_return_2( ; CHECK-NEXT: entry: ; CHECK-NEXT: [[ARRAYIDX0:%.*]] = getelementptr inbounds i32, i32* [[P:%.*]], i64 1 ; CHECK-NEXT: [[P3:%.*]] = bitcast i32* [[ARRAYIDX0]] to i8* -; CHECK-NEXT: [[TMP0:%.*]] = getelementptr inbounds i8, i8* [[P3]], i64 4 -; CHECK-NEXT: call void @llvm.memset.p0i8.i64(i8* align 4 [[TMP0]], i8 0, i64 24, i1 false) +; CHECK-NEXT: call void @llvm.memset.p0i8.i64(i8* align 4 [[P3]], i8 0, i64 28, i1 false) ; CHECK-NEXT: br i1 [[C:%.*]], label [[BB1:%.*]], label [[BB2:%.*]] ; CHECK: bb1: ; CHECK-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds i32, i32* [[P]], i64 1 diff --git a/llvm/test/Transforms/DeadStoreElimination/MSSA/out-of-bounds-stores.ll b/llvm/test/Transforms/DeadStoreElimination/MSSA/out-of-bounds-stores.ll index 3a16a62..925ab4e 100644 --- a/llvm/test/Transforms/DeadStoreElimination/MSSA/out-of-bounds-stores.ll +++ b/llvm/test/Transforms/DeadStoreElimination/MSSA/out-of-bounds-stores.ll @@ -36,6 +36,8 @@ define i32 @test_out_of_bounds_store_nonlocal(i1 %c) { ; CHECK-NEXT: [[D:%.*]] = alloca [1 x i32], align 4 ; CHECK-NEXT: br label [[FOR_BODY:%.*]] ; CHECK: for.body: +; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [1 x i32], [1 x i32]* [[D]], i64 0, i64 0 +; CHECK-NEXT: store i32 10, i32* [[ARRAYIDX]], align 4 ; CHECK-NEXT: br label [[FOR_INC:%.*]] ; CHECK: for.inc: ; CHECK-NEXT: br i1 [[C:%.*]], label [[FOR_BODY_1:%.*]], label [[FOR_END:%.*]] diff --git a/llvm/test/Transforms/DeadStoreElimination/MSSA/overlap.ll b/llvm/test/Transforms/DeadStoreElimination/MSSA/overlap.ll index 05326de..b4ed6cc2 100644 --- a/llvm/test/Transforms/DeadStoreElimination/MSSA/overlap.ll +++ b/llvm/test/Transforms/DeadStoreElimination/MSSA/overlap.ll @@ -67,13 +67,11 @@ end: ret void } -; TODO: The store to %a0 is dead, because only %a1 is read later. +; The store to %a0 is dead, because only %a1 is read later. define void @test3(i1 %c) { ; CHECK-LABEL: @test3( ; CHECK-NEXT: [[A:%.*]] = alloca [2 x i8], align 1 -; CHECK-NEXT: [[A0:%.*]] = getelementptr [2 x i8], [2 x i8]* [[A]], i32 0, i32 0 ; CHECK-NEXT: [[A1:%.*]] = getelementptr [2 x i8], [2 x i8]* [[A]], i32 0, i32 1 -; CHECK-NEXT: store i8 1, i8* [[A0]], align 1 ; CHECK-NEXT: br i1 [[C:%.*]], label [[IF:%.*]], label [[ELSE:%.*]] ; CHECK: if: ; CHECK-NEXT: store [2 x i8] zeroinitializer, [2 x i8]* [[A]], align 1 @@ -102,10 +100,8 @@ else: define void @test4(i1 %c) { ; CHECK-LABEL: @test4( ; CHECK-NEXT: [[A:%.*]] = alloca [2 x i8], align 1 -; CHECK-NEXT: [[A0:%.*]] = getelementptr [2 x i8], [2 x i8]* [[A]], i32 0, i32 0 ; CHECK-NEXT: [[A1:%.*]] = getelementptr [2 x i8], [2 x i8]* [[A]], i32 0, i32 1 ; CHECK-NEXT: store i8 1, i8* [[A1]], align 1 -; CHECK-NEXT: store i8 1, i8* [[A0]], align 1 ; CHECK-NEXT: br i1 [[C:%.*]], label [[IF:%.*]], label [[ELSE:%.*]] ; CHECK: if: ; CHECK-NEXT: store [2 x i8] zeroinitializer, [2 x i8]* [[A]], align 1 diff --git a/llvm/test/Transforms/DeadStoreElimination/MSSA/scoped-noalias.ll b/llvm/test/Transforms/DeadStoreElimination/MSSA/scoped-noalias.ll index b2e6262..8987073 100644 --- a/llvm/test/Transforms/DeadStoreElimination/MSSA/scoped-noalias.ll +++ b/llvm/test/Transforms/DeadStoreElimination/MSSA/scoped-noalias.ll @@ -4,13 +4,13 @@ ; Assume that %p1 != %p2 if and only if %c is true. In that case the noalias ; metadata is correct, but the first store cannot be eliminated, as it may be ; read-clobbered by the load. -; TODO The store is incorrectly eliminated. define void @test(i1 %c, i8* %p1, i8* %p2) { ; CHECK-LABEL: @test( +; CHECK-NEXT: store i8 0, i8* [[P1:%.*]], align 1 ; CHECK-NEXT: [[TMP1:%.*]] = load i8, i8* [[P2:%.*]], align 1, !alias.scope !0 ; CHECK-NEXT: br i1 [[C:%.*]], label [[IF:%.*]], label [[ELSE:%.*]] ; CHECK: if: -; CHECK-NEXT: store i8 1, i8* [[P1:%.*]], align 1, !noalias !0 +; CHECK-NEXT: store i8 1, i8* [[P1]], align 1, !noalias !0 ; CHECK-NEXT: ret void ; CHECK: else: ; CHECK-NEXT: store i8 2, i8* [[P1]], align 1 -- 2.7.4