From 81dbb6aec6261a06132f17b06c9071f4a1871fa4 Mon Sep 17 00:00:00 2001 From: Florian Hahn Date: Tue, 11 Feb 2020 18:27:41 +0000 Subject: [PATCH] Recommit "[DSE] Add first version of MemorySSA-backed DSE (Bottom up walk)." This includes a fix for the santizier failures. This reverts the revert commit 42f8b915eb72364cc5e84adf58a2c2d4947e8b10. --- .../lib/Transforms/Scalar/DeadStoreElimination.cpp | 538 ++++++++++++++++++++- .../MSSA/2011-09-06-EndOfFunction.ll | 1 + .../MSSA/OverwriteStoreBegin.ll | 1 + .../DeadStoreElimination/MSSA/OverwriteStoreEnd.ll | 1 + .../Transforms/DeadStoreElimination/MSSA/atomic.ll | 1 + .../DeadStoreElimination/MSSA/calloc-store.ll | 2 + .../DeadStoreElimination/MSSA/fence-todo.ll | 50 ++ .../Transforms/DeadStoreElimination/MSSA/fence.ll | 48 -- .../Transforms/DeadStoreElimination/MSSA/free.ll | 2 + .../DeadStoreElimination/MSSA/inst-limits.ll | 9 +- .../DeadStoreElimination/MSSA/lifetime.ll | 2 + .../MSSA/mda-with-dbg-values.ll | 20 +- .../MSSA/memcpy-complete-overwrite.ll | 2 + .../DeadStoreElimination/MSSA/memintrinsics.ll | 1 + .../MSSA/memoryssa-scan-limit.ll | 72 +++ .../DeadStoreElimination/MSSA/memset-and-memcpy.ll | 9 +- .../MSSA/memset-missing-debugloc.ll | 1 + .../MSSA/merge-stores-big-endian.ll | 1 + .../DeadStoreElimination/MSSA/merge-stores.ll | 1 + .../MSSA/multiblock-captures.ll | 7 +- .../MSSA/multiblock-exceptions.ll | 1 + .../DeadStoreElimination/MSSA/multiblock-loops.ll | 114 ++++- .../MSSA/multiblock-memoryphis.ll | 70 +++ .../MSSA/multiblock-partial.ll | 3 +- .../DeadStoreElimination/MSSA/multiblock-simple.ll | 41 +- .../DeadStoreElimination/MSSA/operand-bundles.ll | 1 + .../DeadStoreElimination/MSSA/simple-todo.ll | 159 +----- .../Transforms/DeadStoreElimination/MSSA/simple.ll | 167 ++++++- 28 files changed, 1066 insertions(+), 259 deletions(-) create mode 100644 llvm/test/Transforms/DeadStoreElimination/MSSA/fence-todo.ll create mode 100644 llvm/test/Transforms/DeadStoreElimination/MSSA/memoryssa-scan-limit.ll diff --git a/llvm/lib/Transforms/Scalar/DeadStoreElimination.cpp b/llvm/lib/Transforms/Scalar/DeadStoreElimination.cpp index f56a4c5..7106fb0 100644 --- a/llvm/lib/Transforms/Scalar/DeadStoreElimination.cpp +++ b/llvm/lib/Transforms/Scalar/DeadStoreElimination.cpp @@ -29,7 +29,10 @@ #include "llvm/Analysis/MemoryBuiltins.h" #include "llvm/Analysis/MemoryDependenceAnalysis.h" #include "llvm/Analysis/MemoryLocation.h" +#include "llvm/Analysis/MemorySSA.h" +#include "llvm/Analysis/MemorySSAUpdater.h" #include "llvm/Analysis/OrderedBasicBlock.h" +#include "llvm/Analysis/PostDominators.h" #include "llvm/Analysis/TargetLibraryInfo.h" #include "llvm/Analysis/ValueTracking.h" #include "llvm/IR/Argument.h" @@ -40,6 +43,7 @@ #include "llvm/IR/DataLayout.h" #include "llvm/IR/Dominators.h" #include "llvm/IR/Function.h" +#include "llvm/IR/InstIterator.h" #include "llvm/IR/InstrTypes.h" #include "llvm/IR/Instruction.h" #include "llvm/IR/Instructions.h" @@ -87,11 +91,20 @@ EnablePartialStoreMerging("enable-dse-partial-store-merging", cl::init(true), cl::Hidden, cl::desc("Enable partial store merging in DSE")); -// Temporary dummy option for tests. static cl::opt EnableMemorySSA("enable-dse-memoryssa", cl::init(false), cl::Hidden, cl::desc("Use the new MemorySSA-backed DSE.")); +static cl::opt + MemorySSAScanLimit("dse-memoryssa-scanlimit", cl::init(100), cl::Hidden, + cl::desc("The number of memory instructions to scan for " + "dead store elimination (default = 100)")); + +static cl::opt MemorySSADefsPerBlockLimit( + "dse-memoryssa-defs-per-block-limit", cl::init(5000), cl::Hidden, + cl::desc("The number of MemoryDefs we consider as candidates to eliminated " + "other stores per basic block (default = 5000)")); + //===----------------------------------------------------------------------===// // Helper functions //===----------------------------------------------------------------------===// @@ -1354,22 +1367,490 @@ static bool eliminateDeadStores(Function &F, AliasAnalysis *AA, return MadeChange; } +namespace { +//============================================================================= +// MemorySSA backed dead store elimination. +// +// The code below implements dead store elimination using MemorySSA. It uses +// the following general approach: given a MemoryDef, walk upwards to find +// clobbering MemoryDefs that may be killed by the starting def. Then check +// that there are no uses that may read the location of the original MemoryDef +// in between both MemoryDefs. A bit more concretely: +// +// For all MemoryDefs StartDef: +// 1. Get the next dominating clobbering MemoryDef (DomAccess) by walking +// upwards. +// 2. Check that there are no reads between DomAccess and the StartDef by +// checking all uses starting at DomAccess and walking until we see StartDef. +// 3. For each found DomDef, check that: +// 1. There are no barrier instructions between DomDef and StartDef (like +// throws or stores with ordering constraints). +// 2. StartDef is executed whenever DomDef is executed. +// 3. StartDef completely overwrites DomDef. +// 4. Erase DomDef from the function and MemorySSA. + +// Returns true if \p M is an intrisnic that does not read or write memory. +bool isNoopIntrinsic(MemoryUseOrDef *M) { + if (const IntrinsicInst *II = dyn_cast(M->getMemoryInst())) { + switch (II->getIntrinsicID()) { + case Intrinsic::lifetime_start: + case Intrinsic::lifetime_end: + case Intrinsic::invariant_end: + case Intrinsic::launder_invariant_group: + case Intrinsic::assume: + return true; + case Intrinsic::dbg_addr: + case Intrinsic::dbg_declare: + case Intrinsic::dbg_label: + case Intrinsic::dbg_value: + llvm_unreachable("Intrinsic should not be modeled in MemorySSA"); + default: + return false; + } + } + return false; +} + +// Check if we can ignore \p D for DSE. +bool canSkipDef(MemoryDef *D, bool DefVisibleToCaller) { + Instruction *DI = D->getMemoryInst(); + // Calls that only access inaccessible memory cannot read or write any memory + // locations we consider for elimination. + if (auto CS = CallSite(DI)) + if (CS.onlyAccessesInaccessibleMemory()) + return true; + + // We can eliminate stores to locations not visible to the caller across + // throwing instructions. + if (DI->mayThrow() && !DefVisibleToCaller) + return true; + + // We can remove the dead stores, irrespective of the fence and its ordering + // (release/acquire/seq_cst). Fences only constraints the ordering of + // already visible stores, it does not make a store visible to other + // threads. So, skipping over a fence does not change a store from being + // dead. + if (isa(DI)) + return true; + + // Skip intrinsics that do not really read or modify memory. + if (isNoopIntrinsic(D)) + return true; + + return false; +} + +struct DSEState { + Function &F; + AliasAnalysis &AA; + MemorySSA &MSSA; + DominatorTree &DT; + PostDominatorTree &PDT; + const TargetLibraryInfo &TLI; + + // All MemoryDefs that potentially could kill other MemDefs. + SmallVector MemDefs; + // Any that should be skipped as they are already deleted + SmallPtrSet SkipStores; + // Keep track of all of the objects that are invisible to the caller until the + // function returns. + SmallPtrSet InvisibleToCaller; + // Keep track of blocks with throwing instructions not modeled in MemorySSA. + SmallPtrSet ThrowingBlocks; + + DSEState(Function &F, AliasAnalysis &AA, MemorySSA &MSSA, DominatorTree &DT, + PostDominatorTree &PDT, const TargetLibraryInfo &TLI) + : F(F), AA(AA), MSSA(MSSA), DT(DT), PDT(PDT), TLI(TLI) {} + + static DSEState get(Function &F, AliasAnalysis &AA, MemorySSA &MSSA, + DominatorTree &DT, PostDominatorTree &PDT, + const TargetLibraryInfo &TLI) { + DSEState State(F, AA, MSSA, DT, PDT, TLI); + // Collect blocks with throwing instructions not modeled in MemorySSA and + // alloc-like objects. + for (Instruction &I : instructions(F)) { + if (I.mayThrow() && !MSSA.getMemoryAccess(&I)) + State.ThrowingBlocks.insert(I.getParent()); + + auto *MD = dyn_cast_or_null(MSSA.getMemoryAccess(&I)); + if (MD && State.MemDefs.size() < MemorySSADefsPerBlockLimit && + hasAnalyzableMemoryWrite(&I, TLI) && isRemovable(&I)) + State.MemDefs.push_back(MD); + + // Track alloca and alloca-like objects. Here we care about objects not + // visible to the caller during function execution. Alloca objects are + // invalid in the caller, for alloca-like objects we ensure that they are + // not captured throughout the function. + if (isa(&I) || + (isAllocLikeFn(&I, &TLI) && !PointerMayBeCaptured(&I, false, true))) + State.InvisibleToCaller.insert(&I); + } + // Treat byval or inalloca arguments the same as Allocas, stores to them are + // dead at the end of the function. + for (Argument &AI : F.args()) + if (AI.hasByValOrInAllocaAttr()) + State.InvisibleToCaller.insert(&AI); + return State; + } + + Optional getLocForWriteEx(Instruction *I) const { + if (!I->mayWriteToMemory()) + return None; + + if (auto *MTI = dyn_cast(I)) + return {MemoryLocation::getForDest(MTI)}; + + if (auto CS = CallSite(I)) { + if (Function *F = CS.getCalledFunction()) { + StringRef FnName = F->getName(); + if (TLI.has(LibFunc_strcpy) && FnName == TLI.getName(LibFunc_strcpy)) + return {MemoryLocation(CS.getArgument(0))}; + if (TLI.has(LibFunc_strncpy) && FnName == TLI.getName(LibFunc_strncpy)) + return {MemoryLocation(CS.getArgument(0))}; + if (TLI.has(LibFunc_strcat) && FnName == TLI.getName(LibFunc_strcat)) + return {MemoryLocation(CS.getArgument(0))}; + if (TLI.has(LibFunc_strncat) && FnName == TLI.getName(LibFunc_strncat)) + return {MemoryLocation(CS.getArgument(0))}; + } + return None; + } + + return MemoryLocation::getOrNone(I); + } + + /// Returns true if \p Use completely overwrites \p DefLoc. + bool isCompleteOverwrite(MemoryLocation DefLoc, Instruction *UseInst) const { + // UseInst has a MemoryDef associated in MemorySSA. It's possible for a + // MemoryDef to not write to memory, e.g. a volatile load is modeled as a + // MemoryDef. + if (!UseInst->mayWriteToMemory()) + return false; + + if (auto CS = CallSite(UseInst)) + if (CS.onlyAccessesInaccessibleMemory()) + return false; + + ModRefInfo MR = AA.getModRefInfo(UseInst, DefLoc); + // If necessary, perform additional analysis. + if (isModSet(MR)) + MR = AA.callCapturesBefore(UseInst, DefLoc, &DT); + + Optional UseLoc = getLocForWriteEx(UseInst); + return isModSet(MR) && isMustSet(MR) && + UseLoc->Size.getValue() >= DefLoc.Size.getValue(); + } + + /// Returns true if \p Use may read from \p DefLoc. + bool isReadClobber(MemoryLocation DefLoc, Instruction *UseInst) const { + if (!UseInst->mayReadFromMemory()) + return false; + + if (auto CS = CallSite(UseInst)) + if (CS.onlyAccessesInaccessibleMemory()) + return false; + + ModRefInfo MR = AA.getModRefInfo(UseInst, DefLoc); + // If necessary, perform additional analysis. + if (isRefSet(MR)) + MR = AA.callCapturesBefore(UseInst, DefLoc, &DT); + return isRefSet(MR); + } + + // Find a MemoryDef writing to \p DefLoc and dominating \p Current, with no + // read access in between or return None otherwise. The returned value may not + // (completely) overwrite \p DefLoc. Currently we bail out when we encounter + // any of the following + // * An aliasing MemoryUse (read). + // * A MemoryPHI. + Optional getDomMemoryDef(MemoryDef *KillingDef, + MemoryAccess *Current, + MemoryLocation DefLoc, + bool DefVisibleToCaller, + int &ScanLimit) const { + MemoryDef *DomDef; + MemoryAccess *StartDef = Current; + bool StepAgain; + LLVM_DEBUG(dbgs() << " trying to get dominating access for " << *Current + << "\n"); + // Find the next clobbering Mod access for DefLoc, starting at Current. + do { + StepAgain = false; + // Reached TOP. + if (MSSA.isLiveOnEntryDef(Current)) + return None; + + MemoryUseOrDef *CurrentUD = dyn_cast(Current); + if (!CurrentUD) + return None; + + // Look for access that clobber DefLoc. + MemoryAccess *DomAccess = + MSSA.getSkipSelfWalker()->getClobberingMemoryAccess( + CurrentUD->getDefiningAccess(), DefLoc); + DomDef = dyn_cast(DomAccess); + if (!DomDef || MSSA.isLiveOnEntryDef(DomDef)) + return None; + + // Check if we can skip DomDef for DSE. We also require the KillingDef + // execute whenever DomDef executes and use post-dominance to ensure that. + if (canSkipDef(DomDef, DefVisibleToCaller) || + !PDT.dominates(KillingDef->getBlock(), DomDef->getBlock())) { + StepAgain = true; + Current = DomDef; + } + + } while (StepAgain); + + LLVM_DEBUG(dbgs() << " Checking for reads of " << *DomDef << " (" + << *DomDef->getMemoryInst() << ")\n"); + + SmallSetVector WorkList; + auto PushMemUses = [&WorkList](MemoryAccess *Acc) { + for (Use &U : Acc->uses()) + WorkList.insert(cast(U.getUser())); + }; + PushMemUses(DomDef); + + // Check if DomDef may be read. + for (unsigned I = 0; I < WorkList.size(); I++) { + MemoryAccess *UseAccess = WorkList[I]; + + LLVM_DEBUG(dbgs() << " Checking use " << *UseAccess); + if (--ScanLimit == 0) { + LLVM_DEBUG(dbgs() << " ... hit scan limit\n"); + return None; + } + + // Bail out on MemoryPhis for now. + if (isa(UseAccess)) { + LLVM_DEBUG(dbgs() << " ... hit MemoryPhi\n"); + return None; + } + + Instruction *UseInst = cast(UseAccess)->getMemoryInst(); + LLVM_DEBUG(dbgs() << " (" << *UseInst << ")\n"); + + if (isNoopIntrinsic(cast(UseAccess))) { + PushMemUses(UseAccess); + continue; + } + + // Uses which may read the original MemoryDef mean we cannot eliminate the + // original MD. Stop walk. + if (isReadClobber(DefLoc, UseInst)) { + LLVM_DEBUG(dbgs() << " ... found read clobber\n"); + return None; + } + + if (StartDef == UseAccess) + continue; + + // Check all uses for MemoryDefs, except for defs completely overwriting + // the original location. Otherwise we have to check uses of *all* + // MemoryDefs we discover, including non-aliasing ones. Otherwise we might + // miss cases like the following + // 1 = Def(LoE) ; <----- DomDef stores [0,1] + // 2 = Def(1) ; (2, 1) = NoAlias, stores [2,3] + // Use(2) ; MayAlias 2 *and* 1, loads [0, 3]. + // (The Use points to the *first* Def it may alias) + // 3 = Def(1) ; <---- Current (3, 2) = NoAlias, (3,1) = MayAlias, + // stores [0,1] + if (MemoryDef *UseDef = dyn_cast(UseAccess)) { + if (!isCompleteOverwrite(DefLoc, UseInst)) + PushMemUses(UseDef); + } + } + + // No aliasing MemoryUses of DomDef found, DomDef is potentially dead. + return {DomDef}; + } + + // Delete dead memory defs + void deleteDeadInstruction(Instruction *SI) { + MemorySSAUpdater Updater(&MSSA); + SmallVector NowDeadInsts; + NowDeadInsts.push_back(SI); + --NumFastOther; + + while (!NowDeadInsts.empty()) { + Instruction *DeadInst = NowDeadInsts.pop_back_val(); + ++NumFastOther; + + // Try to preserve debug information attached to the dead instruction. + salvageDebugInfo(*DeadInst); + + // Remove the Instruction from MSSA. + if (MemoryAccess *MA = MSSA.getMemoryAccess(DeadInst)) { + if (MemoryDef *MD = dyn_cast(MA)) { + SkipStores.insert(MD); + } + Updater.removeMemoryAccess(MA); + } + + // Remove its operands + for (Use &O : DeadInst->operands()) + if (Instruction *OpI = dyn_cast(O)) { + O = nullptr; + if (isInstructionTriviallyDead(OpI, &TLI)) + NowDeadInsts.push_back(OpI); + } + + DeadInst->eraseFromParent(); + } + } + + // Check for any extra throws between SI and NI that block DSE. This only + // checks extra maythrows (those that aren't MemoryDef's). MemoryDef that may + // throw are handled during the walk from one def to the next. + bool mayThrowBetween(Instruction *SI, Instruction *NI, + const Value *SILocUnd) const { + // First see if we can ignore it by using the fact that SI is an + // alloca/alloca like object that is not visible to the caller during + // execution of the function. + if (SILocUnd && InvisibleToCaller.count(SILocUnd)) + return false; + + if (SI->getParent() == NI->getParent()) + return ThrowingBlocks.find(SI->getParent()) != ThrowingBlocks.end(); + return !ThrowingBlocks.empty(); + } + + // Check if \p NI acts as a DSE barrier for \p SI. The following instructions + // act as barriers: + // * A memory instruction that may throw and \p SI accesses a non-stack + // object. + // * Atomic stores stronger that monotonic. + bool isDSEBarrier(Instruction *SI, MemoryLocation &SILoc, + const Value *SILocUnd, Instruction *NI, + MemoryLocation &NILoc) const { + // If NI may throw it acts as a barrier, unless we are to an alloca/alloca + // like object that does not escape. + if (NI->mayThrow() && !InvisibleToCaller.count(SILocUnd)) + return true; + + if (NI->isAtomic()) { + if (auto *NSI = dyn_cast(NI)) { + if (isStrongerThanMonotonic(NSI->getOrdering())) + return true; + } else + llvm_unreachable( + "Other instructions should be modeled/skipped in MemorySSA"); + } + + return false; + } +}; + +bool eliminateDeadStoresMemorySSA(Function &F, AliasAnalysis &AA, + MemorySSA &MSSA, DominatorTree &DT, + PostDominatorTree &PDT, + const TargetLibraryInfo &TLI) { + const DataLayout &DL = F.getParent()->getDataLayout(); + bool MadeChange = false; + + DSEState State = DSEState::get(F, AA, MSSA, DT, PDT, TLI); + // For each store: + for (unsigned I = 0; I < State.MemDefs.size(); I++) { + MemoryDef *Current = State.MemDefs[I]; + if (State.SkipStores.count(Current)) + continue; + Instruction *SI = cast(Current)->getMemoryInst(); + auto MaybeSILoc = State.getLocForWriteEx(SI); + if (!MaybeSILoc) { + LLVM_DEBUG(dbgs() << "Failed to find analyzable write location for " + << *SI << "\n"); + continue; + } + MemoryLocation SILoc = *MaybeSILoc; + assert(SILoc.Ptr && "SILoc should not be null"); + const Value *SILocUnd = GetUnderlyingObject(SILoc.Ptr, DL); + Instruction *DefObj = + const_cast(dyn_cast(SILocUnd)); + bool DefVisibleToCaller = !State.InvisibleToCaller.count(SILocUnd); + if (DefObj && ((isAllocLikeFn(DefObj, &TLI) && + !PointerMayBeCapturedBefore(DefObj, false, true, SI, &DT)))) + DefVisibleToCaller = false; + + LLVM_DEBUG(dbgs() << "Trying to eliminate MemoryDefs killed by " << *SI + << "\n"); + + int ScanLimit = MemorySSAScanLimit; + MemoryDef *StartDef = Current; + // Walk MemorySSA upward to find MemoryDefs that might be killed by SI. + while (Optional Next = State.getDomMemoryDef( + StartDef, Current, SILoc, DefVisibleToCaller, ScanLimit)) { + MemoryAccess *DomAccess = *Next; + LLVM_DEBUG(dbgs() << " Checking if we can kill " << *DomAccess << "\n"); + MemoryDef *NextDef = dyn_cast(DomAccess); + Instruction *NI = NextDef->getMemoryInst(); + LLVM_DEBUG(dbgs() << " def " << *NI << "\n"); + + if (!hasAnalyzableMemoryWrite(NI, TLI)) + break; + MemoryLocation NILoc = *State.getLocForWriteEx(NI); + // Check for anything that looks like it will be a barrier to further + // removal + if (State.isDSEBarrier(SI, SILoc, SILocUnd, NI, NILoc)) { + LLVM_DEBUG(dbgs() << " stop, barrier\n"); + break; + } + + // Before we try to remove anything, check for any extra throwing + // instructions that block us from DSEing + if (State.mayThrowBetween(SI, NI, SILocUnd)) { + LLVM_DEBUG(dbgs() << " stop, may throw!\n"); + break; + } + + // Check if NI overwrites SI. + int64_t InstWriteOffset, DepWriteOffset; + InstOverlapIntervalsTy IOL; + OverwriteResult OR = isOverwrite(SILoc, NILoc, DL, TLI, DepWriteOffset, + InstWriteOffset, NI, IOL, AA, &F); + + if (OR == OW_Complete) { + LLVM_DEBUG(dbgs() << "DSE: Remove Dead Store:\n DEAD: " << *NI + << "\n KILLER: " << *SI << '\n'); + State.deleteDeadInstruction(NI); + ++NumFastStores; + MadeChange = true; + } else + Current = NextDef; + } + } + + return MadeChange; +} +} // end anonymous namespace + //===----------------------------------------------------------------------===// // DSE Pass //===----------------------------------------------------------------------===// PreservedAnalyses DSEPass::run(Function &F, FunctionAnalysisManager &AM) { - AliasAnalysis *AA = &AM.getResult(F); - DominatorTree *DT = &AM.getResult(F); - MemoryDependenceResults *MD = &AM.getResult(F); - const TargetLibraryInfo *TLI = &AM.getResult(F); + AliasAnalysis &AA = AM.getResult(F); + const TargetLibraryInfo &TLI = AM.getResult(F); + DominatorTree &DT = AM.getResult(F); + + if (EnableMemorySSA) { + MemorySSA &MSSA = AM.getResult(F).getMSSA(); + PostDominatorTree &PDT = AM.getResult(F); + + if (!eliminateDeadStoresMemorySSA(F, AA, MSSA, DT, PDT, TLI)) + return PreservedAnalyses::all(); + } else { + MemoryDependenceResults &MD = AM.getResult(F); - if (!eliminateDeadStores(F, AA, MD, DT, TLI)) - return PreservedAnalyses::all(); + if (!eliminateDeadStores(F, &AA, &MD, &DT, &TLI)) + return PreservedAnalyses::all(); + } PreservedAnalyses PA; PA.preserveSet(); PA.preserve(); - PA.preserve(); + if (EnableMemorySSA) + PA.preserve(); + else + PA.preserve(); return PA; } @@ -1388,25 +1869,42 @@ public: if (skipFunction(F)) return false; - DominatorTree *DT = &getAnalysis().getDomTree(); - AliasAnalysis *AA = &getAnalysis().getAAResults(); - MemoryDependenceResults *MD = - &getAnalysis().getMemDep(); - const TargetLibraryInfo *TLI = - &getAnalysis().getTLI(F); + AliasAnalysis &AA = getAnalysis().getAAResults(); + DominatorTree &DT = getAnalysis().getDomTree(); + const TargetLibraryInfo &TLI = + getAnalysis().getTLI(F); + + if (EnableMemorySSA) { + MemorySSA &MSSA = getAnalysis().getMSSA(); + PostDominatorTree &PDT = + getAnalysis().getPostDomTree(); + + return eliminateDeadStoresMemorySSA(F, AA, MSSA, DT, PDT, TLI); + } else { + MemoryDependenceResults &MD = + getAnalysis().getMemDep(); - return eliminateDeadStores(F, AA, MD, DT, TLI); + return eliminateDeadStores(F, &AA, &MD, &DT, &TLI); + } } void getAnalysisUsage(AnalysisUsage &AU) const override { AU.setPreservesCFG(); - AU.addRequired(); AU.addRequired(); - AU.addRequired(); AU.addRequired(); - AU.addPreserved(); AU.addPreserved(); - AU.addPreserved(); + AU.addRequired(); + AU.addPreserved(); + + if (EnableMemorySSA) { + AU.addRequired(); + AU.addRequired(); + AU.addPreserved(); + AU.addPreserved(); + } else { + AU.addRequired(); + AU.addPreserved(); + } } }; @@ -1417,8 +1915,10 @@ char DSELegacyPass::ID = 0; INITIALIZE_PASS_BEGIN(DSELegacyPass, "dse", "Dead Store Elimination", false, false) INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass) +INITIALIZE_PASS_DEPENDENCY(PostDominatorTreeWrapperPass) INITIALIZE_PASS_DEPENDENCY(AAResultsWrapperPass) INITIALIZE_PASS_DEPENDENCY(GlobalsAAWrapperPass) +INITIALIZE_PASS_DEPENDENCY(MemorySSAWrapperPass) INITIALIZE_PASS_DEPENDENCY(MemoryDependenceWrapperPass) INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass) INITIALIZE_PASS_END(DSELegacyPass, "dse", "Dead Store Elimination", false, diff --git a/llvm/test/Transforms/DeadStoreElimination/MSSA/2011-09-06-EndOfFunction.ll b/llvm/test/Transforms/DeadStoreElimination/MSSA/2011-09-06-EndOfFunction.ll index b9a0ea7..0d84e9c 100644 --- a/llvm/test/Transforms/DeadStoreElimination/MSSA/2011-09-06-EndOfFunction.ll +++ b/llvm/test/Transforms/DeadStoreElimination/MSSA/2011-09-06-EndOfFunction.ll @@ -1,3 +1,4 @@ +; XFAIL: * ; RUN: opt -dse -enable-dse-memoryssa -S < %s | FileCheck %s target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64" diff --git a/llvm/test/Transforms/DeadStoreElimination/MSSA/OverwriteStoreBegin.ll b/llvm/test/Transforms/DeadStoreElimination/MSSA/OverwriteStoreBegin.ll index a7278d5..1028c8e 100644 --- a/llvm/test/Transforms/DeadStoreElimination/MSSA/OverwriteStoreBegin.ll +++ b/llvm/test/Transforms/DeadStoreElimination/MSSA/OverwriteStoreBegin.ll @@ -1,4 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py +; XFAIL: * ; RUN: opt < %s -basicaa -dse -enable-dse-memoryssa -S | FileCheck %s define void @write4to7(i32* nocapture %p) { diff --git a/llvm/test/Transforms/DeadStoreElimination/MSSA/OverwriteStoreEnd.ll b/llvm/test/Transforms/DeadStoreElimination/MSSA/OverwriteStoreEnd.ll index d8791aa..0711855 100644 --- a/llvm/test/Transforms/DeadStoreElimination/MSSA/OverwriteStoreEnd.ll +++ b/llvm/test/Transforms/DeadStoreElimination/MSSA/OverwriteStoreEnd.ll @@ -1,4 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py +; XFAIL: * ; RUN: opt < %s -basicaa -dse -enable-dse-memoryssa -S | FileCheck %s target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128" diff --git a/llvm/test/Transforms/DeadStoreElimination/MSSA/atomic.ll b/llvm/test/Transforms/DeadStoreElimination/MSSA/atomic.ll index b21183c..9558033 100644 --- a/llvm/test/Transforms/DeadStoreElimination/MSSA/atomic.ll +++ b/llvm/test/Transforms/DeadStoreElimination/MSSA/atomic.ll @@ -1,3 +1,4 @@ +; XFAIL: * ; RUN: opt -basicaa -dse -enable-dse-memoryssa -S < %s | FileCheck %s target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64" diff --git a/llvm/test/Transforms/DeadStoreElimination/MSSA/calloc-store.ll b/llvm/test/Transforms/DeadStoreElimination/MSSA/calloc-store.ll index 19ebaa9..397c594 100644 --- a/llvm/test/Transforms/DeadStoreElimination/MSSA/calloc-store.ll +++ b/llvm/test/Transforms/DeadStoreElimination/MSSA/calloc-store.ll @@ -1,3 +1,5 @@ +; XFAIL: * + ; RUN: opt < %s -basicaa -dse -enable-dse-memoryssa -S | FileCheck %s declare noalias i8* @calloc(i64, i64) diff --git a/llvm/test/Transforms/DeadStoreElimination/MSSA/fence-todo.ll b/llvm/test/Transforms/DeadStoreElimination/MSSA/fence-todo.ll new file mode 100644 index 0000000..4ee2958 --- /dev/null +++ b/llvm/test/Transforms/DeadStoreElimination/MSSA/fence-todo.ll @@ -0,0 +1,50 @@ +; XFAIL: * + +; RUN: opt -S -basicaa -dse -enable-dse-memoryssa < %s | FileCheck %s + +; We DSE stack alloc'ed and byval locations, in the presence of fences. +; Fence does not make an otherwise thread local store visible. +; Right now the DSE in presence of fence is only done in end blocks (with no successors), +; but the same logic applies to other basic blocks as well. +; The store to %addr.i can be removed since it is a byval attribute +define void @test3(i32* byval %addr.i) { +; CHECK-LABEL: @test3 +; CHECK-NOT: store +; CHECK: fence +; CHECK: ret + store i32 5, i32* %addr.i, align 4 + fence release + ret void +} + +declare void @foo(i8* nocapture %p) + +declare noalias i8* @malloc(i32) + +; DSE of stores in locations allocated through library calls. +define void @test_nocapture() { +; CHECK-LABEL: @test_nocapture +; CHECK: malloc +; CHECK: foo +; CHECK-NOT: store +; CHECK: fence + %m = call i8* @malloc(i32 24) + call void @foo(i8* %m) + store i8 4, i8* %m + fence release + ret void +} + + +; This is a full fence, but it does not make a thread local store visible. +; We can DSE the store in presence of the fence. +define void @fence_seq_cst() { +; CHECK-LABEL: @fence_seq_cst +; CHECK-NEXT: fence seq_cst +; CHECK-NEXT: ret void + %P1 = alloca i32 + store i32 0, i32* %P1, align 4 + fence seq_cst + store i32 4, i32* %P1, align 4 + ret void +} diff --git a/llvm/test/Transforms/DeadStoreElimination/MSSA/fence.ll b/llvm/test/Transforms/DeadStoreElimination/MSSA/fence.ll index 9b43d37..b22fc9c 100644 --- a/llvm/test/Transforms/DeadStoreElimination/MSSA/fence.ll +++ b/llvm/test/Transforms/DeadStoreElimination/MSSA/fence.ll @@ -46,51 +46,3 @@ define void @test2(i32* %addr.i) { store i32 5, i32* %addr.i, align 4 ret void } - -; We DSE stack alloc'ed and byval locations, in the presence of fences. -; Fence does not make an otherwise thread local store visible. -; Right now the DSE in presence of fence is only done in end blocks (with no successors), -; but the same logic applies to other basic blocks as well. -; The store to %addr.i can be removed since it is a byval attribute -define void @test3(i32* byval %addr.i) { -; CHECK-LABEL: @test3 -; CHECK-NOT: store -; CHECK: fence -; CHECK: ret - store i32 5, i32* %addr.i, align 4 - fence release - ret void -} - -declare void @foo(i8* nocapture %p) - -declare noalias i8* @malloc(i32) - -; DSE of stores in locations allocated through library calls. -define void @test_nocapture() { -; CHECK-LABEL: @test_nocapture -; CHECK: malloc -; CHECK: foo -; CHECK-NOT: store -; CHECK: fence - %m = call i8* @malloc(i32 24) - call void @foo(i8* %m) - store i8 4, i8* %m - fence release - ret void -} - - -; This is a full fence, but it does not make a thread local store visible. -; We can DSE the store in presence of the fence. -define void @fence_seq_cst() { -; CHECK-LABEL: @fence_seq_cst -; CHECK-NEXT: fence seq_cst -; CHECK-NEXT: ret void - %P1 = alloca i32 - store i32 0, i32* %P1, align 4 - fence seq_cst - store i32 4, i32* %P1, align 4 - ret void -} - diff --git a/llvm/test/Transforms/DeadStoreElimination/MSSA/free.ll b/llvm/test/Transforms/DeadStoreElimination/MSSA/free.ll index a9a3234..a6e47d7 100644 --- a/llvm/test/Transforms/DeadStoreElimination/MSSA/free.ll +++ b/llvm/test/Transforms/DeadStoreElimination/MSSA/free.ll @@ -1,3 +1,5 @@ +; XFAIL: * + ; RUN: opt < %s -basicaa -dse -enable-dse-memoryssa -S | FileCheck %s target datalayout = "e-p:64:64:64" diff --git a/llvm/test/Transforms/DeadStoreElimination/MSSA/inst-limits.ll b/llvm/test/Transforms/DeadStoreElimination/MSSA/inst-limits.ll index 78fe292..638571f 100644 --- a/llvm/test/Transforms/DeadStoreElimination/MSSA/inst-limits.ll +++ b/llvm/test/Transforms/DeadStoreElimination/MSSA/inst-limits.ll @@ -1,10 +1,9 @@ ; RUN: opt -S -dse -enable-dse-memoryssa < %s | FileCheck %s target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" -; If there are two stores to the same location, DSE should be able to remove -; the first store if the two stores are separated by no more than 98 -; instructions. The existence of debug intrinsics between the stores should -; not affect this instruction limit. +; This test is not relevant for DSE with MemorySSA. Non-memory instructions +; are ignored anyways. The limits for the MemorySSA traversal are tested in +; llvm/test/Transforms/DeadStoreElimination/MSSA/memoryssa-scan-limit.ll @x = global i32 0, align 4 @@ -129,7 +128,7 @@ entry: define i32 @test_outside_limit() { entry: ; The first store; later there is a second store to the same location - ; CHECK: store i32 1, i32* @x, align 4 + ; CHECK-NOT: store i32 1, i32* @x, align 4 store i32 1, i32* @x, align 4 ; Insert 99 dummy instructions between the two stores; this is diff --git a/llvm/test/Transforms/DeadStoreElimination/MSSA/lifetime.ll b/llvm/test/Transforms/DeadStoreElimination/MSSA/lifetime.ll index 3b3f4d4..e4d4722 100644 --- a/llvm/test/Transforms/DeadStoreElimination/MSSA/lifetime.ll +++ b/llvm/test/Transforms/DeadStoreElimination/MSSA/lifetime.ll @@ -1,3 +1,5 @@ +; XFAIL: * + ; RUN: opt -S -basicaa -dse -enable-dse-memoryssa < %s | FileCheck %s target datalayout = "E-p:64:64:64-a0:0:8-f32:32:32-f64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-v64:64:64-v128:128:128" diff --git a/llvm/test/Transforms/DeadStoreElimination/MSSA/mda-with-dbg-values.ll b/llvm/test/Transforms/DeadStoreElimination/MSSA/mda-with-dbg-values.ll index 52e2ecd..06bcc34 100644 --- a/llvm/test/Transforms/DeadStoreElimination/MSSA/mda-with-dbg-values.ll +++ b/llvm/test/Transforms/DeadStoreElimination/MSSA/mda-with-dbg-values.ll @@ -1,13 +1,12 @@ -; RUN: opt -S -dse -enable-dse-memoryssa -memdep-block-scan-limit=3 < %s | FileCheck %s -; RUN: opt -S -strip-debug -dse -enable-dse-memoryssa -memdep-block-scan-limit=3 < %s | FileCheck %s +; RUN: opt -S -dse -enable-dse-memoryssa -dse-memoryssa-scanlimit=2 < %s | FileCheck %s +; RUN: opt -S -strip-debug -dse -enable-dse-memoryssa -dse-memoryssa-scanlimit=2 < %s | FileCheck %s -; Test case to check that the memory dependency analysis gets the same -; result even if we have a dbg value between the memcpy and -; store. The memory dependency is then used by DSE to remove the store. +; Test case to check that DSE gets the same result even if we have a dbg value +; between the memcpy. -; We use -memdep-block-scan-limit=3 to be able to create a small test case. -; Without it, we would need to squeeze in 100 instructions since the default -; limit is 100. +; This test case is less relevant for the MemorySSA backed version of DSE, as +; debug values are not modeled in MemorySSA and are skipped regardless of the +; exploration limit. target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" target triple = "x86_64-unknown-linux-gnu" @@ -20,6 +19,11 @@ entry: %i = alloca i8, align 1 store i8 1, i8* %i, align 1, !dbg !19 call void @llvm.dbg.value(metadata i32 0, i64 0, metadata !17, metadata !DIExpression()), !dbg !18 + call void @llvm.dbg.value(metadata i32 0, i64 0, metadata !17, metadata !DIExpression()), !dbg !18 + call void @llvm.dbg.value(metadata i32 0, i64 0, metadata !17, metadata !DIExpression()), !dbg !18 + call void @llvm.dbg.value(metadata i32 0, i64 0, metadata !17, metadata !DIExpression()), !dbg !18 + call void @llvm.dbg.value(metadata i32 0, i64 0, metadata !17, metadata !DIExpression()), !dbg !18 + call void @llvm.dbg.value(metadata i32 0, i64 0, metadata !17, metadata !DIExpression()), !dbg !18 %0 = bitcast [1 x i8]* @g to i8* call void @llvm.memcpy.p0i8.p0i8.i64(i8* %i, i8* %0, i64 1, i1 false), !dbg !20 br label %bb2 diff --git a/llvm/test/Transforms/DeadStoreElimination/MSSA/memcpy-complete-overwrite.ll b/llvm/test/Transforms/DeadStoreElimination/MSSA/memcpy-complete-overwrite.ll index 38cb8a3..9315972 100644 --- a/llvm/test/Transforms/DeadStoreElimination/MSSA/memcpy-complete-overwrite.ll +++ b/llvm/test/Transforms/DeadStoreElimination/MSSA/memcpy-complete-overwrite.ll @@ -1,4 +1,6 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py + +; XFAIL: * ; RUN: opt < %s -basicaa -dse -enable-dse-memoryssa -S | FileCheck %s ; RUN: opt < %s -aa-pipeline=basic-aa -passes=dse -enable-dse-memoryssa -S | FileCheck %s target datalayout = "E-p:64:64:64-a0:0:8-f32:32:32-f64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-v64:64:64-v128:128:128" diff --git a/llvm/test/Transforms/DeadStoreElimination/MSSA/memintrinsics.ll b/llvm/test/Transforms/DeadStoreElimination/MSSA/memintrinsics.ll index 51c45c3..c22ad19 100644 --- a/llvm/test/Transforms/DeadStoreElimination/MSSA/memintrinsics.ll +++ b/llvm/test/Transforms/DeadStoreElimination/MSSA/memintrinsics.ll @@ -1,4 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py +; XFAIL: * ; RUN: opt -S -dse -enable-dse-memoryssa < %s | FileCheck %s declare void @llvm.memcpy.p0i8.p0i8.i8(i8* nocapture, i8* nocapture, i8, i1) nounwind diff --git a/llvm/test/Transforms/DeadStoreElimination/MSSA/memoryssa-scan-limit.ll b/llvm/test/Transforms/DeadStoreElimination/MSSA/memoryssa-scan-limit.ll new file mode 100644 index 0000000..43a9f0c --- /dev/null +++ b/llvm/test/Transforms/DeadStoreElimination/MSSA/memoryssa-scan-limit.ll @@ -0,0 +1,72 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py +; RUN: opt < %s -basicaa -dse -enable-dse-memoryssa -S | FileCheck --check-prefix=NO-LIMIT %s +; RUN: opt < %s -basicaa -dse -enable-dse-memoryssa -dse-memoryssa-scanlimit=0 -S | FileCheck --check-prefix=LIMIT-0 %s +; RUN: opt < %s -basicaa -dse -enable-dse-memoryssa -dse-memoryssa-scanlimit=3 -S | FileCheck --check-prefix=LIMIT-3 %s +; RUN: opt < %s -basicaa -dse -enable-dse-memoryssa -dse-memoryssa-scanlimit=4 -S | FileCheck --check-prefix=LIMIT-4 %s + +target datalayout = "e-m:e-p:32:32-i64:64-v128:64:128-a:0:32-n32-S64" + + +define void @test2(i32* noalias %P, i32* noalias %Q, i32* noalias %R) { +; +; NO-LIMIT-LABEL: @test2( +; NO-LIMIT-NEXT: br i1 true, label [[BB1:%.*]], label [[BB2:%.*]] +; NO-LIMIT: bb1: +; NO-LIMIT-NEXT: br label [[BB3:%.*]] +; NO-LIMIT: bb2: +; NO-LIMIT-NEXT: br label [[BB3]] +; NO-LIMIT: bb3: +; NO-LIMIT-NEXT: store i32 0, i32* [[Q:%.*]] +; NO-LIMIT-NEXT: store i32 0, i32* [[R:%.*]] +; NO-LIMIT-NEXT: store i32 0, i32* [[P:%.*]] +; NO-LIMIT-NEXT: ret void +; +; LIMIT-0-LABEL: @test2( +; LIMIT-0-NEXT: br i1 true, label [[BB1:%.*]], label [[BB2:%.*]] +; LIMIT-0: bb1: +; LIMIT-0-NEXT: br label [[BB3:%.*]] +; LIMIT-0: bb2: +; LIMIT-0-NEXT: br label [[BB3]] +; LIMIT-0: bb3: +; LIMIT-0-NEXT: store i32 0, i32* [[Q:%.*]] +; LIMIT-0-NEXT: store i32 0, i32* [[R:%.*]] +; LIMIT-0-NEXT: store i32 0, i32* [[P:%.*]] +; LIMIT-0-NEXT: ret void +; +; LIMIT-3-LABEL: @test2( +; LIMIT-3-NEXT: store i32 1, i32* [[P:%.*]] +; LIMIT-3-NEXT: br i1 true, label [[BB1:%.*]], label [[BB2:%.*]] +; LIMIT-3: bb1: +; LIMIT-3-NEXT: br label [[BB3:%.*]] +; LIMIT-3: bb2: +; LIMIT-3-NEXT: br label [[BB3]] +; LIMIT-3: bb3: +; LIMIT-3-NEXT: store i32 0, i32* [[Q:%.*]] +; LIMIT-3-NEXT: store i32 0, i32* [[R:%.*]] +; LIMIT-3-NEXT: store i32 0, i32* [[P]] +; LIMIT-3-NEXT: ret void +; +; LIMIT-4-LABEL: @test2( +; LIMIT-4-NEXT: br i1 true, label [[BB1:%.*]], label [[BB2:%.*]] +; LIMIT-4: bb1: +; LIMIT-4-NEXT: br label [[BB3:%.*]] +; LIMIT-4: bb2: +; LIMIT-4-NEXT: br label [[BB3]] +; LIMIT-4: bb3: +; LIMIT-4-NEXT: store i32 0, i32* [[Q:%.*]] +; LIMIT-4-NEXT: store i32 0, i32* [[R:%.*]] +; LIMIT-4-NEXT: store i32 0, i32* [[P:%.*]] +; LIMIT-4-NEXT: ret void +; + store i32 1, i32* %P + br i1 true, label %bb1, label %bb2 +bb1: + br label %bb3 +bb2: + br label %bb3 +bb3: + store i32 0, i32* %Q + store i32 0, i32* %R + store i32 0, i32* %P + ret void +} diff --git a/llvm/test/Transforms/DeadStoreElimination/MSSA/memset-and-memcpy.ll b/llvm/test/Transforms/DeadStoreElimination/MSSA/memset-and-memcpy.ll index be4574e..c142ea6 100644 --- a/llvm/test/Transforms/DeadStoreElimination/MSSA/memset-and-memcpy.ll +++ b/llvm/test/Transforms/DeadStoreElimination/MSSA/memset-and-memcpy.ll @@ -59,8 +59,7 @@ define void @test17_atomic_weaker_2(i8* %P, i8* noalias %Q) nounwind ssp { ; Should not delete the volatile memset. define void @test17v(i8* %P, i8* %Q) nounwind ssp { ; CHECK-LABEL: @test17v( -; CHECK-NEXT: tail call void @llvm.memset.p0i8.i64(i8* [[P:%.*]], i8 42, i64 8, i1 true) -; CHECK-NEXT: tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[P]], i8* [[Q:%.*]], i64 12, i1 false) +; CHECK-NEXT: tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[P:%.*]], i8* [[Q:%.*]], i64 12, i1 false) ; CHECK-NEXT: ret void ; tail call void @llvm.memset.p0i8.i64(i8* %P, i8 42, i64 8, i1 true) @@ -74,8 +73,7 @@ define void @test17v(i8* %P, i8* %Q) nounwind ssp { ; Previously this was not allowed (PR8728), also discussed in PR11763. define void @test18(i8* %P, i8* %Q, i8* %R) nounwind ssp { ; CHECK-LABEL: @test18( -; CHECK-NEXT: tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[P:%.*]], i8* [[Q:%.*]], i64 12, i1 false) -; CHECK-NEXT: tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[P]], i8* [[R:%.*]], i64 12, i1 false) +; CHECK-NEXT: tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[P:%.*]], i8* [[R:%.*]], i64 12, i1 false) ; CHECK-NEXT: ret void ; tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %P, i8* %Q, i64 12, i1 false) @@ -85,8 +83,7 @@ define void @test18(i8* %P, i8* %Q, i8* %R) nounwind ssp { define void @test18_atomic(i8* %P, i8* %Q, i8* %R) nounwind ssp { ; CHECK-LABEL: @test18_atomic( -; CHECK-NEXT: tail call void @llvm.memcpy.element.unordered.atomic.p0i8.p0i8.i64(i8* align 1 [[P:%.*]], i8* align 1 [[Q:%.*]], i64 12, i32 1) -; CHECK-NEXT: tail call void @llvm.memcpy.element.unordered.atomic.p0i8.p0i8.i64(i8* align 1 [[P]], i8* align 1 [[R:%.*]], i64 12, i32 1) +; CHECK-NEXT: tail call void @llvm.memcpy.element.unordered.atomic.p0i8.p0i8.i64(i8* align 1 [[P:%.*]], i8* align 1 [[R:%.*]], i64 12, i32 1) ; CHECK-NEXT: ret void ; tail call void @llvm.memcpy.element.unordered.atomic.p0i8.p0i8.i64(i8* align 1 %P, i8* align 1 %Q, i64 12, i32 1) diff --git a/llvm/test/Transforms/DeadStoreElimination/MSSA/memset-missing-debugloc.ll b/llvm/test/Transforms/DeadStoreElimination/MSSA/memset-missing-debugloc.ll index 11d155c..d1aaab9 100644 --- a/llvm/test/Transforms/DeadStoreElimination/MSSA/memset-missing-debugloc.ll +++ b/llvm/test/Transforms/DeadStoreElimination/MSSA/memset-missing-debugloc.ll @@ -1,6 +1,7 @@ ; Test that the getelementptr generated when the dse pass determines that ; a memset can be shortened has the debugloc carried over from the memset. +; XFAIL: * ; RUN: opt -S -march=native -dse -enable-dse-memoryssa < %s| FileCheck %s ; CHECK: bitcast [5 x i64]* %{{[a-zA-Z_][a-zA-Z0-9_]*}} to i8*, !dbg ; CHECK-NEXT: %{{[0-9]+}} = getelementptr inbounds i8, i8* %0, i64 32, !dbg ![[DBG:[0-9]+]] diff --git a/llvm/test/Transforms/DeadStoreElimination/MSSA/merge-stores-big-endian.ll b/llvm/test/Transforms/DeadStoreElimination/MSSA/merge-stores-big-endian.ll index 8acc29f..055f117 100644 --- a/llvm/test/Transforms/DeadStoreElimination/MSSA/merge-stores-big-endian.ll +++ b/llvm/test/Transforms/DeadStoreElimination/MSSA/merge-stores-big-endian.ll @@ -1,4 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py +; XFAIL: * ; RUN: opt -dse -enable-dse-memoryssa -enable-dse-partial-store-merging -S < %s | FileCheck %s target datalayout = "E-m:e-i64:64-i128:128-n32:64-S128" diff --git a/llvm/test/Transforms/DeadStoreElimination/MSSA/merge-stores.ll b/llvm/test/Transforms/DeadStoreElimination/MSSA/merge-stores.ll index feb1f31..b09904e 100644 --- a/llvm/test/Transforms/DeadStoreElimination/MSSA/merge-stores.ll +++ b/llvm/test/Transforms/DeadStoreElimination/MSSA/merge-stores.ll @@ -1,4 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py +; XFAIL: * ; RUN: opt -dse -enable-dse-memoryssa -enable-dse-partial-store-merging -S < %s | FileCheck %s target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-f128:128:128-n8:16:32:64" diff --git a/llvm/test/Transforms/DeadStoreElimination/MSSA/multiblock-captures.ll b/llvm/test/Transforms/DeadStoreElimination/MSSA/multiblock-captures.ll index 7caf481..c1f7392 100644 --- a/llvm/test/Transforms/DeadStoreElimination/MSSA/multiblock-captures.ll +++ b/llvm/test/Transforms/DeadStoreElimination/MSSA/multiblock-captures.ll @@ -25,7 +25,6 @@ define i8* @test_return_captures_1() { define i8* @test_return_captures_2() { ; CHECK-LABEL: @test_return_captures_2( ; CHECK-NEXT: [[M:%.*]] = call i8* @malloc(i64 24) -; CHECK-NEXT: store i8 0, i8* [[M]] ; CHECK-NEXT: br label [[EXIT:%.*]] ; CHECK: exit: ; CHECK-NEXT: store i8 1, i8* [[M]] @@ -91,7 +90,6 @@ exit: define i8* @test_malloc_capture_3() { ; CHECK-LABEL: @test_malloc_capture_3( ; CHECK-NEXT: [[M:%.*]] = call i8* @malloc(i64 24) -; CHECK-NEXT: store i8 0, i8* [[M]] ; CHECK-NEXT: br label [[EXIT:%.*]] ; CHECK: exit: ; CHECK-NEXT: store i8 1, i8* [[M]] @@ -191,7 +189,6 @@ exit: define i8* @test_malloc_capture_7() { ; CHECK-LABEL: @test_malloc_capture_7( ; CHECK-NEXT: [[M:%.*]] = call i8* @malloc(i64 24) -; CHECK-NEXT: store i8 0, i8* [[M]] ; CHECK-NEXT: call void @may_throw() ; CHECK-NEXT: br label [[EXIT:%.*]] ; CHECK: exit: @@ -215,10 +212,10 @@ exit: define void @test_alloca_nocapture_1() { ; CHECK-LABEL: @test_alloca_nocapture_1( ; CHECK-NEXT: [[M:%.*]] = alloca i8 -; CHECK-NEXT: store i8 0, i8* [[M]] ; CHECK-NEXT: call void @foo() ; CHECK-NEXT: br label [[EXIT:%.*]] ; CHECK: exit: +; CHECK-NEXT: store i8 1, i8* [[M]] ; CHECK-NEXT: ret void ; %m = alloca i8 @@ -240,6 +237,7 @@ define void @test_alloca_capture_1() { ; CHECK-NEXT: call void @capture(i8* [[M]]) ; CHECK-NEXT: br label [[EXIT:%.*]] ; CHECK: exit: +; CHECK-NEXT: store i8 1, i8* [[M]] ; CHECK-NEXT: ret void ; %m = alloca i8 @@ -262,6 +260,7 @@ define void @test_alloca_capture_2(%S1* %E) { ; CHECK: exit: ; CHECK-NEXT: [[F_PTR:%.*]] = getelementptr [[S1:%.*]], %S1* [[E:%.*]], i32 0, i32 0 ; CHECK-NEXT: store i8* [[M]], i8** [[F_PTR]] +; CHECK-NEXT: store i8 1, i8* [[M]] ; CHECK-NEXT: ret void ; %m = alloca i8 diff --git a/llvm/test/Transforms/DeadStoreElimination/MSSA/multiblock-exceptions.ll b/llvm/test/Transforms/DeadStoreElimination/MSSA/multiblock-exceptions.ll index 41d4d019..a719e27 100644 --- a/llvm/test/Transforms/DeadStoreElimination/MSSA/multiblock-exceptions.ll +++ b/llvm/test/Transforms/DeadStoreElimination/MSSA/multiblock-exceptions.ll @@ -30,6 +30,7 @@ define void @test12(i32* %p) personality i32 (...)* @__CxxFrameHandler3 { ; CHECK-NEXT: [[C1:%.*]] = cleanuppad within none [] ; CHECK-NEXT: br label [[EXIT]] ; CHECK: exit: +; CHECK-NEXT: store i32 40, i32* [[SV]] ; CHECK-NEXT: ret void ; block1: diff --git a/llvm/test/Transforms/DeadStoreElimination/MSSA/multiblock-loops.ll b/llvm/test/Transforms/DeadStoreElimination/MSSA/multiblock-loops.ll index bcf75e3..1ec4434 100644 --- a/llvm/test/Transforms/DeadStoreElimination/MSSA/multiblock-loops.ll +++ b/llvm/test/Transforms/DeadStoreElimination/MSSA/multiblock-loops.ll @@ -111,7 +111,6 @@ define void @test_loop(i32 %N, i32* noalias nocapture readonly %A, i32* noalias ; CHECK: for.body4.lr.ph: ; CHECK-NEXT: [[I_028:%.*]] = phi i32 [ [[INC11:%.*]], [[FOR_COND_CLEANUP3:%.*]] ], [ 0, [[FOR_BODY4_LR_PH_PREHEADER]] ] ; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[B:%.*]], i32 [[I_028]] -; CHECK-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4 ; CHECK-NEXT: [[MUL:%.*]] = mul nsw i32 [[I_028]], [[N]] ; CHECK-NEXT: br label [[FOR_BODY4:%.*]] ; CHECK: for.body4: @@ -170,3 +169,116 @@ for.cond.cleanup3: ; preds = %for.body4 %exitcond29 = icmp eq i32 %inc11, %N br i1 %exitcond29, label %for.cond.cleanup, label %for.body4.lr.ph } + +declare i1 @cond() readnone + +; TODO: We can eliminate the store in for.header, but we currently hit a MemoryPhi. +define void @loop_multiple_def_uses(i32* noalias %P) { +; CHECK-LABEL: @loop_multiple_def_uses( +; CHECK-NEXT: entry: +; CHECK-NEXT: br label [[FOR_HEADER:%.*]] +; CHECK: for.header: +; CHECK-NEXT: store i32 1, i32* [[P:%.*]], align 4 +; CHECK-NEXT: [[C1:%.*]] = call i1 @cond() +; CHECK-NEXT: br i1 [[C1]], label [[FOR_BODY:%.*]], label [[END:%.*]] +; CHECK: for.body: +; CHECK-NEXT: store i32 1, i32* [[P]], align 4 +; CHECK-NEXT: [[LV:%.*]] = load i32, i32* [[P]] +; CHECK-NEXT: br label [[FOR_HEADER]] +; CHECK: end: +; CHECK-NEXT: store i32 3, i32* [[P]], align 4 +; CHECK-NEXT: ret void +; +entry: + br label %for.header + +for.header: + store i32 1, i32* %P, align 4 + %c1 = call i1 @cond() + br i1 %c1, label %for.body, label %end + +for.body: + store i32 1, i32* %P, align 4 + %lv = load i32, i32* %P + br label %for.header + +end: + store i32 3, i32* %P, align 4 + ret void +} + +; We cannot eliminate the store in for.header, as it is only partially +; overwritten in for.body and read afterwards. +define void @loop_multiple_def_uses_partial_write(i32* noalias %p) { +; CHECK-LABEL: @loop_multiple_def_uses_partial_write( +; CHECK-NEXT: entry: +; CHECK-NEXT: br label [[FOR_HEADER:%.*]] +; CHECK: for.header: +; CHECK-NEXT: store i32 1239491, i32* [[P:%.*]], align 4 +; CHECK-NEXT: [[C1:%.*]] = call i1 @cond() +; CHECK-NEXT: br i1 [[C1]], label [[FOR_BODY:%.*]], label [[END:%.*]] +; CHECK: for.body: +; CHECK-NEXT: [[C:%.*]] = bitcast i32* [[P]] to i8* +; CHECK-NEXT: store i8 1, i8* [[C]], align 4 +; CHECK-NEXT: [[LV:%.*]] = load i32, i32* [[P]] +; CHECK-NEXT: br label [[FOR_HEADER]] +; CHECK: end: +; CHECK-NEXT: store i32 3, i32* [[P]], align 4 +; CHECK-NEXT: ret void +; +entry: + br label %for.header + +for.header: + store i32 1239491, i32* %p, align 4 + %c1 = call i1 @cond() + br i1 %c1, label %for.body, label %end + +for.body: + %c = bitcast i32* %p to i8* + store i8 1, i8* %c, align 4 + %lv = load i32, i32* %p + br label %for.header + +end: + store i32 3, i32* %p, align 4 + ret void +} + +; We cannot eliminate the store in for.header, as the location is not overwritten +; in for.body and read afterwards. +define void @loop_multiple_def_uses_mayalias_write(i32* %p, i32* %q) { + +; CHECK-LABEL: @loop_multiple_def_uses_mayalias_write( +; CHECK-NEXT: entry: +; CHECK-NEXT: br label [[FOR_HEADER:%.*]] +; CHECK: for.header: +; CHECK-NEXT: store i32 1239491, i32* [[P:%.*]], align 4 +; CHECK-NEXT: [[C1:%.*]] = call i1 @cond() +; CHECK-NEXT: br i1 [[C1]], label [[FOR_BODY:%.*]], label [[END:%.*]] +; CHECK: for.body: +; CHECK-NEXT: store i32 1, i32* [[Q:%.*]], align 4 +; CHECK-NEXT: [[LV:%.*]] = load i32, i32* [[P]] +; CHECK-NEXT: br label [[FOR_HEADER]] +; CHECK: end: +; CHECK-NEXT: store i32 3, i32* [[P]], align 4 +; CHECK-NEXT: ret void +; +entry: + br label %for.header + +for.header: + store i32 1239491, i32* %p, align 4 + %c1 = call i1 @cond() + br i1 %c1, label %for.body, label %end + +for.body: + store i32 1, i32* %q, align 4 + %lv = load i32, i32* %p + br label %for.header + +end: + store i32 3, i32* %p, align 4 + ret void +} + diff --git a/llvm/test/Transforms/DeadStoreElimination/MSSA/multiblock-memoryphis.ll b/llvm/test/Transforms/DeadStoreElimination/MSSA/multiblock-memoryphis.ll index ddfc4d4..ce34986 100644 --- a/llvm/test/Transforms/DeadStoreElimination/MSSA/multiblock-memoryphis.ll +++ b/llvm/test/Transforms/DeadStoreElimination/MSSA/multiblock-memoryphis.ll @@ -103,3 +103,73 @@ bb3: store i8 1, i8* %P2 ret void } + +declare void @hoge() + +; Check a function with a MemoryPhi with 3 incoming values. +define void @widget(i32* %Ptr, i1 %c1, i1 %c2, i32 %v1, i32 %v2, i32 %v3) { +; CHECK-LABEL: @widget( +; CHECK-NEXT: bb: +; CHECK-NEXT: tail call void @hoge() +; CHECK-NEXT: br i1 [[C1:%.*]], label [[BB3:%.*]], label [[BB1:%.*]] +; CHECK: bb1: +; CHECK-NEXT: br i1 [[C2:%.*]], label [[BB2:%.*]], label [[BB3]] +; CHECK: bb2: +; CHECK-NEXT: store i32 -1, i32* [[PTR:%.*]], align 4 +; CHECK-NEXT: br label [[BB3]] +; CHECK: bb3: +; CHECK-NEXT: br label [[BB4:%.*]] +; CHECK: bb4: +; CHECK-NEXT: switch i32 [[V1:%.*]], label [[BB8:%.*]] [ +; CHECK-NEXT: i32 0, label [[BB5:%.*]] +; CHECK-NEXT: i32 1, label [[BB6:%.*]] +; CHECK-NEXT: i32 2, label [[BB7:%.*]] +; CHECK-NEXT: ] +; CHECK: bb5: +; CHECK-NEXT: store i32 0, i32* [[PTR]], align 4 +; CHECK-NEXT: br label [[BB8]] +; CHECK: bb6: +; CHECK-NEXT: store i32 1, i32* [[PTR]], align 4 +; CHECK-NEXT: br label [[BB8]] +; CHECK: bb7: +; CHECK-NEXT: store i32 2, i32* [[PTR]], align 4 +; CHECK-NEXT: br label [[BB8]] +; CHECK: bb8: +; CHECK-NEXT: br label [[BB4]] +; +bb: + tail call void @hoge() + br i1 %c1, label %bb3, label %bb1 + +bb1: ; preds = %bb + br i1 %c2, label %bb2, label %bb3 + +bb2: ; preds = %bb1 + store i32 -1, i32* %Ptr, align 4 + br label %bb3 + +bb3: ; preds = %bb2, %bb1, %bb + br label %bb4 + +bb4: ; preds = %bb8, %bb3 + switch i32 %v1, label %bb8 [ + i32 0, label %bb5 + i32 1, label %bb6 + i32 2, label %bb7 + ] + +bb5: ; preds = %bb4 + store i32 0, i32* %Ptr, align 4 + br label %bb8 + +bb6: ; preds = %bb4 + store i32 1, i32* %Ptr, align 4 + br label %bb8 + +bb7: ; preds = %bb4 + store i32 2, i32* %Ptr, align 4 + br label %bb8 + +bb8: ; preds = %bb7, %bb6, %bb5, %bb4 + br label %bb4 +} diff --git a/llvm/test/Transforms/DeadStoreElimination/MSSA/multiblock-partial.ll b/llvm/test/Transforms/DeadStoreElimination/MSSA/multiblock-partial.ll index 17bd8a6..cbd7dae 100644 --- a/llvm/test/Transforms/DeadStoreElimination/MSSA/multiblock-partial.ll +++ b/llvm/test/Transforms/DeadStoreElimination/MSSA/multiblock-partial.ll @@ -32,14 +32,13 @@ bb3: define void @second_store_bigger(i32* noalias %P) { ; CHECK-LABEL: @second_store_bigger( -; CHECK-NEXT: store i32 1, i32* [[P:%.*]] ; CHECK-NEXT: br i1 true, label [[BB1:%.*]], label [[BB2:%.*]] ; CHECK: bb1: ; CHECK-NEXT: br label [[BB3:%.*]] ; CHECK: bb2: ; CHECK-NEXT: br label [[BB3]] ; CHECK: bb3: -; CHECK-NEXT: [[P_I64:%.*]] = bitcast i32* [[P]] to i64* +; CHECK-NEXT: [[P_I64:%.*]] = bitcast i32* [[P:%.*]] to i64* ; CHECK-NEXT: store i64 0, i64* [[P_I64]] ; CHECK-NEXT: ret void ; diff --git a/llvm/test/Transforms/DeadStoreElimination/MSSA/multiblock-simple.ll b/llvm/test/Transforms/DeadStoreElimination/MSSA/multiblock-simple.ll index 2b02277..520a6ea 100644 --- a/llvm/test/Transforms/DeadStoreElimination/MSSA/multiblock-simple.ll +++ b/llvm/test/Transforms/DeadStoreElimination/MSSA/multiblock-simple.ll @@ -6,14 +6,13 @@ target datalayout = "e-m:e-p:32:32-i64:64-v128:64:128-a:0:32-n32-S64" define void @test2(i32* noalias %P) { ; CHECK-LABEL: @test2( -; CHECK-NEXT: store i32 1, i32* [[P:%.*]] ; CHECK-NEXT: br i1 true, label [[BB1:%.*]], label [[BB2:%.*]] ; CHECK: bb1: ; CHECK-NEXT: br label [[BB3:%.*]] ; CHECK: bb2: ; CHECK-NEXT: br label [[BB3]] ; CHECK: bb3: -; CHECK-NEXT: store i32 0, i32* [[P]] +; CHECK-NEXT: store i32 0, i32* [[P:%.*]] ; CHECK-NEXT: ret void ; store i32 1, i32* %P @@ -53,7 +52,6 @@ bb3: define void @test7(i32* noalias %P, i32* noalias %Q) { ; CHECK-LABEL: @test7( -; CHECK-NEXT: store i32 1, i32* [[Q:%.*]] ; CHECK-NEXT: br i1 true, label [[BB1:%.*]], label [[BB2:%.*]] ; CHECK: bb1: ; CHECK-NEXT: [[TMP1:%.*]] = load i32, i32* [[P:%.*]] @@ -61,7 +59,7 @@ define void @test7(i32* noalias %P, i32* noalias %Q) { ; CHECK: bb2: ; CHECK-NEXT: br label [[BB3]] ; CHECK: bb3: -; CHECK-NEXT: store i32 0, i32* [[Q]] +; CHECK-NEXT: store i32 0, i32* [[Q:%.*]] ; CHECK-NEXT: store i32 0, i32* [[P]] ; CHECK-NEXT: ret void ; @@ -114,3 +112,38 @@ bb3: store i32 0, i32* %P ret void } + +; We cannot eliminate `store i32 0, i32* %P`, as it is read by the later load. +; Make sure that we check the uses of `store i32 1, i32* %P.1 which does not +; alias %P. Note that uses point to the *first* def that may alias. +define void @overlapping_read(i32* %P) { +; CHECK-LABEL: @overlapping_read( +; CHECK-NEXT: store i32 0, i32* [[P:%.*]] +; CHECK-NEXT: [[P_1:%.*]] = getelementptr i32, i32* [[P]], i32 1 +; CHECK-NEXT: store i32 1, i32* [[P_1]] +; CHECK-NEXT: [[P_64:%.*]] = bitcast i32* [[P]] to i64* +; CHECK-NEXT: [[LV:%.*]] = load i64, i64* [[P_64]] +; CHECK-NEXT: br i1 true, label [[BB1:%.*]], label [[BB2:%.*]] +; CHECK: bb1: +; CHECK-NEXT: br label [[BB3:%.*]] +; CHECK: bb2: +; CHECK-NEXT: ret void +; CHECK: bb3: +; CHECK-NEXT: store i32 2, i32* [[P]] +; CHECK-NEXT: ret void +; + store i32 0, i32* %P + %P.1 = getelementptr i32, i32* %P, i32 1 + store i32 1, i32* %P.1 + + %P.64 = bitcast i32* %P to i64* + %lv = load i64, i64* %P.64 + br i1 true, label %bb1, label %bb2 +bb1: + br label %bb3 +bb2: + ret void +bb3: + store i32 2, i32* %P + ret void +} diff --git a/llvm/test/Transforms/DeadStoreElimination/MSSA/operand-bundles.ll b/llvm/test/Transforms/DeadStoreElimination/MSSA/operand-bundles.ll index 9e88651..3a59286 100644 --- a/llvm/test/Transforms/DeadStoreElimination/MSSA/operand-bundles.ll +++ b/llvm/test/Transforms/DeadStoreElimination/MSSA/operand-bundles.ll @@ -1,3 +1,4 @@ +; XFAIL: * ; RUN: opt < %s -basicaa -dse -enable-dse-memoryssa -S | FileCheck %s declare noalias i8* @malloc(i64) "malloc-like" diff --git a/llvm/test/Transforms/DeadStoreElimination/MSSA/simple-todo.ll b/llvm/test/Transforms/DeadStoreElimination/MSSA/simple-todo.ll index b96cc9f..d0a3cef 100644 --- a/llvm/test/Transforms/DeadStoreElimination/MSSA/simple-todo.ll +++ b/llvm/test/Transforms/DeadStoreElimination/MSSA/simple-todo.ll @@ -1,4 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py +; XFAIL: * ; RUN: opt < %s -basicaa -dse -enable-dse-memoryssa -S | FileCheck %s ; RUN: opt < %s -aa-pipeline=basic-aa -passes=dse -enable-dse-memoryssa -S | FileCheck %s target datalayout = "E-p:64:64:64-a0:0:8-f32:32:32-f64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-v64:64:64-v128:128:128" @@ -9,19 +10,6 @@ declare void @llvm.memcpy.p0i8.p0i8.i64(i8* nocapture, i8* nocapture, i64, i1) n declare void @llvm.memcpy.element.unordered.atomic.p0i8.p0i8.i64(i8* nocapture, i8* nocapture, i64, i32) nounwind declare void @llvm.init.trampoline(i8*, i8*, i8*) -; PR8576 - Should delete store of 10 even though p/q are may aliases. -define void @test2(i32 *%p, i32 *%q) { -; CHECK-LABEL: @test2( -; CHECK-NEXT: store i32 20, i32* [[Q:%.*]], align 4 -; CHECK-NEXT: store i32 30, i32* [[P:%.*]], align 4 -; CHECK-NEXT: ret void -; - store i32 10, i32* %p, align 4 - store i32 20, i32* %q, align 4 - store i32 30, i32* %p, align 4 - ret void -} - define void @test5(i32* %Q) { ; CHECK-LABEL: @test5( ; CHECK-NEXT: [[A:%.*]] = load volatile i32, i32* [[Q:%.*]] @@ -32,62 +20,6 @@ define void @test5(i32* %Q) { ret void } -; Should delete store of 10 even though memset is a may-store to P (P and Q may -; alias). -define void @test6(i32 *%p, i8 *%q) { -; CHECK-LABEL: @test6( -; CHECK-NEXT: call void @llvm.memset.p0i8.i64(i8* [[Q:%.*]], i8 42, i64 900, i1 false) -; CHECK-NEXT: store i32 30, i32* [[P:%.*]], align 4 -; CHECK-NEXT: ret void -; - store i32 10, i32* %p, align 4 ;; dead. - call void @llvm.memset.p0i8.i64(i8* %q, i8 42, i64 900, i1 false) - store i32 30, i32* %p, align 4 - ret void -} - -; Should delete store of 10 even though memset is a may-store to P (P and Q may -; alias). -define void @test6_atomic(i32* align 4 %p, i8* align 4 %q) { -; CHECK-LABEL: @test6_atomic( -; CHECK-NEXT: call void @llvm.memset.element.unordered.atomic.p0i8.i64(i8* align 4 [[Q:%.*]], i8 42, i64 900, i32 4) -; CHECK-NEXT: store atomic i32 30, i32* [[P:%.*]] unordered, align 4 -; CHECK-NEXT: ret void -; - store atomic i32 10, i32* %p unordered, align 4 ;; dead. - call void @llvm.memset.element.unordered.atomic.p0i8.i64(i8* align 4 %q, i8 42, i64 900, i32 4) - store atomic i32 30, i32* %p unordered, align 4 - ret void -} - -; Should delete store of 10 even though memcpy is a may-store to P (P and Q may -; alias). -define void @test7(i32 *%p, i8 *%q, i8* noalias %r) { -; CHECK-LABEL: @test7( -; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[Q:%.*]], i8* [[R:%.*]], i64 900, i1 false) -; CHECK-NEXT: store i32 30, i32* [[P:%.*]], align 4 -; CHECK-NEXT: ret void -; - store i32 10, i32* %p, align 4 ;; dead. - call void @llvm.memcpy.p0i8.p0i8.i64(i8* %q, i8* %r, i64 900, i1 false) - store i32 30, i32* %p, align 4 - ret void -} - -; Should delete store of 10 even though memcpy is a may-store to P (P and Q may -; alias). -define void @test7_atomic(i32* align 4 %p, i8* align 4 %q, i8* noalias align 4 %r) { -; CHECK-LABEL: @test7_atomic( -; CHECK-NEXT: call void @llvm.memcpy.element.unordered.atomic.p0i8.p0i8.i64(i8* align 4 [[Q:%.*]], i8* align 4 [[R:%.*]], i64 900, i32 4) -; CHECK-NEXT: store atomic i32 30, i32* [[P:%.*]] unordered, align 4 -; CHECK-NEXT: ret void -; - store atomic i32 10, i32* %p unordered, align 4 ;; dead. - call void @llvm.memcpy.element.unordered.atomic.p0i8.p0i8.i64(i8* align 4 %q, i8* align 4 %r, i64 900, i32 4) - store atomic i32 30, i32* %p unordered, align 4 - ret void -} - ; Do not delete stores that are only partially killed. define i32 @test8() { ; CHECK-LABEL: @test8( @@ -158,46 +90,6 @@ define void @test12({ i32, i32 }* %x) nounwind { } -; %P doesn't escape, the DEAD instructions should be removed. -declare void @test13f() -define i32* @test13() { -; CHECK-LABEL: @test13( -; CHECK-NEXT: [[PTR:%.*]] = tail call i8* @malloc(i32 4) -; CHECK-NEXT: [[P:%.*]] = bitcast i8* [[PTR]] to i32* -; CHECK-NEXT: call void @test13f() -; CHECK-NEXT: store i32 0, i32* [[P]] -; CHECK-NEXT: ret i32* [[P]] -; - %ptr = tail call i8* @malloc(i32 4) - %P = bitcast i8* %ptr to i32* - %DEAD = load i32, i32* %P - %DEAD2 = add i32 %DEAD, 1 - store i32 %DEAD2, i32* %P - call void @test13f( ) - store i32 0, i32* %P - ret i32* %P -} - -define i32 addrspace(1)* @test13_addrspacecast() { -; CHECK-LABEL: @test13_addrspacecast( -; CHECK-NEXT: [[P:%.*]] = tail call i8* @malloc(i32 4) -; CHECK-NEXT: [[P_BC:%.*]] = bitcast i8* [[P]] to i32* -; CHECK-NEXT: [[P:%.*]] = addrspacecast i32* [[P_BC]] to i32 addrspace(1)* -; CHECK-NEXT: call void @test13f() -; CHECK-NEXT: store i32 0, i32 addrspace(1)* [[P]] -; CHECK-NEXT: ret i32 addrspace(1)* [[P]] -; - %p = tail call i8* @malloc(i32 4) - %p.bc = bitcast i8* %p to i32* - %P = addrspacecast i32* %p.bc to i32 addrspace(1)* - %DEAD = load i32, i32 addrspace(1)* %P - %DEAD2 = add i32 %DEAD, 1 - store i32 %DEAD2, i32 addrspace(1)* %P - call void @test13f( ) - store i32 0, i32 addrspace(1)* %P - ret i32 addrspace(1)* %P -} - declare noalias i8* @malloc(i32) declare noalias i8* @calloc(i32, i32) @@ -241,26 +133,6 @@ define void @test22(i1 %i, i32 %k, i32 %m) nounwind { ret void } -; Make sure same sized store to later element is deleted -define void @test24([2 x i32]* %a, i32 %b, i32 %c) nounwind { -; CHECK-LABEL: @test24( -; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds [2 x i32], [2 x i32]* [[A:%.*]], i64 0, i64 0 -; CHECK-NEXT: store i32 [[B:%.*]], i32* [[TMP1]], align 4 -; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds [2 x i32], [2 x i32]* [[A]], i64 0, i64 1 -; CHECK-NEXT: store i32 [[C:%.*]], i32* [[TMP2]], align 4 -; CHECK-NEXT: ret void -; - %1 = getelementptr inbounds [2 x i32], [2 x i32]* %a, i64 0, i64 0 - store i32 0, i32* %1, align 4 - %2 = getelementptr inbounds [2 x i32], [2 x i32]* %a, i64 0, i64 1 - store i32 0, i32* %2, align 4 - %3 = getelementptr inbounds [2 x i32], [2 x i32]* %a, i64 0, i64 0 - store i32 %b, i32* %3, align 4 - %4 = getelementptr inbounds [2 x i32], [2 x i32]* %a, i64 0, i64 1 - store i32 %c, i32* %4, align 4 - ret void -} - ; Remove redundant store if loaded value is in another block. define i32 @test26(i1 %c, i32* %p) { ; CHECK-LABEL: @test26( @@ -311,35 +183,6 @@ bb3: declare void @unknown_func() -; Don't remove redundant store because of unknown call. -define i32 @test30(i1 %c, i32* %p, i32 %i) { -; CHECK-LABEL: @test30( -; CHECK-NEXT: entry: -; CHECK-NEXT: [[V:%.*]] = load i32, i32* [[P:%.*]], align 4 -; CHECK-NEXT: br i1 [[C:%.*]], label [[BB1:%.*]], label [[BB2:%.*]] -; CHECK: bb1: -; CHECK-NEXT: br label [[BB3:%.*]] -; CHECK: bb2: -; CHECK-NEXT: call void @unknown_func() -; CHECK-NEXT: br label [[BB3]] -; CHECK: bb3: -; CHECK-NEXT: store i32 [[V]], i32* [[P]], align 4 -; CHECK-NEXT: ret i32 0 -; -entry: - %v = load i32, i32* %p, align 4 - br i1 %c, label %bb1, label %bb2 -bb1: - br label %bb3 -bb2: - ; Might overwrite value at %p - call void @unknown_func() - br label %bb3 -bb3: - store i32 %v, i32* %p, align 4 - ret i32 0 -} - ; Remove redundant store if loaded value is in another block inside a loop. define i32 @test31(i1 %c, i32* %p, i32 %i) { ; CHECK-LABEL: @test31( diff --git a/llvm/test/Transforms/DeadStoreElimination/MSSA/simple.ll b/llvm/test/Transforms/DeadStoreElimination/MSSA/simple.ll index b2c66f2..ef05504 100644 --- a/llvm/test/Transforms/DeadStoreElimination/MSSA/simple.ll +++ b/llvm/test/Transforms/DeadStoreElimination/MSSA/simple.ll @@ -50,6 +50,76 @@ define void @test4(i32* %Q) { ret void } +; PR8576 - Should delete store of 10 even though p/q are may aliases. +define void @test2(i32 *%p, i32 *%q) { +; CHECK-LABEL: @test2( +; CHECK-NEXT: store i32 20, i32* [[Q:%.*]], align 4 +; CHECK-NEXT: store i32 30, i32* [[P:%.*]], align 4 +; CHECK-NEXT: ret void +; + store i32 10, i32* %p, align 4 + store i32 20, i32* %q, align 4 + store i32 30, i32* %p, align 4 + ret void +} + +; Should delete store of 10 even though memset is a may-store to P (P and Q may +; alias). +define void @test6(i32 *%p, i8 *%q) { +; CHECK-LABEL: @test6( +; CHECK-NEXT: call void @llvm.memset.p0i8.i64(i8* [[Q:%.*]], i8 42, i64 900, i1 false) +; CHECK-NEXT: store i32 30, i32* [[P:%.*]], align 4 +; CHECK-NEXT: ret void +; + store i32 10, i32* %p, align 4 ;; dead. + call void @llvm.memset.p0i8.i64(i8* %q, i8 42, i64 900, i1 false) + store i32 30, i32* %p, align 4 + ret void +} + +; Should delete store of 10 even though memset is a may-store to P (P and Q may +; alias). +define void @test6_atomic(i32* align 4 %p, i8* align 4 %q) { +; CHECK-LABEL: @test6_atomic( +; CHECK-NEXT: call void @llvm.memset.element.unordered.atomic.p0i8.i64(i8* align 4 [[Q:%.*]], i8 42, i64 900, i32 4) +; CHECK-NEXT: store atomic i32 30, i32* [[P:%.*]] unordered, align 4 +; CHECK-NEXT: ret void +; + store atomic i32 10, i32* %p unordered, align 4 ;; dead. + call void @llvm.memset.element.unordered.atomic.p0i8.i64(i8* align 4 %q, i8 42, i64 900, i32 4) + store atomic i32 30, i32* %p unordered, align 4 + ret void +} + +; Should delete store of 10 even though memcpy is a may-store to P (P and Q may +; alias). +define void @test7(i32 *%p, i8 *%q, i8* noalias %r) { +; CHECK-LABEL: @test7( +; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[Q:%.*]], i8* [[R:%.*]], i64 900, i1 false) +; CHECK-NEXT: store i32 30, i32* [[P:%.*]], align 4 +; CHECK-NEXT: ret void +; + store i32 10, i32* %p, align 4 ;; dead. + call void @llvm.memcpy.p0i8.p0i8.i64(i8* %q, i8* %r, i64 900, i1 false) + store i32 30, i32* %p, align 4 + ret void +} + +; Should delete store of 10 even though memcpy is a may-store to P (P and Q may +; alias). +define void @test7_atomic(i32* align 4 %p, i8* align 4 %q, i8* noalias align 4 %r) { +; CHECK-LABEL: @test7_atomic( +; CHECK-NEXT: call void @llvm.memcpy.element.unordered.atomic.p0i8.p0i8.i64(i8* align 4 [[Q:%.*]], i8* align 4 [[R:%.*]], i64 900, i32 4) +; CHECK-NEXT: store atomic i32 30, i32* [[P:%.*]] unordered, align 4 +; CHECK-NEXT: ret void +; + store atomic i32 10, i32* %p unordered, align 4 ;; dead. + call void @llvm.memcpy.element.unordered.atomic.p0i8.p0i8.i64(i8* align 4 %q, i8* align 4 %r, i64 900, i32 4) + store atomic i32 30, i32* %p unordered, align 4 + ret void +} + + ; va_arg has fuzzy dependence, the store shouldn't be zapped. define double @test10(i8* %X) { ; CHECK-LABEL: @test10( @@ -64,6 +134,46 @@ define double @test10(i8* %X) { ret double %tmp.0 } +; %P doesn't escape, the DEAD instructions should be removed. +declare void @test13f() +define i32* @test13() { +; CHECK-LABEL: @test13( +; CHECK-NEXT: [[PTR:%.*]] = tail call i8* @malloc(i32 4) +; CHECK-NEXT: [[P:%.*]] = bitcast i8* [[PTR]] to i32* +; CHECK-NEXT: call void @test13f() +; CHECK-NEXT: store i32 0, i32* [[P]] +; CHECK-NEXT: ret i32* [[P]] +; + %ptr = tail call i8* @malloc(i32 4) + %P = bitcast i8* %ptr to i32* + %DEAD = load i32, i32* %P + %DEAD2 = add i32 %DEAD, 1 + store i32 %DEAD2, i32* %P + call void @test13f( ) + store i32 0, i32* %P + ret i32* %P +} + +define i32 addrspace(1)* @test13_addrspacecast() { +; CHECK-LABEL: @test13_addrspacecast( +; CHECK-NEXT: [[P:%.*]] = tail call i8* @malloc(i32 4) +; CHECK-NEXT: [[P_BC:%.*]] = bitcast i8* [[P]] to i32* +; CHECK-NEXT: [[P:%.*]] = addrspacecast i32* [[P_BC]] to i32 addrspace(1)* +; CHECK-NEXT: call void @test13f() +; CHECK-NEXT: store i32 0, i32 addrspace(1)* [[P]] +; CHECK-NEXT: ret i32 addrspace(1)* [[P]] +; + %p = tail call i8* @malloc(i32 4) + %p.bc = bitcast i8* %p to i32* + %P = addrspacecast i32* %p.bc to i32 addrspace(1)* + %DEAD = load i32, i32 addrspace(1)* %P + %DEAD2 = add i32 %DEAD, 1 + store i32 %DEAD2, i32 addrspace(1)* %P + call void @test13f( ) + store i32 0, i32 addrspace(1)* %P + ret i32 addrspace(1)* %P +} + declare noalias i8* @malloc(i32) declare noalias i8* @calloc(i32, i32) @@ -108,6 +218,26 @@ define noalias i8* @test23() nounwind uwtable ssp { ret i8* %call } +; Make sure same sized store to later element is deleted +define void @test24([2 x i32]* %a, i32 %b, i32 %c) nounwind { +; CHECK-LABEL: @test24( +; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds [2 x i32], [2 x i32]* [[A:%.*]], i64 0, i64 0 +; CHECK-NEXT: store i32 [[B:%.*]], i32* [[TMP1]], align 4 +; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds [2 x i32], [2 x i32]* [[A]], i64 0, i64 1 +; CHECK-NEXT: store i32 [[C:%.*]], i32* [[TMP2]], align 4 +; CHECK-NEXT: ret void +; + %1 = getelementptr inbounds [2 x i32], [2 x i32]* %a, i64 0, i64 0 + store i32 0, i32* %1, align 4 + %2 = getelementptr inbounds [2 x i32], [2 x i32]* %a, i64 0, i64 1 + store i32 0, i32* %2, align 4 + %3 = getelementptr inbounds [2 x i32], [2 x i32]* %a, i64 0, i64 0 + store i32 %b, i32* %3, align 4 + %4 = getelementptr inbounds [2 x i32], [2 x i32]* %a, i64 0, i64 1 + store i32 %c, i32* %4, align 4 + ret void +} + ; Check another case like PR13547 where strdup is not like malloc. define i8* @test25(i8* %p) nounwind { ; CHECK-LABEL: @test25( @@ -187,6 +317,35 @@ bb3: declare void @unknown_func() +; Don't remove redundant store because of unknown call. +define i32 @test30(i1 %c, i32* %p, i32 %i) { +; CHECK-LABEL: @test30( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[V:%.*]] = load i32, i32* [[P:%.*]], align 4 +; CHECK-NEXT: br i1 [[C:%.*]], label [[BB1:%.*]], label [[BB2:%.*]] +; CHECK: bb1: +; CHECK-NEXT: br label [[BB3:%.*]] +; CHECK: bb2: +; CHECK-NEXT: call void @unknown_func() +; CHECK-NEXT: br label [[BB3]] +; CHECK: bb3: +; CHECK-NEXT: store i32 [[V]], i32* [[P]], align 4 +; CHECK-NEXT: ret i32 0 +; +entry: + %v = load i32, i32* %p, align 4 + br i1 %c, label %bb1, label %bb2 +bb1: + br label %bb3 +bb2: + ; Might overwrite value at %p + call void @unknown_func() + br label %bb3 +bb3: + store i32 %v, i32* %p, align 4 + ret i32 0 +} + ; Don't remove redundant store in a loop with a may-alias store. define i32 @test32(i1 %c, i32* %p, i32 %i) { ; CHECK-LABEL: @test32( @@ -294,8 +453,7 @@ define void @test37_atomic(i8* %P, i8* %Q, i8* %R) { ; The memmove is dead, because memcpy arguments cannot overlap. define void @test38(i8* %P, i8* %Q, i8* %R) { ; CHECK-LABEL: @test38( -; CHECK-NEXT: tail call void @llvm.memmove.p0i8.p0i8.i64(i8* [[P:%.*]], i8* [[Q:%.*]], i64 12, i1 false) -; CHECK-NEXT: tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[P]], i8* [[R:%.*]], i64 12, i1 false) +; CHECK-NEXT: tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[P:%.*]], i8* [[R:%.*]], i64 12, i1 false) ; CHECK-NEXT: ret void ; @@ -307,8 +465,7 @@ define void @test38(i8* %P, i8* %Q, i8* %R) { ; The memmove is dead, because memcpy arguments cannot overlap. define void @test38_atomic(i8* %P, i8* %Q, i8* %R) { ; CHECK-LABEL: @test38_atomic( -; CHECK-NEXT: tail call void @llvm.memmove.element.unordered.atomic.p0i8.p0i8.i64(i8* align 1 [[P:%.*]], i8* align 1 [[Q:%.*]], i64 12, i32 1) -; CHECK-NEXT: tail call void @llvm.memcpy.element.unordered.atomic.p0i8.p0i8.i64(i8* align 1 [[P]], i8* align 1 [[R:%.*]], i64 12, i32 1) +; CHECK-NEXT: tail call void @llvm.memcpy.element.unordered.atomic.p0i8.p0i8.i64(i8* align 1 [[P:%.*]], i8* align 1 [[R:%.*]], i64 12, i32 1) ; CHECK-NEXT: ret void ; @@ -379,7 +536,9 @@ declare void @free(i8* nocapture) define void @test41(i32* noalias %P) { ; CHECK-LABEL: @test41( ; CHECK-NEXT: [[P2:%.*]] = bitcast i32* [[P:%.*]] to i8* +; CHECK-NEXT: store i32 1, i32* [[P]] ; CHECK-NEXT: call void @unknown_func() +; CHECK-NEXT: store i32 2, i32* [[P]] ; CHECK-NEXT: call void @free(i8* [[P2]]) ; CHECK-NEXT: ret void ; -- 2.7.4