#include "llvm/Analysis/AliasAnalysis.h"
#include "llvm/Analysis/CaptureTracking.h"
#include "llvm/Analysis/GlobalsModRef.h"
+#include "llvm/Analysis/LoopInfo.h"
#include "llvm/Analysis/MemoryBuiltins.h"
#include "llvm/Analysis/MemoryLocation.h"
#include "llvm/Analysis/MemorySSA.h"
#include "llvm/Analysis/MemorySSAUpdater.h"
+#include "llvm/Analysis/MustExecute.h"
#include "llvm/Analysis/PostDominators.h"
#include "llvm/Analysis/TargetLibraryInfo.h"
#include "llvm/Analysis/ValueTracking.h"
PostDominatorTree &PDT;
const TargetLibraryInfo &TLI;
const DataLayout &DL;
+ const LoopInfo &LI;
+
+ // Whether the function contains any irreducible control flow, useful for
+ // being accurately able to detect loops.
+ bool ContainsIrreducibleLoops;
// All MemoryDefs that potentially could kill other MemDefs.
SmallVector<MemoryDef *, 64> MemDefs;
DenseMap<BasicBlock *, InstOverlapIntervalsTy> IOLs;
DSEState(Function &F, AliasAnalysis &AA, MemorySSA &MSSA, DominatorTree &DT,
- PostDominatorTree &PDT, const TargetLibraryInfo &TLI)
+ PostDominatorTree &PDT, const TargetLibraryInfo &TLI,
+ const LoopInfo &LI)
: F(F), AA(AA), BatchAA(AA), MSSA(MSSA), DT(DT), PDT(PDT), TLI(TLI),
- DL(F.getParent()->getDataLayout()) {}
+ DL(F.getParent()->getDataLayout()), LI(LI) {}
static DSEState get(Function &F, AliasAnalysis &AA, MemorySSA &MSSA,
DominatorTree &DT, PostDominatorTree &PDT,
- const TargetLibraryInfo &TLI) {
- DSEState State(F, AA, MSSA, DT, PDT, TLI);
+ const TargetLibraryInfo &TLI, const LoopInfo &LI) {
+ DSEState State(F, AA, MSSA, DT, PDT, TLI, LI);
// Collect blocks with throwing instructions not modeled in MemorySSA and
// alloc-like objects.
unsigned PO = 0;
State.InvisibleToCallerAfterRet.insert({&AI, true});
}
+ // Collect whether there is any irreducible control flow in the function.
+ State.ContainsIrreducibleLoops = mayContainIrreducibleControl(F, &LI);
+
return State;
}
isOverwrite(const Instruction *LaterI, const Instruction *EarlierI,
const MemoryLocation &Later, const MemoryLocation &Earlier,
int64_t &EarlierOff, int64_t &LaterOff) {
+ // AliasAnalysis does not always account for loops. Limit overwrite checks
+ // to dependencies for which we can guarantee they are independant of any
+ // loops they are in.
+ if (!isGuaranteedLoopIndependent(EarlierI, LaterI, Earlier))
+ return OW_Unknown;
+
// FIXME: Vet that this works for size upper-bounds. Seems unlikely that we'll
// get imprecise values here, though (except for unknown sizes).
if (!Later.Size.isPrecise() || !Earlier.Size.isPrecise()) {
return isRefSet(BatchAA.getModRefInfo(UseInst, DefLoc));
}
- /// Returns true if \p Ptr is guaranteed to be loop invariant for any possible
- /// loop. In particular, this guarantees that it only references a single
- /// MemoryLocation during execution of the containing function.
- bool IsGuaranteedLoopInvariant(Value *Ptr) {
- auto IsGuaranteedLoopInvariantBase = [this](Value *Ptr) {
+ /// Returns true if a dependency between \p Current and \p KillingDef is
+ /// guaranteed to be loop invariant for the loops that they are in. Either
+ /// because they are known to be in the same block, in the same loop level or
+ /// by guaranteeing that \p CurrentLoc only references a single MemoryLocation
+ /// during execution of the containing function.
+ bool isGuaranteedLoopIndependent(const Instruction *Current,
+ const Instruction *KillingDef,
+ const MemoryLocation &CurrentLoc) {
+ // If the dependency is within the same block or loop level (being careful of
+ // irreducible loops), we know that AA will return a valid result for the
+ // memory dependency. (Both at the function level, outside of any loop,
+ // would also be valid but we currently disable that to limit compile time).
+ if (Current->getParent() == KillingDef->getParent())
+ return true;
+ const Loop *CurrentLI = LI.getLoopFor(Current->getParent());
+ if (!ContainsIrreducibleLoops && CurrentLI &&
+ CurrentLI == LI.getLoopFor(KillingDef->getParent()))
+ return true;
+
+ // Otherwise check the memory location is invariant to any loops.
+ auto IsGuaranteedLoopInvariantBase = [this](const Value *Ptr) {
Ptr = Ptr->stripPointerCasts();
if (auto *I = dyn_cast<Instruction>(Ptr)) {
if (isa<AllocaInst>(Ptr))
return true;
};
- Ptr = Ptr->stripPointerCasts();
+ const Value *Ptr = CurrentLoc.Ptr->stripPointerCasts();
if (auto *I = dyn_cast<Instruction>(Ptr)) {
if (I->getParent()->isEntryBlock())
return true;
// AliasAnalysis does not account for loops. Limit elimination to
// candidates for which we can guarantee they always store to the same
- // memory location and not multiple locations in a loop.
- if (Current->getBlock() != KillingDef->getBlock() &&
- !IsGuaranteedLoopInvariant(const_cast<Value *>(CurrentLoc->Ptr))) {
+ // memory location and not located in different loops.
+ if (!isGuaranteedLoopIndependent(CurrentI, KillingI, *CurrentLoc)) {
+ LLVM_DEBUG(dbgs() << " ... not guaranteed loop independent\n");
StepAgain = true;
Current = CurrentDef->getDefiningAccess();
WalkerStepLimit -= 1;
}
};
-bool eliminateDeadStores(Function &F, AliasAnalysis &AA, MemorySSA &MSSA,
- DominatorTree &DT, PostDominatorTree &PDT,
- const TargetLibraryInfo &TLI) {
+static bool eliminateDeadStores(Function &F, AliasAnalysis &AA, MemorySSA &MSSA,
+ DominatorTree &DT, PostDominatorTree &PDT,
+ const TargetLibraryInfo &TLI,
+ const LoopInfo &LI) {
bool MadeChange = false;
- DSEState State = DSEState::get(F, AA, MSSA, DT, PDT, TLI);
+ DSEState State = DSEState::get(F, AA, MSSA, DT, PDT, TLI, LI);
// For each store:
for (unsigned I = 0; I < State.MemDefs.size(); I++) {
MemoryDef *KillingDef = State.MemDefs[I];
DominatorTree &DT = AM.getResult<DominatorTreeAnalysis>(F);
MemorySSA &MSSA = AM.getResult<MemorySSAAnalysis>(F).getMSSA();
PostDominatorTree &PDT = AM.getResult<PostDominatorTreeAnalysis>(F);
+ LoopInfo &LI = AM.getResult<LoopAnalysis>(F);
- bool Changed = eliminateDeadStores(F, AA, MSSA, DT, PDT, TLI);
+ bool Changed = eliminateDeadStores(F, AA, MSSA, DT, PDT, TLI, LI);
#ifdef LLVM_ENABLE_STATS
if (AreStatisticsEnabled())
PreservedAnalyses PA;
PA.preserveSet<CFGAnalyses>();
PA.preserve<MemorySSAAnalysis>();
+ PA.preserve<LoopAnalysis>();
return PA;
}
MemorySSA &MSSA = getAnalysis<MemorySSAWrapperPass>().getMSSA();
PostDominatorTree &PDT =
getAnalysis<PostDominatorTreeWrapperPass>().getPostDomTree();
+ LoopInfo &LI = getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
- bool Changed = eliminateDeadStores(F, AA, MSSA, DT, PDT, TLI);
+ bool Changed = eliminateDeadStores(F, AA, MSSA, DT, PDT, TLI, LI);
#ifdef LLVM_ENABLE_STATS
if (AreStatisticsEnabled())
AU.addRequired<MemorySSAWrapperPass>();
AU.addPreserved<PostDominatorTreeWrapperPass>();
AU.addPreserved<MemorySSAWrapperPass>();
+ AU.addRequired<LoopInfoWrapperPass>();
+ AU.addPreserved<LoopInfoWrapperPass>();
}
};
INITIALIZE_PASS_DEPENDENCY(MemorySSAWrapperPass)
INITIALIZE_PASS_DEPENDENCY(MemoryDependenceWrapperPass)
INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass)
+INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass)
INITIALIZE_PASS_END(DSELegacyPass, "dse", "Dead Store Elimination", false,
false)
; CHECK: for.body4.lr.ph:
; CHECK-NEXT: [[I_028:%.*]] = phi i32 [ [[INC11:%.*]], [[FOR_COND_CLEANUP3:%.*]] ], [ 0, [[FOR_BODY4_LR_PH_PREHEADER]] ]
; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[B:%.*]], i32 [[I_028]]
-; CHECK-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4
; CHECK-NEXT: [[MUL:%.*]] = mul nsw i32 [[I_028]], [[N]]
; CHECK-NEXT: br label [[FOR_BODY4:%.*]]
; CHECK: for.body4:
define i16 @partial_override_overloop(i1 %c, i32 %i) {
; CHECK-LABEL: @partial_override_overloop(
; CHECK-NEXT: entry:
+; CHECK-NEXT: br label [[FIRST:%.*]]
+; CHECK: first:
; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i16], [10 x i16]* @x, i16 0, i32 [[I:%.*]]
+; CHECK-NEXT: store i16 1, i16* [[ARRAYIDX]], align 1
; CHECK-NEXT: br label [[DO_BODY:%.*]]
; CHECK: do.body:
-; CHECK-NEXT: [[I_0:%.*]] = phi i16 [ 0, [[ENTRY:%.*]] ], [ [[INC:%.*]], [[DO_BODY]] ]
+; CHECK-NEXT: [[I_0:%.*]] = phi i16 [ 0, [[FIRST]] ], [ [[INC:%.*]], [[DO_BODY]] ]
; CHECK-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds [10 x i16], [10 x i16]* @x, i16 0, i16 [[I_0]]
; CHECK-NEXT: store i16 2, i16* [[ARRAYIDX2]], align 1
; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i16 [[I_0]], 4
; CHECK-NEXT: ret i16 0
;
entry:
+ ; Branch to first so MemoryLoc is not in the entry block.
+ br label %first
+
+first:
%arrayidx = getelementptr inbounds [10 x i16], [10 x i16]* @x, i16 0, i32 %i
store i16 1, i16* %arrayidx, align 1
br label %do.body
do.body:
- %i.0 = phi i16 [ 0, %entry ], [ %inc, %do.body ]
+ %i.0 = phi i16 [ 0, %first ], [ %inc, %do.body ]
%arrayidx2 = getelementptr inbounds [10 x i16], [10 x i16]* @x, i16 0, i16 %i.0
store i16 2, i16* %arrayidx2, align 1
%exitcond = icmp eq i16 %i.0, 4