From: Johannes Doerfert Date: Thu, 17 Nov 2016 22:25:17 +0000 (+0000) Subject: Probably overwritten loads should not be considered hoistable X-Git-Url: http://review.tizen.org/git/?a=commitdiff_plain;h=6cd59e9076290a7a66093ed0025e9a4419c0bb42;p=platform%2Fupstream%2Fllvm.git Probably overwritten loads should not be considered hoistable Do not assume a load to be hoistable/invariant if the pointer is used by another instruction in the SCoP that might write to memory and that is always executed. llvm-svn: 287272 --- diff --git a/polly/include/polly/Support/ScopHelper.h b/polly/include/polly/Support/ScopHelper.h index 6298073..cdaa1e8 100644 --- a/polly/include/polly/Support/ScopHelper.h +++ b/polly/include/polly/Support/ScopHelper.h @@ -385,10 +385,11 @@ llvm::Value *getConditionFromTerminator(llvm::TerminatorInst *TI); /// @param R The analyzed region. /// @param LI The loop info. /// @param SE The scalar evolution analysis. +/// @param DT The dominator tree of the function. /// /// @return True if @p LInst can be hoisted in @p R. bool isHoistableLoad(llvm::LoadInst *LInst, llvm::Region &R, llvm::LoopInfo &LI, - llvm::ScalarEvolution &SE); + llvm::ScalarEvolution &SE, const llvm::DominatorTree &DT); /// Return true iff @p V is an intrinsic that we ignore during code /// generation. diff --git a/polly/lib/Analysis/ScopDetection.cpp b/polly/lib/Analysis/ScopDetection.cpp index d23b9d5..d63ab32 100644 --- a/polly/lib/Analysis/ScopDetection.cpp +++ b/polly/lib/Analysis/ScopDetection.cpp @@ -314,7 +314,7 @@ bool ScopDetection::onlyValidRequiredInvariantLoads( return false; for (LoadInst *Load : RequiredILS) - if (!isHoistableLoad(Load, CurRegion, *LI, *SE)) + if (!isHoistableLoad(Load, CurRegion, *LI, *SE, *DT)) return false; Context.RequiredILS.insert(RequiredILS.begin(), RequiredILS.end()); @@ -680,7 +680,7 @@ bool ScopDetection::hasValidArraySizes(DetectionContext &Context, auto *V = dyn_cast(Unknown->getValue()); if (auto *Load = dyn_cast(V)) { if (Context.CurRegion.contains(Load) && - isHoistableLoad(Load, CurRegion, *LI, *SE)) + isHoistableLoad(Load, CurRegion, *LI, *SE, *DT)) Context.RequiredILS.insert(Load); continue; } @@ -889,7 +889,7 @@ bool ScopDetection::isValidAccess(Instruction *Inst, const SCEV *AF, Instruction *Inst = dyn_cast(Ptr.getValue()); if (Inst && Context.CurRegion.contains(Inst)) { auto *Load = dyn_cast(Inst); - if (Load && isHoistableLoad(Load, Context.CurRegion, *LI, *SE)) { + if (Load && isHoistableLoad(Load, Context.CurRegion, *LI, *SE, *DT)) { Context.RequiredILS.insert(Load); continue; } diff --git a/polly/lib/Support/ScopHelper.cpp b/polly/lib/Support/ScopHelper.cpp index 7e092fc..d7fd4a3 100644 --- a/polly/lib/Support/ScopHelper.cpp +++ b/polly/lib/Support/ScopHelper.cpp @@ -436,15 +436,35 @@ Value *polly::getConditionFromTerminator(TerminatorInst *TI) { } bool polly::isHoistableLoad(LoadInst *LInst, Region &R, LoopInfo &LI, - ScalarEvolution &SE) { + ScalarEvolution &SE, const DominatorTree &DT) { Loop *L = LI.getLoopFor(LInst->getParent()); - const SCEV *PtrSCEV = SE.getSCEVAtScope(LInst->getPointerOperand(), L); + auto *Ptr = LInst->getPointerOperand(); + const SCEV *PtrSCEV = SE.getSCEVAtScope(Ptr, L); while (L && R.contains(L)) { if (!SE.isLoopInvariant(PtrSCEV, L)) return false; L = L->getParentLoop(); } + for (auto *User : Ptr->users()) { + auto *UserI = dyn_cast(User); + if (!UserI || !R.contains(UserI)) + continue; + if (!UserI->mayWriteToMemory()) + continue; + + auto &BB = *UserI->getParent(); + bool DominatesAllPredecessors = true; + for (auto Pred : predecessors(R.getExit())) + if (R.contains(Pred) && !DT.dominates(&BB, Pred)) + DominatesAllPredecessors = false; + + if (!DominatesAllPredecessors) + continue; + + return false; + } + return true; } diff --git a/polly/test/ScopInfo/complex-successor-structure.ll b/polly/test/ScopInfo/complex-successor-structure.ll index 6e43261..229f88a 100644 --- a/polly/test/ScopInfo/complex-successor-structure.ll +++ b/polly/test/ScopInfo/complex-successor-structure.ll @@ -385,7 +385,7 @@ if.entry: ; preds = %for.body %.reload153 = load i16, i16* %.reg2mem152 %.reload151 = load i16, i16* %.reg2mem150 %.reload = load i16, i16* %.reg2mem - %37 = load i16, i16* %Output, align 2 + %37 = load i16, i16* %In1, align 2 %cmp77 = icmp slt i16 %37, 128 br i1 %cmp77, label %A0, label %B0