From: Michael Kruse Date: Tue, 1 Mar 2016 21:44:06 +0000 (+0000) Subject: Fix non-synthesizable loop exit values. X-Git-Tag: llvmorg-3.9.0-rc1~12756 X-Git-Url: http://review.tizen.org/git/?a=commitdiff_plain;h=c7e0d9c216d552456f78be3fc94f077d009bdb1f;p=platform%2Fupstream%2Fllvm.git Fix non-synthesizable loop exit values. Polly recognizes affine loops that ScalarEvolution does not, in particular those with loop conditions that depend on hoisted invariant loads. Check for SCEVAddRec dependencies on such loops and do not consider their exit values as synthesizable because SCEVExpander would generate them as expressions that depend on the original induction variables. These are not available in generated code. llvm-svn: 262404 --- diff --git a/polly/include/polly/ScopDetection.h b/polly/include/polly/ScopDetection.h index 294157c..b91a4e0 100644 --- a/polly/include/polly/ScopDetection.h +++ b/polly/include/polly/ScopDetection.h @@ -146,9 +146,9 @@ public: /// @brief The set of base pointers with non-affine accesses. /// - /// This set contains all base pointers which are used in memory accesses - /// that can not be detected as affine accesses. - SetVector NonAffineAccesses; + /// This set contains all base pointers and the locations where they are + /// used for memory accesses that can not be detected as affine accesses. + SetVector> NonAffineAccesses; BaseToElSize ElementSize; /// @brief The region has at least one load instruction. @@ -252,11 +252,12 @@ private: /// @param Context The current detection context. /// @param Sizes The sizes of the different array dimensions. /// @param BasePointer The base pointer we are interested in. + /// @param Scope The location where @p BasePointer is being used. /// @returns True if one or more array sizes could be derived - meaning: we /// see this array as multi-dimensional. bool hasValidArraySizes(DetectionContext &Context, SmallVectorImpl &Sizes, - const SCEVUnknown *BasePointer) const; + const SCEVUnknown *BasePointer, Loop *Scope) const; /// @brief Derive access functions for a given base pointer. /// @@ -276,10 +277,11 @@ private: /// /// @param Context The current detection context. /// @param basepointer the base pointer we are interested in. + /// @param Scope The location where @p BasePointer is being used. /// @param True if consistent (multi-dimensional) array accesses could be /// derived for this array. bool hasBaseAffineAccesses(DetectionContext &Context, - const SCEVUnknown *BasePointer) const; + const SCEVUnknown *BasePointer, Loop *Scope) const; // Delinearize all non affine memory accesses and return false when there // exists a non affine memory access that cannot be delinearized. Return true diff --git a/polly/include/polly/Support/SCEVValidator.h b/polly/include/polly/Support/SCEVValidator.h index 5d83d13..99bb972 100644 --- a/polly/include/polly/Support/SCEVValidator.h +++ b/polly/include/polly/Support/SCEVValidator.h @@ -46,7 +46,11 @@ void findValues(const llvm::SCEV *Expr, llvm::SetVector &Values); /// /// @param S The SCEV to analyze. /// @param R The region in which we look for dependences. -bool hasScalarDepsInsideRegion(const llvm::SCEV *S, const llvm::Region *R); +/// @param Scope Location where the value is needed. +/// @param AllowLoops Whether loop recurrences outside the loop that are in the +/// region count as dependence. +bool hasScalarDepsInsideRegion(const llvm::SCEV *S, const llvm::Region *R, + llvm::Loop *Scope, bool AllowLoops); bool isAffineExpr(const llvm::Region *R, const llvm::SCEV *Expression, llvm::ScalarEvolution &SE, const llvm::Value *BaseAddress = 0, InvariantLoadsSetTy *ILS = nullptr); diff --git a/polly/include/polly/Support/ScopHelper.h b/polly/include/polly/Support/ScopHelper.h index 165f1f9..7cd23c8 100644 --- a/polly/include/polly/Support/ScopHelper.h +++ b/polly/include/polly/Support/ScopHelper.h @@ -22,6 +22,7 @@ namespace llvm { class LoopInfo; +class Loop; class ScalarEvolution; class SCEV; class Region; @@ -391,11 +392,13 @@ bool isIgnoredIntrinsic(const llvm::Value *V); /// @param LI The LoopInfo analysis. /// @param SE The scalar evolution database. /// @param R The region out of which SSA names are parameters. +/// @param Scope Location where the value would by synthesized. /// @return If the instruction I can be regenerated from its /// scalar evolution representation, return true, /// otherwise return false. bool canSynthesize(const llvm::Value *V, const llvm::LoopInfo *LI, - llvm::ScalarEvolution *SE, const llvm::Region *R); + llvm::ScalarEvolution *SE, const llvm::Region *R, + llvm::Loop *Scope); /// @brief Return the block in which a value is used. /// diff --git a/polly/lib/Analysis/ScopDetection.cpp b/polly/lib/Analysis/ScopDetection.cpp index 4776a7f..47bf547 100644 --- a/polly/lib/Analysis/ScopDetection.cpp +++ b/polly/lib/Analysis/ScopDetection.cpp @@ -716,7 +716,8 @@ ScopDetection::getDelinearizationTerms(DetectionContext &Context, bool ScopDetection::hasValidArraySizes(DetectionContext &Context, SmallVectorImpl &Sizes, - const SCEVUnknown *BasePointer) const { + const SCEVUnknown *BasePointer, + Loop *Scope) const { Value *BaseValue = BasePointer->getValue(); Region &CurRegion = Context.CurRegion; for (const SCEV *DelinearizedSize : Sizes) { @@ -733,7 +734,7 @@ bool ScopDetection::hasValidArraySizes(DetectionContext &Context, continue; } } - if (hasScalarDepsInsideRegion(DelinearizedSize, &CurRegion)) + if (hasScalarDepsInsideRegion(DelinearizedSize, &CurRegion, Scope, false)) return invalid( Context, /*Assert=*/true, DelinearizedSize, Context.Accesses[BasePointer].front().first, BaseValue); @@ -813,8 +814,9 @@ bool ScopDetection::computeAccessFunctions( return true; } -bool ScopDetection::hasBaseAffineAccesses( - DetectionContext &Context, const SCEVUnknown *BasePointer) const { +bool ScopDetection::hasBaseAffineAccesses(DetectionContext &Context, + const SCEVUnknown *BasePointer, + Loop *Scope) const { auto Shape = std::shared_ptr(new ArrayShape(BasePointer)); auto Terms = getDelinearizationTerms(Context, BasePointer); @@ -822,7 +824,8 @@ bool ScopDetection::hasBaseAffineAccesses( SE->findArrayDimensions(Terms, Shape->DelinearizedSizes, Context.ElementSize[BasePointer]); - if (!hasValidArraySizes(Context, Shape->DelinearizedSizes, BasePointer)) + if (!hasValidArraySizes(Context, Shape->DelinearizedSizes, BasePointer, + Scope)) return false; return computeAccessFunctions(Context, BasePointer, Shape); @@ -834,13 +837,16 @@ bool ScopDetection::hasAffineMemoryAccesses(DetectionContext &Context) const { if (Context.HasUnknownAccess && !Context.NonAffineAccesses.empty()) return AllowNonAffine; - for (const SCEVUnknown *BasePointer : Context.NonAffineAccesses) - if (!hasBaseAffineAccesses(Context, BasePointer)) { + for (auto &Pair : Context.NonAffineAccesses) { + auto *BasePointer = Pair.first; + auto *Scope = Pair.second; + if (!hasBaseAffineAccesses(Context, BasePointer, Scope)) { if (KeepGoing) continue; else return false; } + } return true; } @@ -901,7 +907,8 @@ bool ScopDetection::isValidAccess(Instruction *Inst, const SCEV *AF, Context.Accesses[BP].push_back({Inst, AF}); if (!IsAffine) - Context.NonAffineAccesses.insert(BP); + Context.NonAffineAccesses.insert( + std::make_pair(BP, LI->getLoopFor(Inst->getParent()))); } else if (!AllowNonAffine && !IsAffine) { return invalid(Context, /*Assert=*/true, AF, Inst, BV); diff --git a/polly/lib/Analysis/ScopInfo.cpp b/polly/lib/Analysis/ScopInfo.cpp index e5de9c6..135c6fc 100644 --- a/polly/lib/Analysis/ScopInfo.cpp +++ b/polly/lib/Analysis/ScopInfo.cpp @@ -3741,7 +3741,8 @@ void ScopInfo::buildPHIAccesses(PHINode *PHI, Region &R, // If we can synthesize a PHI we can skip it, however only if it is in // the region. If it is not it can only be in the exit block of the region. // In this case we model the operands but not the PHI itself. - if (!IsExitBlock && canSynthesize(PHI, LI, SE, &R)) + auto *Scope = LI->getLoopFor(PHI->getParent()); + if (!IsExitBlock && canSynthesize(PHI, LI, SE, &R, Scope)) return; // PHI nodes are modeled as if they had been demoted prior to the SCoP @@ -4219,7 +4220,8 @@ void ScopInfo::ensureValueRead(Value *V, BasicBlock *UserBB) { // If the instruction can be synthesized and the user is in the region we do // not need to add a value dependences. Region &ScopRegion = scop->getRegion(); - if (canSynthesize(V, LI, SE, &ScopRegion)) + auto *Scope = LI->getLoopFor(UserBB); + if (canSynthesize(V, LI, SE, &ScopRegion, Scope)) return; // Do not build scalar dependences for required invariant loads as we will diff --git a/polly/lib/CodeGen/BlockGenerators.cpp b/polly/lib/CodeGen/BlockGenerators.cpp index 6f48717..4beb30a 100644 --- a/polly/lib/CodeGen/BlockGenerators.cpp +++ b/polly/lib/CodeGen/BlockGenerators.cpp @@ -238,7 +238,7 @@ void BlockGenerator::generateScalarStore(ScopStmt &Stmt, StoreInst *Store, bool BlockGenerator::canSyntheziseInStmt(ScopStmt &Stmt, Instruction *Inst) { Loop *L = getLoopForStmt(Stmt); return (Stmt.isBlockStmt() || !Stmt.getRegion()->contains(L)) && - canSynthesize(Inst, &LI, &SE, &Stmt.getParent()->getRegion()); + canSynthesize(Inst, &LI, &SE, &Stmt.getParent()->getRegion(), L); } void BlockGenerator::copyInstruction(ScopStmt &Stmt, Instruction *Inst, diff --git a/polly/lib/CodeGen/IslNodeBuilder.cpp b/polly/lib/CodeGen/IslNodeBuilder.cpp index 69adab1..3068de4 100644 --- a/polly/lib/CodeGen/IslNodeBuilder.cpp +++ b/polly/lib/CodeGen/IslNodeBuilder.cpp @@ -191,14 +191,15 @@ struct SubtreeReferences { static int findReferencesInBlock(struct SubtreeReferences &References, const ScopStmt *Stmt, const BasicBlock *BB) { for (const Instruction &Inst : *BB) - for (Value *SrcVal : Inst.operands()) - if (canSynthesize(SrcVal, &References.LI, &References.SE, - &References.R)) { - References.SCEVs.insert( - References.SE.getSCEVAtScope(SrcVal, References.LI.getLoopFor(BB))); + for (Value *SrcVal : Inst.operands()) { + auto *Scope = References.LI.getLoopFor(BB); + if (canSynthesize(SrcVal, &References.LI, &References.SE, &References.R, + Scope)) { + References.SCEVs.insert(References.SE.getSCEVAtScope(SrcVal, Scope)); continue; } else if (Value *NewVal = References.GlobalMap.lookup(SrcVal)) References.Values.insert(NewVal); + } return 0; } diff --git a/polly/lib/Support/SCEVValidator.cpp b/polly/lib/Support/SCEVValidator.cpp index 8a12650..492264b 100644 --- a/polly/lib/Support/SCEVValidator.cpp +++ b/polly/lib/Support/SCEVValidator.cpp @@ -423,13 +423,18 @@ public: struct SCEVInRegionDependences : public SCEVVisitor { public: - /// Returns true when the SCEV has SSA names defined in region R. - static bool hasDependences(const SCEV *S, const Region *R) { - SCEVInRegionDependences Ignore(R); + /// Returns true when the SCEV has SSA names defined in region R. It @p + /// AllowLoops is false, loop dependences are checked as well. AddRec SCEVs + /// are only allowed within its loop (current loop determined by @p Scope), + /// not outside of it unless AddRec's loop is not even in the region. + static bool hasDependences(const SCEV *S, const Region *R, Loop *Scope, + bool AllowLoops) { + SCEVInRegionDependences Ignore(R, Scope, AllowLoops); return Ignore.visit(S); } - SCEVInRegionDependences(const Region *R) : R(R) {} + SCEVInRegionDependences(const Region *R, Loop *Scope, bool AllowLoops) + : R(R), Scope(Scope), AllowLoops(AllowLoops) {} bool visit(const SCEV *Expr) { return SCEVVisitor::visit(Expr); @@ -476,6 +481,14 @@ public: } bool visitAddRecExpr(const SCEVAddRecExpr *Expr) { + if (!AllowLoops) { + if (!Scope) + return true; + auto *L = Expr->getLoop(); + if (R->contains(L) && !L->contains(Scope)) + return true; + } + for (size_t i = 0; i < Expr->getNumOperands(); ++i) if (visit(Expr->getOperand(i))) return true; @@ -511,6 +524,8 @@ public: private: const Region *R; + Loop *Scope; + bool AllowLoops; }; namespace polly { @@ -556,8 +571,9 @@ void findValues(const SCEV *Expr, SetVector &Values) { ST.visitAll(Expr); } -bool hasScalarDepsInsideRegion(const SCEV *Expr, const Region *R) { - return SCEVInRegionDependences::hasDependences(Expr, R); +bool hasScalarDepsInsideRegion(const SCEV *Expr, const Region *R, + llvm::Loop *Scope, bool AllowLoops) { + return SCEVInRegionDependences::hasDependences(Expr, R, Scope, AllowLoops); } bool isAffineExpr(const Region *R, const SCEV *Expr, ScalarEvolution &SE, diff --git a/polly/lib/Support/ScopHelper.cpp b/polly/lib/Support/ScopHelper.cpp index f845be8..e73946f 100644 --- a/polly/lib/Support/ScopHelper.cpp +++ b/polly/lib/Support/ScopHelper.cpp @@ -436,13 +436,13 @@ bool polly::isIgnoredIntrinsic(const Value *V) { } bool polly::canSynthesize(const Value *V, const llvm::LoopInfo *LI, - ScalarEvolution *SE, const Region *R) { + ScalarEvolution *SE, const Region *R, Loop *Scope) { if (!V || !SE->isSCEVable(V->getType())) return false; - if (const SCEV *Scev = SE->getSCEV(const_cast(V))) + if (const SCEV *Scev = SE->getSCEVAtScope(const_cast(V), Scope)) if (!isa(Scev)) - if (!hasScalarDepsInsideRegion(Scev, R)) + if (!hasScalarDepsInsideRegion(Scev, R, Scope, false)) return true; return false; diff --git a/polly/test/Isl/CodeGen/unpredictable-loop-unsynthesizable.ll b/polly/test/Isl/CodeGen/unpredictable-loop-unsynthesizable.ll new file mode 100644 index 0000000..1f8940f --- /dev/null +++ b/polly/test/Isl/CodeGen/unpredictable-loop-unsynthesizable.ll @@ -0,0 +1,59 @@ +; RUN: opt %loadPolly -polly-scops -analyze < %s | FileCheck %s +; RUN: opt %loadPolly -polly-codegen -analyze < %s + +; The loop for.body is a scop with invariant load hoisting, but does not +; terminate predictably for ScalarEvolution. The scalar %1 therefore is not +; synthesizable using SCEVExpander. We therefore must have Stmt_for_end_loopexit +; to catch the induction variable at loop exit. We also check for not crashing +; at codegen because SCEVExpander would use the original induction variable in +; generated code. + +%struct.bit_stream_struc.3.43.51.71.83.91.99.107.154 = type { i8*, i32, %struct._IO_FILE.1.41.49.69.81.89.97.105.153*, i8*, i32, i64, i32, i32 } +%struct._IO_FILE.1.41.49.69.81.89.97.105.153 = type { i32, i8*, i8*, i8*, i8*, i8*, i8*, i8*, i8*, i8*, i8*, i8*, %struct._IO_marker.0.40.48.68.80.88.96.104.152*, %struct._IO_FILE.1.41.49.69.81.89.97.105.153*, i32, i32, i64, i16, i8, [1 x i8], i8*, i64, i8*, i8*, i8*, i8*, i64, i32, [20 x i8] } +%struct._IO_marker.0.40.48.68.80.88.96.104.152 = type { %struct._IO_marker.0.40.48.68.80.88.96.104.152*, %struct._IO_FILE.1.41.49.69.81.89.97.105.153*, i32 } + +define i32 @copy_buffer(%struct.bit_stream_struc.3.43.51.71.83.91.99.107.154* nocapture %bs) { +entry: + %buf_byte_idx5.phi.trans.insert = getelementptr inbounds %struct.bit_stream_struc.3.43.51.71.83.91.99.107.154, %struct.bit_stream_struc.3.43.51.71.83.91.99.107.154* %bs, i64 0, i32 6 + br i1 undef, label %for.body, label %cleanup + +for.body: + %indvars.iv28 = phi i64 [ %indvars.iv.next29, %for.body ], [ 0, %entry ] + %indvars.iv.next29 = add nuw nsw i64 %indvars.iv28, 1 + %0 = load i32, i32* %buf_byte_idx5.phi.trans.insert, align 8 + %cmp6 = icmp sgt i32 0, %0 + br i1 %cmp6, label %for.body, label %for.end.loopexit + +for.end.loopexit: + %1 = trunc i64 %indvars.iv.next29 to i32 + br label %cleanup + +cleanup: + %retval.0 = phi i32 [ 0, %entry ], [ %1, %for.end.loopexit ] + ret i32 %retval.0 +} + + +; CHECK: Invariant Accesses: { +; CHECK-NEXT: ReadAccess := [Reduction Type: NONE] [Scalar: 0] +; CHECK-NEXT: [p_0_loaded_from_bs] -> { Stmt_for_body[i0] -> MemRef_bs[11] }; +; CHECK-NEXT: Execution Context: [p_0_loaded_from_bs] -> { : } +; CHECK-NEXT: } +; CHECK: Statements { +; CHECK-NEXT: Stmt_for_body +; CHECK-NEXT: Domain := +; CHECK-NEXT: [p_0_loaded_from_bs] -> { Stmt_for_body[0] : p_0_loaded_from_bs >= 0 }; +; CHECK-NEXT: Schedule := +; CHECK-NEXT: [p_0_loaded_from_bs] -> { Stmt_for_body[i0] -> [0, 0] }; +; CHECK-NEXT: MustWriteAccess := [Reduction Type: NONE] [Scalar: 1] +; CHECK-NEXT: [p_0_loaded_from_bs] -> { Stmt_for_body[i0] -> MemRef_indvars_iv_next29[] }; +; CHECK-NEXT: Stmt_for_end_loopexit +; CHECK-NEXT: Domain := +; CHECK-NEXT: [p_0_loaded_from_bs] -> { Stmt_for_end_loopexit[] : p_0_loaded_from_bs >= 0 }; +; CHECK-NEXT: Schedule := +; CHECK-NEXT: [p_0_loaded_from_bs] -> { Stmt_for_end_loopexit[] -> [1, 0] }; +; CHECK-NEXT: ReadAccess := [Reduction Type: NONE] [Scalar: 1] +; CHECK-NEXT: [p_0_loaded_from_bs] -> { Stmt_for_end_loopexit[] -> MemRef_indvars_iv_next29[] }; +; CHECK-NEXT: MustWriteAccess := [Reduction Type: NONE] [Scalar: 1] +; CHECK-NEXT: [p_0_loaded_from_bs] -> { Stmt_for_end_loopexit[] -> MemRef_1[] }; +; CHECK-NEXT: }