From 3ef78d6d38992ee9f8e72597f7039e2a94aa16fe Mon Sep 17 00:00:00 2001 From: Johannes Doerfert Date: Fri, 8 Apr 2016 10:30:09 +0000 Subject: [PATCH] [FIX] Adjust execution context of hoisted loads wrt. error domains If we build the domains for error blocks and later remove them we lose the information that they are not executed. Thus, in the SCoP it looks like the control will always reach the statement S: for (i = 0 ... N) if (*valid == 0) doSth(&ptr); S: A[i] = *ptr; Consequently, we would have assumed "ptr" to be always accessed and preloaded it unconditionally. However, only if "*valid != 0" we would execute the optimized version of the SCoP. Nevertheless, we would have hoisted and accessed "ptr"regardless of "*valid". This changes the semantic of the program as the value of "*valid" can cause a change of "ptr" and control if it is executed or not. To fix this problem we adjust the execution context of hoisted loads wrt. error domains. To this end we introduce an ErrorDomainCtxMap that maps each basic block to the error context under which it might be executed. Thus, to the context under which it is executed but an error block would have been executed to. To fill this map one traversal of the blocks in the SCoP suffices. During this traversal we do also "remove" error statements and those that are only reachable via error statements. This was previously done by the removeErrorBlockDomains function which is therefor not needed anymore. This fixes bug PR26683 and thereby several SPEC miscompiles. Differential Revision: http://reviews.llvm.org/D18822 llvm-svn: 265778 --- polly/include/polly/ScopInfo.h | 25 ++++++-- polly/lib/Analysis/ScopInfo.cpp | 115 +++++++++++++++++++++++----------- polly/test/ScopInfo/error-blocks-2.ll | 72 +++++++++++++++++++++ 3 files changed, 171 insertions(+), 41 deletions(-) create mode 100644 polly/test/ScopInfo/error-blocks-2.ll diff --git a/polly/include/polly/ScopInfo.h b/polly/include/polly/ScopInfo.h index 4b75141..ea582f5 100644 --- a/polly/include/polly/ScopInfo.h +++ b/polly/include/polly/ScopInfo.h @@ -1317,6 +1317,14 @@ private: /// @brief A map from basic blocks to their domains. DenseMap DomainMap; + /// @brief A map from basic blocks to the error context reaching them. + /// + /// The error context describes all parameter configurations under which + /// the block would be executed but the control would pass an error block. + /// This information is not contained in the domain and only implicit in the + /// AssumedContext/InvalidContext. + DenseMap ErrorDomainCtxMap; + /// Constraints on parameters. isl_set *Context; @@ -1494,14 +1502,20 @@ private: void propagateDomainConstraints(Region *R, ScopDetection &SD, DominatorTree &DT, LoopInfo &LI); - /// @brief Remove domains of error blocks/regions (and blocks dominated by - /// them). + /// @brief Propagate of error block domain contexts through @p R. /// + /// This method will propagate error block domain contexts through @p R + /// and thereby fill the ErrorDomainCtxMap map. Additionally, the domains + /// of error statements and those only reachable via error statements will + /// be replaced by an empty set. Later those will be removed completely from + /// the SCoP. + /// + /// @param R The currently traversed region. /// @param SD The ScopDetection analysis for the current function. /// @param DT The DominatorTree for the current function. /// @param LI The LoopInfo for the current function. - void removeErrorBlockDomains(ScopDetection &SD, DominatorTree &DT, - LoopInfo &LI); + void propagateErrorConstraints(Region *R, ScopDetection &SD, + DominatorTree &DT, LoopInfo &LI); /// @brief Compute the domain for each basic block in @p R. /// @@ -1537,6 +1551,9 @@ private: /// @see isIgnored() void simplifySCoP(bool RemoveIgnoredStmts, DominatorTree &DT, LoopInfo &LI); + /// @brief Return the error context under which @p Stmt is reached. + __isl_give isl_set *getErrorCtxReachingStmt(ScopStmt &Stmt); + /// @brief Create equivalence classes for required invariant accesses. /// /// These classes will consolidate multiple required invariant loads from the diff --git a/polly/lib/Analysis/ScopInfo.cpp b/polly/lib/Analysis/ScopInfo.cpp index a56318b..9c81b43 100644 --- a/polly/lib/Analysis/ScopInfo.cpp +++ b/polly/lib/Analysis/ScopInfo.cpp @@ -2118,39 +2118,6 @@ isl_set *Scop::getDomainConditions(BasicBlock *BB) { return isl_set_copy(DomainMap[BB]); } -void Scop::removeErrorBlockDomains(ScopDetection &SD, DominatorTree &DT, - LoopInfo &LI) { - auto removeDomains = [this, &DT](BasicBlock *Start) { - auto *BBNode = DT.getNode(Start); - for (auto *ErrorChild : depth_first(BBNode)) { - auto *ErrorChildBlock = ErrorChild->getBlock(); - auto *CurrentDomain = DomainMap[ErrorChildBlock]; - auto *Empty = isl_set_empty(isl_set_get_space(CurrentDomain)); - DomainMap[ErrorChildBlock] = Empty; - isl_set_free(CurrentDomain); - } - }; - - SmallVector Todo = {&R}; - - while (!Todo.empty()) { - auto *SubRegion = Todo.back(); - Todo.pop_back(); - - if (!SD.isNonAffineSubRegion(SubRegion, &getRegion())) { - for (auto &Child : *SubRegion) - Todo.push_back(Child.get()); - continue; - } - if (containsErrorBlock(SubRegion->getNode(), getRegion(), LI, DT)) - removeDomains(SubRegion->getEntry()); - } - - for (auto *BB : R.blocks()) - if (isErrorBlock(*BB, R, LI, DT)) - removeDomains(BB); -} - bool Scop::buildDomains(Region *R, ScopDetection &SD, DominatorTree &DT, LoopInfo &LI) { @@ -2178,12 +2145,17 @@ bool Scop::buildDomains(Region *R, ScopDetection &SD, DominatorTree &DT, // Error blocks and blocks dominated by them have been assumed to never be // executed. Representing them in the Scop does not add any value. In fact, // it is likely to cause issues during construction of the ScopStmts. The - // contents of error blocks have not been verfied to be expressible and + // contents of error blocks have not been verified to be expressible and // will cause problems when building up a ScopStmt for them. // Furthermore, basic blocks dominated by error blocks may reference // instructions in the error block which, if the error block is not modeled, - // can themselves not be constructed properly. - removeErrorBlockDomains(SD, DT, LI); + // can themselves not be constructed properly. To this end we will replace + // the domains of error blocks and those only reachable via error blocks + // with an empty set. Additionally, we will record for each block under which + // parameter combination it would be reached via an error block in the + // ErrorDomainCtxMap map. This information is needed during load hoisting. + propagateErrorConstraints(R, SD, DT, LI); + return true; } @@ -2246,6 +2218,65 @@ static __isl_give isl_set *adjustDomainDimensions(Scop &S, return Dom; } +void Scop::propagateErrorConstraints(Region *R, ScopDetection &SD, + DominatorTree &DT, LoopInfo &LI) { + + ReversePostOrderTraversal RTraversal(R); + for (auto *RN : RTraversal) { + + // Recurse for affine subregions but go on for basic blocks and non-affine + // subregions. + if (RN->isSubRegion()) { + Region *SubRegion = RN->getNodeAs(); + if (!SD.isNonAffineSubRegion(SubRegion, &getRegion())) { + propagateErrorConstraints(SubRegion, SD, DT, LI); + continue; + } + } + + bool ContainsErrorBlock = containsErrorBlock(RN, getRegion(), LI, DT); + BasicBlock *BB = getRegionNodeBasicBlock(RN); + isl_set *&Domain = DomainMap[BB]; + assert(Domain && "Cannot propagate a nullptr"); + + auto *&ErrorCtx = ErrorDomainCtxMap[BB]; + auto *DomainCtx = isl_set_params(isl_set_copy(Domain)); + bool IsErrorBlock = ContainsErrorBlock || + (ErrorCtx && isl_set_is_subset(DomainCtx, ErrorCtx)); + + if (IsErrorBlock) { + ErrorCtx = ErrorCtx ? isl_set_union(ErrorCtx, DomainCtx) : DomainCtx; + auto *EmptyDom = isl_set_empty(isl_set_get_space(Domain)); + isl_set_free(Domain); + Domain = EmptyDom; + } else { + isl_set_free(DomainCtx); + } + + if (!ErrorCtx) + continue; + + auto *TI = BB->getTerminator(); + unsigned NumSuccs = RN->isSubRegion() ? 1 : TI->getNumSuccessors(); + for (unsigned u = 0; u < NumSuccs; u++) { + auto *SuccBB = getRegionNodeSuccessor(RN, TI, u); + auto *&SuccErrorCtx = ErrorDomainCtxMap[SuccBB]; + auto *CurErrorCtx = isl_set_copy(ErrorCtx); + SuccErrorCtx = + SuccErrorCtx ? isl_set_union(SuccErrorCtx, CurErrorCtx) : CurErrorCtx; + SuccErrorCtx = isl_set_coalesce(SuccErrorCtx); + + // Check if the maximal number of domain conjuncts was reached. + // In case this happens we will bail. + if (isl_set_n_basic_set(SuccErrorCtx) < MaxConjunctsInDomain) + continue; + + invalidate(COMPLEXITY, TI->getDebugLoc()); + return; + } + } +} + void Scop::propagateDomainConstraintsToRegionExit( BasicBlock *BB, Loop *BBLoop, SmallPtrSetImpl &FinishedExitBlocks, ScopDetection &SD, @@ -2932,6 +2963,8 @@ Scop::~Scop() { for (auto It : DomainMap) isl_set_free(It.second); + for (auto It : ErrorDomainCtxMap) + isl_set_free(It.second); // Free the alias groups for (MinMaxVectorPairTy &MinMaxAccessPair : MinMaxAliasGroups) { @@ -3044,10 +3077,18 @@ const InvariantEquivClassTy *Scop::lookupInvariantEquivClass(Value *Val) const { return nullptr; } +isl_set *Scop::getErrorCtxReachingStmt(ScopStmt &Stmt) { + auto *BB = Stmt.getEntryBlock(); + return isl_set_copy(ErrorDomainCtxMap.lookup(BB)); +} + void Scop::addInvariantLoads(ScopStmt &Stmt, MemoryAccessList &InvMAs) { - // Get the context under which the statement is executed. + // Get the context under which the statement is executed but remove the error + // context under which this statement is reached. isl_set *DomainCtx = isl_set_params(Stmt.getDomain()); + if (auto *ErrorCtx = getErrorCtxReachingStmt(Stmt)) + DomainCtx = isl_set_subtract(DomainCtx, ErrorCtx); DomainCtx = isl_set_remove_redundancies(DomainCtx); DomainCtx = isl_set_detect_equalities(DomainCtx); DomainCtx = isl_set_coalesce(DomainCtx); diff --git a/polly/test/ScopInfo/error-blocks-2.ll b/polly/test/ScopInfo/error-blocks-2.ll new file mode 100644 index 0000000..c27ba63 --- /dev/null +++ b/polly/test/ScopInfo/error-blocks-2.ll @@ -0,0 +1,72 @@ +; RUN: opt %loadPolly -polly-scops -analyze < %s | FileCheck %s +; +; CHECK: Invariant Accesses: { +; CHECK-NEXT: ReadAccess := [Reduction Type: NONE] [Scalar: 0] +; CHECK-NEXT: [N, valid_val] -> { Stmt_for_body[i0] -> MemRef_valid[0] }; +; CHECK-NEXT: Execution Context: [N, valid_val] -> { : N > 0 } +; CHECK-NEXT: ReadAccess := [Reduction Type: NONE] [Scalar: 0] +; CHECK-NEXT: [N, valid_val] -> { Stmt_S[i0] -> MemRef_ptr_addr[0] }; +; CHECK-NEXT: Execution Context: [N, valid_val] -> { : N > 0 and (valid_val < 0 or valid_val > 0) } +; CHECK-NEXT: ReadAccess := [Reduction Type: NONE] [Scalar: 0] +; CHECK-NEXT: [N, valid_val] -> { Stmt_S[i0] -> MemRef_tmp2[0] }; +; CHECK-NEXT: Execution Context: [N, valid_val] -> { : N > 0 and (valid_val < 0 or valid_val > 0) } +; CHECK-NEXT: } +; CHECK-NEXT: Context: +; CHECK-NEXT: [N, valid_val] -> { : -2147483648 <= N <= 2147483647 and -2147483648 <= valid_val <= 2147483647 } +; CHECK-NEXT: Assumed Context: +; CHECK-NEXT: [N, valid_val] -> { : } +; CHECK-NEXT: Invalid Context: +; CHECK-NEXT: [N, valid_val] -> { : valid_val = 0 and N > 0 } +; +; CHECK: Statements { +; CHECK-NEXT: Stmt_S +; CHECK-NEXT: Domain := +; CHECK-NEXT: [N, valid_val] -> { Stmt_S[i0] : 0 <= i0 < N }; +; CHECK-NEXT: Schedule := +; CHECK-NEXT: [N, valid_val] -> { Stmt_S[i0] -> [i0] }; +; CHECK-NEXT: MustWriteAccess := [Reduction Type: NONE] [Scalar: 0] +; CHECK-NEXT: [N, valid_val] -> { Stmt_S[i0] -> MemRef_A[i0] }; +; CHECK-NEXT: } +; +target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" + +define void @f(i32 %N, i32* noalias %valid, i32* noalias %ptr, i32* noalias %A) { +entry: + %ptr.addr = alloca i32*, align 8 + store i32* %ptr, i32** %ptr.addr, align 8 + %tmp = sext i32 %N to i64 + br label %for.cond + +for.cond: ; preds = %for.inc, %entry + %indvars.iv = phi i64 [ %indvars.iv.next, %for.inc ], [ 0, %entry ] + %cmp = icmp slt i64 %indvars.iv, %tmp + br i1 %cmp, label %for.body, label %for.end + +for.body: ; preds = %for.cond + %valid_val = load i32, i32* %valid, align 4 + %cmp1 = icmp eq i32 %valid_val, 0 + br i1 %cmp1, label %if.then, label %if.end + +if.then: ; preds = %for.body + call void @doSth(i32** nonnull %ptr.addr) + br label %if.end + +if.end: ; preds = %if.then, %for.body + br label %S + +S: ; preds = %if.end + %tmp2 = load i32*, i32** %ptr.addr, align 8 + %tmp3 = load i32, i32* %tmp2, align 4 + %arrayidx = getelementptr inbounds i32, i32* %A, i64 %indvars.iv + store i32 %tmp3, i32* %arrayidx, align 4 + br label %for.inc + +for.inc: ; preds = %S + %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 + br label %for.cond + +for.end: ; preds = %for.cond + ret void +} + +declare void @doSth(i32**) -- 2.7.4