From: Johannes Doerfert Date: Thu, 26 Jun 2014 18:38:08 +0000 (+0000) Subject: Hybrid dependency analysis X-Git-Url: http://review.tizen.org/git/?a=commitdiff_plain;h=ea23b1d561553984c82daec0aec6fbc2da489cc3;p=platform%2Fupstream%2Fllvm.git Hybrid dependency analysis This dependency analysis will keep track of memory accesses if they might be part of a reduction. If not, the dependences are tracked on a statement level. The main reason to do this is to reduce the compile time while beeing able to distinguish the effects of reduction and non-reduction accesses. + Adjusted two test cases llvm-svn: 211794 --- diff --git a/polly/include/polly/Dependences.h b/polly/include/polly/Dependences.h index e82431d..5be1ba7 100755 --- a/polly/include/polly/Dependences.h +++ b/polly/include/polly/Dependences.h @@ -119,7 +119,8 @@ private: /// @brief Collect information about the SCoP. void collectInfo(Scop &S, isl_union_map **Read, isl_union_map **Write, - isl_union_map **MayWrite, isl_union_map **Schedule); + isl_union_map **MayWrite, isl_union_map **AccessSchedule, + isl_union_map **StmtSchedule); /// @brief Calculate and add at the privatization dependences void addPrivatizationDependences(); diff --git a/polly/lib/Analysis/Dependences.cpp b/polly/lib/Analysis/Dependences.cpp index 59a166a..bcdcb93 100644 --- a/polly/lib/Analysis/Dependences.cpp +++ b/polly/lib/Analysis/Dependences.cpp @@ -69,12 +69,20 @@ Dependences::Dependences() : ScopPass(ID) { RAW = WAR = WAW = nullptr; } void Dependences::collectInfo(Scop &S, isl_union_map **Read, isl_union_map **Write, isl_union_map **MayWrite, - isl_union_map **Schedule) { + isl_union_map **AccessSchedule, + isl_union_map **StmtSchedule) { isl_space *Space = S.getParamSpace(); *Read = isl_union_map_empty(isl_space_copy(Space)); *Write = isl_union_map_empty(isl_space_copy(Space)); *MayWrite = isl_union_map_empty(isl_space_copy(Space)); - *Schedule = isl_union_map_empty(Space); + *AccessSchedule = isl_union_map_empty(isl_space_copy(Space)); + *StmtSchedule = isl_union_map_empty(Space); + + SmallPtrSet ReductionBaseValues; + for (ScopStmt *Stmt : S) + for (MemoryAccess *MA : *Stmt) + if (MA->isReductionLike()) + ReductionBaseValues.insert(MA->getBaseAddr()); for (ScopStmt *Stmt : S) { for (MemoryAccess *MA : *Stmt) { @@ -83,12 +91,38 @@ void Dependences::collectInfo(Scop &S, isl_union_map **Read, accdom = isl_map_intersect_domain(accdom, domcp); + if (ReductionBaseValues.count(MA->getBaseAddr())) { + // Wrap the access domain and adjust the scattering accordingly. + // + // An access domain like + // Stmt[i0, i1] -> MemAcc_A[i0 + i1] + // will be transformed into + // [Stmt[i0, i1] -> MemAcc_A[i0 + i1]] -> MemAcc_A[i0 + i1] + // + // The original scattering looks like + // Stmt[i0, i1] -> [0, i0, 2, i1, 0] + // but as we transformed the access domain we need the scattering + // to match the new access domains, thus we need + // [Stmt[i0, i1] -> MemAcc_A[i0 + i1]] -> [0, i0, 2, i1, 0] + accdom = isl_map_range_map(accdom); + + isl_map *stmt_scatter = Stmt->getScattering(); + isl_set *scatter_dom = isl_map_domain(isl_map_copy(accdom)); + isl_set *scatter_ran = isl_map_range(stmt_scatter); + isl_map *scatter = + isl_map_from_domain_and_range(scatter_dom, scatter_ran); + for (unsigned u = 0, e = Stmt->getNumIterators(); u != e; u++) + scatter = + isl_map_equate(scatter, isl_dim_out, 2 * u + 1, isl_dim_in, u); + *AccessSchedule = isl_union_map_add_map(*AccessSchedule, scatter); + } + if (MA->isRead()) *Read = isl_union_map_add_map(*Read, accdom); else *Write = isl_union_map_add_map(*Write, accdom); } - *Schedule = isl_union_map_add_map(*Schedule, Stmt->getScattering()); + *StmtSchedule = isl_union_map_add_map(*StmtSchedule, Stmt->getScattering()); } } @@ -159,11 +193,15 @@ void Dependences::addPrivatizationDependences() { } void Dependences::calculateDependences(Scop &S) { - isl_union_map *Read, *Write, *MayWrite, *Schedule; + isl_union_map *Read, *Write, *MayWrite, *AccessSchedule, *StmtSchedule, + *Schedule; DEBUG(dbgs() << "Scop: \n" << S << "\n"); - collectInfo(S, &Read, &Write, &MayWrite, &Schedule); + collectInfo(S, &Read, &Write, &MayWrite, &AccessSchedule, &StmtSchedule); + + Schedule = + isl_union_map_union(AccessSchedule, isl_union_map_copy(StmtSchedule)); Read = isl_union_map_coalesce(Read); Write = isl_union_map_coalesce(Write); @@ -235,6 +273,33 @@ void Dependences::calculateDependences(Scop &S) { isl_ctx_reset_operations(S.getIslCtx()); isl_ctx_set_max_operations(S.getIslCtx(), MaxOpsOld); + isl_union_map *STMT_RAW, *STMT_WAW, *STMT_WAR; + STMT_RAW = isl_union_map_intersect_domain( + isl_union_map_copy(RAW), + isl_union_map_domain(isl_union_map_copy(StmtSchedule))); + STMT_WAW = isl_union_map_intersect_domain( + isl_union_map_copy(WAW), + isl_union_map_domain(isl_union_map_copy(StmtSchedule))); + STMT_WAR = isl_union_map_intersect_domain(isl_union_map_copy(WAR), + isl_union_map_domain(StmtSchedule)); + DEBUG(dbgs() << "Wrapped Dependences:\n"; printScop(dbgs()); dbgs() << "\n"); + + RAW = isl_union_map_zip(RAW); + WAW = isl_union_map_zip(WAW); + WAR = isl_union_map_zip(WAR); + + DEBUG(dbgs() << "Zipped Dependences:\n"; printScop(dbgs()); dbgs() << "\n"); + + RAW = isl_union_map_union(isl_union_set_unwrap(isl_union_map_domain(RAW)), + STMT_RAW); + WAW = isl_union_map_union(isl_union_set_unwrap(isl_union_map_domain(WAW)), + STMT_WAW); + WAR = isl_union_map_union(isl_union_set_unwrap(isl_union_map_domain(WAR)), + STMT_WAR); + + DEBUG(dbgs() << "Unwrapped Dependences:\n"; printScop(dbgs()); + dbgs() << "\n"); + // To handle reduction dependences we proceed as follows: // 1) Aggregate all possible reduction dependences, namely all self // dependences on reduction like statements. diff --git a/polly/test/Dependences/reduction_simple_iv.ll b/polly/test/Dependences/reduction_simple_iv.ll index 4b4a392..1dd9d51 100644 --- a/polly/test/Dependences/reduction_simple_iv.ll +++ b/polly/test/Dependences/reduction_simple_iv.ll @@ -9,7 +9,6 @@ ; CHECK: Reduction dependences: ; CHECK: { Stmt_for_cond[i0] -> Stmt_for_cond[1 + i0] : i0 <= 99 and i0 >= 0 } ; -; ; void f(int* sum) { ; for (int i = 0; i <= 100; i++) ; sum += i * 3; diff --git a/polly/test/Dependences/reduction_simple_iv_debug_wrapped_dependences.ll b/polly/test/Dependences/reduction_simple_iv_debug_wrapped_dependences.ll new file mode 100644 index 0000000..ae46134 --- /dev/null +++ b/polly/test/Dependences/reduction_simple_iv_debug_wrapped_dependences.ll @@ -0,0 +1,64 @@ +; RUN: opt %loadPolly -polly-dependences -analyze -debug-only=polly-dependence 2>&1 < %s | FileCheck %s +; +; REQUIRES: asserts +; +; CHECK: Read: { [Stmt_for_cond[i0] -> MemRef_sum[0]] -> MemRef_sum[0] : i0 >= 0 and i0 <= 100 } +; CHECK: Write: { [Stmt_for_cond[i0] -> MemRef_sum[0]] -> MemRef_sum[0] : i0 >= 0 and i0 <= 100 } +; CHECK: Schedule: { Stmt_for_cond[i0] -> scattering[0, i0, 0]; [Stmt_for_cond[i0] -> MemRef_sum[0]] -> scattering[0, i0, 0] : i0 <= 100 and i0 >= 0 } +; CHECK: Wrapped Dependences: +; CHECK: RAW dependences: +; CHECK: { [Stmt_for_cond[i0] -> MemRef_sum[0]] -> [Stmt_for_cond[1 + i0] -> MemRef_sum[0]] : i0 >= 0 and i0 <= 99 } +; CHECK: WAR dependences: +; CHECK: { } +; CHECK: WAW dependences: +; CHECK: { [Stmt_for_cond[i0] -> MemRef_sum[0]] -> [Stmt_for_cond[1 + i0] -> MemRef_sum[0]] : i0 >= 0 and i0 <= 99 } +; CHECK: Reduction dependences: +; CHECK: n/a +; CHECK: Zipped Dependences: +; CHECK: RAW dependences: +; CHECK: { [Stmt_for_cond[i0] -> Stmt_for_cond[1 + i0]] -> [MemRef_sum[0] -> MemRef_sum[0]] : i0 >= 0 and i0 <= 99 } +; CHECK: WAR dependences: +; CHECK: { } +; CHECK: WAW dependences: +; CHECK: { [Stmt_for_cond[i0] -> Stmt_for_cond[1 + i0]] -> [MemRef_sum[0] -> MemRef_sum[0]] : i0 >= 0 and i0 <= 99 } +; CHECK: Reduction dependences: +; CHECK: n/a +; CHECK: Unwrapped Dependences: +; CHECK: RAW dependences: +; CHECK: { Stmt_for_cond[i0] -> Stmt_for_cond[1 + i0] : i0 >= 0 and i0 <= 99 } +; CHECK: WAR dependences: +; CHECK: { } +; CHECK: WAW dependences: +; CHECK: { Stmt_for_cond[i0] -> Stmt_for_cond[1 + i0] : i0 >= 0 and i0 <= 99 } +; CHECK: Reduction dependences: +; CHECK: n/a +; +; void f(int* sum) { +; for (int i = 0; i <= 100; i++) +; sum += i * 3; +; } +target datalayout = "e-m:e-p:32:32-i64:64-v128:64:128-n32-S64" + +define void @f(i32* %sum) { +entry: + br label %entry.split1 + +entry.split1: ; preds = %entry + br label %entry.split + +entry.split: ; preds = %entry.split1 + br label %for.cond + +for.cond: ; preds = %for.cond, %entry.split + %i1.0 = phi i32 [ 0, %entry.split ], [ %inc, %for.cond ] + %sum.reload = load i32* %sum + %mul = mul nsw i32 %i1.0, 3 + %add = add nsw i32 %sum.reload, %mul + %inc = add nsw i32 %i1.0, 1 + store i32 %add, i32* %sum + %cmp = icmp slt i32 %i1.0, 100 + br i1 %cmp, label %for.cond, label %for.end + +for.end: ; preds = %for.cond + ret void +}