From f1906138b4cb4562b5489623f52f49dc6bef28e7 Mon Sep 17 00:00:00 2001 From: Johannes Doerfert Date: Fri, 20 Jun 2014 16:37:11 +0000 Subject: [PATCH] Model statement wise reduction dependences + Collect reduction dependences + Introduced TYPE_RED in Dependences.h which can be used to obtain the reduction dependences + Used TYPE_RED to prevent parallelization while we do not have a privatizing code generation + Relax the dependences for non-parallel code generation + Add privatization dependences to ensure correctness + 12 Test cases to check for reduction and privatization dependences llvm-svn: 211369 --- polly/include/polly/Dependences.h | 17 ++- polly/lib/Analysis/Dependences.cpp | 124 ++++++++++++++++++++- polly/lib/CodeGen/IslAst.cpp | 4 +- polly/lib/Transform/DeadCodeElimination.cpp | 3 +- .../Dependences/reduction_multiple_reductions.ll | 58 ++++++++++ .../Dependences/reduction_multiple_reductions_2.ll | 110 ++++++++++++++++++ .../reduction_only_reduction_like_access.ll | 44 ++++++++ .../reduction_only_reduction_like_access_2.ll | 56 ++++++++++ .../Dependences/reduction_privatization_deps.ll | 115 +++++++++++++++++++ .../Dependences/reduction_privatization_deps_2.ll | 86 ++++++++++++++ .../Dependences/reduction_privatization_deps_3.ll | 89 +++++++++++++++ .../Dependences/reduction_privatization_deps_4.ll | 84 ++++++++++++++ .../Dependences/reduction_privatization_deps_5.ll | 88 +++++++++++++++ polly/test/Dependences/reduction_simple_iv.ll | 41 +++++++ .../reduction_simple_privatization_deps_2.ll | 79 +++++++++++++ ...uction_simple_privatization_deps_w_parameter.ll | 59 ++++++++++ 16 files changed, 1051 insertions(+), 6 deletions(-) create mode 100644 polly/test/Dependences/reduction_multiple_reductions.ll create mode 100644 polly/test/Dependences/reduction_multiple_reductions_2.ll create mode 100644 polly/test/Dependences/reduction_only_reduction_like_access.ll create mode 100644 polly/test/Dependences/reduction_only_reduction_like_access_2.ll create mode 100644 polly/test/Dependences/reduction_privatization_deps.ll create mode 100644 polly/test/Dependences/reduction_privatization_deps_2.ll create mode 100644 polly/test/Dependences/reduction_privatization_deps_3.ll create mode 100644 polly/test/Dependences/reduction_privatization_deps_4.ll create mode 100644 polly/test/Dependences/reduction_privatization_deps_5.ll create mode 100644 polly/test/Dependences/reduction_simple_iv.ll create mode 100644 polly/test/Dependences/reduction_simple_privatization_deps_2.ll create mode 100644 polly/test/Dependences/reduction_simple_privatization_deps_w_parameter.ll diff --git a/polly/include/polly/Dependences.h b/polly/include/polly/Dependences.h index 9a704a8..e82431d 100755 --- a/polly/include/polly/Dependences.h +++ b/polly/include/polly/Dependences.h @@ -46,6 +46,12 @@ public: static char ID; /// @brief The type of the dependences. + /// + /// Reduction dependences are not included in TYPE_ALL dependences because we + /// can ignore them during the scheduling. This is the case since the order in + /// which the reduction statements are executed does not matter. However, if + /// they are executed in parallel we need to take additional measures (e.g., + /// privatization) to ensure a correct result. enum Type { // Write after read TYPE_WAR = 0x1, @@ -56,7 +62,10 @@ public: // Write after write TYPE_WAW = 0x4, - // All dependences + // Reduction dependences + TYPE_RED = 0x8, + + // All (validity) dependences TYPE_ALL = (TYPE_WAR | TYPE_RAW | TYPE_WAW) }; @@ -105,10 +114,16 @@ private: isl_union_map *WAR; isl_union_map *WAW; + /// @brief The map of reduction dependences + isl_union_map *RED = nullptr; + /// @brief Collect information about the SCoP. void collectInfo(Scop &S, isl_union_map **Read, isl_union_map **Write, isl_union_map **MayWrite, isl_union_map **Schedule); + /// @brief Calculate and add at the privatization dependences + void addPrivatizationDependences(); + // @brief Calculate the dependences for a certain SCoP. void calculateDependences(Scop &S); }; diff --git a/polly/lib/Analysis/Dependences.cpp b/polly/lib/Analysis/Dependences.cpp index dc64b7e..95c344a 100644 --- a/polly/lib/Analysis/Dependences.cpp +++ b/polly/lib/Analysis/Dependences.cpp @@ -92,6 +92,72 @@ void Dependences::collectInfo(Scop &S, isl_union_map **Read, } } +/// @brief Compute the privatization dependences for a given dependency @p Map +/// +/// Privatization dependences are widened original dependences which originate +/// or end in a reduction access. To compute them we apply the transitive close +/// of the reduction dependences (which maps each iteration of a reduction +/// statement to all following ones) on the RAW/WAR/WAW dependences. The +/// dependences which start or end at a reduction statement will be extended to +/// depend on all following reduction statement iterations as well. +/// Note: "Following" here means according to the reduction dependences. +/// +/// For the input: +/// +/// S0: *sum = 0; +/// for (int i = 0; i < 1024; i++) +/// S1: *sum += i; +/// S2: *sum = *sum * 3; +/// +/// we have the following dependences before we add privatization dependences: +/// +/// RAW: +/// { S0[] -> S1[0]; S1[1023] -> S2[] } +/// WAR: +/// { } +/// WAW: +/// { S0[] -> S1[0]; S1[1024] -> S2[] } +/// RED: +/// { S1[i0] -> S1[1 + i0] : i0 >= 0 and i0 <= 1022 } +/// +/// and afterwards: +/// +/// RAW: +/// { S0[] -> S1[i0] : i0 >= 0 and i0 <= 1023; +/// S1[i0] -> S2[] : i0 >= 0 and i0 <= 1023} +/// WAR: +/// { } +/// WAW: +/// { S0[] -> S1[i0] : i0 >= 0 and i0 <= 1023; +/// S1[i0] -> S2[] : i0 >= 0 and i0 <= 1023} +/// RED: +/// { S1[i0] -> S1[1 + i0] : i0 >= 0 and i0 <= 1022 } +void Dependences::addPrivatizationDependences() { + isl_union_map *PrivRAW, *PrivWAW, *PrivWAR, *TransClosure; + + // The transitive closure might be over approximated but we only use it to + // compute the privatization dependences. Thus, overapproximation will lead + // "only" to more conservative privatization dependences. + // FIXME: Take precautions to ensure only forward dependences are created. + TransClosure = isl_union_map_transitive_closure(isl_union_map_copy(RED), 0); + + isl_union_map **Maps[] = {&RAW, &WAW, &WAR}; + isl_union_map **PrivMaps[] = {&PrivRAW, &PrivWAW, &PrivWAR}; + for (unsigned u = 0; u < 3; u++) { + isl_union_map **Map = Maps[u], **PrivMap = PrivMaps[u]; + + *PrivMap = isl_union_map_apply_range(isl_union_map_copy(*Map), + isl_union_map_copy(TransClosure)); + *PrivMap = isl_union_map_union( + *PrivMap, isl_union_map_apply_range(isl_union_map_copy(TransClosure), + isl_union_map_copy(*Map))); + + *Map = isl_union_map_union(*Map, *PrivMap); + } + + isl_union_map_free(TransClosure); +} + void Dependences::calculateDependences(Scop &S) { isl_union_map *Read, *Write, *MayWrite, *Schedule; @@ -114,7 +180,7 @@ void Dependences::calculateDependences(Scop &S) { // The pointers below will be set by the subsequent calls to // isl_union_map_compute_flow. - RAW = WAW = WAR = nullptr; + RAW = WAW = WAR = RED = nullptr; if (OptAnalysisType == VALUE_BASED_ANALYSIS) { isl_union_map_compute_flow( @@ -169,6 +235,50 @@ void Dependences::calculateDependences(Scop &S) { isl_ctx_reset_operations(S.getIslCtx()); isl_ctx_set_max_operations(S.getIslCtx(), MaxOpsOld); + // To handle reduction dependences we proceed as follows: + // 1) Aggregate all possible reduction dependences, namely all self + // dependences on reduction like statements. + // 2) Intersect them with the actual RAW & WAW dependences to the get the + // actual reduction dependences. This will ensure the load/store memory + // addresses were __identical__ in the two iterations of the statement. + // 3) Relax the original RAW and WAW dependences by substracting the actual + // reduction dependences. Binary reductions (sum += A[i]) cause both, and + // the same, RAW and WAW dependences. + // 4) Add the privatization dependences which are widened versions of + // already present dependences. They model the effect of manual + // privatization at the outermost possible place (namely after the last + // write and before the first access to a reduction location). + + // Step 1) + RED = isl_union_map_empty(isl_union_map_get_space(RAW)); + for (auto *Stmt : S) { + if (!Stmt->isReductionLike()) + continue; + isl_set *Domain = Stmt->getDomain(); + isl_map *Identity = + isl_map_from_domain_and_range(isl_set_copy(Domain), Domain); + RED = isl_union_map_add_map(RED, Identity); + } + + // Step 2) + RED = isl_union_map_intersect(RED, isl_union_map_copy(RAW)); + RED = isl_union_map_intersect(RED, isl_union_map_copy(WAW)); + + if (!isl_union_map_is_empty(RED)) { + + // Step 3) + RAW = isl_union_map_subtract(RAW, isl_union_map_copy(RED)); + WAW = isl_union_map_subtract(WAW, isl_union_map_copy(RED)); + + // Step 4) + addPrivatizationDependences(); + } + + RAW = isl_union_map_coalesce(RAW); + WAW = isl_union_map_coalesce(WAW); + WAR = isl_union_map_coalesce(WAR); + RED = isl_union_map_coalesce(RED); + DEBUG(printScop(dbgs())); } @@ -256,7 +366,9 @@ bool Dependences::isParallelDimension(__isl_take isl_set *ScheduleSubset, return false; } - Deps = getDependences(TYPE_ALL); + // FIXME: We can remove ignore reduction dependences in case we privatize the + // memory locations the reduction statements reduce into. + Deps = getDependences(TYPE_ALL | TYPE_RED); if (isl_union_map_is_empty(Deps)) { isl_union_map_free(Deps); @@ -310,14 +422,17 @@ void Dependences::printScop(raw_ostream &OS) const { printDependencyMap(OS, WAR); OS << "\tWAW dependences:\n\t\t"; printDependencyMap(OS, WAW); + OS << "\tReduction dependences:\n\t\t"; + printDependencyMap(OS, RED); } void Dependences::releaseMemory() { isl_union_map_free(RAW); isl_union_map_free(WAR); isl_union_map_free(WAW); + isl_union_map_free(RED); - RAW = WAR = WAW = nullptr; + RED = RAW = WAR = WAW = nullptr; } isl_union_map *Dependences::getDependences(int Kinds) { @@ -334,6 +449,9 @@ isl_union_map *Dependences::getDependences(int Kinds) { if (Kinds & TYPE_WAW) Deps = isl_union_map_union(Deps, isl_union_map_copy(WAW)); + if (Kinds & TYPE_RED) + Deps = isl_union_map_union(Deps, isl_union_map_copy(RED)); + Deps = isl_union_map_coalesce(Deps); Deps = isl_union_map_detect_equalities(Deps); return Deps; diff --git a/polly/lib/CodeGen/IslAst.cpp b/polly/lib/CodeGen/IslAst.cpp index 0e2ddcc..57814e4 100644 --- a/polly/lib/CodeGen/IslAst.cpp +++ b/polly/lib/CodeGen/IslAst.cpp @@ -166,7 +166,9 @@ static bool astScheduleDimIsParallel(__isl_keep isl_ast_build *Build, Dimension = isl_space_dim(ScheduleSpace, isl_dim_out) - 1; - Deps = D->getDependences(Dependences::TYPE_ALL); + // FIXME: We can remove ignore reduction dependences in case we privatize the + // memory locations the reduction statements reduce into. + Deps = D->getDependences(Dependences::TYPE_ALL | Dependences::TYPE_RED); Deps = isl_union_map_apply_range(Deps, isl_union_map_copy(Schedule)); Deps = isl_union_map_apply_domain(Deps, Schedule); diff --git a/polly/lib/Transform/DeadCodeElimination.cpp b/polly/lib/Transform/DeadCodeElimination.cpp index 14c6044..c43ec33 100644 --- a/polly/lib/Transform/DeadCodeElimination.cpp +++ b/polly/lib/Transform/DeadCodeElimination.cpp @@ -100,7 +100,8 @@ bool DeadCodeElim::eliminateDeadCode(Scop &S, int PreciseSteps) { return false; isl_union_set *Live = this->getLastWrites(S.getWrites(), S.getSchedule()); - isl_union_map *Dep = D->getDependences(Dependences::TYPE_RAW); + isl_union_map *Dep = + D->getDependences(Dependences::TYPE_RAW | Dependences::TYPE_RED); Dep = isl_union_map_reverse(Dep); if (PreciseSteps == -1) diff --git a/polly/test/Dependences/reduction_multiple_reductions.ll b/polly/test/Dependences/reduction_multiple_reductions.ll new file mode 100644 index 0000000..68bb81a --- /dev/null +++ b/polly/test/Dependences/reduction_multiple_reductions.ll @@ -0,0 +1,58 @@ +; RUN: opt %loadPolly -basicaa -polly-dependences -analyze < %s | FileCheck %s +; +; Verify we do not have dependences between the if and the else clause +; +; CHECK: RAW dependences: +; CHECK: { } +; CHECK: WAR dependences: +; CHECK: { } +; CHECK: WAW dependences: +; CHECK: { } +; CHECK: Reduction dependences: +; CHECK: { Stmt_if_then[i0] -> Stmt_if_then[1 + i0] : i0 <= 510 and i0 >= 0; Stmt_if_else[i0] -> Stmt_if_else[1 + i0] : i0 <= 1022 and i0 >= 512 } +; +; void f(int *restrict sum, int *restrict prod) { +; for (int i = 0; i < 1024; i++) +; if (i < 512) +; *sum += i; +; else +; *prod *= i; +; } +; +target datalayout = "e-m:e-p:32:32-i64:64-v128:64:128-n32-S64" + +define void @f(i32* noalias %sum, i32* noalias %prod) { +entry: + br label %for.cond + +for.cond: ; preds = %for.inc, %entry + %i.0 = phi i32 [ 0, %entry ], [ %inc, %for.inc ] + %exitcond = icmp ne i32 %i.0, 1024 + br i1 %exitcond, label %for.body, label %for.end + +for.body: ; preds = %for.cond + %cmp1 = icmp slt i32 %i.0, 512 + br i1 %cmp1, label %if.then, label %if.else + +if.then: ; preds = %for.body + %tmp = load i32* %sum, align 4 + %add = add nsw i32 %tmp, %i.0 + store i32 %add, i32* %sum, align 4 + br label %if.end + +if.else: ; preds = %for.body + %tmp1 = load i32* %prod, align 4 + %mul = mul nsw i32 %tmp1, %i.0 + store i32 %mul, i32* %prod, align 4 + br label %if.end + +if.end: ; preds = %if.else, %if.then + br label %for.inc + +for.inc: ; preds = %if.end + %inc = add nsw i32 %i.0, 1 + br label %for.cond + +for.end: ; preds = %for.cond + ret void +} diff --git a/polly/test/Dependences/reduction_multiple_reductions_2.ll b/polly/test/Dependences/reduction_multiple_reductions_2.ll new file mode 100644 index 0000000..6d1053b --- /dev/null +++ b/polly/test/Dependences/reduction_multiple_reductions_2.ll @@ -0,0 +1,110 @@ +; RUN: opt %loadPolly -basicaa -polly-dependences -analyze < %s | FileCheck %s +; +; CHECK: RAW dependences: +; CHECK-DAG: Stmt_S2[i0, i1] -> Stmt_S3[i0] : i0 <= 1023 and i0 >= 0 and i1 <= 1023 and i1 >= 0 +; CHECK-DAG: Stmt_S3[i0] -> Stmt_S0[1 + i0] : i0 >= 0 and i0 <= 1022 +; CHECK-DAG: Stmt_S0[i0] -> Stmt_S1[i0, o1] : i0 <= 1023 and i0 >= 0 and o1 <= 1023 and o1 >= 0 +; These are the important RAW dependences, as they need to originate/end in only one iteration: +; CHECK-DAG: Stmt_S1[i0, 1023] -> Stmt_S2[i0, o1] : i0 <= 1023 and i0 >= 0 and o1 <= 1023 and o1 >= 0 +; CHECK-DAG: Stmt_S1[i0, i1] -> Stmt_S2[i0, 0] : i0 <= 1023 and i0 >= 0 and i1 <= 1022 and i1 >= 0 +; CHECK: WAR dependences: +; CHECK: { } +; CHECK: WAW dependences: +; CHECK-DAG: Stmt_S2[i0, i1] -> Stmt_S3[i0] : i0 <= 1023 and i0 >= 0 and i1 <= 1023 and i1 >= 0 +; CHECK-DAG: Stmt_S3[i0] -> Stmt_S0[1 + i0] : i0 >= 0 and i0 <= 1022 +; CHECK-DAG: Stmt_S0[i0] -> Stmt_S1[i0, o1] : i0 <= 1023 and i0 >= 0 and o1 <= 1023 and o1 >= 0 +; These are the important WAW dependences, as they need to originate/end in only one iteration: +; CHECK-DAG: Stmt_S1[i0, 1023] -> Stmt_S2[i0, o1] : i0 <= 1023 and i0 >= 0 and o1 <= 1023 and o1 >= 0 +; CHECK-DAG: Stmt_S1[i0, i1] -> Stmt_S2[i0, 0] : i0 <= 1023 and i0 >= 0 and i1 <= 1022 and i1 >= 0 +; CHECK: Reduction dependences: +; CHECK-DAG: Stmt_S1[i0, i1] -> Stmt_S1[i0, 1 + i1] : i0 <= 1023 and i0 >= 0 and i1 <= 1022 and i1 >= 0 +; CHECK-DAG: Stmt_S2[i0, i1] -> Stmt_S2[i0, 1 + i1] : i0 <= 1023 and i0 >= 0 and i1 <= 1022 and i1 >= 0 +; +; void f(int *restrict red) { +; for (int j = 0; j < 1024; j++) { +; S0: *red = 42 + *red * 5; +; for (int i = 0; i < 1024; i++) +; S1: *red *= i; +; for (int i = 0; i < 1024; i++) +; S2: *red += i; +; S3: *red = 42 + *red * 7; +; } +; } +; +target datalayout = "e-m:e-p:32:32-i64:64-v128:64:128-n32-S64" + +define void @f(i32* noalias %red) { +entry: + br label %for.cond + +for.cond: ; preds = %for.inc15, %entry + %j.0 = phi i32 [ 0, %entry ], [ %inc16, %for.inc15 ] + %exitcond2 = icmp ne i32 %j.0, 1024 + br i1 %exitcond2, label %for.body, label %for.end17 + +for.body: ; preds = %for.cond + br label %S0 + +S0: ; preds = %for.body + %tmp = load i32* %red, align 4 + %mul = mul nsw i32 %tmp, 5 + %add = add nsw i32 %mul, 42 + store i32 %add, i32* %red, align 4 + br label %for.cond1 + +for.cond1: ; preds = %for.inc, %S0 + %i.0 = phi i32 [ 0, %S0 ], [ %inc, %for.inc ] + %exitcond = icmp ne i32 %i.0, 1024 + br i1 %exitcond, label %for.body3, label %for.end + +for.body3: ; preds = %for.cond1 + br label %S1 + +S1: ; preds = %for.body3 + %tmp3 = load i32* %red, align 4 + %mul4 = mul nsw i32 %tmp3, %i.0 + store i32 %mul4, i32* %red, align 4 + br label %for.inc + +for.inc: ; preds = %S1 + %inc = add nsw i32 %i.0, 1 + br label %for.cond1 + +for.end: ; preds = %for.cond1 + br label %for.cond6 + +for.cond6: ; preds = %for.inc10, %for.end + %i5.0 = phi i32 [ 0, %for.end ], [ %inc11, %for.inc10 ] + %exitcond1 = icmp ne i32 %i5.0, 1024 + br i1 %exitcond1, label %for.body8, label %for.end12 + +for.body8: ; preds = %for.cond6 + br label %S2 + +S2: ; preds = %for.body8 + %tmp4 = load i32* %red, align 4 + %add9 = add nsw i32 %tmp4, %i5.0 + store i32 %add9, i32* %red, align 4 + br label %for.inc10 + +for.inc10: ; preds = %S2 + %inc11 = add nsw i32 %i5.0, 1 + br label %for.cond6 + +for.end12: ; preds = %for.cond6 + br label %S3 + +S3: ; preds = %for.end12 + %tmp5 = load i32* %red, align 4 + %mul13 = mul nsw i32 %tmp5, 7 + %add14 = add nsw i32 %mul13, 42 + store i32 %add14, i32* %red, align 4 + br label %for.inc15 + +for.inc15: ; preds = %S3 + %inc16 = add nsw i32 %j.0, 1 + br label %for.cond + +for.end17: ; preds = %for.cond + ret void +} diff --git a/polly/test/Dependences/reduction_only_reduction_like_access.ll b/polly/test/Dependences/reduction_only_reduction_like_access.ll new file mode 100644 index 0000000..c12949a --- /dev/null +++ b/polly/test/Dependences/reduction_only_reduction_like_access.ll @@ -0,0 +1,44 @@ +; RUN: opt %loadPolly -polly-dependences -analyze < %s | FileCheck %s +; +; FIXME: Change the comment once we allow different pointers +; The statement is "almost" reduction like but should not yield any reduction dependences +; +; We are limited to binary reductions at the moment and this is not one. +; There are never at least two iterations which read __and__ write to the same +; location, thus we won't find the RAW and WAW dependences of a reduction, +; thus we should not find Reduction dependences. +; +; CHECK: Reduction dependences: +; CHECK: { } +; +; void f(int *sum) { +; for (int i = 0; i < 100; i++) +; sum[i] = sum[99-i] + i; +; } +target datalayout = "e-m:e-p:32:32-i64:64-v128:64:128-n32-S64" + +define void @f(i32* %sum) { +entry: + br label %for.cond + +for.cond: ; preds = %for.inc, %entry + %i.0 = phi i32 [ 0, %entry ], [ %inc, %for.inc ] + %exitcond = icmp ne i32 %i.0, 100 + br i1 %exitcond, label %for.body, label %for.end + +for.body: ; preds = %for.cond + %sub = sub nsw i32 99, %i.0 + %arrayidx = getelementptr inbounds i32* %sum, i32 %sub + %tmp = load i32* %arrayidx, align 4 + %add = add nsw i32 %tmp, %i.0 + %arrayidx1 = getelementptr inbounds i32* %sum, i32 %i.0 + store i32 %add, i32* %arrayidx1, align 4 + br label %for.inc + +for.inc: ; preds = %for.body + %inc = add nsw i32 %i.0, 1 + br label %for.cond + +for.end: ; preds = %for.cond + ret void +} diff --git a/polly/test/Dependences/reduction_only_reduction_like_access_2.ll b/polly/test/Dependences/reduction_only_reduction_like_access_2.ll new file mode 100644 index 0000000..59fe711 --- /dev/null +++ b/polly/test/Dependences/reduction_only_reduction_like_access_2.ll @@ -0,0 +1,56 @@ +; RUN: opt %loadPolly -polly-dependences -analyze < %s | FileCheck %s +; +; FIXME: This is only true as long as we restrict ourselfs to the exact same +; access pointer for the load and store. +; The statement is "almost" reduction like but should not yield any reduction dependences +; +; CHECK: Reduction dependences: +; CHECK: { } +; +; void f(int *sum) { +; for (int i = 0; i < 1024; i++) +; for (int j = 0; j < 512; j++) +; sum[i + j] = sum[i] + 3; +; } +target datalayout = "e-m:e-p:32:32-i64:64-v128:64:128-n32-S64" + +define void @f(i32* %sum) { +entry: + br label %for.cond + +for.cond: ; preds = %for.inc6, %entry + %i.0 = phi i32 [ 0, %entry ], [ %inc7, %for.inc6 ] + %exitcond1 = icmp ne i32 %i.0, 1024 + br i1 %exitcond1, label %for.body, label %for.end8 + +for.body: ; preds = %for.cond + br label %for.cond1 + +for.cond1: ; preds = %for.inc, %for.body + %j.0 = phi i32 [ 0, %for.body ], [ %inc, %for.inc ] + %exitcond = icmp ne i32 %j.0, 512 + br i1 %exitcond, label %for.body3, label %for.end + +for.body3: ; preds = %for.cond1 + %arrayidx = getelementptr inbounds i32* %sum, i32 %i.0 + %tmp = load i32* %arrayidx, align 4 + %add = add nsw i32 %tmp, 3 + %add4 = add nsw i32 %i.0, %j.0 + %arrayidx5 = getelementptr inbounds i32* %sum, i32 %add4 + store i32 %add, i32* %arrayidx5, align 4 + br label %for.inc + +for.inc: ; preds = %for.body3 + %inc = add nsw i32 %j.0, 1 + br label %for.cond1 + +for.end: ; preds = %for.cond1 + br label %for.inc6 + +for.inc6: ; preds = %for.end + %inc7 = add nsw i32 %i.0, 1 + br label %for.cond + +for.end8: ; preds = %for.cond + ret void +} diff --git a/polly/test/Dependences/reduction_privatization_deps.ll b/polly/test/Dependences/reduction_privatization_deps.ll new file mode 100644 index 0000000..e733a3d --- /dev/null +++ b/polly/test/Dependences/reduction_privatization_deps.ll @@ -0,0 +1,115 @@ +; RUN: opt %loadPolly -basicaa -polly-dependences -analyze < %s | FileCheck %s +; +; CHECK: RAW dependences: +; CHECK-DAG: Stmt_S0[i0] -> Stmt_S1[o0, i0 - o0] : i0 <= 1023 and o0 >= 0 and o0 <= i0 +; CHECK-DAG: Stmt_S1[i0, i1] -> Stmt_S2[-1 + i0 + i1] : i0 >= 0 and i1 >= 1 and i0 <= 1022 and i1 <= 1024 - i0 and i1 <= 1023 +; CHECK-DAG: Stmt_S1[1023, i1] -> Stmt_S2[1022 + i1] : i1 <= 1 and i1 >= 0 +; CHECK-DAG: Stmt_S1[i0, 0] -> Stmt_S2[-1 + i0] : i0 <= 1022 and i0 >= 1 +; CHECK: WAR dependences: +; CHECK-DAG: Stmt_S2[i0] -> Stmt_S2[1 + i0] : i0 <= 1022 and i0 >= 0 +; CHECK-DAG: Stmt_S1[i0, i1] -> Stmt_S2[i0 + i1] : i0 >= 0 and i1 <= 1023 - i0 and i1 >= 1 +; CHECK-DAG: Stmt_S1[i0, 0] -> Stmt_S2[i0] : i0 <= 1023 and i0 >= 1 +; CHECK: WAW dependences: +; CHECK-DAG: Stmt_S0[i0] -> Stmt_S1[o0, i0 - o0] : i0 <= 1023 and o0 >= 0 and o0 <= i0 +; CHECK-DAG: Stmt_S1[0, 0] -> Stmt_S2[0] +; CHECK: Reduction dependences: +; CHECK-DAG: Stmt_S1[i0, i1] -> Stmt_S1[1 + i0, -1 + i1] : i0 <= 1022 and i0 >= 0 and i1 <= 1023 and i1 >= 1 +; +; void f(int *sum) { +; for (int i = 0; i < 1024; i++) +; S0: sum[i] = 0; +; for (int i = 0; i < 1024; i++) +; for (int j = 0; j < 1024; j++) +; S1: sum[i + j] += i; +; for (int i = 0; i < 1024; i++) +; S2: sum[i] = sum[i + 1] * 3; +; } +; +target datalayout = "e-m:e-p:32:32-i64:64-v128:64:128-n32-S64" + +define void @f(i32* %sum) { +entry: + br label %for.cond + +for.cond: ; preds = %for.inc, %entry + %i.0 = phi i32 [ 0, %entry ], [ %inc, %for.inc ] + %exitcond3 = icmp ne i32 %i.0, 1024 + br i1 %exitcond3, label %for.body, label %for.end + +for.body: ; preds = %for.cond + br label %S0 + +S0: ; preds = %for.body + %arrayidx = getelementptr inbounds i32* %sum, i32 %i.0 + store i32 0, i32* %arrayidx, align 4 + br label %for.inc + +for.inc: ; preds = %S0 + %inc = add nsw i32 %i.0, 1 + br label %for.cond + +for.end: ; preds = %for.cond + br label %for.cond2 + +for.cond2: ; preds = %for.inc13, %for.end + %i1.0 = phi i32 [ 0, %for.end ], [ %inc14, %for.inc13 ] + %exitcond2 = icmp ne i32 %i1.0, 1024 + br i1 %exitcond2, label %for.body4, label %for.end15 + +for.body4: ; preds = %for.cond2 + br label %for.cond5 + +for.cond5: ; preds = %for.inc10, %for.body4 + %j.0 = phi i32 [ 0, %for.body4 ], [ %inc11, %for.inc10 ] + %exitcond1 = icmp ne i32 %j.0, 1024 + br i1 %exitcond1, label %for.body7, label %for.end12 + +for.body7: ; preds = %for.cond5 + br label %S1 + +S1: ; preds = %for.body7 + %add = add nsw i32 %i1.0, %j.0 + %arrayidx8 = getelementptr inbounds i32* %sum, i32 %add + %tmp = load i32* %arrayidx8, align 4 + %add9 = add nsw i32 %tmp, %i1.0 + store i32 %add9, i32* %arrayidx8, align 4 + br label %for.inc10 + +for.inc10: ; preds = %S1 + %inc11 = add nsw i32 %j.0, 1 + br label %for.cond5 + +for.end12: ; preds = %for.cond5 + br label %for.inc13 + +for.inc13: ; preds = %for.end12 + %inc14 = add nsw i32 %i1.0, 1 + br label %for.cond2 + +for.end15: ; preds = %for.cond2 + br label %for.cond17 + +for.cond17: ; preds = %for.inc23, %for.end15 + %i16.0 = phi i32 [ 0, %for.end15 ], [ %inc24, %for.inc23 ] + %exitcond = icmp ne i32 %i16.0, 1024 + br i1 %exitcond, label %for.body19, label %for.end25 + +for.body19: ; preds = %for.cond17 + br label %S2 + +S2: ; preds = %for.body19 + %add20 = add nsw i32 %i16.0, 1 + %arrayidx21 = getelementptr inbounds i32* %sum, i32 %add20 + %tmp4 = load i32* %arrayidx21, align 4 + %mul = mul nsw i32 %tmp4, 3 + %arrayidx22 = getelementptr inbounds i32* %sum, i32 %i16.0 + store i32 %mul, i32* %arrayidx22, align 4 + br label %for.inc23 + +for.inc23: ; preds = %S2 + %inc24 = add nsw i32 %i16.0, 1 + br label %for.cond17 + +for.end25: ; preds = %for.cond17 + ret void +} diff --git a/polly/test/Dependences/reduction_privatization_deps_2.ll b/polly/test/Dependences/reduction_privatization_deps_2.ll new file mode 100644 index 0000000..979b0ea --- /dev/null +++ b/polly/test/Dependences/reduction_privatization_deps_2.ll @@ -0,0 +1,86 @@ +; RUN: opt %loadPolly -polly-dependences -analyze < %s | FileCheck %s +; +; We have privatization dependences from a textually later statement to a +; textually earlier one, but the dependences still go forward in time. +; +; CHECK: RAW dependences: +; CHECK-DAG: Stmt_S3[i0] -> Stmt_S2[1 + i0, o1] : i0 <= 97 and i0 >= 0 and o1 <= 99 and o1 >= 0 +; CHECK-DAG: Stmt_S1[i0] -> Stmt_S3[i0] : i0 >= 0 and i0 <= 98 +; CHECK: WAR dependences: +; CHECK: { } +; CHECK: WAW dependences: +; CHECK-DAG: Stmt_S3[i0] -> Stmt_S2[1 + i0, o1] : i0 <= 97 and i0 >= 0 and o1 <= 99 and o1 >= 0 +; CHECK-DAG: Stmt_S1[i0] -> Stmt_S3[i0] : i0 >= 0 and i0 <= 98 +; CHECK: Reduction dependences: +; CHECK: { Stmt_S2[i0, i1] -> Stmt_S2[i0, 1 + i1] : i0 <= 98 and i0 >= 0 and i1 <= 98 and i1 >= 0 } +; +; void f(int *sum) { +; int i, j; +; for (i = 0; i < 99; i++) { +; S1: sum[i + 1] += 42; +; for (j = 0; j < 100; j++) +; S2: sum[i] += i * j; +; S3: sum[i + 1] += 7; +; } +; } +; +target datalayout = "e-m:e-p:32:32-i64:64-v128:64:128-n32-S64" + +define void @f(i32* %sum) { +entry: + br label %for.cond + +for.cond: ; preds = %for.inc10, %entry + %i.0 = phi i32 [ 0, %entry ], [ %inc11, %for.inc10 ] + %exitcond1 = icmp ne i32 %i.0, 99 + br i1 %exitcond1, label %for.body, label %for.end12 + +for.body: ; preds = %for.cond + br label %S1 + +S1: ; preds = %for.body + %add = add nsw i32 %i.0, 1 + %arrayidx = getelementptr inbounds i32* %sum, i32 %add + %tmp = load i32* %arrayidx, align 4 + %add1 = add nsw i32 %tmp, 42 + store i32 %add1, i32* %arrayidx, align 4 + br label %for.cond2 + +for.cond2: ; preds = %for.inc, %S1 + %j.0 = phi i32 [ 0, %S1 ], [ %inc, %for.inc ] + %exitcond = icmp ne i32 %j.0, 100 + br i1 %exitcond, label %for.body4, label %for.end + +for.body4: ; preds = %for.cond2 + br label %S2 + +S2: ; preds = %for.body4 + %mul = mul nsw i32 %i.0, %j.0 + %arrayidx5 = getelementptr inbounds i32* %sum, i32 %i.0 + %tmp2 = load i32* %arrayidx5, align 4 + %add6 = add nsw i32 %tmp2, %mul + store i32 %add6, i32* %arrayidx5, align 4 + br label %for.inc + +for.inc: ; preds = %S2 + %inc = add nsw i32 %j.0, 1 + br label %for.cond2 + +for.end: ; preds = %for.cond2 + br label %S3 + +S3: ; preds = %for.end + %add7 = add nsw i32 %i.0, 1 + %arrayidx8 = getelementptr inbounds i32* %sum, i32 %add7 + %tmp3 = load i32* %arrayidx8, align 4 + %add9 = add nsw i32 %tmp3, 7 + store i32 %add9, i32* %arrayidx8, align 4 + br label %for.inc10 + +for.inc10: ; preds = %S3 + %inc11 = add nsw i32 %i.0, 1 + br label %for.cond + +for.end12: ; preds = %for.cond + ret void +} diff --git a/polly/test/Dependences/reduction_privatization_deps_3.ll b/polly/test/Dependences/reduction_privatization_deps_3.ll new file mode 100644 index 0000000..83449be --- /dev/null +++ b/polly/test/Dependences/reduction_privatization_deps_3.ll @@ -0,0 +1,89 @@ +; RUN: opt %loadPolly -polly-dependences -polly-codegen-scev -analyze < %s | FileCheck %s +; +; CHECK: RAW dependences: +; CHECK-DAG: Stmt_S2[0, 0] -> Stmt_S3[1] +; CHECK-DAG: Stmt_S2[i0, 1 - i0] -> Stmt_S3[i0] : i0 <= 1 and i0 >= 0 +; CHECK-DAG: Stmt_S3[i0] -> Stmt_S2[o0, 1 - i0] : i0 <= 1 and i0 >= 0 and o0 >= 1 + i0 and o0 <= 98 +; CHECK-DAG: Stmt_S1[i0] -> Stmt_S3[2 + i0] : i0 >= 0 and i0 <= 96 +; CHECK: WAR dependences: +; CHECK: { } +; CHECK: WAW dependences: +; CHECK-DAG: Stmt_S2[0, 0] -> Stmt_S3[1] +; CHECK-DAG: Stmt_S2[i0, 1 - i0] -> Stmt_S3[i0] : i0 <= 1 and i0 >= 0 +; CHECK-DAG: Stmt_S3[i0] -> Stmt_S2[o0, 1 - i0] : i0 <= 1 and i0 >= 0 and o0 >= 1 + i0 and o0 <= 98 +; CHECK-DAG: Stmt_S1[i0] -> Stmt_S3[2 + i0] : i0 >= 0 and i0 <= 96 +; CHECK: Reduction dependences: +; CHECK-DAG: Stmt_S2[i0, i1] -> Stmt_S2[1 + i0, i1] : i0 <= 97 and i0 >= 0 and i1 <= 98 - i0 and i1 >= 0 and i1 >= 2 - i0 +; CHECK-DAG: Stmt_S2[0, 0] -> Stmt_S2[1, 0] +; +; void f(int *sum) { +; int i, j; +; for (i = 0; i < 99; i++) { +; S1: sum[i + 1] += 42; +; for (j = i; j < 100; j++) +; S2: sum[i - j] += i * j; +; S3: sum[i - 1] += 7; +; } +; } +; +target datalayout = "e-m:e-p:32:32-i64:64-v128:64:128-n32-S64" + +define void @f(i32* %sum) { +entry: + br label %for.cond + +for.cond: ; preds = %for.inc10, %entry + %i.0 = phi i32 [ 0, %entry ], [ %inc11, %for.inc10 ] + %exitcond1 = icmp ne i32 %i.0, 99 + br i1 %exitcond1, label %for.body, label %for.end12 + +for.body: ; preds = %for.cond + br label %S1 + +S1: ; preds = %for.body + %add = add nsw i32 %i.0, 1 + %arrayidx = getelementptr inbounds i32* %sum, i32 %add + %tmp = load i32* %arrayidx, align 4 + %add1 = add nsw i32 %tmp, 42 + store i32 %add1, i32* %arrayidx, align 4 + br label %for.cond2 + +for.cond2: ; preds = %for.inc, %S1 + %j.0 = phi i32 [ %i.0, %S1 ], [ %inc, %for.inc ] + %exitcond = icmp ne i32 %j.0, 100 + br i1 %exitcond, label %for.body4, label %for.end + +for.body4: ; preds = %for.cond2 + br label %S2 + +S2: ; preds = %for.body4 + %mul = mul nsw i32 %i.0, %j.0 + %sub = sub nsw i32 %i.0, %j.0 + %arrayidx5 = getelementptr inbounds i32* %sum, i32 %sub + %tmp2 = load i32* %arrayidx5, align 4 + %add6 = add nsw i32 %tmp2, %mul + store i32 %add6, i32* %arrayidx5, align 4 + br label %for.inc + +for.inc: ; preds = %S2 + %inc = add nsw i32 %j.0, 1 + br label %for.cond2 + +for.end: ; preds = %for.cond2 + br label %S3 + +S3: ; preds = %for.end + %sub7 = add nsw i32 %i.0, -1 + %arrayidx8 = getelementptr inbounds i32* %sum, i32 %sub7 + %tmp3 = load i32* %arrayidx8, align 4 + %add9 = add nsw i32 %tmp3, 7 + store i32 %add9, i32* %arrayidx8, align 4 + br label %for.inc10 + +for.inc10: ; preds = %S3 + %inc11 = add nsw i32 %i.0, 1 + br label %for.cond + +for.end12: ; preds = %for.cond + ret void +} diff --git a/polly/test/Dependences/reduction_privatization_deps_4.ll b/polly/test/Dependences/reduction_privatization_deps_4.ll new file mode 100644 index 0000000..5f44815 --- /dev/null +++ b/polly/test/Dependences/reduction_privatization_deps_4.ll @@ -0,0 +1,84 @@ +; RUN: opt %loadPolly -polly-dependences -analyze < %s | FileCheck %s +; +; CHECK: RAW dependences: +; CHECK-DAG: Stmt_S2[i0, i1] -> Stmt_S1[i1] : i0 >= 0 and i1 >= 1 + i0 and i1 <= 98 +; CHECK-DAG: Stmt_S1[i0] -> Stmt_S2[i0, i0] : i0 >= 0 and i0 <= 98 +; CHECK-DAG: Stmt_S2[i0, i0] -> Stmt_S3[i0] : i0 >= 0 and i0 <= 98 +; CHECK-DAG: Stmt_S3[i0] -> Stmt_S2[o0, i0] : i0 >= 0 and o0 >= 1 + i0 and o0 <= 98 +; CHECK: WAR dependences: +; CHECK-DAG: { } +; CHECK: WAW dependences: +; CHECK-DAG: Stmt_S2[i0, i1] -> Stmt_S1[i1] : i0 >= 0 and i1 >= 1 + i0 and i1 <= 98 +; CHECK-DAG: Stmt_S1[i0] -> Stmt_S2[i0, i0] : i0 >= 0 and i0 <= 98 +; CHECK-DAG: Stmt_S2[i0, i0] -> Stmt_S3[i0] : i0 >= 0 and i0 <= 98 +; CHECK-DAG: Stmt_S3[i0] -> Stmt_S2[o0, i0] : i0 >= 0 and o0 >= 1 + i0 and o0 <= 98 +; CHECK: Reduction dependences: +; CHECK-DAG: { Stmt_S2[i0, i1] -> Stmt_S2[1 + i0, i1] : (i0 <= 97 and i1 >= 0 and i1 <= -1 + i0) or (i0 >= 0 and i1 >= 2 + i0 and i1 <= 99) } +; +; void f(int *sum) { +; for (int i = 0; i < 99; i++) { +; S1: sum[i] += 42; +; for (int j = 0; j < 100; j++) +; S2: sum[j] += i * j; +; S3: sum[i] += 7; +; } +; } +; +target datalayout = "e-m:e-p:32:32-i64:64-v128:64:128-n32-S64" + +define void @f(i32* %sum) { +entry: + br label %for.cond + +for.cond: ; preds = %for.inc8, %entry + %i.0 = phi i32 [ 0, %entry ], [ %inc9, %for.inc8 ] + %exitcond1 = icmp ne i32 %i.0, 99 + br i1 %exitcond1, label %for.body, label %for.end10 + +for.body: ; preds = %for.cond + br label %S1 + +S1: ; preds = %for.body + %arrayidx = getelementptr inbounds i32* %sum, i32 %i.0 + %tmp = load i32* %arrayidx, align 4 + %add = add nsw i32 %tmp, 42 + store i32 %add, i32* %arrayidx, align 4 + br label %for.cond1 + +for.cond1: ; preds = %for.inc, %S1 + %j.0 = phi i32 [ 0, %S1 ], [ %inc, %for.inc ] + %exitcond = icmp ne i32 %j.0, 100 + br i1 %exitcond, label %for.body3, label %for.end + +for.body3: ; preds = %for.cond1 + br label %S2 + +S2: ; preds = %for.body3 + %mul = mul nsw i32 %i.0, %j.0 + %arrayidx4 = getelementptr inbounds i32* %sum, i32 %j.0 + %tmp2 = load i32* %arrayidx4, align 4 + %add5 = add nsw i32 %tmp2, %mul + store i32 %add5, i32* %arrayidx4, align 4 + br label %for.inc + +for.inc: ; preds = %S2 + %inc = add nsw i32 %j.0, 1 + br label %for.cond1 + +for.end: ; preds = %for.cond1 + br label %S3 + +S3: ; preds = %for.end + %arrayidx6 = getelementptr inbounds i32* %sum, i32 %i.0 + %tmp3 = load i32* %arrayidx6, align 4 + %add7 = add nsw i32 %tmp3, 7 + store i32 %add7, i32* %arrayidx6, align 4 + br label %for.inc8 + +for.inc8: ; preds = %S3 + %inc9 = add nsw i32 %i.0, 1 + br label %for.cond + +for.end10: ; preds = %for.cond + ret void +} diff --git a/polly/test/Dependences/reduction_privatization_deps_5.ll b/polly/test/Dependences/reduction_privatization_deps_5.ll new file mode 100644 index 0000000..b6b4dbd --- /dev/null +++ b/polly/test/Dependences/reduction_privatization_deps_5.ll @@ -0,0 +1,88 @@ +; RUN: opt %loadPolly -polly-dependences -analyze < %s | FileCheck %s +; +; CHECK: RAW dependences: +; CHECK-DAG: Stmt_S2[i0, 0] -> Stmt_S1[1 + i0, 0] : i0 >= 0 and i0 <= 97 +; CHECK-DAG: Stmt_S1[i0, 0] -> Stmt_S2[i0, 0] : i0 >= 0 and i0 <= 98 +; CHECK: WAR dependences: +; CHECK-DAG: { } +; CHECK: WAW dependences: +; CHECK-DAG: Stmt_S2[i0, 0] -> Stmt_S1[1 + i0, 0] : i0 >= 0 and i0 <= 97 +; CHECK-DAG: Stmt_S1[i0, 0] -> Stmt_S2[i0, 0] : i0 >= 0 and i0 <= 98 +; CHECK: Reduction dependences: +; CHECK-DAG: { Stmt_S2[i0, i1] -> Stmt_S2[1 + i0, i1] : i0 <= 97 and i0 >= 0 and i1 <= 99 and i1 >= 1 } +; +; void f(int *sum) { +; for (int i = 0; i < 99; i++) { +; for (int j = 0; j < 1; j++) +; S1: sum[j] += 42; +; for (int j = 0; j < 100; j++) +; S2: sum[j] += i * j; +; } +; } +; +target datalayout = "e-m:e-p:32:32-i64:64-v128:64:128-n32-S64" + +define void @f(i32* %sum) { +entry: + br label %for.cond + +for.cond: ; preds = %for.inc12, %entry + %i.0 = phi i32 [ 0, %entry ], [ %inc13, %for.inc12 ] + %exitcond2 = icmp ne i32 %i.0, 99 + br i1 %exitcond2, label %for.body, label %for.end14 + +for.body: ; preds = %for.cond + br label %for.cond1 + +for.cond1: ; preds = %for.inc, %for.body + %j.0 = phi i32 [ 0, %for.body ], [ %inc, %for.inc ] + %exitcond = icmp ne i32 %j.0, 1 + br i1 %exitcond, label %for.body3, label %for.end + +for.body3: ; preds = %for.cond1 + br label %S1 + +S1: ; preds = %for.body3 + %arrayidx = getelementptr inbounds i32* %sum, i32 %j.0 + %tmp = load i32* %arrayidx, align 4 + %add = add nsw i32 %tmp, 42 + store i32 %add, i32* %arrayidx, align 4 + br label %for.inc + +for.inc: ; preds = %S1 + %inc = add nsw i32 %j.0, 1 + br label %for.cond1 + +for.end: ; preds = %for.cond1 + br label %for.cond4 + +for.cond4: ; preds = %for.inc9, %for.end + %j.1 = phi i32 [ 0, %for.end ], [ %inc10, %for.inc9 ] + %exitcond1 = icmp ne i32 %j.1, 100 + br i1 %exitcond1, label %for.body6, label %for.end11 + +for.body6: ; preds = %for.cond4 + br label %S2 + +S2: ; preds = %for.body6 + %mul = mul nsw i32 %i.0, %j.1 + %arrayidx7 = getelementptr inbounds i32* %sum, i32 %j.1 + %tmp3 = load i32* %arrayidx7, align 4 + %add8 = add nsw i32 %tmp3, %mul + store i32 %add8, i32* %arrayidx7, align 4 + br label %for.inc9 + +for.inc9: ; preds = %S2 + %inc10 = add nsw i32 %j.1, 1 + br label %for.cond4 + +for.end11: ; preds = %for.cond4 + br label %for.inc12 + +for.inc12: ; preds = %for.end11 + %inc13 = add nsw i32 %i.0, 1 + br label %for.cond + +for.end14: ; preds = %for.cond + ret void +} diff --git a/polly/test/Dependences/reduction_simple_iv.ll b/polly/test/Dependences/reduction_simple_iv.ll new file mode 100644 index 0000000..4b4a392 --- /dev/null +++ b/polly/test/Dependences/reduction_simple_iv.ll @@ -0,0 +1,41 @@ +; RUN: opt %loadPolly -polly-dependences -analyze < %s | FileCheck %s +; +; CHECK: RAW dependences: +; CHECK: { } +; CHECK: WAR dependences: +; CHECK: { } +; CHECK: WAW dependences: +; CHECK: { } +; CHECK: Reduction dependences: +; CHECK: { Stmt_for_cond[i0] -> Stmt_for_cond[1 + i0] : i0 <= 99 and i0 >= 0 } +; +; +; void f(int* sum) { +; for (int i = 0; i <= 100; i++) +; sum += i * 3; +; } +target datalayout = "e-m:e-p:32:32-i64:64-v128:64:128-n32-S64" + +define void @f(i32* %sum) { +entry: + br label %entry.split1 + +entry.split1: ; preds = %entry + br label %entry.split + +entry.split: ; preds = %entry.split1 + br label %for.cond + +for.cond: ; preds = %for.cond, %entry.split + %i1.0 = phi i32 [ 0, %entry.split ], [ %inc, %for.cond ] + %sum.reload = load i32* %sum + %mul = mul nsw i32 %i1.0, 3 + %add = add nsw i32 %sum.reload, %mul + %inc = add nsw i32 %i1.0, 1 + store i32 %add, i32* %sum + %cmp = icmp slt i32 %i1.0, 100 + br i1 %cmp, label %for.cond, label %for.end + +for.end: ; preds = %for.cond + ret void +} diff --git a/polly/test/Dependences/reduction_simple_privatization_deps_2.ll b/polly/test/Dependences/reduction_simple_privatization_deps_2.ll new file mode 100644 index 0000000..46b32c0 --- /dev/null +++ b/polly/test/Dependences/reduction_simple_privatization_deps_2.ll @@ -0,0 +1,79 @@ +; RUN: opt %loadPolly -polly-dependences -analyze < %s | FileCheck %s +; +; CHECK: RAW dependences: +; CHECK-DAG: Stmt_S1[i0, i1] -> Stmt_S2[i0] : i0 <= 99 and i0 >= 0 and i1 <= 99 and i1 >= 0 +; CHECK-DAG: Stmt_S0[i0] -> Stmt_S1[i0, o1] : i0 <= 99 and i0 >= 0 and o1 <= 99 and o1 >= 0 +; CHECK-DAG: Stmt_S2[i0] -> Stmt_S0[1 + i0] : i0 >= 0 and i0 <= 98 +; CHECK: WAR dependences: +; CHECK-DAG: { } +; CHECK: WAW dependences: +; CHECK-DAG: Stmt_S1[i0, i1] -> Stmt_S2[i0] : i0 <= 99 and i0 >= 0 and i1 <= 99 and i1 >= 0 +; CHECK-DAG: Stmt_S0[i0] -> Stmt_S1[i0, o1] : i0 <= 99 and i0 >= 0 and o1 <= 99 and o1 >= 0 +; CHECK-DAG: Stmt_S2[i0] -> Stmt_S0[1 + i0] : i0 >= 0 and i0 <= 98 +; CHECK: Reduction dependences: +; CHECK-DAG: Stmt_S1[i0, i1] -> Stmt_S1[i0, 1 + i1] : i0 <= 99 and i0 >= 0 and i1 <= 98 and i1 >= 0 +; +; void f(int *sum) { +; for (int i = 0; i < 100; i++) { +; S0: *sum *= 42; +; for (int j = 0; j < 100; j++) +; S1: *sum += i * j; +; S2: *sum *= 7; +; } +; } +; +target datalayout = "e-m:e-p:32:32-i64:64-v128:64:128-n32-S64" + +define void @f(i32* %sum) { +entry: + br label %for.cond + +for.cond: ; preds = %for.inc6, %entry + %i.0 = phi i32 [ 0, %entry ], [ %inc7, %for.inc6 ] + %exitcond1 = icmp ne i32 %i.0, 100 + br i1 %exitcond1, label %for.body, label %for.end8 + +for.body: ; preds = %for.cond + br label %S0 + +S0: ; preds = %for.body + %tmp = load i32* %sum, align 4 + %mul = mul nsw i32 %tmp, 42 + store i32 %mul, i32* %sum, align 4 + br label %for.cond1 + +for.cond1: ; preds = %for.inc, %S0 + %j.0 = phi i32 [ 0, %S0 ], [ %inc, %for.inc ] + %exitcond = icmp ne i32 %j.0, 100 + br i1 %exitcond, label %for.body3, label %for.end + +for.body3: ; preds = %for.cond1 + br label %S1 + +S1: ; preds = %for.body3 + %mul4 = mul nsw i32 %i.0, %j.0 + %tmp2 = load i32* %sum, align 4 + %add = add nsw i32 %tmp2, %mul4 + store i32 %add, i32* %sum, align 4 + br label %for.inc + +for.inc: ; preds = %S1 + %inc = add nsw i32 %j.0, 1 + br label %for.cond1 + +for.end: ; preds = %for.cond1 + br label %S2 + +S2: ; preds = %for.end + %tmp3 = load i32* %sum, align 4 + %mul5 = mul nsw i32 %tmp3, 7 + store i32 %mul5, i32* %sum, align 4 + br label %for.inc6 + +for.inc6: ; preds = %S2 + %inc7 = add nsw i32 %i.0, 1 + br label %for.cond + +for.end8: ; preds = %for.cond + ret void +} diff --git a/polly/test/Dependences/reduction_simple_privatization_deps_w_parameter.ll b/polly/test/Dependences/reduction_simple_privatization_deps_w_parameter.ll new file mode 100644 index 0000000..f223619 --- /dev/null +++ b/polly/test/Dependences/reduction_simple_privatization_deps_w_parameter.ll @@ -0,0 +1,59 @@ +; RUN: opt %loadPolly -polly-dependences -analyze < %s | FileCheck %s +; +; CHECK: RAW dependences: +; CHECK-DAG: Stmt_S0[] -> Stmt_S1[o0] : N >= 11 and o0 <= 1023 and o0 >= 0 +; CHECK-DAG: Stmt_S1[i0] -> Stmt_S2[] : N >= 11 and i0 <= 1023 and i0 >= 0 +; CHECK: WAR dependences: +; CHECK: [N] -> { } +; CHECK: WAW dependences: +; CHECK-DAG: Stmt_S0[] -> Stmt_S1[o0] : N >= 11 and o0 <= 1023 and o0 >= 0 +; CHECK-DAG: Stmt_S1[i0] -> Stmt_S2[] : N >= 11 and i0 <= 1023 and i0 >= 0 +; CHECK: Reduction dependences: +; CHECK: Stmt_S1[i0] -> Stmt_S1[1 + i0] : N >= 11 and i0 <= 1022 and i0 >= 0 +; +; void f(int *sum, int N) { +; if (N >= 10) { +; S0: *sum = 0; +; for (int i = 0; i < 1024; i++) +; S1: *sum += i; +; S2: *sum = *sum * 3; +; } +; } +target datalayout = "e-m:e-p:32:32-i64:64-v128:64:128-n32-S64" + +define void @f(i32* %sum, i32 %N) { +entry: + br label %entry.1 + +entry.1: + %excond = icmp sgt i32 %N, 10 + br i1 %excond, label %S0, label %f.end + +S0: + store i32 0, i32* %sum, align 4 + br label %for.cond + +for.cond: ; preds = %for.inc, %S0 + %i.0 = phi i32 [ 0, %S0 ], [ %inc, %for.inc ] + %exitcond = icmp ne i32 %i.0, 1024 + br i1 %exitcond, label %S1, label %S2 + +S1: ; preds = %for.cond + %tmp = load i32* %sum, align 4 + %add = add nsw i32 %tmp, %i.0 + store i32 %add, i32* %sum, align 4 + br label %for.inc + +for.inc: ; preds = %S1 + %inc = add nsw i32 %i.0, 1 + br label %for.cond + +S2: ; preds = %for.cond + %tmp1 = load i32* %sum, align 4 + %mul = mul nsw i32 %tmp1, 3 + store i32 %mul, i32* %sum, align 4 + br label %f.end + +f.end: + ret void +} -- 2.7.4