/// Updated access relation read from JSCOP file.
isl_map *newAccessRelation;
+ void assumeNoOutOfBound(const IRAccess &Access);
+
public:
// @brief Create a memory access from an access in LLVM-IR.
//
isl_map *getAccessRelation() const;
+ /// @brief Return the space in which the access relation lives in.
+ __isl_give isl_space *getAccessRelationSpace() const;
+
/// @brief Get an isl string representing this access function.
std::string getAccessRelationStr() const;
/// @brief Add the bounds of the parameters to the context.
void addParameterBounds();
+ /// @brief Simplify the assumed context.
+ void simplifyAssumedContext();
+
/// Build the Scop and Statement with precalculated scop information.
void buildScop(TempScop &TempScop, const Region &CurRegion,
// Loops in Scop containing CurRegion
/// @return The assumed context of this Scop.
__isl_give isl_set *getAssumedContext() const;
+ /// @brief Add assumptions to assumed context.
+ ///
+ /// The assumptions added will be assumed to hold during the execution of the
+ /// scop. However, as they are generally not statically provable, at code
+ /// generation time run-time checks will be generated that ensure the
+ /// assumptions hold.
+ ///
+ /// WARNING: We currently exploit in simplifyAssumedContext the knowledge
+ /// that assumptions do not change the set of statement instances
+ /// executed.
+ ///
+ /// @param Set A set describing relations between parameters that are assumed
+ /// to hold.
+ void addAssumption(__isl_take isl_set *Set);
+
/// @brief Get an isl string representing the context.
std::string getContextStr() const;
+ /// @brief Get an isl string representing the assumed context.
+ std::string getAssumedContextStr() const;
+
/// @name Statements Iterators
///
/// These iterators iterate over all statements of this Scop.
return stringFromIslObj(AccessRelation);
}
+__isl_give isl_space *MemoryAccess::getAccessRelationSpace() const {
+ return isl_map_get_space(AccessRelation);
+}
+
isl_map *MemoryAccess::getNewAccessRelation() const {
return isl_map_copy(newAccessRelation);
}
isl_basic_set_universe(Space));
}
+// Formalize no out-of-bound access assumption
+//
+// When delinearizing array accesses we optimistically assume that the
+// delinearized accesses do not access out of bound locations (the subscript
+// expression of each array evaluates for each statement instance that is
+// executed to a value that is larger than zero and strictly smaller than the
+// size of the corresponding dimension). The only exception is the outermost
+// dimension for which we do not assume any upper bound. At this point we
+// formalize this assumption to ensure that at code generation time the relevant
+// run-time checks can be generated.
+//
+// To find the set of constraints necessary to avoid out of bound accesses, we
+// first build the set of data locations that are not within array bounds. We
+// then apply the reverse access relation to obtain the set of iterations that
+// may contain invalid accesses and reduce this set of iterations to the ones
+// that are actually executed by intersecting them with the domain of the
+// statement. If we now project out all loop dimensions, we obtain a set of
+// parameters that may cause statement instances to be executed that may
+// possibly yield out of bound memory accesses. The complement of these
+// constraints is the set of constraints that needs to be assumed to ensure such
+// statement instances are never executed.
+void MemoryAccess::assumeNoOutOfBound(const IRAccess &Access) {
+ isl_space *Space = isl_space_range(getAccessRelationSpace());
+ isl_set *Outside = isl_set_empty(isl_space_copy(Space));
+ for (int i = 0, Size = Access.Subscripts.size(); i < Size; ++i) {
+ isl_local_space *LS = isl_local_space_from_space(isl_space_copy(Space));
+ isl_pw_aff *Var =
+ isl_pw_aff_var_on_domain(isl_local_space_copy(LS), isl_dim_set, i);
+ isl_pw_aff *Zero = isl_pw_aff_zero_on_domain(LS);
+
+ isl_set *DimOutside;
+
+ if (i == 0) {
+ DimOutside = isl_pw_aff_lt_set(Var, Zero);
+ } else {
+ DimOutside = isl_pw_aff_lt_set(isl_pw_aff_copy(Var), Zero);
+ isl_pw_aff *SizeE =
+ SCEVAffinator::getPwAff(Statement, Access.Sizes[i - 1]);
+
+ SizeE = isl_pw_aff_drop_dims(SizeE, isl_dim_in, 0,
+ Statement->getNumIterators());
+ SizeE = isl_pw_aff_add_dims(SizeE, isl_dim_in,
+ isl_space_dim(Space, isl_dim_set));
+ SizeE = isl_pw_aff_set_tuple_id(
+ SizeE, isl_dim_in, isl_space_get_tuple_id(Space, isl_dim_set));
+
+ DimOutside = isl_set_union(DimOutside, isl_pw_aff_le_set(SizeE, Var));
+ }
+
+ Outside = isl_set_union(Outside, DimOutside);
+ }
+
+ Outside = isl_set_apply(Outside, isl_map_reverse(getAccessRelation()));
+ Outside = isl_set_intersect(Outside, Statement->getDomain());
+ Outside = isl_set_params(Outside);
+ Outside = isl_set_complement(Outside);
+ Statement->getParent()->addAssumption(Outside);
+ isl_space_free(Space);
+}
+
MemoryAccess::MemoryAccess(const IRAccess &Access, const Instruction *AccInst,
ScopStmt *Statement)
: Statement(Statement), Inst(AccInst), newAccessRelation(nullptr) {
isl_space_free(Space);
AccessRelation = isl_map_set_tuple_name(AccessRelation, isl_dim_out,
getBaseName().c_str());
+ assumeNoOutOfBound(Access);
}
void MemoryAccess::realignParams() {
Stmt->realignParams();
}
+void Scop::simplifyAssumedContext() {
+ // The parameter constraints of the iteration domains give us a set of
+ // constraints that need to hold for all cases where at least a single
+ // statement iteration is executed in the whole scop. We now simplify the
+ // assumed context under the assumption that such constraints hold and at
+ // least a single statement iteration is executed. For cases where no
+ // statement instances are executed, the assumptions we have taken about
+ // the executed code do not matter and can be changed.
+ //
+ // WARNING: This only holds if the assumptions we have taken do not reduce
+ // the set of statement instances that are executed. Otherwise we
+ // may run into a case where the iteration domains suggest that
+ // for a certain set of parameter constraints no code is executed,
+ // but in the original program some computation would have been
+ // performed. In such a case, modifying the run-time conditions and
+ // possibly influencing the run-time check may cause certain scops
+ // to not be executed.
+ //
+ // Example:
+ //
+ // When delinearizing the following code:
+ //
+ // for (long i = 0; i < 100; i++)
+ // for (long j = 0; j < m; j++)
+ // A[i+p][j] = 1.0;
+ //
+ // we assume that the condition m <= 0 or (m >= 1 and p >= 0) holds as
+ // otherwise we would access out of bound data. Now, knowing that code is
+ // only executed for the case m >= 0, it is sufficient to assume p >= 0.
+ AssumedContext =
+ isl_set_gist_params(AssumedContext, isl_union_set_params(getDomains()));
+}
+
Scop::Scop(TempScop &tempScop, LoopInfo &LI, ScalarEvolution &ScalarEvolution,
isl_ctx *Context)
: SE(&ScalarEvolution), R(tempScop.getMaxRegion()),
realignParams();
addParameterBounds();
+ simplifyAssumedContext();
assert(NestLoops.empty() && "NestLoops not empty at top level!");
}
}
std::string Scop::getContextStr() const { return stringFromIslObj(Context); }
+std::string Scop::getAssumedContextStr() const {
+ return stringFromIslObj(AssumedContext);
+}
std::string Scop::getNameStr() const {
std::string ExitName, EntryName;
return isl_set_copy(AssumedContext);
}
+void Scop::addAssumption(__isl_take isl_set *Set) {
+ AssumedContext = isl_set_intersect(AssumedContext, Set);
+}
+
void Scop::printContext(raw_ostream &OS) const {
OS << "Context:\n";
OS.indent(4) << getContextStr() << "\n";
+ OS.indent(4) << "Assumed Context:\n";
+ if (!AssumedContext) {
+ OS.indent(4) << "n/a\n\n";
+ return;
+ }
+
+ OS.indent(4) << getAssumedContextStr() << "\n";
+
for (const SCEV *Parameter : Parameters) {
int Dim = ParameterIds.find(Parameter)->second;
OS.indent(4) << "p" << Dim << ": " << *Parameter << "\n";
PwZero = isl_pw_aff_intersect_domain(
PwZero, isl_set_complement(S->getAssumedContext()));
- isl_pw_aff *Cond = isl_pw_aff_union_max(PwZero, PwOne);
+ isl_pw_aff *Cond = isl_pw_aff_union_max(PwOne, PwZero);
RunCondition = isl_ast_build_expr_from_pw_aff(Context, Cond);
}
IslNodeBuilder NodeBuilder(Builder, Annotator, this);
+ Builder.SetInsertPoint(StartBlock->getSinglePredecessor()->begin());
+ NodeBuilder.addParameters(S.getContext());
// Build condition that evaluates at run-time if all assumptions taken
// for the scop hold. If we detect some assumptions do not hold, the
// original code is executed.
BasicBlock *PrevBB = StartBlock->getUniquePredecessor();
BranchInst *Branch = dyn_cast<BranchInst>(PrevBB->getTerminator());
Branch->setCondition(V);
+ Builder.SetInsertPoint(StartBlock->begin());
- NodeBuilder.addParameters(S.getContext());
NodeBuilder.create(Ast);
return true;
}
--- /dev/null
+; RUN: opt %loadPolly -polly-ast -analyze -polly-delinearize < %s | FileCheck %s
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
+target triple = "x86_64-unknown-linux-gnu"
+
+; void foo(long n, long m, int o, double A[n][m], long p, long q) {
+;
+; if (o >= 0)
+; for (long i = 0; i < n; i++)
+; for (long j = 0; j < m; j++)
+; A[i+p][j+q] = 1.0;
+; else
+; for (long i = 0; i < n; i++)
+; for (long j = 0; j < m; j++)
+; A[i+p][j+q-100] = 1.0;
+;
+
+; This test case is meant to verify that the run-time condition generated
+; for the delinearization is simplified such that conditions that would not
+; cause any code to be executed are not generated.
+
+; CHECK: if ((q == 100 && o <= 0 && p >= 0) || (q == 0 && o >= 1 && p >= 0) ? 1 : 0)
+
+; CHECK: if (o >= 1) {
+; CHECK: for (int c1 = 0; c1 < n; c1 += 1)
+; CHECK: for (int c3 = 0; c3 < m; c3 += 1)
+; CHECK: Stmt_for_j(c1, c3);
+; CHECK: } else
+; CHECK: for (int c1 = 0; c1 < n; c1 += 1)
+; CHECK: for (int c3 = 0; c3 < m; c3 += 1)
+; CHECK: Stmt_for_j_1(c1, c3);
+
+; CHECK: else
+; CHECK: { /* original code */ }
+
+define void @foo(i64 %n, i64 %m, i64 %o, double* %A, i64 %p, i64 %q) {
+entry:
+ br label %cond
+
+cond:
+ %cmp = icmp sgt i64 %o, 0
+ br i1 %cmp, label %for.i, label %for.i.1
+
+for.i:
+ %i = phi i64 [ 0, %cond ], [ %i.inc, %for.i.inc ]
+ br label %for.j
+
+for.j:
+ %j = phi i64 [ 0, %for.i ], [ %j.inc, %for.j.inc ]
+ %offset0 = add nsw i64 %i, %p
+ %subscript0 = mul i64 %offset0, %m
+ %offset1 = add nsw i64 %j, %q
+ %subscript1 = add i64 %offset1, %subscript0
+ %idx = getelementptr inbounds double* %A, i64 %subscript1
+ store double 1.0, double* %idx
+ br label %for.j.inc
+
+for.j.inc:
+ %j.inc = add nsw i64 %j, 1
+ %j.exitcond = icmp eq i64 %j.inc, %m
+ br i1 %j.exitcond, label %for.i.inc, label %for.j
+
+for.i.inc:
+ %i.inc = add nsw i64 %i, 1
+ %i.exitcond = icmp eq i64 %i.inc, %n
+ br i1 %i.exitcond, label %end, label %for.i
+
+for.i.1:
+ %i.1 = phi i64 [ 0, %cond ], [ %i.inc.1, %for.i.inc.1 ]
+ br label %for.j.1
+
+for.j.1:
+ %j.1 = phi i64 [ 0, %for.i.1 ], [ %j.inc.1, %for.j.inc.1 ]
+ %offset0.1 = add nsw i64 %i.1, %p
+ %subscript0.1 = mul i64 %offset0.1, %m
+ %offset1.1 = add nsw i64 %j.1, %q
+ %subscript1.1 = add i64 %offset1.1, %subscript0.1
+ %subscript1.2 = sub i64 %subscript1.1, 100
+ %idx.1 = getelementptr inbounds double* %A, i64 %subscript1.2
+ store double 1.0, double* %idx.1
+ br label %for.j.inc.1
+
+for.j.inc.1:
+ %j.inc.1 = add nsw i64 %j.1, 1
+ %j.exitcond.1 = icmp eq i64 %j.inc.1, %m
+ br i1 %j.exitcond.1, label %for.i.inc.1, label %for.j.1
+
+for.i.inc.1:
+ %i.inc.1 = add nsw i64 %i.1, 1
+ %i.exitcond.1 = icmp eq i64 %i.inc.1, %n
+ br i1 %i.exitcond.1, label %end, label %for.i.1
+
+end:
+ ret void
+}
--- /dev/null
+; RUN: opt %loadPolly -polly-codegen-isl -S -polly-delinearize < %s | FileCheck %s
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
+target triple = "x86_64-unknown-linux-gnu"
+
+; Derived from the following code:
+;
+; void foo(long n, long m, double A[n][m]) {
+; for (long i = 0; i < 100; i++)
+; for (long j = 0; j < 150; j++)
+; A[i][j] = 1.0;
+; }
+
+; CHECK: polly.split_new_and_old:
+; CHECK: %0 = icmp sge i64 %m, 150
+; CHECK: %1 = select i1 %0, i64 1, i64 0
+; CHECK: %2 = icmp ne i64 0, %1
+; CHECK: br i1 %2, label %polly.start, label %for.i
+
+define void @foo(i64 %n, i64 %m, double* %A) {
+entry:
+ br label %for.i
+
+for.i:
+ %i = phi i64 [ 0, %entry ], [ %i.inc, %for.i.inc ]
+ %tmp = mul nsw i64 %i, %m
+ br label %for.j
+
+for.j:
+ %j = phi i64 [ 0, %for.i ], [ %j.inc, %for.j ]
+ %vlaarrayidx.sum = add i64 %j, %tmp
+ %arrayidx = getelementptr inbounds double* %A, i64 %vlaarrayidx.sum
+ store double 1.0, double* %arrayidx
+ %j.inc = add nsw i64 %j, 1
+ %j.exitcond = icmp eq i64 %j.inc, 150
+ br i1 %j.exitcond, label %for.i.inc, label %for.j
+
+for.i.inc:
+ %i.inc = add nsw i64 %i, 1
+ %i.exitcond = icmp eq i64 %i.inc, 100
+ br i1 %i.exitcond, label %end, label %for.i
+
+end:
+ ret void
+}
--- /dev/null
+; RUN: opt %loadPolly -polly-codegen-isl -S -polly-delinearize < %s | FileCheck %s
+; RUN: opt %loadPolly -polly-codegen-isl -S -polly-delinearize -polly-codegen-scev < %s | FileCheck %s
+
+; CHECK: %1 = zext i32 %n to i64
+; CHECK: %2 = icmp sge i64 %1, 1
+; CHECK: %3 = select i1 %2, i64 1, i64 0
+; CHECK: %4 = icmp ne i64 0, %3
+
+target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-unknown-linux-gnu"
+
+define void @init_array(i32 %n, double* %data) {
+entry:
+ %0 = zext i32 %n to i64
+ br label %for.body4
+
+for.body4: ; preds = %for.body4, %entry
+ %indvar1 = phi i64 [ %indvar.next2, %for.body4 ], [ 0, %entry ]
+ %.moved.to.for.body4 = mul i64 %0, %indvar1
+ %1 = add i64 %.moved.to.for.body4, 0
+ %arrayidx7 = getelementptr double* %data, i64 %1
+ store double undef, double* %arrayidx7, align 8
+ %indvar.next2 = add i64 %indvar1, 1
+ br i1 false, label %for.body4, label %for.end10
+
+for.end10: ; preds = %for.body4
+ ret void
+}
; A[i][i] = 1.0;
; }
+
+; CHECK: Assumed Context:
+; CHECK: [n] -> { : }
+
; CHECK: p0: %n
; CHECK-NOT: p1
--- /dev/null
+; RUN: opt %loadPolly -polly-scops -analyze -polly-delinearize < %s | FileCheck %s
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
+target triple = "x86_64-unknown-linux-gnu"
+
+; Derived from the following code:
+;
+; void foo(long n, long m, long p, double A[n][m]) {
+; for (long i = 0; i < 100; i++)
+; for (long j = 0; j < m; j++)
+; A[i+p][j] = 1.0;
+; }
+
+; CHECK: Assumed Context:
+; CHECK: [m, p] -> { : p >= 0 }
+; CHECK: p0: %m
+; CHECK: p1: %p
+; CHECK: Statements {
+; CHECK: Stmt_for_j
+; CHECK: Domain :=
+; CHECK: [m, p] -> { Stmt_for_j[i0, i1] : i0 >= 0 and i0 <= 99 and i1 >= 0 and i1 <= -1 + m };
+; CHECK: Scattering :=
+; CHECK: [m, p] -> { Stmt_for_j[i0, i1] -> scattering[0, i0, 0, i1, 0] };
+; CHECK: MustWriteAccess := [Reduction Type: NONE]
+; CHECK: [m, p] -> { Stmt_for_j[i0, i1] -> MemRef_A[p + i0, i1] };
+; CHECK: }
+
+define void @foo(i64 %n, i64 %m, i64 %p, double* %A) {
+entry:
+ br label %for.i
+
+for.i:
+ %i = phi i64 [ 0, %entry ], [ %i.inc, %for.i.inc ]
+ %add = add nsw i64 %i, %p
+ %tmp = mul nsw i64 %add, %m
+ br label %for.j
+
+for.j:
+ %j = phi i64 [ 0, %for.i ], [ %j.inc, %for.j ]
+ %vlaarrayidx.sum = add i64 %j, %tmp
+ %arrayidx = getelementptr inbounds double* %A, i64 %vlaarrayidx.sum
+ store double 1.0, double* %arrayidx
+ %j.inc = add nsw i64 %j, 1
+ %j.exitcond = icmp eq i64 %j.inc, %m
+ br i1 %j.exitcond, label %for.i.inc, label %for.j
+
+for.i.inc:
+ %i.inc = add nsw i64 %i, 1
+ %i.exitcond = icmp eq i64 %i.inc, 100
+ br i1 %i.exitcond, label %end, label %for.i
+
+end:
+ ret void
+}
--- /dev/null
+; RUN: opt %loadPolly -polly-scops -analyze -polly-delinearize < %s | FileCheck %s
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
+target triple = "x86_64-unknown-linux-gnu"
+
+; Derived from the following code:
+;
+; void foo(long n, long m, double A[n][m]) {
+; for (long i = 0; i < 100; i++)
+; for (long j = 0; j < 150; j++)
+; A[i][j] = 1.0;
+; }
+; CHECK: Assumed Context:
+; CHECK: [m] -> { : m >= 150 }
+; CHECK: p0: %m
+; CHECK: Statements {
+; CHECK: Stmt_for_j
+; CHECK: Domain :=
+; CHECK: [m] -> { Stmt_for_j[i0, i1] : i0 >= 0 and i0 <= 99 and i1 >= 0 and i1 <= 149 };
+; CHECK: Scattering :=
+; CHECK: [m] -> { Stmt_for_j[i0, i1] -> scattering[0, i0, 0, i1, 0] };
+; CHECK: MustWriteAccess := [Reduction Type: NONE]
+; CHECK: [m] -> { Stmt_for_j[i0, i1] -> MemRef_A[i0, i1] };
+
+define void @foo(i64 %n, i64 %m, double* %A) {
+entry:
+ br label %for.i
+
+for.i:
+ %i = phi i64 [ 0, %entry ], [ %i.inc, %for.i.inc ]
+ %tmp = mul nsw i64 %i, %m
+ br label %for.j
+
+for.j:
+ %j = phi i64 [ 0, %for.i ], [ %j.inc, %for.j ]
+ %vlaarrayidx.sum = add i64 %j, %tmp
+ %arrayidx = getelementptr inbounds double* %A, i64 %vlaarrayidx.sum
+ store double 1.0, double* %arrayidx
+ %j.inc = add nsw i64 %j, 1
+ %j.exitcond = icmp eq i64 %j.inc, 150
+ br i1 %j.exitcond, label %for.i.inc, label %for.j
+
+for.i.inc:
+ %i.inc = add nsw i64 %i, 1
+ %i.exitcond = icmp eq i64 %i.inc, 100
+ br i1 %i.exitcond, label %end, label %for.i
+
+end:
+ ret void
+}
--- /dev/null
+; RUN: opt %loadPolly -polly-scops -analyze -polly-delinearize < %s | FileCheck %s
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
+target triple = "x86_64-unknown-linux-gnu"
+
+; void foo(long n, long m, long o, double A[n][m][o]) {
+;
+; for (long i = 0; i < 100; i++)
+; for (long j = 0; j < 150; j++)
+; for (long k = 0; k < 200; k++)
+; A[i][j][k] = 1.0;
+; }
+
+; CHECK: Assumed Context:
+; CHECK: [m, o] -> { : m >= 150 and o >= 200 }
+; CHECK: p0: %m
+; CHECK: p1: %o
+; CHECK: Statements {
+; CHECK: Stmt_for_k
+; CHECK: Domain :=
+; CHECK: [m, o] -> { Stmt_for_k[i0, i1, i2] : i0 >= 0 and i0 <= 99 and i1 >= 0 and i1 <= 149 and i2 >= 0 and i2 <= 199 };
+; CHECK: Scattering :=
+; CHECK: [m, o] -> { Stmt_for_k[i0, i1, i2] -> scattering[0, i0, 0, i1, 0, i2, 0] };
+; CHECK: MustWriteAccess := [Reduction Type: NONE]
+; CHECK: [m, o] -> { Stmt_for_k[i0, i1, i2] -> MemRef_A[i0, i1, i2] };
+
+define void @foo(i64 %n, i64 %m, i64 %o, double* %A) {
+entry:
+ br label %for.i
+
+for.i:
+ %i = phi i64 [ 0, %entry ], [ %i.inc, %for.i.inc ]
+ br label %for.j
+
+for.j:
+ %j = phi i64 [ 0, %for.i ], [ %j.inc, %for.j.inc ]
+ br label %for.k
+
+for.k:
+ %k = phi i64 [ 0, %for.j ], [ %k.inc, %for.k.inc ]
+ %subscript0 = mul i64 %i, %m
+ %subscript1 = add i64 %j, %subscript0
+ %subscript2 = mul i64 %subscript1, %o
+ %subscript = add i64 %subscript2, %k
+ %idx = getelementptr inbounds double* %A, i64 %subscript
+ store double 1.0, double* %idx
+ br label %for.k.inc
+
+for.k.inc:
+ %k.inc = add nsw i64 %k, 1
+ %k.exitcond = icmp eq i64 %k.inc, 200
+ br i1 %k.exitcond, label %for.j.inc, label %for.k
+
+for.j.inc:
+ %j.inc = add nsw i64 %j, 1
+ %j.exitcond = icmp eq i64 %j.inc, 150
+ br i1 %j.exitcond, label %for.i.inc, label %for.j
+
+for.i.inc:
+ %i.inc = add nsw i64 %i, 1
+ %i.exitcond = icmp eq i64 %i.inc, 100
+ br i1 %i.exitcond, label %end, label %for.i
+
+end:
+ ret void
+}
; A[i+3][j-4][k+7] = 1.0;
; }
+; CHECK: Assumed Context:
+; CHECK: { : }
+
; CHECK: p0: %n
; CHECK: p1: %m
; CHECK: p2: %o
; Access function:
; {{{((8 * ((((%m * %p) + %q) * %o) + %r)) + %A),+,(8 * %m * %o)}<%for.i>,+,
; (8 * %o)}<%for.j>,+,8}<%for.k>
+
+; CHECK: Assumed Context:
+; CHECK: [n, m, o, p, q, r] -> { : q = 0 and r = 0 and p >= 0 }
;
; CHECK: p0: %n
; CHECK: p1: %m
; A[i+3][j-4][k+7] = 1.0;
; }
;
+; CHECK: Assumed Context:
+; CHECK: { : }
; CHECK: p0: %n
; CHECK: p1: %m
; CHECK: p2: %o
; }
; }
;
+; CHECK: Assumed Context:
+; CHECK: { : }
; CHECK: p0: %n
; CHECK: p1: %m
; CHECK: p2: %o
; A[i][j] = 1.0;
; }
+; CHECK: Assumed Context:
+; CHECK: [n, m] -> { : }
; CHECK: p0: %n
; CHECK: p1: %m
; CHECK-NOT: p3
; A[i][j][k] = 1.0;
; }
+; CHECK: Assumed Context:
+; CHECK: [n, m, o] -> { : }
; CHECK: p0: %n
; CHECK: p1: %m
; CHECK: p2: %o
; A[i][j][k] = 1.0;
; }
+; We currently fail to get the relation between the 32 and 64 bit versions of
+; m and o, such that we generate unnecessary run-time checks. This is not a
+; correctness issue, but could be improved.
+
+; CHECK: Assumed Context:
+; CHECK: [n, m, o, p_3, p_4] -> { : p_4 >= o and p_3 >= m }
; CHECK: p0: %n
; CHECK: p1: %m
; CHECK: p2: %o
-; CHECK-NOT: p3
+; CHECK: p3: (zext i32 %m to i64)
+; CHECK: p4: (zext i32 %o to i64)
+; CHECK-NOT: p5
; CHECK: Domain
-; CHECK: [n, m, o] -> { Stmt_for_k[i0, i1, i2] : i0 >= 0 and i0 <= -1 + n and i1 >= 0 and i1 <= -1 + m and i2 >= 0 and i2 <= -1 + o };
+; CHECK: [n, m, o, p_3, p_4] -> { Stmt_for_k[i0, i1, i2] : i0 >= 0 and i0 <= -1 + n and i1 >= 0 and i1 <= -1 + m and i2 >= 0 and i2 <= -1 + o };
; CHECK: Scattering
-; CHECK: [n, m, o] -> { Stmt_for_k[i0, i1, i2] -> scattering[0, i0, 0, i1, 0, i2, 0] };
+; CHECK: [n, m, o, p_3, p_4] -> { Stmt_for_k[i0, i1, i2] -> scattering[0, i0, 0, i1, 0, i2, 0] };
; CHECK: WriteAccess
-; CHECK: [n, m, o] -> { Stmt_for_k[i0, i1, i2] -> MemRef_A[i0, i1, i2] };
+; CHECK: [n, m, o, p_3, p_4] -> { Stmt_for_k[i0, i1, i2] -> MemRef_A[i0, i1, i2] };
target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
target triple = "x86_64-unknown-linux-gnu"
; void foo(long n, long m, long o, double A[n][m][o]) {
;
; for (long i = 0; i < n; i++)
-; for (long j = 0; j < m; j++)
-; for (long k = 0; k < o; k++)
-; A[i][k][j] = 1.0;
+; for (long k = 0; k < o; k++)
+; for (long j = 0; j < m; j++)
+; A[i][j][k] = 1.0;
; }
+; CHECK: Assumed Context:
+; CHECK: { : }
; CHECK: p0: %n
-; CHECK: p1: %m
-; CHECK: p2: %o
+; CHECK: p1: %o
+; CHECK: p2: %m
; CHECK-NOT: p3
;
; CHECK: Domain
-; CHECK: [n, m, o] -> { Stmt_for_k[i0, i1, i2] : i0 >= 0 and i0 <= -1 + n and i1 >= 0 and i1 <= -1 + m and i2 >= 0 and i2 <= -1 + o };
+; CHECK: [n, o, m] -> { Stmt_for_j[i0, i1, i2] : i0 >= 0 and i0 <= -1 + n and i1 >= 0 and i1 <= -1 + o and i2 >= 0 and i2 <= -1 + m };
; CHECK: Scattering
-; CHECK: [n, m, o] -> { Stmt_for_k[i0, i1, i2] -> scattering[0, i0, 0, i1, 0, i2, 0] };
+; CHECK: [n, o, m] -> { Stmt_for_j[i0, i1, i2] -> scattering[0, i0, 0, i1, 0, i2, 0] };
; CHECK: WriteAccess
-; CHECK: [n, m, o] -> { Stmt_for_k[i0, i1, i2] -> MemRef_A[i0, i2, i1] };
+; CHECK: [n, o, m] -> { Stmt_for_j[i0, i1, i2] -> MemRef_A[i0, i2, i1] };
define void @foo(i64 %n, i64 %m, i64 %o, double* %A) {
entry:
for.i:
%i = phi i64 [ 0, %entry ], [ %i.inc, %for.i.inc ]
- br label %for.j
-
-for.j:
- %j = phi i64 [ 0, %for.i ], [ %j.inc, %for.j.inc ]
br label %for.k
for.k:
- %k = phi i64 [ 0, %for.j ], [ %k.inc, %for.k.inc ]
+ %k = phi i64 [ 0, %for.i ], [ %k.inc, %for.k.inc ]
+ br label %for.j
+
+for.j:
+ %j = phi i64 [ 0, %for.k ], [ %j.inc, %for.j.inc ]
%subscript0 = mul i64 %i, %m
- %subscript1 = add i64 %k, %subscript0
+ %subscript1 = add i64 %j, %subscript0
%subscript2 = mul i64 %subscript1, %o
- %subscript = add i64 %subscript2, %j
+ %subscript = add i64 %subscript2, %k
%idx = getelementptr inbounds double* %A, i64 %subscript
store double 1.0, double* %idx
- br label %for.k.inc
-
-for.k.inc:
- %k.inc = add nsw i64 %k, 1
- %k.exitcond = icmp eq i64 %k.inc, %o
- br i1 %k.exitcond, label %for.j.inc, label %for.k
+ br label %for.j.inc
for.j.inc:
%j.inc = add nsw i64 %j, 1
%j.exitcond = icmp eq i64 %j.inc, %m
- br i1 %j.exitcond, label %for.i.inc, label %for.j
+ br i1 %j.exitcond, label %for.k.inc, label %for.j
+
+for.k.inc:
+ %k.inc = add nsw i64 %k, 1
+ %k.exitcond = icmp eq i64 %k.inc, %o
+ br i1 %k.exitcond, label %for.i.inc, label %for.k
for.i.inc:
%i.inc = add nsw i64 %i, 1
ret void
}
+; CHECK: Assumed Context:
+; CHECK: { : }
+
; CHECK: Stmt_bb
; CHECK: Domain :=
; CHECK: [N] -> { Stmt_bb[i0] : i0 >= 0 and i0 <= -1 + N };