#include "polly/ScopPass.h"
#include "polly/Support/GICHelper.h"
#include "polly/Support/ISLOStream.h"
+#include "polly/Support/ISLTools.h"
#include "polly/Support/VirtualInstruction.h"
#include "llvm/ADT/Statistic.h"
#include "llvm/Support/Debug.h"
STATISTIC(InBetweenStore, "Number of Load-Store pairs NOT removed because "
"there is another store between them");
STATISTIC(TotalOverwritesRemoved, "Number of removed overwritten writes");
+STATISTIC(TotalWritesCoalesced, "Number of writes coalesced with another");
STATISTIC(TotalRedundantWritesRemoved,
"Number of writes of same value removed in any SCoP");
STATISTIC(TotalEmptyPartialAccessesRemoved,
/// Number of writes that are overwritten anyway.
int OverwritesRemoved = 0;
+ /// Number of combined writes.
+ int WritesCoalesced = 0;
+
/// Number of redundant writes removed from this SCoP.
int RedundantWritesRemoved = 0;
/// Return whether at least one simplification has been applied.
bool isModified() const {
- return OverwritesRemoved > 0 || RedundantWritesRemoved > 0 ||
- EmptyPartialAccessesRemoved > 0 || DeadAccessesRemoved > 0 ||
- DeadInstructionsRemoved > 0 || StmtsRemoved > 0;
+ return OverwritesRemoved > 0 || WritesCoalesced > 0 ||
+ RedundantWritesRemoved > 0 || EmptyPartialAccessesRemoved > 0 ||
+ DeadAccessesRemoved > 0 || DeadInstructionsRemoved > 0 ||
+ StmtsRemoved > 0;
}
MemoryAccess *getReadAccessForValue(ScopStmt *Stmt, llvm::Value *Val) {
}
}
+ /// Combine writes that write the same value if possible.
+ ///
+ /// This function is able to combine:
+ /// - Partial writes with disjoint domain.
+ /// - Writes that write to the same array element.
+ ///
+ /// In all cases, both writes must write the same values.
+ void coalesceWrites() {
+ for (auto &Stmt : *S) {
+ isl::set Domain =
+ give(Stmt.getDomain()).intersect_params(give(S->getContext()));
+
+ // We let isl do the lookup for the same-value condition. For this, we
+ // wrap llvm::Value into an isl::set such that isl can do the lookup in
+ // its hashtable implementation. llvm::Values are only compared within a
+ // ScopStmt, so the map can be local to this scope. TODO: Refactor with
+ // ZoneAlgorithm::makeValueSet()
+ SmallDenseMap<Value *, isl::set> ValueSets;
+ auto makeValueSet = [&ValueSets, this](Value *V) -> isl::set {
+ assert(V);
+ isl::set &Result = ValueSets[V];
+ if (Result.is_null()) {
+ isl_ctx *Ctx = S->getIslCtx();
+ std::string Name =
+ getIslCompatibleName("Val", V, ValueSets.size() - 1,
+ std::string(), UseInstructionNames);
+ isl::id Id = give(isl_id_alloc(Ctx, Name.c_str(), V));
+ Result = isl::set::universe(
+ isl::space(Ctx, 0, 0).set_tuple_id(isl::dim::set, Id));
+ }
+ return Result;
+ };
+
+ // List of all eligible (for coalescing) writes of the future.
+ // { [Domain[] -> Element[]] -> [Value[] -> MemoryAccess[]] }
+ isl::union_map FutureWrites =
+ isl::union_map::empty(give(S->getParamSpace()));
+
+ // Iterate over accesses from the last to the first.
+ SmallVector<MemoryAccess *, 32> Accesses(getAccessesInOrder(Stmt));
+ for (MemoryAccess *MA : reverse(Accesses)) {
+ // In region statements, the explicit accesses can be in blocks that can
+ // be executed in any order. We therefore process only the implicit
+ // writes and stop after that.
+ if (Stmt.isRegionStmt() && isExplicitAccess(MA))
+ break;
+
+ // { Domain[] -> Element[] }
+ isl::map AccRel =
+ MA->getLatestAccessRelation().intersect_domain(Domain);
+
+ // { [Domain[] -> Element[]] }
+ isl::set AccRelWrapped = AccRel.wrap();
+
+ // { Value[] }
+ isl::set ValSet;
+
+ if (MA->isMustWrite() && (MA->isOriginalScalarKind() ||
+ isa<StoreInst>(MA->getAccessInstruction()))) {
+ // Normally, tryGetValueStored() should be used to determine which
+ // element is written, but it can return nullptr; For PHI accesses,
+ // getAccessValue() returns the PHI instead of the PHI's incoming
+ // value. In this case, where we only compare values of a single
+ // statement, this is fine, because within a statement, a PHI in a
+ // successor block has always the same value as the incoming write. We
+ // still preferably use the incoming value directly so we also catch
+ // direct uses of that.
+ Value *StoredVal = MA->tryGetValueStored();
+ if (!StoredVal)
+ StoredVal = MA->getAccessValue();
+ ValSet = makeValueSet(StoredVal);
+
+ // { Domain[] }
+ isl::set AccDomain = AccRel.domain();
+
+ // Parts of the statement's domain that is not written by this access.
+ isl::set UndefDomain = Domain.subtract(AccDomain);
+
+ // { Element[] }
+ isl::set ElementUniverse =
+ isl::set::universe(AccRel.get_space().range());
+
+ // { Domain[] -> Element[] }
+ isl::map UndefAnything =
+ isl::map::from_domain_and_range(UndefDomain, ElementUniverse);
+
+ // We are looking a compatible write access. The other write can
+ // access these elements...
+ isl::map AllowedAccesses = AccRel.unite(UndefAnything);
+
+ // ... and must write the same value.
+ // { [Domain[] -> Element[]] -> Value[] }
+ isl::map Filter =
+ isl::map::from_domain_and_range(AllowedAccesses.wrap(), ValSet);
+
+ // Lookup future write that fulfills these conditions.
+ // { [[Domain[] -> Element[]] -> Value[]] -> MemoryAccess[] }
+ isl::union_map Filtered =
+ FutureWrites.uncurry().intersect_domain(Filter.wrap());
+
+ // Iterate through the candidates.
+ Filtered.foreach_map([&, this](isl::map Map) -> isl::stat {
+ MemoryAccess *OtherMA = (MemoryAccess *)Map.get_space()
+ .get_tuple_id(isl::dim::out)
+ .get_user();
+
+ isl::map OtherAccRel =
+ OtherMA->getLatestAccessRelation().intersect_domain(Domain);
+
+ // The filter only guaranteed that some of OtherMA's accessed
+ // elements are allowed. Verify that it only accesses allowed
+ // elements. Otherwise, continue with the next candidate.
+ if (!OtherAccRel.is_subset(AllowedAccesses).is_true())
+ return isl::stat::ok;
+
+ // The combined access relation.
+ // { Domain[] -> Element[] }
+ isl::map NewAccRel = AccRel.unite(OtherAccRel);
+ simplify(NewAccRel);
+
+ // Carry out the coalescing.
+ Stmt.removeSingleMemoryAccess(MA);
+ OtherMA->setNewAccessRelation(NewAccRel.copy());
+
+ // We removed MA, OtherMA takes its role.
+ MA = OtherMA;
+
+ TotalWritesCoalesced++;
+ WritesCoalesced++;
+
+ // Don't look for more candidates.
+ return isl::stat::error;
+ });
+ }
+
+ // Two writes cannot be coalesced if there is another access (to some of
+ // the written elements) between them. Remove all visited write accesses
+ // from the list of eligible writes. Don't just remove the accessed
+ // elements, but any MemoryAccess that touches any of the invalidated
+ // elements.
+ // { MemoryAccess[] }
+ isl::union_set TouchedAccesses =
+ FutureWrites.intersect_domain(AccRelWrapped)
+ .range()
+ .unwrap()
+ .range();
+ FutureWrites =
+ FutureWrites.uncurry().subtract_range(TouchedAccesses).curry();
+
+ if (MA->isMustWrite() && !ValSet.is_null()) {
+ // { MemoryAccess[] }
+ auto AccSet =
+ isl::set::universe(isl::space(S->getIslCtx(), 0, 0)
+ .set_tuple_id(isl::dim::set, MA->getId()));
+
+ // { Val[] -> MemoryAccess[] }
+ isl::map ValAccSet = isl::map::from_domain_and_range(ValSet, AccSet);
+
+ // { [Domain[] -> Element[]] -> [Value[] -> MemoryAccess[]] }
+ isl::map AccRelValAcc =
+ isl::map::from_domain_and_range(AccRelWrapped, ValAccSet.wrap());
+ FutureWrites = FutureWrites.add_map(AccRelValAcc);
+ }
+ }
+ }
+ }
+
/// Remove writes that just write the same value already stored in the
/// element.
void removeRedundantWrites() {
OS.indent(Indent) << "Statistics {\n";
OS.indent(Indent + 4) << "Overwrites removed: " << OverwritesRemoved
<< '\n';
+ OS.indent(Indent + 4) << "Partial writes coalesced: " << WritesCoalesced
+ << "\n";
OS.indent(Indent + 4) << "Redundant writes removed: "
<< RedundantWritesRemoved << "\n";
OS.indent(Indent + 4) << "Accesses with empty domains removed: "
DEBUG(dbgs() << "Removing overwrites...\n");
removeOverwrites();
+ DEBUG(dbgs() << "Coalesce partial writes...\n");
+ coalesceWrites();
+
DEBUG(dbgs() << "Removing redundant writes...\n");
removeRedundantWrites();
S = nullptr;
OverwritesRemoved = 0;
+ WritesCoalesced = 0;
RedundantWritesRemoved = 0;
EmptyPartialAccessesRemoved = 0;
DeadAccessesRemoved = 0;
--- /dev/null
+; RUN: opt %loadPolly -polly-import-jscop -polly-import-jscop-postfix=transformed -polly-simplify -analyze < %s | FileCheck -match-full-lines %s
+;
+; Combine 3 partial accesses into one.
+;
+; for (int j = 0; j < n; j += 1) {
+; A[0] = 42.0;
+; A[0] = 42.0;
+; A[0] = 42.0;
+; }
+;
+define void @coalesce_3partials(i32 %n, double* noalias nonnull %A) {
+entry:
+ br label %for
+
+for:
+ %j = phi i32 [0, %entry], [%j.inc, %inc]
+ %j.cmp = icmp slt i32 %j, %n
+ br i1 %j.cmp, label %body, label %exit
+
+ body:
+ store double 42.0, double* %A
+ store double 42.0, double* %A
+ store double 42.0, double* %A
+ br label %inc
+
+inc:
+ %j.inc = add nuw nsw i32 %j, 1
+ br label %for
+
+exit:
+ br label %return
+
+return:
+ ret void
+}
+
+
+; CHECK: Statistics {
+; CHECK: Overwrites removed: 0
+; CHECK: Partial writes coalesced: 2
+; CHECK: }
+
+; CHECK: After accesses {
+; CHECK-NEXT: Stmt_body
+; CHECK-NEXT: MustWriteAccess := [Reduction Type: NONE] [Scalar: 0]
+; CHECK-NEXT: [n] -> { Stmt_body[i0] -> MemRef_A[0] };
+; CHECK-NEXT: new: [n] -> { Stmt_body[i0] -> MemRef_A[0] : n <= 2147483647 };
+; CHECK-NEXT: }
--- /dev/null
+{
+ "arrays" : [
+ {
+ "name" : "MemRef_A",
+ "sizes" : [ "*" ],
+ "type" : "double"
+ }
+ ],
+ "context" : "[n] -> { : -2147483648 <= n <= 2147483647 }",
+ "name" : "%for---%return",
+ "statements" : [
+ {
+ "accesses" : [
+ {
+ "kind" : "write",
+ "relation" : "[n] -> { Stmt_body[i0] -> MemRef_A[0] }"
+ },
+ {
+ "kind" : "write",
+ "relation" : "[n] -> { Stmt_body[i0] -> MemRef_A[0] }"
+ },
+ {
+ "kind" : "write",
+ "relation" : "[n] -> { Stmt_body[i0] -> MemRef_A[0] }"
+ }
+ ],
+ "domain" : "[n] -> { Stmt_body[i0] : 0 <= i0 < n }",
+ "name" : "Stmt_body",
+ "schedule" : "[n] -> { Stmt_body[i0] -> [i0] }"
+ }
+ ]
+}
--- /dev/null
+{
+ "arrays" : [
+ {
+ "name" : "MemRef_A",
+ "sizes" : [ "*" ],
+ "type" : "double"
+ }
+ ],
+ "context" : "[n] -> { : -2147483648 <= n <= 2147483647 }",
+ "name" : "%for---%return",
+ "statements" : [
+ {
+ "accesses" : [
+ {
+ "kind" : "write",
+ "relation" : "[n] -> { Stmt_body[i0] -> MemRef_A[0] : i0 < 8 }"
+ },
+ {
+ "kind" : "write",
+ "relation" : "[n] -> { Stmt_body[i0] -> MemRef_A[0] : 8 <= i0 < 16 }"
+ },
+ {
+ "kind" : "write",
+ "relation" : "[n] -> { Stmt_body[i0] -> MemRef_A[0] : 16 <= i0 }"
+ }
+ ],
+ "domain" : "[n] -> { Stmt_body[i0] : 0 <= i0 < n }",
+ "name" : "Stmt_body",
+ "schedule" : "[n] -> { Stmt_body[i0] -> [i0] }"
+ }
+ ]
+}
--- /dev/null
+; RUN: opt %loadPolly -polly-import-jscop -polly-import-jscop-postfix=transformed -polly-simplify -analyze < %s | FileCheck -match-full-lines %s
+;
+; Combine four partial stores into two.
+; The stores write to the same array, but never the same element.
+;
+; for (int j = 0; j < n; j += 1) {
+; A[0] = 21.0;
+; A[1] = 42.0;
+; A[0] = 21.0;
+; A[1] = 42.0;
+; }
+;
+define void @coalesce_disjointelements(i32 %n, double* noalias nonnull %A) {
+entry:
+ br label %for
+
+for:
+ %j = phi i32 [0, %entry], [%j.inc, %inc]
+ %j.cmp = icmp slt i32 %j, %n
+ br i1 %j.cmp, label %body, label %exit
+
+ body:
+ %A_0 = getelementptr inbounds double, double* %A, i32 0
+ %A_1 = getelementptr inbounds double, double* %A, i32 1
+ store double 21.0, double* %A_0
+ store double 42.0, double* %A_1
+ store double 21.0, double* %A_0
+ store double 42.0, double* %A_1
+ br label %inc
+
+inc:
+ %j.inc = add nuw nsw i32 %j, 1
+ br label %for
+
+exit:
+ br label %return
+
+return:
+ ret void
+}
+
+
+; CHECK: Statistics {
+; CHECK: Overwrites removed: 0
+; CHECK: Partial writes coalesced: 2
+; CHECK: }
+
+; CHECK: After accesses {
+; CHECK-NEXT: Stmt_body
+; CHECK-NEXT: MustWriteAccess := [Reduction Type: NONE] [Scalar: 0]
+; CHECK-NEXT: [n] -> { Stmt_body[i0] -> MemRef_A[0] };
+; CHECK-NEXT: new: [n] -> { Stmt_body[i0] -> MemRef_A[0] : n <= 2147483647 };
+; CHECK-NEXT: MustWriteAccess := [Reduction Type: NONE] [Scalar: 0]
+; CHECK-NEXT: [n] -> { Stmt_body[i0] -> MemRef_A[1] };
+; CHECK-NEXT: new: [n] -> { Stmt_body[i0] -> MemRef_A[1] : n <= 2147483647 };
+; CHECK-NEXT: }
--- /dev/null
+{
+ "arrays" : [
+ {
+ "name" : "MemRef_A",
+ "sizes" : [ "*" ],
+ "type" : "double"
+ }
+ ],
+ "context" : "[n] -> { : -2147483648 <= n <= 2147483647 }",
+ "name" : "%for---%return",
+ "statements" : [
+ {
+ "accesses" : [
+ {
+ "kind" : "write",
+ "relation" : "[n] -> { Stmt_body[i0] -> MemRef_A[0] }"
+ },
+ {
+ "kind" : "write",
+ "relation" : "[n] -> { Stmt_body[i0] -> MemRef_A[1] }"
+ },
+ {
+ "kind" : "write",
+ "relation" : "[n] -> { Stmt_body[i0] -> MemRef_A[0] }"
+ },
+ {
+ "kind" : "write",
+ "relation" : "[n] -> { Stmt_body[i0] -> MemRef_A[1] }"
+ }
+ ],
+ "domain" : "[n] -> { Stmt_body[i0] : 0 <= i0 < n }",
+ "name" : "Stmt_body",
+ "schedule" : "[n] -> { Stmt_body[i0] -> [i0] }"
+ }
+ ]
+}
--- /dev/null
+{
+ "arrays" : [
+ {
+ "name" : "MemRef_A",
+ "sizes" : [ "*" ],
+ "type" : "double"
+ }
+ ],
+ "context" : "[n] -> { : -2147483648 <= n <= 2147483647 }",
+ "name" : "%for---%return",
+ "statements" : [
+ {
+ "accesses" : [
+ {
+ "kind" : "write",
+ "relation" : "[n] -> { Stmt_body[i0] -> MemRef_A[0] : i0 > 16 }"
+ },
+ {
+ "kind" : "write",
+ "relation" : "[n] -> { Stmt_body[i0] -> MemRef_A[1] : i0 > 16 }"
+ },
+ {
+ "kind" : "write",
+ "relation" : "[n] -> { Stmt_body[i0] -> MemRef_A[0] : i0 <= 16 }"
+ },
+ {
+ "kind" : "write",
+ "relation" : "[n] -> { Stmt_body[i0] -> MemRef_A[1] : i0 <= 16 }"
+ }
+ ],
+ "domain" : "[n] -> { Stmt_body[i0] : 0 <= i0 < n }",
+ "name" : "Stmt_body",
+ "schedule" : "[n] -> { Stmt_body[i0] -> [i0] }"
+ }
+ ]
+}
--- /dev/null
+; RUN: opt %loadPolly -polly-import-jscop -polly-import-jscop-postfix=transformed -polly-simplify -analyze < %s | FileCheck -match-full-lines %s
+;
+; Combine two partial stores (with overlapping domains) into one.
+;
+; for (int j = 0; j < n; j += 1) {
+; A[0] = 42.0;
+; A[0] = 42.0;
+; }
+;
+define void @coalesce_overlapping(i32 %n, double* noalias nonnull %A) {
+entry:
+ br label %for
+
+for:
+ %j = phi i32 [0, %entry], [%j.inc, %inc]
+ %j.cmp = icmp slt i32 %j, %n
+ br i1 %j.cmp, label %body, label %exit
+
+ body:
+ store double 42.0, double* %A
+ store double 42.0, double* %A
+ br label %inc
+
+inc:
+ %j.inc = add nuw nsw i32 %j, 1
+ br label %for
+
+exit:
+ br label %return
+
+return:
+ ret void
+}
+
+
+; CHECK: Statistics {
+; CHECK: Overwrites removed: 0
+; CHECK: Partial writes coalesced: 1
+; CHECK: }
+
+; CHECK: After accesses {
+; CHECK-NEXT: Stmt_body
+; CHECK-NEXT: MustWriteAccess := [Reduction Type: NONE] [Scalar: 0]
+; CHECK-NEXT: [n] -> { Stmt_body[i0] -> MemRef_A[0] };
+; CHECK-NEXT: new: [n] -> { Stmt_body[i0] -> MemRef_A[0] : n <= 2147483647 };
+; CHECK-NEXT: }
--- /dev/null
+{
+ "arrays" : [
+ {
+ "name" : "MemRef_A",
+ "sizes" : [ "*" ],
+ "type" : "double"
+ }
+ ],
+ "context" : "[n] -> { : -2147483648 <= n <= 2147483647 }",
+ "name" : "%for---%return",
+ "statements" : [
+ {
+ "accesses" : [
+ {
+ "kind" : "write",
+ "relation" : "[n] -> { Stmt_body[i0] -> MemRef_A[0] }"
+ },
+ {
+ "kind" : "write",
+ "relation" : "[n] -> { Stmt_body[i0] -> MemRef_A[0] }"
+ }
+ ],
+ "domain" : "[n] -> { Stmt_body[i0] : 0 <= i0 < n }",
+ "name" : "Stmt_body",
+ "schedule" : "[n] -> { Stmt_body[i0] -> [i0] }"
+ }
+ ]
+}
--- /dev/null
+{
+ "arrays" : [
+ {
+ "name" : "MemRef_A",
+ "sizes" : [ "*" ],
+ "type" : "double"
+ }
+ ],
+ "context" : "[n] -> { : -2147483648 <= n <= 2147483647 }",
+ "name" : "%for---%return",
+ "statements" : [
+ {
+ "accesses" : [
+ {
+ "kind" : "write",
+ "relation" : "[n] -> { Stmt_body[i0] -> MemRef_A[0] : i0 < 18 }"
+ },
+ {
+ "kind" : "write",
+ "relation" : "[n] -> { Stmt_body[i0] -> MemRef_A[0] : i0 >= 8}"
+ }
+ ],
+ "domain" : "[n] -> { Stmt_body[i0] : 0 <= i0 < n }",
+ "name" : "Stmt_body",
+ "schedule" : "[n] -> { Stmt_body[i0] -> [i0] }"
+ }
+ ]
+}
--- /dev/null
+; RUN: opt %loadPolly -polly-import-jscop -polly-import-jscop-postfix=transformed -polly-simplify -analyze < %s | FileCheck -match-full-lines %s
+;
+; Combine two partial stores (with disjoint domains) into one.
+;
+; for (int j = 0; j < n; j += 1) {
+; A[0] = 42.0;
+; A[0] = 42.0;
+; }
+;
+define void @coalesce_partial(i32 %n, double* noalias nonnull %A) {
+entry:
+ br label %for
+
+for:
+ %j = phi i32 [0, %entry], [%j.inc, %inc]
+ %j.cmp = icmp slt i32 %j, %n
+ br i1 %j.cmp, label %body, label %exit
+
+ body:
+ store double 42.0, double* %A
+ store double 42.0, double* %A
+ br label %inc
+
+inc:
+ %j.inc = add nuw nsw i32 %j, 1
+ br label %for
+
+exit:
+ br label %return
+
+return:
+ ret void
+}
+
+
+; CHECK: Statistics {
+; CHECK: Overwrites removed: 0
+; CHECK: Partial writes coalesced: 1
+; CHECK: }
+
+; CHECK: After accesses {
+; CHECK-NEXT: Stmt_body
+; CHECK-NEXT: MustWriteAccess := [Reduction Type: NONE] [Scalar: 0]
+; CHECK-NEXT: [n] -> { Stmt_body[i0] -> MemRef_A[0] };
+; CHECK-NEXT: new: [n] -> { Stmt_body[i0] -> MemRef_A[0] : n <= 2147483647 };
+; CHECK-NEXT: }
--- /dev/null
+{
+ "arrays" : [
+ {
+ "name" : "MemRef_A",
+ "sizes" : [ "*" ],
+ "type" : "double"
+ }
+ ],
+ "context" : "[n] -> { : -2147483648 <= n <= 2147483647 }",
+ "name" : "%for---%return",
+ "statements" : [
+ {
+ "accesses" : [
+ {
+ "kind" : "write",
+ "relation" : "[n] -> { Stmt_body[i0] -> MemRef_A[0] }"
+ },
+ {
+ "kind" : "write",
+ "relation" : "[n] -> { Stmt_body[i0] -> MemRef_A[0] }"
+ }
+ ],
+ "domain" : "[n] -> { Stmt_body[i0] : 0 <= i0 < n }",
+ "name" : "Stmt_body",
+ "schedule" : "[n] -> { Stmt_body[i0] -> [i0] }"
+ }
+ ]
+}
--- /dev/null
+{
+ "arrays" : [
+ {
+ "name" : "MemRef_A",
+ "sizes" : [ "*" ],
+ "type" : "double"
+ }
+ ],
+ "context" : "[n] -> { : -2147483648 <= n <= 2147483647 }",
+ "name" : "%for---%return",
+ "statements" : [
+ {
+ "accesses" : [
+ {
+ "kind" : "write",
+ "relation" : "[n] -> { Stmt_body[i0] -> MemRef_A[0] : i0 < 16 }"
+ },
+ {
+ "kind" : "write",
+ "relation" : "[n] -> { Stmt_body[i0] -> MemRef_A[0] : i0 >= 16 }"
+ }
+ ],
+ "domain" : "[n] -> { Stmt_body[i0] : 0 <= i0 < n }",
+ "name" : "Stmt_body",
+ "schedule" : "[n] -> { Stmt_body[i0] -> [i0] }"
+ }
+ ]
+}
; }
; CHECK: After accesses {
-; CHECK-NEXT: Stmt_bb10
-; CHECK-NEXT: ReadAccess := [Reduction Type: NONE] [Scalar: 1]
-; CHECK-NEXT: { Stmt_bb10[i0, i1, i2] -> MemRef_tmp_0__phi[] };
-; CHECK-NEXT: new: { Stmt_bb10[i0, i1, i2] -> MemRef_C[i0, i1] };
-; CHECK-NEXT: MustWriteAccess := [Reduction Type: NONE] [Scalar: 1]
-; CHECK-NEXT: { Stmt_bb10[i0, i1, i2] -> MemRef_tmp_0[] };
-; CHECK-NEXT: new: { Stmt_bb10[i0, i1, i2] -> MemRef_C[i0, i1] };
-; CHECK-NEXT: MustWriteAccess := [Reduction Type: NONE] [Scalar: 1]
-; CHECK-NEXT: { Stmt_bb10[i0, i1, i2] -> MemRef_tmp_0_lcssa__phi[] };
-; CHECK-NEXT: new: { Stmt_bb10[i0, i1, 1024] -> MemRef_C[i0, i1] };
; CHECK-NEXT: Stmt_bb13
; CHECK-NEXT: MustWriteAccess := [Reduction Type: NONE] [Scalar: 1]
; CHECK-NEXT: { Stmt_bb13[i0, i1, i2] -> MemRef_tmp_0__phi[] };
--- /dev/null
+; RUN: opt %loadPolly -polly-import-jscop -polly-import-jscop-postfix=transformed -polly-simplify -analyze < %s | FileCheck -match-full-lines %s
+;
+; Do not combine stores that write different values.
+;
+; for (int j = 0; j < n; j += 1) {
+; A[0] = 21.0;
+; A[0] = 42.0;
+; }
+;
+define void @nocoalesce_differentvalues(i32 %n, double* noalias nonnull %A) {
+entry:
+ br label %for
+
+for:
+ %j = phi i32 [0, %entry], [%j.inc, %inc]
+ %j.cmp = icmp slt i32 %j, %n
+ br i1 %j.cmp, label %body, label %exit
+
+ body:
+ store double 21.0, double* %A
+ store double 42.0, double* %A
+ br label %inc
+
+inc:
+ %j.inc = add nuw nsw i32 %j, 1
+ br label %for
+
+exit:
+ br label %return
+
+return:
+ ret void
+}
+
+
+; CHECK: Statistics {
+; CHECK: Overwrites removed: 0
+; CHECK: Partial writes coalesced: 0
+; CHECK: }
+
+; CHECK: SCoP could not be simplified
--- /dev/null
+{
+ "arrays" : [
+ {
+ "name" : "MemRef_A",
+ "sizes" : [ "*" ],
+ "type" : "double"
+ }
+ ],
+ "context" : "[n] -> { : -2147483648 <= n <= 2147483647 }",
+ "name" : "%for---%return",
+ "statements" : [
+ {
+ "accesses" : [
+ {
+ "kind" : "write",
+ "relation" : "[n] -> { Stmt_body[i0] -> MemRef_A[0] }"
+ },
+ {
+ "kind" : "write",
+ "relation" : "[n] -> { Stmt_body[i0] -> MemRef_A[0] }"
+ }
+ ],
+ "domain" : "[n] -> { Stmt_body[i0] : 0 <= i0 < n }",
+ "name" : "Stmt_body",
+ "schedule" : "[n] -> { Stmt_body[i0] -> [i0] }"
+ }
+ ]
+}
--- /dev/null
+{
+ "arrays" : [
+ {
+ "name" : "MemRef_A",
+ "sizes" : [ "*" ],
+ "type" : "double"
+ }
+ ],
+ "context" : "[n] -> { : -2147483648 <= n <= 2147483647 }",
+ "name" : "%for---%return",
+ "statements" : [
+ {
+ "accesses" : [
+ {
+ "kind" : "write",
+ "relation" : "[n] -> { Stmt_body[i0] -> MemRef_A[0] : i0 >= 16}"
+ },
+ {
+ "kind" : "write",
+ "relation" : "[n] -> { Stmt_body[i0] -> MemRef_A[0] : i0 < 16}"
+ }
+ ],
+ "domain" : "[n] -> { Stmt_body[i0] : 0 <= i0 < n }",
+ "name" : "Stmt_body",
+ "schedule" : "[n] -> { Stmt_body[i0] -> [i0] }"
+ }
+ ]
+}
--- /dev/null
+; RUN: opt %loadPolly -polly-import-jscop -polly-import-jscop-postfix=transformed -polly-simplify -analyze < %s | FileCheck -match-full-lines %s
+;
+; Do not combine stores that do not write to different elements in the
+; same instance.
+;
+; for (int j = 0; j < n; j += 1) {
+; A[0] = 21.0;
+; A[0] = 42.0;
+; }
+;
+define void @nocoalesce_elementmismatch(i32 %n, double* noalias nonnull %A) {
+entry:
+ br label %for
+
+for:
+ %j = phi i32 [0, %entry], [%j.inc, %inc]
+ %j.cmp = icmp slt i32 %j, %n
+ br i1 %j.cmp, label %body, label %exit
+
+ body:
+ %A_0 = getelementptr inbounds double, double* %A, i32 0
+ %A_1 = getelementptr inbounds double, double* %A, i32 1
+ store double 42.0, double* %A_0
+ store double 42.0, double* %A_1
+ br label %inc
+
+inc:
+ %j.inc = add nuw nsw i32 %j, 1
+ br label %for
+
+exit:
+ br label %return
+
+return:
+ ret void
+}
+
+
+; CHECK: SCoP could not be simplified
--- /dev/null
+{
+ "arrays" : [
+ {
+ "name" : "MemRef_A",
+ "sizes" : [ "*" ],
+ "type" : "double"
+ }
+ ],
+ "context" : "[n] -> { : -2147483648 <= n <= 2147483647 }",
+ "name" : "%for---%return",
+ "statements" : [
+ {
+ "accesses" : [
+ {
+ "kind" : "write",
+ "relation" : "[n] -> { Stmt_body[i0] -> MemRef_A[0] }"
+ },
+ {
+ "kind" : "write",
+ "relation" : "[n] -> { Stmt_body[i0] -> MemRef_A[1] }"
+ }
+ ],
+ "domain" : "[n] -> { Stmt_body[i0] : 0 <= i0 < n }",
+ "name" : "Stmt_body",
+ "schedule" : "[n] -> { Stmt_body[i0] -> [i0] }"
+ }
+ ]
+}
--- /dev/null
+{
+ "arrays" : [
+ {
+ "name" : "MemRef_A",
+ "sizes" : [ "*" ],
+ "type" : "double"
+ }
+ ],
+ "context" : "[n] -> { : -2147483648 <= n <= 2147483647 }",
+ "name" : "%for---%return",
+ "statements" : [
+ {
+ "accesses" : [
+ {
+ "kind" : "write",
+ "relation" : "[n] -> { Stmt_body[i0] -> MemRef_A[0] : i0 >= 16 }"
+ },
+ {
+ "kind" : "write",
+ "relation" : "[n] -> { Stmt_body[i0] -> MemRef_A[1] : i0 <= 16}"
+ }
+ ],
+ "domain" : "[n] -> { Stmt_body[i0] : 0 <= i0 < n }",
+ "name" : "Stmt_body",
+ "schedule" : "[n] -> { Stmt_body[i0] -> [i0] }"
+ }
+ ]
+}
--- /dev/null
+; RUN: opt %loadPolly -polly-import-jscop -polly-import-jscop-postfix=transformed -polly-simplify -analyze < %s | FileCheck -match-full-lines %s
+;
+; Do not combine stores if there is a read between them.
+; Note: The read between is unused, so will be removed by markAndSweep.
+; However, searches for coalesces takes place before.
+;
+; for (int j = 0; j < n; j += 1) {
+; A[0] = 42.0;
+; tmp = A[0];
+; A[0] = 42.0;
+; }
+;
+define void @nocoalesce_readbetween(i32 %n, double* noalias nonnull %A) {
+entry:
+ br label %for
+
+for:
+ %j = phi i32 [0, %entry], [%j.inc, %inc]
+ %j.cmp = icmp slt i32 %j, %n
+ br i1 %j.cmp, label %body, label %exit
+
+ body:
+ store double 42.0, double* %A
+ %tmp = load double, double* %A
+ store double 42.0, double* %A
+ br label %inc
+
+inc:
+ %j.inc = add nuw nsw i32 %j, 1
+ br label %for
+
+exit:
+ br label %return
+
+return:
+ ret void
+}
+
+
+; CHECK: Statistics {
+; CHECK: Overwrites removed: 0
+; CHECK: Partial writes coalesced: 0
+; CHECK: }
+
+; CHECK: After accesses {
+; CHECK-NEXT: Stmt_body
+; CHECK-NEXT: MustWriteAccess := [Reduction Type: NONE] [Scalar: 0]
+; CHECK-NEXT: [n] -> { Stmt_body[i0] -> MemRef_A[0] };
+; CHECK-NEXT: new: [n] -> { Stmt_body[i0] -> MemRef_A[0] : i0 >= 17 };
+; CHECK-NEXT: MustWriteAccess := [Reduction Type: NONE] [Scalar: 0]
+; CHECK-NEXT: [n] -> { Stmt_body[i0] -> MemRef_A[0] };
+; CHECK-NEXT: new: [n] -> { Stmt_body[i0] -> MemRef_A[0] : i0 <= 16 };
+; CHECK-NEXT: }
--- /dev/null
+{
+ "arrays" : [
+ {
+ "name" : "MemRef_A",
+ "sizes" : [ "*" ],
+ "type" : "double"
+ }
+ ],
+ "context" : "[n] -> { : -2147483648 <= n <= 2147483647 }",
+ "name" : "%for---%return",
+ "statements" : [
+ {
+ "accesses" : [
+ {
+ "kind" : "write",
+ "relation" : "[n] -> { Stmt_body[i0] -> MemRef_A[0] }"
+ },
+ {
+ "kind" : "read",
+ "relation" : "[n] -> { Stmt_body[i0] -> MemRef_A[0] }"
+ },
+ {
+ "kind" : "write",
+ "relation" : "[n] -> { Stmt_body[i0] -> MemRef_A[0] }"
+ }
+ ],
+ "domain" : "[n] -> { Stmt_body[i0] : 0 <= i0 < n }",
+ "name" : "Stmt_body",
+ "schedule" : "[n] -> { Stmt_body[i0] -> [i0] }"
+ }
+ ]
+}
--- /dev/null
+{
+ "arrays" : [
+ {
+ "name" : "MemRef_A",
+ "sizes" : [ "*" ],
+ "type" : "double"
+ }
+ ],
+ "context" : "[n] -> { : -2147483648 <= n <= 2147483647 }",
+ "name" : "%for---%return",
+ "statements" : [
+ {
+ "accesses" : [
+ {
+ "kind" : "write",
+ "relation" : "[n] -> { Stmt_body[i0] -> MemRef_A[0] : i0 > 16 }"
+ },
+ {
+ "kind" : "read",
+ "relation" : "[n] -> { Stmt_body[i0] -> MemRef_A[0] }"
+ },
+ {
+ "kind" : "write",
+ "relation" : "[n] -> { Stmt_body[i0] -> MemRef_A[0] : i0 <= 16 }"
+ }
+ ],
+ "domain" : "[n] -> { Stmt_body[i0] : 0 <= i0 < n }",
+ "name" : "Stmt_body",
+ "schedule" : "[n] -> { Stmt_body[i0] -> [i0] }"
+ }
+ ]
+}
--- /dev/null
+; RUN: opt %loadPolly -polly-import-jscop -polly-import-jscop-postfix=transformed -polly-simplify -analyze < %s | FileCheck -match-full-lines %s
+;
+; Do not combine stores if there is a write between them.
+;
+; for (int j = 0; j < n; j += 1) {
+; A[0] = 42.0;
+; A[0] = 21.0;
+; A[0] = 42.0;
+; }
+;
+define void @nocoalesce_writebetween(i32 %n, double* noalias nonnull %A) {
+entry:
+ br label %for
+
+for:
+ %j = phi i32 [0, %entry], [%j.inc, %inc]
+ %j.cmp = icmp slt i32 %j, %n
+ br i1 %j.cmp, label %body, label %exit
+
+ body:
+ store double 42.0, double* %A
+ store double 21.0, double* %A
+ store double 42.0, double* %A
+ br label %inc
+
+inc:
+ %j.inc = add nuw nsw i32 %j, 1
+ br label %for
+
+exit:
+ br label %return
+
+return:
+ ret void
+}
+
+
+; CHECK: SCoP could not be simplified
--- /dev/null
+{
+ "arrays" : [
+ {
+ "name" : "MemRef_A",
+ "sizes" : [ "*" ],
+ "type" : "double"
+ }
+ ],
+ "context" : "[n] -> { : -2147483648 <= n <= 2147483647 }",
+ "name" : "%for---%return",
+ "statements" : [
+ {
+ "accesses" : [
+ {
+ "kind" : "write",
+ "relation" : "[n] -> { Stmt_body[i0] -> MemRef_A[0] }"
+ },
+ {
+ "kind" : "write",
+ "relation" : "[n] -> { Stmt_body[i0] -> MemRef_A[0] }"
+ },
+ {
+ "kind" : "write",
+ "relation" : "[n] -> { Stmt_body[i0] -> MemRef_A[0] }"
+ }
+ ],
+ "domain" : "[n] -> { Stmt_body[i0] : 0 <= i0 < n }",
+ "name" : "Stmt_body",
+ "schedule" : "[n] -> { Stmt_body[i0] -> [i0] }"
+ }
+ ]
+}
--- /dev/null
+{
+ "arrays" : [
+ {
+ "name" : "MemRef_A",
+ "sizes" : [ "*" ],
+ "type" : "double"
+ }
+ ],
+ "context" : "[n] -> { : -2147483648 <= n <= 2147483647 }",
+ "name" : "%for---%return",
+ "statements" : [
+ {
+ "accesses" : [
+ {
+ "kind" : "write",
+ "relation" : "[n] -> { Stmt_body[i0] -> MemRef_A[0] : i0 < 16 }"
+ },
+ {
+ "kind" : "write",
+ "relation" : "[n] -> { Stmt_body[i0] -> MemRef_A[0] : 8 <= i0 < 24}"
+ },
+ {
+ "kind" : "write",
+ "relation" : "[n] -> { Stmt_body[i0] -> MemRef_A[0] : i0 >= 16 }"
+ }
+ ],
+ "domain" : "[n] -> { Stmt_body[i0] : 0 <= i0 < n }",
+ "name" : "Stmt_body",
+ "schedule" : "[n] -> { Stmt_body[i0] -> [i0] }"
+ }
+ ]
+}