From 19cd61dc11eee8efcab92076f2ee4986bd432102 Mon Sep 17 00:00:00 2001 From: Michael Kruse Date: Tue, 24 Oct 2017 13:05:24 +0000 Subject: [PATCH] [DeLICM] Do not try to map to multiple array elements. Add check and skip when the store used to determine the target accesses multiple array elements. Only a single array location should for mapping the scalar. Having multiple creates problems when deciding which element to load from. While MemoryAccess::getAddressFunction() should select just one of them, other problems arise in code that assumes that there is just one target element per statement instance. This fixes llvm.org/PR34989 This also reverts r313902 which fixed llvm.org/PR34485 also caused by a non-functional target array element. This patch avoids the situation to occur in the first place. llvm-svn: 316432 --- polly/lib/Transform/DeLICM.cpp | 40 ++++++--- polly/test/DeLICM/noninjective_phi_relation.ll | 115 ------------------------- polly/test/DeLICM/skip_multiaccess.ll | 70 +++++++++++++++ 3 files changed, 96 insertions(+), 129 deletions(-) delete mode 100644 polly/test/DeLICM/noninjective_phi_relation.ll create mode 100644 polly/test/DeLICM/skip_multiaccess.ll diff --git a/polly/lib/Transform/DeLICM.cpp b/polly/lib/Transform/DeLICM.cpp index 68dfac4..83a6bf9 100644 --- a/polly/lib/Transform/DeLICM.cpp +++ b/polly/lib/Transform/DeLICM.cpp @@ -666,19 +666,6 @@ private: /// For each PHI instance we can directly determine which was the incoming /// block, and hence derive which value the PHI has. /// - /// The returned relation generally is injective, meaning that every PHI write - /// has at most one (or zero, if the incoming block's branch does not jump to - /// the PHI's block) PHI Read that reads it. However, due to the SCoP's - /// parameter context it is possible a statement instance that would overwrite - /// the PHI scalar is not in the statement's domain and thus a PHI write - /// appear to be used twice. This is a static property, at runtime the - /// runtime condition should avoid that a configuration is executed. Although - /// incorrect, it should not have any effect in DeLICM. If it passes the - /// conflict check, there are multiple locations, one for each PHI Read it - /// matches, it had to write to. MemoryAccess::getAddressFunction() will - /// select only one of them. That is, statically, not all necessary value are - /// written, but the runtime check guards it from ever being executed. - /// /// @param SAI The ScopArrayInfo representing the PHI's storage. /// /// @return { DomainPHIRead[] -> DomainPHIWrite[] } @@ -715,6 +702,7 @@ private: isl_union_map_from_map(LastPerPHIWrites.take()), isl_union_map_reverse(PHIWriteScatter.take()))); assert(isl_union_map_is_single_valued(Result.keep()) == isl_bool_true); + assert(isl_union_map_is_injective(Result.keep()) == isl_bool_true); return Result; } @@ -1360,7 +1348,31 @@ public: continue; } - isl::union_set TouchedElts = MA->getLatestAccessRelation().range(); + // Check for more than one element acces per statement instance. + // Currently we expect write accesses to be functional, eg. disallow + // + // { Stmt[0] -> [i] : 0 <= i < 2 } + // + // This may occur when some accesses to the element write/read only + // parts of the element, eg. a single byte. Polly then divides each + // element into subelements of the smallest access length, normal access + // then touch multiple of such subelements. It is very common when the + // array is accesses with memset, memcpy or memmove which take i8* + // arguments. + isl::union_map AccRel = MA->getLatestAccessRelation(); + if (!AccRel.is_single_valued().is_true()) { + DEBUG(dbgs() << "Access " << MA + << " is incompatible because it writes multiple " + "elements per instance\n"); + OptimizationRemarkMissed R(DEBUG_TYPE, "NonFunctionalAccRel", + MA->getAccessInstruction()); + R << "skipped possible mapping target because it writes more than " + "one element"; + S->getFunction().getContext().diagnose(R); + continue; + } + + isl::union_set TouchedElts = AccRel.range(); if (!TouchedElts.is_subset(CompatibleElts)) { DEBUG( dbgs() diff --git a/polly/test/DeLICM/noninjective_phi_relation.ll b/polly/test/DeLICM/noninjective_phi_relation.ll deleted file mode 100644 index dfe2d0f..0000000 --- a/polly/test/DeLICM/noninjective_phi_relation.ll +++ /dev/null @@ -1,115 +0,0 @@ -; RUN: opt %loadPolly -polly-delicm -analyze < %s | FileCheck %s -match-full-lines -; -; llvm.org/PR34485 -; This produces a non-injective PHIRead -> PHIWrite map due to an invalid -; paramter assumption. It does not map anything because the result does pass -; the non-conflicting test. It would be a better test if it did to test whether -; correct code is generated. -; -; unsigned a; -; long b, d; -; short c; -; int e, f; -; void fn1() { -; for (;;) { -; for (; f;) -; for (;;) -; ; -; a -= 0 < b; -; for (; f <= 5; f++) { -; short *g = &c; -; *g = a++ ? e *= 4 : c; -; g = &f; -; d = *g; -; } -; } -; } -; -target datalayout = "e-m:w-i64:64-f80:128-n8:16:32:64-S128" - -@f = common local_unnamed_addr global i32 0, align 4 -@b = common local_unnamed_addr global i32 0, align 4 -@a = common local_unnamed_addr global i32 0, align 4 -@c = common local_unnamed_addr global i16 0, align 2 -@e = common local_unnamed_addr global i32 0, align 4 -@d = common local_unnamed_addr global i32 0, align 4 - -define void @fn1() local_unnamed_addr { -entry: - br label %entry.split - -entry.split: ; preds = %entry - %.pr = load i32, i32* @f, align 4 - %tobool19 = icmp eq i32 %.pr, 0 - br i1 %tobool19, label %for.end.lr.ph, label %for.body - -for.end.lr.ph: ; preds = %entry.split - %0 = load i32, i32* @b, align 4 - %cmp = icmp sgt i32 %0, 0 - %conv = zext i1 %cmp to i32 - %a.promoted20 = load i32, i32* @a, align 4 - br label %for.end - -for.cond.for.body_crit_edge: ; preds = %for.end, %for.end12 - %inc.lcssa2125 = phi i32 [ %inc, %for.end12 ], [ %sub, %for.end ] - store i32 %inc.lcssa2125, i32* @a, align 4 - br label %for.body - -for.body: ; preds = %for.cond.for.body_crit_edge, %entry.split - br label %for.cond2 - -for.cond2: ; preds = %for.cond2, %for.body - br label %for.cond2 - -for.end: ; preds = %for.end.lr.ph, %for.end12 - %inc.lcssa22 = phi i32 [ %a.promoted20, %for.end.lr.ph ], [ %inc, %for.end12 ] - %sub = sub i32 %inc.lcssa22, %conv - %.pr15 = load i32, i32* @f, align 4 - %cmp416 = icmp slt i32 %.pr15, 6 - br i1 %cmp416, label %for.body6.lr.ph, label %for.cond.for.body_crit_edge - -for.body6.lr.ph: ; preds = %for.end - %c.promoted = load i16, i16* @c, align 2 - br label %for.body6 - -for.body6: ; preds = %for.body6.lr.ph, %cond.end - %conv918 = phi i16 [ %c.promoted, %for.body6.lr.ph ], [ %conv9, %cond.end ] - %inc17 = phi i32 [ %sub, %for.body6.lr.ph ], [ %inc, %cond.end ] - %1 = phi i32 [ %.pr15, %for.body6.lr.ph ], [ %inc11, %cond.end ] - %inc = add i32 %inc17, 1 - %tobool7 = icmp eq i32 %inc17, 0 - br i1 %tobool7, label %cond.false, label %cond.true - -cond.true: ; preds = %for.body6 - %2 = load i32, i32* @e, align 4 - %mul = shl nsw i32 %2, 2 - store i32 %mul, i32* @e, align 4 - br label %cond.end - -cond.false: ; preds = %for.body6 - %conv8 = sext i16 %conv918 to i32 - br label %cond.end - -cond.end: ; preds = %cond.false, %cond.true - %cond = phi i32 [ %mul, %cond.true ], [ %conv8, %cond.false ] - %conv9 = trunc i32 %cond to i16 - %3 = load i16, i16* bitcast (i32* @f to i16*), align 4 - %inc11 = add nsw i32 %1, 1 - store i32 %inc11, i32* @f, align 4 - %cmp4 = icmp slt i32 %1, 5 - br i1 %cmp4, label %for.body6, label %for.end12 - -for.end12: ; preds = %cond.end - %conv10 = sext i16 %3 to i32 - store i16 %conv9, i16* @c, align 2 - store i32 %conv10, i32* @d, align 4 - %tobool = icmp eq i32 %inc11, 0 - br i1 %tobool, label %for.end, label %for.cond.for.body_crit_edge -} - - -; CHECK: Statistics { -; CHECK: Compatible overwrites: 1 -; CHECK: } - -; CHECK: No modification has been made diff --git a/polly/test/DeLICM/skip_multiaccess.ll b/polly/test/DeLICM/skip_multiaccess.ll new file mode 100644 index 0000000..92c0b12 --- /dev/null +++ b/polly/test/DeLICM/skip_multiaccess.ll @@ -0,0 +1,70 @@ +; RUN: opt %loadPolly -polly-delicm -pass-remarks-missed=polly-delicm -disable-output < %s 2>&1 | FileCheck %s +; +; llvm.org/PR34485 +; llvm.org/PR34989 +; +; The memset causes the array A to be divided into i8-sized subelements. +; The the regular store then writes multiple of these subelements, which +; we do not support currently. +; +; void func(double *A) { +; memset(A, 0, 4); +; for (int j = 0; j < 2; j += 1) { /* outer */ +; double phi = 0.0; +; for (int i = 0; i < 4; i += 1) /* reduction */ +; phi += 4.2; +; A[j] = phi; +; } +; } + +declare void @llvm.memset.p0f64.i64(double* nocapture, i8, i64, i32, i1) + +define void @func(double* noalias nonnull %A) { +entry: + br label %outer.for + +outer.for: + %j = phi i32 [0, %entry], [%j.inc, %outer.inc] + call void @llvm.memset.p0f64.i64(double* %A, i8 0, i64 4, i32 1, i1 false) + %j.cmp = icmp slt i32 %j, 2 + br i1 %j.cmp, label %reduction.for, label %outer.exit + + + reduction.for: + %i = phi i32 [0, %outer.for], [%i.inc, %reduction.inc] + %phi = phi double [0.0, %outer.for], [%add, %reduction.inc] + %i.cmp = icmp slt i32 %i, 4 + br i1 %i.cmp, label %body, label %reduction.exit + + + + body: + %add = fadd double %phi, 4.2 + br label %reduction.inc + + + + reduction.inc: + %i.inc = add nuw nsw i32 %i, 1 + br label %reduction.for + + reduction.exit: + %A_idx = getelementptr inbounds double, double* %A, i32 %j + store double %phi, double* %A_idx + br label %outer.inc + + + +outer.inc: + %j.inc = add nuw nsw i32 %j, 1 + br label %outer.for + +outer.exit: + br label %return + +return: + ret void +} + + +; CHECK: skipped possible mapping target because it writes more than one element -- 2.7.4