/// Only used in DEBUG build but we don't want NDEBUG-dependent ABI.
unsigned NumSymbolicStrides;
+ /// \brief Checks existence of store to invariant address inside loop.
+ /// If the loop has any store to invariant address, then it returns true,
+ /// else returns false.
+ bool hasStoreToLoopInvariantAddress() const {
+ return StoreToLoopInvariantAddress;
+ }
+
private:
/// \brief Analyze the loop. Substitute symbolic strides using Strides.
void analyzeLoop(const ValueToValueMap &Strides);
/// \brief Cache the result of analyzeLoop.
bool CanVecMem;
+ /// \brief Indicator for storing to uniform addresses.
+ /// If a loop has write to a loop invariant address then it should be true.
+ bool StoreToLoopInvariantAddress;
+
/// \brief The diagnostics report generated for the analysis. E.g. why we
/// couldn't analyze the loop.
Optional<LoopAccessReport> Report;
for (I = Stores.begin(), IE = Stores.end(); I != IE; ++I) {
StoreInst *ST = cast<StoreInst>(*I);
Value* Ptr = ST->getPointerOperand();
-
- if (isUniform(Ptr)) {
- emitAnalysis(
- LoopAccessReport(ST)
- << "write to a loop invariant address could not be vectorized");
- DEBUG(dbgs() << "LAA: We don't allow storing to uniform addresses\n");
- CanVecMem = false;
- return;
- }
-
+ // Check for store to loop invariant address.
+ StoreToLoopInvariantAddress |= isUniform(Ptr);
// If we did *not* see this pointer before, insert it to the read-write
// list. At this phase it is only a 'write' list.
if (Seen.insert(Ptr).second) {
const ValueToValueMap &Strides)
: DepChecker(SE, L), NumComparisons(0), TheLoop(L), SE(SE), DL(DL),
TLI(TLI), AA(AA), DT(DT), NumLoads(0), NumStores(0),
- MaxSafeDepDistBytes(-1U), CanVecMem(false) {
+ MaxSafeDepDistBytes(-1U), CanVecMem(false),
+ StoreToLoopInvariantAddress(false) {
if (canAnalyzeLoop())
analyzeLoop(Strides);
}
OS.indent(Depth) << "Memory dependences are safe with run-time checks\n";
}
+ OS.indent(Depth) << "Store to invariant address was "
+ << (StoreToLoopInvariantAddress ? "" : "not ")
+ << "found in loop.\n";
+
if (Report)
OS.indent(Depth) << "Report: " << Report->str() << "\n";
if (!LAI->canVectorizeMemory())
return false;
+ if (LAI->hasStoreToLoopInvariantAddress()) {
+ emitAnalysis(
+ VectorizationReport()
+ << "write to a loop invariant address could not be vectorized");
+ DEBUG(dbgs() << "LV: We don't allow storing to uniform addresses\n");
+ return false;
+ }
+
if (LAI->getNumRuntimePointerChecks() >
VectorizerParams::RuntimeMemoryCheckThreshold) {
emitAnalysis(VectorizationReport()
--- /dev/null
+; RUN: opt < %s -loop-accesses -analyze | FileCheck %s
+
+; Test to confirm LAA will find store to invariant address.
+; Inner loop has a store to invariant address.
+;
+; for(; i < itr; i++) {
+; for(; j < itr; j++) {
+; var1[i] = var2[j] + var1[i];
+; }
+; }
+
+; CHECK: Store to invariant address was found in loop.
+; CHECK-NOT: Store to invariant address was not found in loop.
+
+define i32 @foo(i32* nocapture %var1, i32* nocapture readonly %var2, i32 %itr) #0 {
+entry:
+ %cmp20 = icmp eq i32 %itr, 0
+ br i1 %cmp20, label %for.end10, label %for.cond1.preheader
+
+for.cond1.preheader: ; preds = %entry, %for.inc8
+ %indvars.iv23 = phi i64 [ %indvars.iv.next24, %for.inc8 ], [ 0, %entry ]
+ %j.022 = phi i32 [ %j.1.lcssa, %for.inc8 ], [ 0, %entry ]
+ %cmp218 = icmp ult i32 %j.022, %itr
+ br i1 %cmp218, label %for.body3.lr.ph, label %for.inc8
+
+for.body3.lr.ph: ; preds = %for.cond1.preheader
+ %arrayidx5 = getelementptr inbounds i32, i32* %var1, i64 %indvars.iv23
+ %0 = zext i32 %j.022 to i64
+ br label %for.body3
+
+for.body3: ; preds = %for.body3, %for.body3.lr.ph
+ %indvars.iv = phi i64 [ %0, %for.body3.lr.ph ], [ %indvars.iv.next, %for.body3 ]
+ %arrayidx = getelementptr inbounds i32, i32* %var2, i64 %indvars.iv
+ %1 = load i32, i32* %arrayidx, align 4
+ %2 = load i32, i32* %arrayidx5, align 4
+ %add = add nsw i32 %2, %1
+ store i32 %add, i32* %arrayidx5, align 4
+ %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
+ %lftr.wideiv = trunc i64 %indvars.iv.next to i32
+ %exitcond = icmp eq i32 %lftr.wideiv, %itr
+ br i1 %exitcond, label %for.inc8, label %for.body3
+
+for.inc8: ; preds = %for.body3, %for.cond1.preheader
+ %j.1.lcssa = phi i32 [ %j.022, %for.cond1.preheader ], [ %itr, %for.body3 ]
+ %indvars.iv.next24 = add nuw nsw i64 %indvars.iv23, 1
+ %lftr.wideiv25 = trunc i64 %indvars.iv.next24 to i32
+ %exitcond26 = icmp eq i32 %lftr.wideiv25, %itr
+ br i1 %exitcond26, label %for.end10, label %for.cond1.preheader
+
+for.end10: ; preds = %for.inc8, %entry
+ ret i32 undef
+}
+
--- /dev/null
+; RUN: opt < %s -loop-accesses -analyze | FileCheck %s
+
+; Test to confirm LAA will not find store to invariant address.
+; Inner loop has no store to invariant address.
+;
+; for(; i < itr; i++) {
+; for(; j < itr; j++) {
+; var2[j] = var2[j] + var1[i];
+; }
+; }
+
+; CHECK: Store to invariant address was not found in loop.
+; CHECK-NOT: Store to invariant address was found in loop.
+
+
+define i32 @foo(i32* nocapture readonly %var1, i32* nocapture %var2, i32 %itr) #0 {
+entry:
+ %cmp20 = icmp eq i32 %itr, 0
+ br i1 %cmp20, label %for.end10, label %for.cond1.preheader
+
+for.cond1.preheader: ; preds = %entry, %for.inc8
+ %indvars.iv23 = phi i64 [ %indvars.iv.next24, %for.inc8 ], [ 0, %entry ]
+ %j.022 = phi i32 [ %j.1.lcssa, %for.inc8 ], [ 0, %entry ]
+ %cmp218 = icmp ult i32 %j.022, %itr
+ br i1 %cmp218, label %for.body3.lr.ph, label %for.inc8
+
+for.body3.lr.ph: ; preds = %for.cond1.preheader
+ %arrayidx5 = getelementptr inbounds i32, i32* %var1, i64 %indvars.iv23
+ %0 = zext i32 %j.022 to i64
+ br label %for.body3
+
+for.body3: ; preds = %for.body3, %for.body3.lr.ph
+ %indvars.iv = phi i64 [ %0, %for.body3.lr.ph ], [ %indvars.iv.next, %for.body3 ]
+ %arrayidx = getelementptr inbounds i32, i32* %var2, i64 %indvars.iv
+ %1 = load i32, i32* %arrayidx, align 4
+ %2 = load i32, i32* %arrayidx5, align 4
+ %add = add nsw i32 %2, %1
+ store i32 %add, i32* %arrayidx, align 4
+ %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
+ %lftr.wideiv = trunc i64 %indvars.iv.next to i32
+ %exitcond = icmp eq i32 %lftr.wideiv, %itr
+ br i1 %exitcond, label %for.inc8, label %for.body3
+
+for.inc8: ; preds = %for.body3, %for.cond1.preheader
+ %j.1.lcssa = phi i32 [ %j.022, %for.cond1.preheader ], [ %itr, %for.body3 ]
+ %indvars.iv.next24 = add nuw nsw i64 %indvars.iv23, 1
+ %lftr.wideiv25 = trunc i64 %indvars.iv.next24 to i32
+ %exitcond26 = icmp eq i32 %lftr.wideiv25, %itr
+ br i1 %exitcond26, label %for.end10, label %for.cond1.preheader
+
+for.end10: ; preds = %for.inc8, %entry
+ ret i32 undef
+}
+
--- /dev/null
+; RUN: opt < %s -loop-accesses -analyze | FileCheck %s
+
+; Test to confirm LAA will find store to invariant address.
+; Inner loop has a store to invariant address.
+;
+; for(; i < itr; i++) {
+; for(; j < itr; j++) {
+; var1[j] = ++var2[i] + var1[j];
+; }
+; }
+
+; CHECK: Store to invariant address was found in loop.
+
+define void @foo(i32* nocapture %var1, i32* nocapture %var2, i32 %itr) #0 {
+entry:
+ %cmp20 = icmp sgt i32 %itr, 0
+ br i1 %cmp20, label %for.cond1.preheader, label %for.end11
+
+for.cond1.preheader: ; preds = %entry, %for.inc9
+ %indvars.iv23 = phi i64 [ %indvars.iv.next24, %for.inc9 ], [ 0, %entry ]
+ %j.022 = phi i32 [ %j.1.lcssa, %for.inc9 ], [ 0, %entry ]
+ %cmp218 = icmp slt i32 %j.022, %itr
+ br i1 %cmp218, label %for.body3.lr.ph, label %for.inc9
+
+for.body3.lr.ph: ; preds = %for.cond1.preheader
+ %arrayidx = getelementptr inbounds i32, i32* %var2, i64 %indvars.iv23
+ %0 = sext i32 %j.022 to i64
+ br label %for.body3
+
+for.body3: ; preds = %for.body3, %for.body3.lr.ph
+ %indvars.iv = phi i64 [ %0, %for.body3.lr.ph ], [ %indvars.iv.next, %for.body3 ]
+ %1 = load i32, i32* %arrayidx, align 4
+ %inc = add nsw i32 %1, 1
+ store i32 %inc, i32* %arrayidx, align 4
+ %arrayidx5 = getelementptr inbounds i32, i32* %var1, i64 %indvars.iv
+ %2 = load i32, i32* %arrayidx5, align 4
+ %add = add nsw i32 %inc, %2
+ store i32 %add, i32* %arrayidx5, align 4
+ %indvars.iv.next = add nsw i64 %indvars.iv, 1
+ %lftr.wideiv = trunc i64 %indvars.iv.next to i32
+ %exitcond = icmp eq i32 %lftr.wideiv, %itr
+ br i1 %exitcond, label %for.inc9, label %for.body3
+
+for.inc9: ; preds = %for.body3, %for.cond1.preheader
+ %j.1.lcssa = phi i32 [ %j.022, %for.cond1.preheader ], [ %itr, %for.body3 ]
+ %indvars.iv.next24 = add nuw nsw i64 %indvars.iv23, 1
+ %lftr.wideiv25 = trunc i64 %indvars.iv.next24 to i32
+ %exitcond26 = icmp eq i32 %lftr.wideiv25, %itr
+ br i1 %exitcond26, label %for.end11, label %for.cond1.preheader
+
+for.end11: ; preds = %for.inc9, %entry
+ ret void
+}