[LoopAccesses] Allow analysis to complete in the presence of uniform stores

author Adam Nemet <anemet@apple.com>

Wed, 8 Apr 2015 17:48:40 +0000 (17:48 +0000)

committer Adam Nemet <anemet@apple.com>

Wed, 8 Apr 2015 17:48:40 +0000 (17:48 +0000)
author Adam Nemet <anemet@apple.com>
Wed, 8 Apr 2015 17:48:40 +0000 (17:48 +0000)
committer Adam Nemet <anemet@apple.com>
Wed, 8 Apr 2015 17:48:40 +0000 (17:48 +0000)
diff --git a/llvm/include/llvm/Analysis/LoopAccessAnalysis.h b/llvm/include/llvm/Analysis/LoopAccessAnalysis.h

index 19b4009..a4393bb 100644 (file)
--- a/llvm/include/llvm/Analysis/LoopAccessAnalysis.h
+++ b/llvm/include/llvm/Analysis/LoopAccessAnalysis.h
@@ -432,6 +432,13 @@ public:
    /// Only used in DEBUG build but we don't want NDEBUG-dependent ABI.
    unsigned NumSymbolicStrides;
  
+  /// \brief Checks existence of store to invariant address inside loop.
+  /// If the loop has any store to invariant address, then it returns true,
+  /// else returns false.
+  bool hasStoreToLoopInvariantAddress() const {
+    return StoreToLoopInvariantAddress;
+  }
+
  private:
    /// \brief Analyze the loop.  Substitute symbolic strides using Strides.
    void analyzeLoop(const ValueToValueMap &Strides);
@@ -469,6 +476,10 @@ private:
    /// \brief Cache the result of analyzeLoop.
    bool CanVecMem;
  
+  /// \brief Indicator for storing to uniform addresses.
+  /// If a loop has write to a loop invariant address then it should be true.
+  bool StoreToLoopInvariantAddress;
+
    /// \brief The diagnostics report generated for the analysis.  E.g. why we
    /// couldn't analyze the loop.
    Optional<LoopAccessReport> Report;
diff --git a/llvm/lib/Analysis/LoopAccessAnalysis.cpp b/llvm/lib/Analysis/LoopAccessAnalysis.cpp

index ae561d7..2342933 100644 (file)
--- a/llvm/lib/Analysis/LoopAccessAnalysis.cpp
+++ b/llvm/lib/Analysis/LoopAccessAnalysis.cpp
@@ -1044,16 +1044,8 @@ void LoopAccessInfo::analyzeLoop(const ValueToValueMap &Strides) {
    for (I = Stores.begin(), IE = Stores.end(); I != IE; ++I) {
      StoreInst *ST = cast<StoreInst>(*I);
      Value* Ptr = ST->getPointerOperand();
-
-    if (isUniform(Ptr)) {
-      emitAnalysis(
-          LoopAccessReport(ST)
-          << "write to a loop invariant address could not be vectorized");
-      DEBUG(dbgs() << "LAA: We don't allow storing to uniform addresses\n");
-      CanVecMem = false;
-      return;
-    }
-
+    // Check for store to loop invariant address.
+    StoreToLoopInvariantAddress |= isUniform(Ptr);
      // If we did *not* see this pointer before, insert it to  the read-write
      // list. At this phase it is only a 'write' list.
      if (Seen.insert(Ptr).second) {
@@ -1314,7 +1306,8 @@ LoopAccessInfo::LoopAccessInfo(Loop *L, ScalarEvolution *SE,
                                 const ValueToValueMap &Strides)
      : DepChecker(SE, L), NumComparisons(0), TheLoop(L), SE(SE), DL(DL),
        TLI(TLI), AA(AA), DT(DT), NumLoads(0), NumStores(0),
-      MaxSafeDepDistBytes(-1U), CanVecMem(false) {
+      MaxSafeDepDistBytes(-1U), CanVecMem(false),
+      StoreToLoopInvariantAddress(false) {
    if (canAnalyzeLoop())
      analyzeLoop(Strides);
  }
@@ -1327,6 +1320,10 @@ void LoopAccessInfo::print(raw_ostream &OS, unsigned Depth) const {
        OS.indent(Depth) << "Memory dependences are safe with run-time checks\n";
    }
  
+  OS.indent(Depth) << "Store to invariant address was "
+                   << (StoreToLoopInvariantAddress ? "" : "not ")
+                   << "found in loop.\n";
+
    if (Report)
      OS.indent(Depth) << "Report: " << Report->str() << "\n";
  
diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp

index 2158856..8986932 100644 (file)
--- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
+++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
@@ -4009,6 +4009,14 @@ bool LoopVectorizationLegality::canVectorizeMemory() {
    if (!LAI->canVectorizeMemory())
      return false;
  
+  if (LAI->hasStoreToLoopInvariantAddress()) {
+    emitAnalysis(
+        VectorizationReport()
+        << "write to a loop invariant address could not be vectorized");
+    DEBUG(dbgs() << "LV: We don't allow storing to uniform addresses\n");
+    return false;
+  }
+
    if (LAI->getNumRuntimePointerChecks() >
        VectorizerParams::RuntimeMemoryCheckThreshold) {
      emitAnalysis(VectorizationReport()
diff --git a/llvm/test/Analysis/LoopAccessAnalysis/store-to-invariant-check1.ll b/llvm/test/Analysis/LoopAccessAnalysis/store-to-invariant-check1.ll

new file mode 100644 (file)

index 0000000..8ab8ab2
--- /dev/null
+++ b/llvm/test/Analysis/LoopAccessAnalysis/store-to-invariant-check1.ll
@@ -0,0 +1,53 @@
+; RUN: opt < %s -loop-accesses -analyze | FileCheck %s
+
+; Test to confirm LAA will find store to invariant address.
+; Inner loop has a store to invariant address.
+;
+;  for(; i < itr; i++) {
+;    for(; j < itr; j++) {
+;      var1[i] = var2[j] + var1[i];
+;    }
+;  }
+
+; CHECK: Store to invariant address was found in loop.
+; CHECK-NOT: Store to invariant address was not found in loop.
+
+define i32 @foo(i32* nocapture %var1, i32* nocapture readonly %var2, i32 %itr) #0 {
+entry:
+  %cmp20 = icmp eq i32 %itr, 0
+  br i1 %cmp20, label %for.end10, label %for.cond1.preheader
+
+for.cond1.preheader:                              ; preds = %entry, %for.inc8
+  %indvars.iv23 = phi i64 [ %indvars.iv.next24, %for.inc8 ], [ 0, %entry ]
+  %j.022 = phi i32 [ %j.1.lcssa, %for.inc8 ], [ 0, %entry ]
+  %cmp218 = icmp ult i32 %j.022, %itr
+  br i1 %cmp218, label %for.body3.lr.ph, label %for.inc8
+
+for.body3.lr.ph:                                  ; preds = %for.cond1.preheader
+  %arrayidx5 = getelementptr inbounds i32, i32* %var1, i64 %indvars.iv23
+  %0 = zext i32 %j.022 to i64
+  br label %for.body3
+
+for.body3:                                        ; preds = %for.body3, %for.body3.lr.ph
+  %indvars.iv = phi i64 [ %0, %for.body3.lr.ph ], [ %indvars.iv.next, %for.body3 ]
+  %arrayidx = getelementptr inbounds i32, i32* %var2, i64 %indvars.iv
+  %1 = load i32, i32* %arrayidx, align 4
+  %2 = load i32, i32* %arrayidx5, align 4
+  %add = add nsw i32 %2, %1
+  store i32 %add, i32* %arrayidx5, align 4
+  %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
+  %lftr.wideiv = trunc i64 %indvars.iv.next to i32
+  %exitcond = icmp eq i32 %lftr.wideiv, %itr
+  br i1 %exitcond, label %for.inc8, label %for.body3
+
+for.inc8:                                         ; preds = %for.body3, %for.cond1.preheader
+  %j.1.lcssa = phi i32 [ %j.022, %for.cond1.preheader ], [ %itr, %for.body3 ]
+  %indvars.iv.next24 = add nuw nsw i64 %indvars.iv23, 1
+  %lftr.wideiv25 = trunc i64 %indvars.iv.next24 to i32
+  %exitcond26 = icmp eq i32 %lftr.wideiv25, %itr
+  br i1 %exitcond26, label %for.end10, label %for.cond1.preheader
+
+for.end10:                                        ; preds = %for.inc8, %entry
+  ret i32 undef
+}
+
diff --git a/llvm/test/Analysis/LoopAccessAnalysis/store-to-invariant-check2.ll b/llvm/test/Analysis/LoopAccessAnalysis/store-to-invariant-check2.ll

new file mode 100644 (file)

index 0000000..4da0906
--- /dev/null
+++ b/llvm/test/Analysis/LoopAccessAnalysis/store-to-invariant-check2.ll
@@ -0,0 +1,54 @@
+; RUN: opt < %s -loop-accesses -analyze  | FileCheck %s
+
+; Test to confirm LAA will not find store to invariant address.
+; Inner loop has no store to invariant address.
+;
+;  for(; i < itr; i++) {
+;    for(; j < itr; j++) {
+;      var2[j] = var2[j] + var1[i];
+;    }
+;  }
+
+; CHECK: Store to invariant address was not found in loop.
+; CHECK-NOT: Store to invariant address was found in loop.
+
+
+define i32 @foo(i32* nocapture readonly %var1, i32* nocapture %var2, i32 %itr) #0 {
+entry:
+  %cmp20 = icmp eq i32 %itr, 0
+  br i1 %cmp20, label %for.end10, label %for.cond1.preheader
+
+for.cond1.preheader:                              ; preds = %entry, %for.inc8
+  %indvars.iv23 = phi i64 [ %indvars.iv.next24, %for.inc8 ], [ 0, %entry ]
+  %j.022 = phi i32 [ %j.1.lcssa, %for.inc8 ], [ 0, %entry ]
+  %cmp218 = icmp ult i32 %j.022, %itr
+  br i1 %cmp218, label %for.body3.lr.ph, label %for.inc8
+
+for.body3.lr.ph:                                  ; preds = %for.cond1.preheader
+  %arrayidx5 = getelementptr inbounds i32, i32* %var1, i64 %indvars.iv23
+  %0 = zext i32 %j.022 to i64
+  br label %for.body3
+
+for.body3:                                        ; preds = %for.body3, %for.body3.lr.ph
+  %indvars.iv = phi i64 [ %0, %for.body3.lr.ph ], [ %indvars.iv.next, %for.body3 ]
+  %arrayidx = getelementptr inbounds i32, i32* %var2, i64 %indvars.iv
+  %1 = load i32, i32* %arrayidx, align 4
+  %2 = load i32, i32* %arrayidx5, align 4
+  %add = add nsw i32 %2, %1
+  store i32 %add, i32* %arrayidx, align 4
+  %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
+  %lftr.wideiv = trunc i64 %indvars.iv.next to i32
+  %exitcond = icmp eq i32 %lftr.wideiv, %itr
+  br i1 %exitcond, label %for.inc8, label %for.body3
+
+for.inc8:                                         ; preds = %for.body3, %for.cond1.preheader
+  %j.1.lcssa = phi i32 [ %j.022, %for.cond1.preheader ], [ %itr, %for.body3 ]
+  %indvars.iv.next24 = add nuw nsw i64 %indvars.iv23, 1
+  %lftr.wideiv25 = trunc i64 %indvars.iv.next24 to i32
+  %exitcond26 = icmp eq i32 %lftr.wideiv25, %itr
+  br i1 %exitcond26, label %for.end10, label %for.cond1.preheader
+
+for.end10:                                        ; preds = %for.inc8, %entry
+  ret i32 undef
+}
+
diff --git a/llvm/test/Analysis/LoopAccessAnalysis/store-to-invariant-check3.ll b/llvm/test/Analysis/LoopAccessAnalysis/store-to-invariant-check3.ll

new file mode 100644 (file)

index 0000000..18315a5
--- /dev/null
+++ b/llvm/test/Analysis/LoopAccessAnalysis/store-to-invariant-check3.ll
@@ -0,0 +1,53 @@
+; RUN: opt < %s -loop-accesses -analyze | FileCheck %s
+
+; Test to confirm LAA will find store to invariant address.
+; Inner loop has a store to invariant address.
+;
+;  for(; i < itr; i++) {
+;    for(; j < itr; j++) {
+;      var1[j] = ++var2[i] + var1[j];
+;    }
+;  }
+
+; CHECK: Store to invariant address was found in loop.
+
+define void @foo(i32* nocapture %var1, i32* nocapture %var2, i32 %itr) #0 {
+entry:
+  %cmp20 = icmp sgt i32 %itr, 0
+  br i1 %cmp20, label %for.cond1.preheader, label %for.end11
+
+for.cond1.preheader:                              ; preds = %entry, %for.inc9
+  %indvars.iv23 = phi i64 [ %indvars.iv.next24, %for.inc9 ], [ 0, %entry ]
+  %j.022 = phi i32 [ %j.1.lcssa, %for.inc9 ], [ 0, %entry ]
+  %cmp218 = icmp slt i32 %j.022, %itr
+  br i1 %cmp218, label %for.body3.lr.ph, label %for.inc9
+
+for.body3.lr.ph:                                  ; preds = %for.cond1.preheader
+  %arrayidx = getelementptr inbounds i32, i32* %var2, i64 %indvars.iv23
+  %0 = sext i32 %j.022 to i64
+  br label %for.body3
+
+for.body3:                                        ; preds = %for.body3, %for.body3.lr.ph
+  %indvars.iv = phi i64 [ %0, %for.body3.lr.ph ], [ %indvars.iv.next, %for.body3 ]
+  %1 = load i32, i32* %arrayidx, align 4
+  %inc = add nsw i32 %1, 1
+  store i32 %inc, i32* %arrayidx, align 4
+  %arrayidx5 = getelementptr inbounds i32, i32* %var1, i64 %indvars.iv
+  %2 = load i32, i32* %arrayidx5, align 4
+  %add = add nsw i32 %inc, %2
+  store i32 %add, i32* %arrayidx5, align 4
+  %indvars.iv.next = add nsw i64 %indvars.iv, 1
+  %lftr.wideiv = trunc i64 %indvars.iv.next to i32
+  %exitcond = icmp eq i32 %lftr.wideiv, %itr
+  br i1 %exitcond, label %for.inc9, label %for.body3
+
+for.inc9:                                         ; preds = %for.body3, %for.cond1.preheader
+  %j.1.lcssa = phi i32 [ %j.022, %for.cond1.preheader ], [ %itr, %for.body3 ]
+  %indvars.iv.next24 = add nuw nsw i64 %indvars.iv23, 1
+  %lftr.wideiv25 = trunc i64 %indvars.iv.next24 to i32
+  %exitcond26 = icmp eq i32 %lftr.wideiv25, %itr
+  br i1 %exitcond26, label %for.end11, label %for.cond1.preheader
+
+for.end11:                                        ; preds = %for.inc9, %entry
+  ret void
+}
author	Adam Nemet <anemet@apple.com>
	Wed, 8 Apr 2015 17:48:40 +0000 (17:48 +0000)
committer	Adam Nemet <anemet@apple.com>
	Wed, 8 Apr 2015 17:48:40 +0000 (17:48 +0000)
llvm/include/llvm/Analysis/LoopAccessAnalysis.h		patch \| blob \| history
llvm/lib/Analysis/LoopAccessAnalysis.cpp		patch \| blob \| history
llvm/lib/Transforms/Vectorize/LoopVectorize.cpp		patch \| blob \| history
llvm/test/Analysis/LoopAccessAnalysis/store-to-invariant-check1.ll	[new file with mode: 0644]	patch \| blob
llvm/test/Analysis/LoopAccessAnalysis/store-to-invariant-check2.ll	[new file with mode: 0644]	patch \| blob
llvm/test/Analysis/LoopAccessAnalysis/store-to-invariant-check3.ll	[new file with mode: 0644]	patch \| blob