[DA] Delinearization of fixed-size multi-dimensional arrays

author Bardia Mahjour <bmahjour@ca.ibm.com>

Wed, 26 Feb 2020 17:51:58 +0000 (12:51 -0500)

committer Bardia Mahjour <bmahjour@ca.ibm.com>

Thu, 27 Feb 2020 15:29:01 +0000 (10:29 -0500)
author Bardia Mahjour <bmahjour@ca.ibm.com>
Wed, 26 Feb 2020 17:51:58 +0000 (12:51 -0500)
committer Bardia Mahjour <bmahjour@ca.ibm.com>
Thu, 27 Feb 2020 15:29:01 +0000 (10:29 -0500)
diff --git a/llvm/include/llvm/Analysis/DependenceAnalysis.h b/llvm/include/llvm/Analysis/DependenceAnalysis.h

index 0c4002c..ba1153d 100644 (file)
--- a/llvm/include/llvm/Analysis/DependenceAnalysis.h
+++ b/llvm/include/llvm/Analysis/DependenceAnalysis.h
@@ -924,10 +924,28 @@ template <typename T> class ArrayRef;
      void updateDirection(Dependence::DVEntry &Level,
                           const Constraint &CurConstraint) const;
  
+    /// Given a linear access function, tries to recover subscripts
+    /// for each dimension of the array element access.
      bool tryDelinearize(Instruction *Src, Instruction *Dst,
                          SmallVectorImpl<Subscript> &Pair);
  
-  private:
+    /// Tries to delinearize access function for a fixed size multi-dimensional
+    /// array, by deriving subscripts from GEP instructions. Returns true upon
+    /// success and false otherwise.
+    bool tryDelinearizeFixedSize(Instruction *Src, Instruction *Dst,
+                                 const SCEV *SrcAccessFn,
+                                 const SCEV *DstAccessFn,
+                                 SmallVectorImpl<const SCEV *> &SrcSubscripts,
+                                 SmallVectorImpl<const SCEV *> &DstSubscripts);
+
+    /// Tries to delinearize access function for a multi-dimensional array with
+    /// symbolic runtime sizes.
+    /// Returns true upon success and false otherwise.
+    bool tryDelinearizeParametricSize(
+        Instruction *Src, Instruction *Dst, const SCEV *SrcAccessFn,
+        const SCEV *DstAccessFn, SmallVectorImpl<const SCEV *> &SrcSubscripts,
+        SmallVectorImpl<const SCEV *> &DstSubscripts);
+
      /// checkSubscript - Helper function for checkSrcSubscript and
      /// checkDstSubscript to avoid duplicate code
      bool checkSubscript(const SCEV *Expr, const Loop *LoopNest,
diff --git a/llvm/lib/Analysis/DependenceAnalysis.cpp b/llvm/lib/Analysis/DependenceAnalysis.cpp

index 9b38053..96c7bc7 100644 (file)
--- a/llvm/lib/Analysis/DependenceAnalysis.cpp
+++ b/llvm/lib/Analysis/DependenceAnalysis.cpp
@@ -3264,23 +3264,134 @@ bool DependenceInfo::tryDelinearize(Instruction *Src, Instruction *Dst,
    assert(isLoadOrStore(Dst) && "instruction is not load or store");
    Value *SrcPtr = getLoadStorePointerOperand(Src);
    Value *DstPtr = getLoadStorePointerOperand(Dst);
-
    Loop *SrcLoop = LI->getLoopFor(Src->getParent());
    Loop *DstLoop = LI->getLoopFor(Dst->getParent());
+  const SCEV *SrcAccessFn = SE->getSCEVAtScope(SrcPtr, SrcLoop);
+  const SCEV *DstAccessFn = SE->getSCEVAtScope(DstPtr, DstLoop);
+  const SCEVUnknown *SrcBase =
+      dyn_cast<SCEVUnknown>(SE->getPointerBase(SrcAccessFn));
+  const SCEVUnknown *DstBase =
+      dyn_cast<SCEVUnknown>(SE->getPointerBase(DstAccessFn));
+
+  if (!SrcBase || !DstBase || SrcBase != DstBase)
+    return false;
  
-  // Below code mimics the code in Delinearization.cpp
-  const SCEV *SrcAccessFn =
-    SE->getSCEVAtScope(SrcPtr, SrcLoop);
-  const SCEV *DstAccessFn =
-    SE->getSCEVAtScope(DstPtr, DstLoop);
+  SmallVector<const SCEV *, 4> SrcSubscripts, DstSubscripts;
+
+  if (!tryDelinearizeFixedSize(Src, Dst, SrcAccessFn, DstAccessFn,
+                               SrcSubscripts, DstSubscripts) &&
+      !tryDelinearizeParametricSize(Src, Dst, SrcAccessFn, DstAccessFn,
+                                    SrcSubscripts, DstSubscripts))
+    return false;
+
+  int Size = SrcSubscripts.size();
+  LLVM_DEBUG({
+    dbgs() << "\nSrcSubscripts: ";
+    for (int I = 0; I < Size; I++)
+      dbgs() << *SrcSubscripts[I];
+    dbgs() << "\nDstSubscripts: ";
+    for (int I = 0; I < Size; I++)
+      dbgs() << *DstSubscripts[I];
+  });
  
+  // The delinearization transforms a single-subscript MIV dependence test into
+  // a multi-subscript SIV dependence test that is easier to compute. So we
+  // resize Pair to contain as many pairs of subscripts as the delinearization
+  // has found, and then initialize the pairs following the delinearization.
+  Pair.resize(Size);
+  for (int I = 0; I < Size; ++I) {
+    Pair[I].Src = SrcSubscripts[I];
+    Pair[I].Dst = DstSubscripts[I];
+    unifySubscriptType(&Pair[I]);
+  }
+
+  return true;
+}
+
+bool DependenceInfo::tryDelinearizeFixedSize(
+    Instruction *Src, Instruction *Dst, const SCEV *SrcAccessFn,
+    const SCEV *DstAccessFn, SmallVectorImpl<const SCEV *> &SrcSubscripts,
+    SmallVectorImpl<const SCEV *> &DstSubscripts) {
+
+  // In general we cannot safely assume that the subscripts recovered from GEPs
+  // are in the range of values defined for their corresponding array
+  // dimensions. For example some C language usage/interpretation make it
+  // impossible to verify this at compile-time. As such we give up here unless
+  // we can assume that the subscripts do not overlap into neighboring
+  // dimensions and that the number of dimensions matches the number of
+  // subscripts being recovered.
+  if (!DisableDelinearizationChecks)
+    return false;
+
+  Value *SrcPtr = getLoadStorePointerOperand(Src);
+  Value *DstPtr = getLoadStorePointerOperand(Dst);
    const SCEVUnknown *SrcBase =
        dyn_cast<SCEVUnknown>(SE->getPointerBase(SrcAccessFn));
    const SCEVUnknown *DstBase =
        dyn_cast<SCEVUnknown>(SE->getPointerBase(DstAccessFn));
+  assert(SrcBase && DstBase && SrcBase == DstBase &&
+         "expected src and dst scev unknowns to be equal");
  
-  if (!SrcBase || !DstBase || SrcBase != DstBase)
+  // Check the simple case where the array dimensions are fixed size.
+  auto *SrcGEP = dyn_cast<GetElementPtrInst>(SrcPtr);
+  auto *DstGEP = dyn_cast<GetElementPtrInst>(DstPtr);
+  if (!SrcGEP || !DstGEP)
+    return false;
+
+  SmallVector<int, 4> SrcSizes, DstSizes;
+  SE->getIndexExpressionsFromGEP(SrcGEP, SrcSubscripts, SrcSizes);
+  SE->getIndexExpressionsFromGEP(DstGEP, DstSubscripts, DstSizes);
+
+  // Check that the two size arrays are non-empty and equal in length and
+  // value.
+  if (SrcSizes.empty() || SrcSubscripts.size() <= 1 ||
+      SrcSizes.size() != DstSizes.size() ||
+      !std::equal(SrcSizes.begin(), SrcSizes.end(), DstSizes.begin())) {
+    SrcSubscripts.clear();
+    DstSubscripts.clear();
      return false;
+  }
+
+  Value *SrcBasePtr = SrcGEP->getOperand(0);
+  Value *DstBasePtr = DstGEP->getOperand(0);
+  while (auto *PCast = dyn_cast<BitCastInst>(SrcBasePtr))
+    SrcBasePtr = PCast->getOperand(0);
+  while (auto *PCast = dyn_cast<BitCastInst>(DstBasePtr))
+    DstBasePtr = PCast->getOperand(0);
+
+  // Check that for identical base pointers we do not miss index offsets
+  // that have been added before this GEP is applied.
+  if (SrcBasePtr == SrcBase->getValue() && DstBasePtr == DstBase->getValue()) {
+    assert(SrcSubscripts.size() == DstSubscripts.size() &&
+           SrcSubscripts.size() == SrcSizes.size() + 1 &&
+           "Expected equal number of entries in the list of sizes and "
+           "subscripts.");
+    LLVM_DEBUG({
+      dbgs() << "Delinearized subscripts of fixed-size array\n"
+             << "SrcGEP:" << *SrcGEP << "\n"
+             << "DstGEP:" << *DstGEP << "\n";
+    });
+    return true;
+  }
+
+  SrcSubscripts.clear();
+  DstSubscripts.clear();
+  return false;
+}
+
+bool DependenceInfo::tryDelinearizeParametricSize(
+    Instruction *Src, Instruction *Dst, const SCEV *SrcAccessFn,
+    const SCEV *DstAccessFn, SmallVectorImpl<const SCEV *> &SrcSubscripts,
+    SmallVectorImpl<const SCEV *> &DstSubscripts) {
+
+  Value *SrcPtr = getLoadStorePointerOperand(Src);
+  Value *DstPtr = getLoadStorePointerOperand(Dst);
+  const SCEVUnknown *SrcBase =
+      dyn_cast<SCEVUnknown>(SE->getPointerBase(SrcAccessFn));
+  const SCEVUnknown *DstBase =
+      dyn_cast<SCEVUnknown>(SE->getPointerBase(DstAccessFn));
+  assert(SrcBase && DstBase && SrcBase == DstBase &&
+         "expected src and dst scev unknowns to be equal");
  
    const SCEV *ElementSize = SE->getElementSize(Src);
    if (ElementSize != SE->getElementSize(Dst))
@@ -3304,7 +3415,6 @@ bool DependenceInfo::tryDelinearize(Instruction *Src, Instruction *Dst,
    SE->findArrayDimensions(Terms, Sizes, ElementSize);
  
    // Third step: compute the access functions for each subscript.
-  SmallVector<const SCEV *, 4> SrcSubscripts, DstSubscripts;
    SE->computeAccessFunctions(SrcAR, SrcSubscripts, Sizes);
    SE->computeAccessFunctions(DstAR, DstSubscripts, Sizes);
  
@@ -3313,7 +3423,7 @@ bool DependenceInfo::tryDelinearize(Instruction *Src, Instruction *Dst,
        SrcSubscripts.size() != DstSubscripts.size())
      return false;
  
-  int size = SrcSubscripts.size();
+  size_t Size = SrcSubscripts.size();
  
    // Statically check that the array bounds are in-range. The first subscript we
    // don't have a size for and it cannot overflow into another subscript, so is
@@ -3322,40 +3432,20 @@ bool DependenceInfo::tryDelinearize(Instruction *Src, Instruction *Dst,
    // FIXME: It may be better to record these sizes and add them as constraints
    // to the dependency checks.
    if (!DisableDelinearizationChecks)
-    for (int i = 1; i < size; ++i) {
-      if (!isKnownNonNegative(SrcSubscripts[i], SrcPtr))
+    for (size_t I = 1; I < Size; ++I) {
+      if (!isKnownNonNegative(SrcSubscripts[I], SrcPtr))
          return false;
  
-      if (!isKnownLessThan(SrcSubscripts[i], Sizes[i - 1]))
+      if (!isKnownLessThan(SrcSubscripts[I], Sizes[I - 1]))
          return false;
  
-      if (!isKnownNonNegative(DstSubscripts[i], DstPtr))
+      if (!isKnownNonNegative(DstSubscripts[I], DstPtr))
          return false;
  
-      if (!isKnownLessThan(DstSubscripts[i], Sizes[i - 1]))
+      if (!isKnownLessThan(DstSubscripts[I], Sizes[I - 1]))
          return false;
      }
  
-  LLVM_DEBUG({
-    dbgs() << "\nSrcSubscripts: ";
-    for (int i = 0; i < size; i++)
-      dbgs() << *SrcSubscripts[i];
-    dbgs() << "\nDstSubscripts: ";
-    for (int i = 0; i < size; i++)
-      dbgs() << *DstSubscripts[i];
-  });
-
-  // The delinearization transforms a single-subscript MIV dependence test into
-  // a multi-subscript SIV dependence test that is easier to compute. So we
-  // resize Pair to contain as many pairs of subscripts as the delinearization
-  // has found, and then initialize the pairs following the delinearization.
-  Pair.resize(size);
-  for (int i = 0; i < size; ++i) {
-    Pair[i].Src = SrcSubscripts[i];
-    Pair[i].Dst = DstSubscripts[i];
-    unifySubscriptType(&Pair[i]);
-  }
-
    return true;
  }
  
diff --git a/llvm/test/Analysis/DependenceAnalysis/PreliminaryNoValidityCheckFixedSize.ll b/llvm/test/Analysis/DependenceAnalysis/PreliminaryNoValidityCheckFixedSize.ll

new file mode 100644 (file)

index 0000000..21bc37d
--- /dev/null
+++ b/llvm/test/Analysis/DependenceAnalysis/PreliminaryNoValidityCheckFixedSize.ll
@@ -0,0 +1,106 @@
+; RUN: opt < %s -disable-output "-passes=print<da>" -aa-pipeline=basic-aa 2>&1 \
+; RUN:   -da-disable-delinearization-checks | FileCheck %s
+
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
+target triple = "x86_64-apple-macosx10.6.0"
+
+;;  for (long int i = 0; i < n; i++) {
+;;    for (long int j = 0; j < n; j++) {
+;;      for (long int k = 0; k < n; k++) {
+;;        A[i][j][k] = i;
+;;      }
+;;      for (long int k = 0; k < n; k++) {
+;;        *B++ = A[i + 3][j + 2][k + 1];
+
+define void @p2(i64 %n, [100 x [100 x i64]]* %A, i64* %B) nounwind uwtable ssp {
+entry:
+  %cmp10 = icmp sgt i64 %n, 0
+  br i1 %cmp10, label %for.cond1.preheader.preheader, label %for.end26
+
+; CHECK-LABEL: p2
+; CHECK: da analyze - none!
+; CHECK: da analyze - flow [-3 -2]!
+; CHECK: da analyze - confused!
+; CHECK: da analyze - none!
+; CHECK: da analyze - confused!
+; CHECK: da analyze - output [* * *]!
+
+for.cond1.preheader.preheader:                    ; preds = %entry
+  br label %for.cond1.preheader
+
+for.cond1.preheader:                              ; preds = %for.cond1.preheader.preheader, %for.inc24
+  %B.addr.012 = phi i64* [ %B.addr.1.lcssa, %for.inc24 ], [ %B, %for.cond1.preheader.preheader ]
+  %i.011 = phi i64 [ %inc25, %for.inc24 ], [ 0, %for.cond1.preheader.preheader ]
+  %cmp26 = icmp sgt i64 %n, 0
+  br i1 %cmp26, label %for.cond4.preheader.preheader, label %for.inc24
+
+for.cond4.preheader.preheader:                    ; preds = %for.cond1.preheader
+  br label %for.cond4.preheader
+
+for.cond4.preheader:                              ; preds = %for.cond4.preheader.preheader, %for.inc21
+  %B.addr.18 = phi i64* [ %B.addr.2.lcssa, %for.inc21 ], [ %B.addr.012, %for.cond4.preheader.preheader ]
+  %j.07 = phi i64 [ %inc22, %for.inc21 ], [ 0, %for.cond4.preheader.preheader ]
+  %cmp51 = icmp sgt i64 %n, 0
+  br i1 %cmp51, label %for.body6.preheader, label %for.cond10.loopexit
+
+for.body6.preheader:                              ; preds = %for.cond4.preheader
+  br label %for.body6
+
+for.body6:                                        ; preds = %for.body6.preheader, %for.body6
+  %k.02 = phi i64 [ %inc, %for.body6 ], [ 0, %for.body6.preheader ]
+  %arrayidx8 = getelementptr inbounds [100 x [100 x i64]], [100 x [100 x i64]]* %A, i64 %i.011, i64 %j.07, i64 %k.02
+  store i64 %i.011, i64* %arrayidx8, align 8
+  %inc = add nsw i64 %k.02, 1
+  %exitcond13 = icmp ne i64 %inc, %n
+  br i1 %exitcond13, label %for.body6, label %for.cond10.loopexit.loopexit
+
+for.cond10.loopexit.loopexit:                     ; preds = %for.body6
+  br label %for.cond10.loopexit
+
+for.cond10.loopexit:                              ; preds = %for.cond10.loopexit.loopexit, %for.cond4.preheader
+  %cmp113 = icmp sgt i64 %n, 0
+  br i1 %cmp113, label %for.body12.preheader, label %for.inc21
+
+for.body12.preheader:                             ; preds = %for.cond10.loopexit
+  br label %for.body12
+
+for.body12:                                       ; preds = %for.body12.preheader, %for.body12
+  %k9.05 = phi i64 [ %inc19, %for.body12 ], [ 0, %for.body12.preheader ]
+  %B.addr.24 = phi i64* [ %incdec.ptr, %for.body12 ], [ %B.addr.18, %for.body12.preheader ]
+  %add = add nsw i64 %k9.05, 1
+  %add13 = add nsw i64 %j.07, 2
+  %add14 = add nsw i64 %i.011, 3
+  %arrayidx17 = getelementptr inbounds [100 x [100 x i64]], [100 x [100 x i64]]* %A, i64 %add14, i64 %add13, i64 %add
+  %0 = load i64, i64* %arrayidx17, align 8
+  %incdec.ptr = getelementptr inbounds i64, i64* %B.addr.24, i64 1
+  store i64 %0, i64* %B.addr.24, align 8
+  %inc19 = add nsw i64 %k9.05, 1
+  %exitcond = icmp ne i64 %inc19, %n
+  br i1 %exitcond, label %for.body12, label %for.inc21.loopexit
+
+for.inc21.loopexit:                               ; preds = %for.body12
+  %scevgep = getelementptr i64, i64* %B.addr.18, i64 %n
+  br label %for.inc21
+
+for.inc21:                                        ; preds = %for.inc21.loopexit, %for.cond10.loopexit
+  %B.addr.2.lcssa = phi i64* [ %B.addr.18, %for.cond10.loopexit ], [ %scevgep, %for.inc21.loopexit ]
+  %inc22 = add nsw i64 %j.07, 1
+  %exitcond14 = icmp ne i64 %inc22, %n
+  br i1 %exitcond14, label %for.cond4.preheader, label %for.inc24.loopexit
+
+for.inc24.loopexit:                               ; preds = %for.inc21
+  %B.addr.2.lcssa.lcssa = phi i64* [ %B.addr.2.lcssa, %for.inc21 ]
+  br label %for.inc24
+
+for.inc24:                                        ; preds = %for.inc24.loopexit, %for.cond1.preheader
+  %B.addr.1.lcssa = phi i64* [ %B.addr.012, %for.cond1.preheader ], [ %B.addr.2.lcssa.lcssa, %for.inc24.loopexit ]
+  %inc25 = add nsw i64 %i.011, 1
+  %exitcond15 = icmp ne i64 %inc25, %n
+  br i1 %exitcond15, label %for.cond1.preheader, label %for.end26.loopexit
+
+for.end26.loopexit:                               ; preds = %for.inc24
+  br label %for.end26
+
+for.end26:                                        ; preds = %for.end26.loopexit, %entry
+  ret void
+}
diff --git a/llvm/test/Analysis/DependenceAnalysis/SimpleSIVNoValidityCheckFixedSize.ll b/llvm/test/Analysis/DependenceAnalysis/SimpleSIVNoValidityCheckFixedSize.ll

new file mode 100644 (file)

index 0000000..d783d2e
--- /dev/null
+++ b/llvm/test/Analysis/DependenceAnalysis/SimpleSIVNoValidityCheckFixedSize.ll
@@ -0,0 +1,120 @@
+; RUN: opt < %s -disable-output -passes="print<da>"                            \
+; RUN: -da-disable-delinearization-checks 2>&1 | FileCheck %s
+; RUN: opt < %s -da -analyze -da-disable-delinearization-checks | FileCheck %s
+
+; CHECK-LABEL: t1
+; CHECK: da analyze - none!
+; CHECK: da analyze - consistent anti [1 -2]!
+; CHECK: da analyze - none!
+
+;; #define N 1024
+;; #define M 2048
+;; void t1(int a[N][M]) {
+;;   for (int i = 0; i < N-1; ++i)
+;;     for (int j = 2; j < M; ++j)
+;;       a[i][j] = a[i+1][j-2];
+;; }
+
+define void @t1([2048 x i32]* %a) {
+entry:
+  br label %for.body
+
+for.body:                                         ; preds = %entry, %for.inc11
+  %indvars.iv4 = phi i64 [ 0, %entry ], [ %indvars.iv.next5, %for.inc11 ]
+  br label %for.body4
+
+for.body4:                                        ; preds = %for.body, %for.body4
+  %indvars.iv = phi i64 [ 2, %for.body ], [ %indvars.iv.next, %for.body4 ]
+  %0 = add nuw nsw i64 %indvars.iv4, 1
+  %1 = add nsw i64 %indvars.iv, -2
+  %arrayidx6 = getelementptr inbounds [2048 x i32], [2048 x i32]* %a, i64 %0, i64 %1
+  %2 = load i32, i32* %arrayidx6, align 4
+  %arrayidx10 = getelementptr inbounds [2048 x i32], [2048 x i32]* %a, i64 %indvars.iv4, i64 %indvars.iv
+  store i32 %2, i32* %arrayidx10, align 4
+  %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
+  %exitcond = icmp ne i64 %indvars.iv.next, 2048
+  br i1 %exitcond, label %for.body4, label %for.inc11
+
+for.inc11:                                        ; preds = %for.body4
+  %indvars.iv.next5 = add nuw nsw i64 %indvars.iv4, 1
+  %exitcond7 = icmp ne i64 %indvars.iv.next5, 1023
+  br i1 %exitcond7, label %for.body, label %for.end13
+
+for.end13:                                        ; preds = %for.inc11
+  ret void
+}
+
+
+; CHECK-LABEL: t2
+; CHECK: da analyze - none!
+; CHECK: da analyze - consistent anti [1 -2 0 -3 2]!
+; CHECK: da analyze - none!
+
+;; #define N 1024
+;; #define M 2048
+;; void t2(int a[][N][N][N][M]) {
+;;   for (int i1 = 0; i1 < N-1; ++i1)
+;;     for (int i2 = 2; i2 < N; ++i2)
+;;       for (int i3 = 0; i3 < N; ++i3)
+;;         for (int i4 = 3; i4 < N; ++i4)
+;;           for (int i5 = 0; i5 < M-2; ++i5)
+;;             a[i1][i2][i3][i4][i5] = a[i1+1][i2-2][i3][i4-3][i5+2];
+;; }
+
+define void @t2([1024 x [1024 x [1024 x [2048 x i32]]]]* %a) {
+entry:
+  br label %for.body
+
+for.body:                                         ; preds = %entry, %for.inc46
+  %indvars.iv18 = phi i64 [ 0, %entry ], [ %indvars.iv.next19, %for.inc46 ]
+  br label %for.body4
+
+for.body4:                                        ; preds = %for.body, %for.inc43
+  %indvars.iv14 = phi i64 [ 2, %for.body ], [ %indvars.iv.next15, %for.inc43 ]
+  br label %for.body8
+
+for.body8:                                        ; preds = %for.body4, %for.inc40
+  %indvars.iv11 = phi i64 [ 0, %for.body4 ], [ %indvars.iv.next12, %for.inc40 ]
+  br label %for.body12
+
+for.body12:                                       ; preds = %for.body8, %for.inc37
+  %indvars.iv7 = phi i64 [ 3, %for.body8 ], [ %indvars.iv.next8, %for.inc37 ]
+  br label %for.body16
+
+for.body16:                                       ; preds = %for.body12, %for.body16
+  %indvars.iv = phi i64 [ 0, %for.body12 ], [ %indvars.iv.next, %for.body16 ]
+  %0 = add nuw nsw i64 %indvars.iv18, 1
+  %1 = add nsw i64 %indvars.iv14, -2
+  %2 = add nsw i64 %indvars.iv7, -3
+  %3 = add nuw nsw i64 %indvars.iv, 2
+  %arrayidx26 = getelementptr inbounds [1024 x [1024 x [1024 x [2048 x i32]]]], [1024 x [1024 x [1024 x [2048 x i32]]]]* %a, i64 %0, i64 %1, i64 %indvars.iv11, i64 %2, i64 %3
+  %4 = load i32, i32* %arrayidx26, align 4
+  %arrayidx36 = getelementptr inbounds [1024 x [1024 x [1024 x [2048 x i32]]]], [1024 x [1024 x [1024 x [2048 x i32]]]]* %a, i64 %indvars.iv18, i64 %indvars.iv14, i64 %indvars.iv11, i64 %indvars.iv7, i64 %indvars.iv
+  store i32 %4, i32* %arrayidx36, align 4
+  %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
+  %exitcond = icmp ne i64 %indvars.iv.next, 2046
+  br i1 %exitcond, label %for.body16, label %for.inc37
+
+for.inc37:                                        ; preds = %for.body16
+  %indvars.iv.next8 = add nuw nsw i64 %indvars.iv7, 1
+  %exitcond10 = icmp ne i64 %indvars.iv.next8, 1024
+  br i1 %exitcond10, label %for.body12, label %for.inc40
+
+for.inc40:                                        ; preds = %for.inc37
+  %indvars.iv.next12 = add nuw nsw i64 %indvars.iv11, 1
+  %exitcond13 = icmp ne i64 %indvars.iv.next12, 1024
+  br i1 %exitcond13, label %for.body8, label %for.inc43
+
+for.inc43:                                        ; preds = %for.inc40
+  %indvars.iv.next15 = add nuw nsw i64 %indvars.iv14, 1
+  %exitcond17 = icmp ne i64 %indvars.iv.next15, 1024
+  br i1 %exitcond17, label %for.body4, label %for.inc46
+
+for.inc46:                                        ; preds = %for.inc43
+  %indvars.iv.next19 = add nuw nsw i64 %indvars.iv18, 1
+  %exitcond21 = icmp ne i64 %indvars.iv.next19, 1023
+  br i1 %exitcond21, label %for.body, label %for.end48
+
+for.end48:                                        ; preds = %for.inc46
+  ret void
+}
diff --git a/llvm/test/Transforms/LoopInterchange/currentLimitation.ll b/llvm/test/Transforms/LoopInterchange/currentLimitation.ll

index 1a9d42d..4f20bdc 100644 (file)
--- a/llvm/test/Transforms/LoopInterchange/currentLimitation.ll
+++ b/llvm/test/Transforms/LoopInterchange/currentLimitation.ll
@@ -2,6 +2,11 @@
  ; RUN:   -pass-remarks-output=%t -verify-loop-info -verify-dom-info -S | FileCheck -check-prefix=IR %s
  ; RUN: FileCheck --input-file=%t %s
  
+; RUN: opt < %s -basicaa -loop-interchange -pass-remarks-missed='loop-interchange' \
+; RUN:   -da-disable-delinearization-checks -pass-remarks-output=%t             \
+; RUN:   -verify-loop-info -verify-dom-info -S | FileCheck -check-prefix=IR %s
+; RUN: FileCheck --check-prefix=DELIN --input-file=%t %s
+
  target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
  target triple = "x86_64-unknown-linux-gnu"
   
@@ -16,13 +21,14 @@ target triple = "x86_64-unknown-linux-gnu"
  ;;    for(int j=1;j<N-1;j++)
  ;;      A[j+1][i+1] = A[j+1][i+1] + k;
  
-; FIXME: Currently fails because of DA changes.
  ; IR-LABEL: @interchange_01
  ; IR-NOT: split
  
  ; CHECK:      Name:            Dependence
  ; CHECK-NEXT: Function:        interchange_01
  
+; DELIN:      Name:            UnsupportedInsBetweenInduction
+; DELIN-NEXT: Function:        interchange_01
  define void @interchange_01(i32 %k, i32 %N) {
   entry:
     %sub = add nsw i32 %N, -1
diff --git a/llvm/test/Transforms/LoopInterchange/loop-interchange-optimization-remarks.ll b/llvm/test/Transforms/LoopInterchange/loop-interchange-optimization-remarks.ll

index b19a4a4..8aaf7c7 100644 (file)
--- a/llvm/test/Transforms/LoopInterchange/loop-interchange-optimization-remarks.ll
+++ b/llvm/test/Transforms/LoopInterchange/loop-interchange-optimization-remarks.ll
@@ -5,6 +5,11 @@
  ; RUN:     -pass-remarks='loop-interchange' -S
  ; RUN: cat %t |  FileCheck %s
  
+; RUN: opt < %s -basicaa -loop-interchange -verify-dom-info -verify-loop-info \
+; RUN:     -pass-remarks-output=%t -pass-remarks-missed='loop-interchange' \
+; RUN:     -pass-remarks='loop-interchange' -S -da-disable-delinearization-checks
+; RUN: cat %t |  FileCheck --check-prefix=DELIN %s
+
  @A = common global [100 x [100 x i32]] zeroinitializer
  @B = common global [100 x [100 x i32]] zeroinitializer
  @C = common global [100 x i32] zeroinitializer
@@ -61,6 +66,18 @@ for.end19:
  ; CHECK-NEXT:   - String:          Cannot interchange loops due to dependences.
  ; CHECK-NEXT: ...
  
+; DELIN: --- !Missed
+; DELIN-NEXT: Pass:            loop-interchange
+; DELIN-NEXT: Name:            InterchangeNotProfitable
+; DELIN-NEXT: Function:        test01
+; DELIN-NEXT: Args:
+; DELIN-NEXT:   - String:          'Interchanging loops is too costly (cost='
+; DELIN-NEXT:   - Cost:            '2'
+; DELIN-NEXT:   - String:          ', threshold='
+; DELIN-NEXT:   - Threshold:       '0'
+; DELIN-NEXT:   - String:          ') and it does not improve parallelism.'
+; DELIN-NEXT: ...
+
  ;;--------------------------------------Test case 02------------------------------------
  ;; [FIXME] This loop though valid is currently not interchanged due to the
  ;; limitation that we cannot split the inner loop latch due to multiple use of inner induction
@@ -113,6 +130,14 @@ define void @test02(i32 %k, i32 %N) {
  ; CHECK-NEXT:   - String:          Cannot interchange loops due to dependences.
  ; CHECK-NEXT: ...
  
+; DELIN: --- !Missed
+; DELIN-NEXT: Pass:            loop-interchange
+; DELIN-NEXT: Name:            UnsupportedInsBetweenInduction
+; DELIN-NEXT: Function:        test02
+; DELIN-NEXT: Args:
+; DELIN-NEXT:   - String:          Found unsupported instruction between induction variable increment and branch.
+; DELIN-NEXT: ...
+
  ;;-----------------------------------Test case 03-------------------------------
  ;; Test to make sure we can handle output dependencies.
  ;;
@@ -161,6 +186,14 @@ for.body4:                                        ; preds = %for.body4, %for.con
  ; CHECK-NEXT:   - String:          Cannot interchange loops due to dependences.
  ; CHECK-NEXT: ...
  
+; DELIN: --- !Passed
+; DELIN-NEXT: Pass:            loop-interchange
+; DELIN-NEXT: Name:            Interchanged
+; DELIN-NEXT: Function:        test03
+; DELIN-NEXT: Args:
+; DELIN-NEXT:  - String:          Loop interchanged with enclosing loop.
+; DELIN-NEXT: ...
+
  ;;--------------------------------------Test case 04-------------------------------------
  ;; Loops not tightly nested are not interchanged
  ;;  for(int j=0;j<N;j++) {
@@ -215,3 +248,11 @@ for.end17:
  ; CHECK-NEXT: Args:
  ; CHECK-NEXT:   - String:          Cannot interchange loops due to dependences.
  ; CHECK-NEXT: ...
+
+; DELIN: --- !Missed
+; DELIN-NEXT: Pass:            loop-interchange
+; DELIN-NEXT: Name:            NotTightlyNested
+; DELIN-NEXT: Function:        test04
+; DELIN-NEXT: Args:
+; DELIN-NEXT:  - String:          Cannot interchange loops because they are not tightly nested.
+; DELIN-NEXT: ...
diff --git a/llvm/test/Transforms/LoopInterchange/profitability.ll b/llvm/test/Transforms/LoopInterchange/profitability.ll

index 9fd2174..996ae61 100644 (file)
--- a/llvm/test/Transforms/LoopInterchange/profitability.ll
+++ b/llvm/test/Transforms/LoopInterchange/profitability.ll
@@ -2,6 +2,11 @@
  ; RUN:     -pass-remarks=loop-interchange -pass-remarks-missed=loop-interchange
  ; RUN: FileCheck -input-file %t %s
  
+; RUN: opt < %s -loop-interchange -pass-remarks-output=%t -verify-dom-info -verify-loop-info \
+; RUN:     -pass-remarks=loop-interchange -pass-remarks-missed=loop-interchange \
+; RUN:     -da-disable-delinearization-checks
+; RUN: FileCheck --check-prefix=DELIN -input-file %t %s
+
  ;; We test profitability model in these test cases.
  
  target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
@@ -15,10 +20,13 @@ target triple = "x86_64-unknown-linux-gnu"
  ;;   for(int i=1;i<100;i++)
  ;;     for(int j=1;j<100;j++)
  ;;       A[j][i] = A[j - 1][i] + B[j][i];
-;; FIXME: DA misses this case after D35430
  
  ; CHECK:      Name:            Dependence
  ; CHECK-NEXT: Function:        interchange_01
+
+; DELIN:      Name:            Interchanged
+; DELIN-NEXT: Function:        interchange_01
+
  define void @interchange_01() {
  entry:
    br label %for2.preheader
author	Bardia Mahjour <bmahjour@ca.ibm.com>
	Wed, 26 Feb 2020 17:51:58 +0000 (12:51 -0500)
committer	Bardia Mahjour <bmahjour@ca.ibm.com>
	Thu, 27 Feb 2020 15:29:01 +0000 (10:29 -0500)
llvm/include/llvm/Analysis/DependenceAnalysis.h		patch \| blob \| history
llvm/lib/Analysis/DependenceAnalysis.cpp		patch \| blob \| history
llvm/test/Analysis/DependenceAnalysis/PreliminaryNoValidityCheckFixedSize.ll	[new file with mode: 0644]	patch \| blob
llvm/test/Analysis/DependenceAnalysis/SimpleSIVNoValidityCheckFixedSize.ll	[new file with mode: 0644]	patch \| blob
llvm/test/Transforms/LoopInterchange/currentLimitation.ll		patch \| blob \| history
llvm/test/Transforms/LoopInterchange/loop-interchange-optimization-remarks.ll		patch \| blob \| history
llvm/test/Transforms/LoopInterchange/profitability.ll		patch \| blob \| history