void updateDirection(Dependence::DVEntry &Level,
const Constraint &CurConstraint) const;
+ /// Given a linear access function, tries to recover subscripts
+ /// for each dimension of the array element access.
bool tryDelinearize(Instruction *Src, Instruction *Dst,
SmallVectorImpl<Subscript> &Pair);
- private:
+ /// Tries to delinearize access function for a fixed size multi-dimensional
+ /// array, by deriving subscripts from GEP instructions. Returns true upon
+ /// success and false otherwise.
+ bool tryDelinearizeFixedSize(Instruction *Src, Instruction *Dst,
+ const SCEV *SrcAccessFn,
+ const SCEV *DstAccessFn,
+ SmallVectorImpl<const SCEV *> &SrcSubscripts,
+ SmallVectorImpl<const SCEV *> &DstSubscripts);
+
+ /// Tries to delinearize access function for a multi-dimensional array with
+ /// symbolic runtime sizes.
+ /// Returns true upon success and false otherwise.
+ bool tryDelinearizeParametricSize(
+ Instruction *Src, Instruction *Dst, const SCEV *SrcAccessFn,
+ const SCEV *DstAccessFn, SmallVectorImpl<const SCEV *> &SrcSubscripts,
+ SmallVectorImpl<const SCEV *> &DstSubscripts);
+
/// checkSubscript - Helper function for checkSrcSubscript and
/// checkDstSubscript to avoid duplicate code
bool checkSubscript(const SCEV *Expr, const Loop *LoopNest,
assert(isLoadOrStore(Dst) && "instruction is not load or store");
Value *SrcPtr = getLoadStorePointerOperand(Src);
Value *DstPtr = getLoadStorePointerOperand(Dst);
-
Loop *SrcLoop = LI->getLoopFor(Src->getParent());
Loop *DstLoop = LI->getLoopFor(Dst->getParent());
+ const SCEV *SrcAccessFn = SE->getSCEVAtScope(SrcPtr, SrcLoop);
+ const SCEV *DstAccessFn = SE->getSCEVAtScope(DstPtr, DstLoop);
+ const SCEVUnknown *SrcBase =
+ dyn_cast<SCEVUnknown>(SE->getPointerBase(SrcAccessFn));
+ const SCEVUnknown *DstBase =
+ dyn_cast<SCEVUnknown>(SE->getPointerBase(DstAccessFn));
+
+ if (!SrcBase || !DstBase || SrcBase != DstBase)
+ return false;
- // Below code mimics the code in Delinearization.cpp
- const SCEV *SrcAccessFn =
- SE->getSCEVAtScope(SrcPtr, SrcLoop);
- const SCEV *DstAccessFn =
- SE->getSCEVAtScope(DstPtr, DstLoop);
+ SmallVector<const SCEV *, 4> SrcSubscripts, DstSubscripts;
+
+ if (!tryDelinearizeFixedSize(Src, Dst, SrcAccessFn, DstAccessFn,
+ SrcSubscripts, DstSubscripts) &&
+ !tryDelinearizeParametricSize(Src, Dst, SrcAccessFn, DstAccessFn,
+ SrcSubscripts, DstSubscripts))
+ return false;
+
+ int Size = SrcSubscripts.size();
+ LLVM_DEBUG({
+ dbgs() << "\nSrcSubscripts: ";
+ for (int I = 0; I < Size; I++)
+ dbgs() << *SrcSubscripts[I];
+ dbgs() << "\nDstSubscripts: ";
+ for (int I = 0; I < Size; I++)
+ dbgs() << *DstSubscripts[I];
+ });
+ // The delinearization transforms a single-subscript MIV dependence test into
+ // a multi-subscript SIV dependence test that is easier to compute. So we
+ // resize Pair to contain as many pairs of subscripts as the delinearization
+ // has found, and then initialize the pairs following the delinearization.
+ Pair.resize(Size);
+ for (int I = 0; I < Size; ++I) {
+ Pair[I].Src = SrcSubscripts[I];
+ Pair[I].Dst = DstSubscripts[I];
+ unifySubscriptType(&Pair[I]);
+ }
+
+ return true;
+}
+
+bool DependenceInfo::tryDelinearizeFixedSize(
+ Instruction *Src, Instruction *Dst, const SCEV *SrcAccessFn,
+ const SCEV *DstAccessFn, SmallVectorImpl<const SCEV *> &SrcSubscripts,
+ SmallVectorImpl<const SCEV *> &DstSubscripts) {
+
+ // In general we cannot safely assume that the subscripts recovered from GEPs
+ // are in the range of values defined for their corresponding array
+ // dimensions. For example some C language usage/interpretation make it
+ // impossible to verify this at compile-time. As such we give up here unless
+ // we can assume that the subscripts do not overlap into neighboring
+ // dimensions and that the number of dimensions matches the number of
+ // subscripts being recovered.
+ if (!DisableDelinearizationChecks)
+ return false;
+
+ Value *SrcPtr = getLoadStorePointerOperand(Src);
+ Value *DstPtr = getLoadStorePointerOperand(Dst);
const SCEVUnknown *SrcBase =
dyn_cast<SCEVUnknown>(SE->getPointerBase(SrcAccessFn));
const SCEVUnknown *DstBase =
dyn_cast<SCEVUnknown>(SE->getPointerBase(DstAccessFn));
+ assert(SrcBase && DstBase && SrcBase == DstBase &&
+ "expected src and dst scev unknowns to be equal");
- if (!SrcBase || !DstBase || SrcBase != DstBase)
+ // Check the simple case where the array dimensions are fixed size.
+ auto *SrcGEP = dyn_cast<GetElementPtrInst>(SrcPtr);
+ auto *DstGEP = dyn_cast<GetElementPtrInst>(DstPtr);
+ if (!SrcGEP || !DstGEP)
+ return false;
+
+ SmallVector<int, 4> SrcSizes, DstSizes;
+ SE->getIndexExpressionsFromGEP(SrcGEP, SrcSubscripts, SrcSizes);
+ SE->getIndexExpressionsFromGEP(DstGEP, DstSubscripts, DstSizes);
+
+ // Check that the two size arrays are non-empty and equal in length and
+ // value.
+ if (SrcSizes.empty() || SrcSubscripts.size() <= 1 ||
+ SrcSizes.size() != DstSizes.size() ||
+ !std::equal(SrcSizes.begin(), SrcSizes.end(), DstSizes.begin())) {
+ SrcSubscripts.clear();
+ DstSubscripts.clear();
return false;
+ }
+
+ Value *SrcBasePtr = SrcGEP->getOperand(0);
+ Value *DstBasePtr = DstGEP->getOperand(0);
+ while (auto *PCast = dyn_cast<BitCastInst>(SrcBasePtr))
+ SrcBasePtr = PCast->getOperand(0);
+ while (auto *PCast = dyn_cast<BitCastInst>(DstBasePtr))
+ DstBasePtr = PCast->getOperand(0);
+
+ // Check that for identical base pointers we do not miss index offsets
+ // that have been added before this GEP is applied.
+ if (SrcBasePtr == SrcBase->getValue() && DstBasePtr == DstBase->getValue()) {
+ assert(SrcSubscripts.size() == DstSubscripts.size() &&
+ SrcSubscripts.size() == SrcSizes.size() + 1 &&
+ "Expected equal number of entries in the list of sizes and "
+ "subscripts.");
+ LLVM_DEBUG({
+ dbgs() << "Delinearized subscripts of fixed-size array\n"
+ << "SrcGEP:" << *SrcGEP << "\n"
+ << "DstGEP:" << *DstGEP << "\n";
+ });
+ return true;
+ }
+
+ SrcSubscripts.clear();
+ DstSubscripts.clear();
+ return false;
+}
+
+bool DependenceInfo::tryDelinearizeParametricSize(
+ Instruction *Src, Instruction *Dst, const SCEV *SrcAccessFn,
+ const SCEV *DstAccessFn, SmallVectorImpl<const SCEV *> &SrcSubscripts,
+ SmallVectorImpl<const SCEV *> &DstSubscripts) {
+
+ Value *SrcPtr = getLoadStorePointerOperand(Src);
+ Value *DstPtr = getLoadStorePointerOperand(Dst);
+ const SCEVUnknown *SrcBase =
+ dyn_cast<SCEVUnknown>(SE->getPointerBase(SrcAccessFn));
+ const SCEVUnknown *DstBase =
+ dyn_cast<SCEVUnknown>(SE->getPointerBase(DstAccessFn));
+ assert(SrcBase && DstBase && SrcBase == DstBase &&
+ "expected src and dst scev unknowns to be equal");
const SCEV *ElementSize = SE->getElementSize(Src);
if (ElementSize != SE->getElementSize(Dst))
SE->findArrayDimensions(Terms, Sizes, ElementSize);
// Third step: compute the access functions for each subscript.
- SmallVector<const SCEV *, 4> SrcSubscripts, DstSubscripts;
SE->computeAccessFunctions(SrcAR, SrcSubscripts, Sizes);
SE->computeAccessFunctions(DstAR, DstSubscripts, Sizes);
SrcSubscripts.size() != DstSubscripts.size())
return false;
- int size = SrcSubscripts.size();
+ size_t Size = SrcSubscripts.size();
// Statically check that the array bounds are in-range. The first subscript we
// don't have a size for and it cannot overflow into another subscript, so is
// FIXME: It may be better to record these sizes and add them as constraints
// to the dependency checks.
if (!DisableDelinearizationChecks)
- for (int i = 1; i < size; ++i) {
- if (!isKnownNonNegative(SrcSubscripts[i], SrcPtr))
+ for (size_t I = 1; I < Size; ++I) {
+ if (!isKnownNonNegative(SrcSubscripts[I], SrcPtr))
return false;
- if (!isKnownLessThan(SrcSubscripts[i], Sizes[i - 1]))
+ if (!isKnownLessThan(SrcSubscripts[I], Sizes[I - 1]))
return false;
- if (!isKnownNonNegative(DstSubscripts[i], DstPtr))
+ if (!isKnownNonNegative(DstSubscripts[I], DstPtr))
return false;
- if (!isKnownLessThan(DstSubscripts[i], Sizes[i - 1]))
+ if (!isKnownLessThan(DstSubscripts[I], Sizes[I - 1]))
return false;
}
- LLVM_DEBUG({
- dbgs() << "\nSrcSubscripts: ";
- for (int i = 0; i < size; i++)
- dbgs() << *SrcSubscripts[i];
- dbgs() << "\nDstSubscripts: ";
- for (int i = 0; i < size; i++)
- dbgs() << *DstSubscripts[i];
- });
-
- // The delinearization transforms a single-subscript MIV dependence test into
- // a multi-subscript SIV dependence test that is easier to compute. So we
- // resize Pair to contain as many pairs of subscripts as the delinearization
- // has found, and then initialize the pairs following the delinearization.
- Pair.resize(size);
- for (int i = 0; i < size; ++i) {
- Pair[i].Src = SrcSubscripts[i];
- Pair[i].Dst = DstSubscripts[i];
- unifySubscriptType(&Pair[i]);
- }
-
return true;
}
--- /dev/null
+; RUN: opt < %s -disable-output "-passes=print<da>" -aa-pipeline=basic-aa 2>&1 \
+; RUN: -da-disable-delinearization-checks | FileCheck %s
+
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
+target triple = "x86_64-apple-macosx10.6.0"
+
+;; for (long int i = 0; i < n; i++) {
+;; for (long int j = 0; j < n; j++) {
+;; for (long int k = 0; k < n; k++) {
+;; A[i][j][k] = i;
+;; }
+;; for (long int k = 0; k < n; k++) {
+;; *B++ = A[i + 3][j + 2][k + 1];
+
+define void @p2(i64 %n, [100 x [100 x i64]]* %A, i64* %B) nounwind uwtable ssp {
+entry:
+ %cmp10 = icmp sgt i64 %n, 0
+ br i1 %cmp10, label %for.cond1.preheader.preheader, label %for.end26
+
+; CHECK-LABEL: p2
+; CHECK: da analyze - none!
+; CHECK: da analyze - flow [-3 -2]!
+; CHECK: da analyze - confused!
+; CHECK: da analyze - none!
+; CHECK: da analyze - confused!
+; CHECK: da analyze - output [* * *]!
+
+for.cond1.preheader.preheader: ; preds = %entry
+ br label %for.cond1.preheader
+
+for.cond1.preheader: ; preds = %for.cond1.preheader.preheader, %for.inc24
+ %B.addr.012 = phi i64* [ %B.addr.1.lcssa, %for.inc24 ], [ %B, %for.cond1.preheader.preheader ]
+ %i.011 = phi i64 [ %inc25, %for.inc24 ], [ 0, %for.cond1.preheader.preheader ]
+ %cmp26 = icmp sgt i64 %n, 0
+ br i1 %cmp26, label %for.cond4.preheader.preheader, label %for.inc24
+
+for.cond4.preheader.preheader: ; preds = %for.cond1.preheader
+ br label %for.cond4.preheader
+
+for.cond4.preheader: ; preds = %for.cond4.preheader.preheader, %for.inc21
+ %B.addr.18 = phi i64* [ %B.addr.2.lcssa, %for.inc21 ], [ %B.addr.012, %for.cond4.preheader.preheader ]
+ %j.07 = phi i64 [ %inc22, %for.inc21 ], [ 0, %for.cond4.preheader.preheader ]
+ %cmp51 = icmp sgt i64 %n, 0
+ br i1 %cmp51, label %for.body6.preheader, label %for.cond10.loopexit
+
+for.body6.preheader: ; preds = %for.cond4.preheader
+ br label %for.body6
+
+for.body6: ; preds = %for.body6.preheader, %for.body6
+ %k.02 = phi i64 [ %inc, %for.body6 ], [ 0, %for.body6.preheader ]
+ %arrayidx8 = getelementptr inbounds [100 x [100 x i64]], [100 x [100 x i64]]* %A, i64 %i.011, i64 %j.07, i64 %k.02
+ store i64 %i.011, i64* %arrayidx8, align 8
+ %inc = add nsw i64 %k.02, 1
+ %exitcond13 = icmp ne i64 %inc, %n
+ br i1 %exitcond13, label %for.body6, label %for.cond10.loopexit.loopexit
+
+for.cond10.loopexit.loopexit: ; preds = %for.body6
+ br label %for.cond10.loopexit
+
+for.cond10.loopexit: ; preds = %for.cond10.loopexit.loopexit, %for.cond4.preheader
+ %cmp113 = icmp sgt i64 %n, 0
+ br i1 %cmp113, label %for.body12.preheader, label %for.inc21
+
+for.body12.preheader: ; preds = %for.cond10.loopexit
+ br label %for.body12
+
+for.body12: ; preds = %for.body12.preheader, %for.body12
+ %k9.05 = phi i64 [ %inc19, %for.body12 ], [ 0, %for.body12.preheader ]
+ %B.addr.24 = phi i64* [ %incdec.ptr, %for.body12 ], [ %B.addr.18, %for.body12.preheader ]
+ %add = add nsw i64 %k9.05, 1
+ %add13 = add nsw i64 %j.07, 2
+ %add14 = add nsw i64 %i.011, 3
+ %arrayidx17 = getelementptr inbounds [100 x [100 x i64]], [100 x [100 x i64]]* %A, i64 %add14, i64 %add13, i64 %add
+ %0 = load i64, i64* %arrayidx17, align 8
+ %incdec.ptr = getelementptr inbounds i64, i64* %B.addr.24, i64 1
+ store i64 %0, i64* %B.addr.24, align 8
+ %inc19 = add nsw i64 %k9.05, 1
+ %exitcond = icmp ne i64 %inc19, %n
+ br i1 %exitcond, label %for.body12, label %for.inc21.loopexit
+
+for.inc21.loopexit: ; preds = %for.body12
+ %scevgep = getelementptr i64, i64* %B.addr.18, i64 %n
+ br label %for.inc21
+
+for.inc21: ; preds = %for.inc21.loopexit, %for.cond10.loopexit
+ %B.addr.2.lcssa = phi i64* [ %B.addr.18, %for.cond10.loopexit ], [ %scevgep, %for.inc21.loopexit ]
+ %inc22 = add nsw i64 %j.07, 1
+ %exitcond14 = icmp ne i64 %inc22, %n
+ br i1 %exitcond14, label %for.cond4.preheader, label %for.inc24.loopexit
+
+for.inc24.loopexit: ; preds = %for.inc21
+ %B.addr.2.lcssa.lcssa = phi i64* [ %B.addr.2.lcssa, %for.inc21 ]
+ br label %for.inc24
+
+for.inc24: ; preds = %for.inc24.loopexit, %for.cond1.preheader
+ %B.addr.1.lcssa = phi i64* [ %B.addr.012, %for.cond1.preheader ], [ %B.addr.2.lcssa.lcssa, %for.inc24.loopexit ]
+ %inc25 = add nsw i64 %i.011, 1
+ %exitcond15 = icmp ne i64 %inc25, %n
+ br i1 %exitcond15, label %for.cond1.preheader, label %for.end26.loopexit
+
+for.end26.loopexit: ; preds = %for.inc24
+ br label %for.end26
+
+for.end26: ; preds = %for.end26.loopexit, %entry
+ ret void
+}
--- /dev/null
+; RUN: opt < %s -disable-output -passes="print<da>" \
+; RUN: -da-disable-delinearization-checks 2>&1 | FileCheck %s
+; RUN: opt < %s -da -analyze -da-disable-delinearization-checks | FileCheck %s
+
+; CHECK-LABEL: t1
+; CHECK: da analyze - none!
+; CHECK: da analyze - consistent anti [1 -2]!
+; CHECK: da analyze - none!
+
+;; #define N 1024
+;; #define M 2048
+;; void t1(int a[N][M]) {
+;; for (int i = 0; i < N-1; ++i)
+;; for (int j = 2; j < M; ++j)
+;; a[i][j] = a[i+1][j-2];
+;; }
+
+define void @t1([2048 x i32]* %a) {
+entry:
+ br label %for.body
+
+for.body: ; preds = %entry, %for.inc11
+ %indvars.iv4 = phi i64 [ 0, %entry ], [ %indvars.iv.next5, %for.inc11 ]
+ br label %for.body4
+
+for.body4: ; preds = %for.body, %for.body4
+ %indvars.iv = phi i64 [ 2, %for.body ], [ %indvars.iv.next, %for.body4 ]
+ %0 = add nuw nsw i64 %indvars.iv4, 1
+ %1 = add nsw i64 %indvars.iv, -2
+ %arrayidx6 = getelementptr inbounds [2048 x i32], [2048 x i32]* %a, i64 %0, i64 %1
+ %2 = load i32, i32* %arrayidx6, align 4
+ %arrayidx10 = getelementptr inbounds [2048 x i32], [2048 x i32]* %a, i64 %indvars.iv4, i64 %indvars.iv
+ store i32 %2, i32* %arrayidx10, align 4
+ %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
+ %exitcond = icmp ne i64 %indvars.iv.next, 2048
+ br i1 %exitcond, label %for.body4, label %for.inc11
+
+for.inc11: ; preds = %for.body4
+ %indvars.iv.next5 = add nuw nsw i64 %indvars.iv4, 1
+ %exitcond7 = icmp ne i64 %indvars.iv.next5, 1023
+ br i1 %exitcond7, label %for.body, label %for.end13
+
+for.end13: ; preds = %for.inc11
+ ret void
+}
+
+
+; CHECK-LABEL: t2
+; CHECK: da analyze - none!
+; CHECK: da analyze - consistent anti [1 -2 0 -3 2]!
+; CHECK: da analyze - none!
+
+;; #define N 1024
+;; #define M 2048
+;; void t2(int a[][N][N][N][M]) {
+;; for (int i1 = 0; i1 < N-1; ++i1)
+;; for (int i2 = 2; i2 < N; ++i2)
+;; for (int i3 = 0; i3 < N; ++i3)
+;; for (int i4 = 3; i4 < N; ++i4)
+;; for (int i5 = 0; i5 < M-2; ++i5)
+;; a[i1][i2][i3][i4][i5] = a[i1+1][i2-2][i3][i4-3][i5+2];
+;; }
+
+define void @t2([1024 x [1024 x [1024 x [2048 x i32]]]]* %a) {
+entry:
+ br label %for.body
+
+for.body: ; preds = %entry, %for.inc46
+ %indvars.iv18 = phi i64 [ 0, %entry ], [ %indvars.iv.next19, %for.inc46 ]
+ br label %for.body4
+
+for.body4: ; preds = %for.body, %for.inc43
+ %indvars.iv14 = phi i64 [ 2, %for.body ], [ %indvars.iv.next15, %for.inc43 ]
+ br label %for.body8
+
+for.body8: ; preds = %for.body4, %for.inc40
+ %indvars.iv11 = phi i64 [ 0, %for.body4 ], [ %indvars.iv.next12, %for.inc40 ]
+ br label %for.body12
+
+for.body12: ; preds = %for.body8, %for.inc37
+ %indvars.iv7 = phi i64 [ 3, %for.body8 ], [ %indvars.iv.next8, %for.inc37 ]
+ br label %for.body16
+
+for.body16: ; preds = %for.body12, %for.body16
+ %indvars.iv = phi i64 [ 0, %for.body12 ], [ %indvars.iv.next, %for.body16 ]
+ %0 = add nuw nsw i64 %indvars.iv18, 1
+ %1 = add nsw i64 %indvars.iv14, -2
+ %2 = add nsw i64 %indvars.iv7, -3
+ %3 = add nuw nsw i64 %indvars.iv, 2
+ %arrayidx26 = getelementptr inbounds [1024 x [1024 x [1024 x [2048 x i32]]]], [1024 x [1024 x [1024 x [2048 x i32]]]]* %a, i64 %0, i64 %1, i64 %indvars.iv11, i64 %2, i64 %3
+ %4 = load i32, i32* %arrayidx26, align 4
+ %arrayidx36 = getelementptr inbounds [1024 x [1024 x [1024 x [2048 x i32]]]], [1024 x [1024 x [1024 x [2048 x i32]]]]* %a, i64 %indvars.iv18, i64 %indvars.iv14, i64 %indvars.iv11, i64 %indvars.iv7, i64 %indvars.iv
+ store i32 %4, i32* %arrayidx36, align 4
+ %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
+ %exitcond = icmp ne i64 %indvars.iv.next, 2046
+ br i1 %exitcond, label %for.body16, label %for.inc37
+
+for.inc37: ; preds = %for.body16
+ %indvars.iv.next8 = add nuw nsw i64 %indvars.iv7, 1
+ %exitcond10 = icmp ne i64 %indvars.iv.next8, 1024
+ br i1 %exitcond10, label %for.body12, label %for.inc40
+
+for.inc40: ; preds = %for.inc37
+ %indvars.iv.next12 = add nuw nsw i64 %indvars.iv11, 1
+ %exitcond13 = icmp ne i64 %indvars.iv.next12, 1024
+ br i1 %exitcond13, label %for.body8, label %for.inc43
+
+for.inc43: ; preds = %for.inc40
+ %indvars.iv.next15 = add nuw nsw i64 %indvars.iv14, 1
+ %exitcond17 = icmp ne i64 %indvars.iv.next15, 1024
+ br i1 %exitcond17, label %for.body4, label %for.inc46
+
+for.inc46: ; preds = %for.inc43
+ %indvars.iv.next19 = add nuw nsw i64 %indvars.iv18, 1
+ %exitcond21 = icmp ne i64 %indvars.iv.next19, 1023
+ br i1 %exitcond21, label %for.body, label %for.end48
+
+for.end48: ; preds = %for.inc46
+ ret void
+}
; RUN: -pass-remarks-output=%t -verify-loop-info -verify-dom-info -S | FileCheck -check-prefix=IR %s
; RUN: FileCheck --input-file=%t %s
+; RUN: opt < %s -basicaa -loop-interchange -pass-remarks-missed='loop-interchange' \
+; RUN: -da-disable-delinearization-checks -pass-remarks-output=%t \
+; RUN: -verify-loop-info -verify-dom-info -S | FileCheck -check-prefix=IR %s
+; RUN: FileCheck --check-prefix=DELIN --input-file=%t %s
+
target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
target triple = "x86_64-unknown-linux-gnu"
;; for(int j=1;j<N-1;j++)
;; A[j+1][i+1] = A[j+1][i+1] + k;
-; FIXME: Currently fails because of DA changes.
; IR-LABEL: @interchange_01
; IR-NOT: split
; CHECK: Name: Dependence
; CHECK-NEXT: Function: interchange_01
+; DELIN: Name: UnsupportedInsBetweenInduction
+; DELIN-NEXT: Function: interchange_01
define void @interchange_01(i32 %k, i32 %N) {
entry:
%sub = add nsw i32 %N, -1
; RUN: -pass-remarks='loop-interchange' -S
; RUN: cat %t | FileCheck %s
+; RUN: opt < %s -basicaa -loop-interchange -verify-dom-info -verify-loop-info \
+; RUN: -pass-remarks-output=%t -pass-remarks-missed='loop-interchange' \
+; RUN: -pass-remarks='loop-interchange' -S -da-disable-delinearization-checks
+; RUN: cat %t | FileCheck --check-prefix=DELIN %s
+
@A = common global [100 x [100 x i32]] zeroinitializer
@B = common global [100 x [100 x i32]] zeroinitializer
@C = common global [100 x i32] zeroinitializer
; CHECK-NEXT: - String: Cannot interchange loops due to dependences.
; CHECK-NEXT: ...
+; DELIN: --- !Missed
+; DELIN-NEXT: Pass: loop-interchange
+; DELIN-NEXT: Name: InterchangeNotProfitable
+; DELIN-NEXT: Function: test01
+; DELIN-NEXT: Args:
+; DELIN-NEXT: - String: 'Interchanging loops is too costly (cost='
+; DELIN-NEXT: - Cost: '2'
+; DELIN-NEXT: - String: ', threshold='
+; DELIN-NEXT: - Threshold: '0'
+; DELIN-NEXT: - String: ') and it does not improve parallelism.'
+; DELIN-NEXT: ...
+
;;--------------------------------------Test case 02------------------------------------
;; [FIXME] This loop though valid is currently not interchanged due to the
;; limitation that we cannot split the inner loop latch due to multiple use of inner induction
; CHECK-NEXT: - String: Cannot interchange loops due to dependences.
; CHECK-NEXT: ...
+; DELIN: --- !Missed
+; DELIN-NEXT: Pass: loop-interchange
+; DELIN-NEXT: Name: UnsupportedInsBetweenInduction
+; DELIN-NEXT: Function: test02
+; DELIN-NEXT: Args:
+; DELIN-NEXT: - String: Found unsupported instruction between induction variable increment and branch.
+; DELIN-NEXT: ...
+
;;-----------------------------------Test case 03-------------------------------
;; Test to make sure we can handle output dependencies.
;;
; CHECK-NEXT: - String: Cannot interchange loops due to dependences.
; CHECK-NEXT: ...
+; DELIN: --- !Passed
+; DELIN-NEXT: Pass: loop-interchange
+; DELIN-NEXT: Name: Interchanged
+; DELIN-NEXT: Function: test03
+; DELIN-NEXT: Args:
+; DELIN-NEXT: - String: Loop interchanged with enclosing loop.
+; DELIN-NEXT: ...
+
;;--------------------------------------Test case 04-------------------------------------
;; Loops not tightly nested are not interchanged
;; for(int j=0;j<N;j++) {
; CHECK-NEXT: Args:
; CHECK-NEXT: - String: Cannot interchange loops due to dependences.
; CHECK-NEXT: ...
+
+; DELIN: --- !Missed
+; DELIN-NEXT: Pass: loop-interchange
+; DELIN-NEXT: Name: NotTightlyNested
+; DELIN-NEXT: Function: test04
+; DELIN-NEXT: Args:
+; DELIN-NEXT: - String: Cannot interchange loops because they are not tightly nested.
+; DELIN-NEXT: ...
; RUN: -pass-remarks=loop-interchange -pass-remarks-missed=loop-interchange
; RUN: FileCheck -input-file %t %s
+; RUN: opt < %s -loop-interchange -pass-remarks-output=%t -verify-dom-info -verify-loop-info \
+; RUN: -pass-remarks=loop-interchange -pass-remarks-missed=loop-interchange \
+; RUN: -da-disable-delinearization-checks
+; RUN: FileCheck --check-prefix=DELIN -input-file %t %s
+
;; We test profitability model in these test cases.
target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
;; for(int i=1;i<100;i++)
;; for(int j=1;j<100;j++)
;; A[j][i] = A[j - 1][i] + B[j][i];
-;; FIXME: DA misses this case after D35430
; CHECK: Name: Dependence
; CHECK-NEXT: Function: interchange_01
+
+; DELIN: Name: Interchanged
+; DELIN-NEXT: Function: interchange_01
+
define void @interchange_01() {
entry:
br label %for2.preheader