From 1b811ff8a935fafb279ad9bad36d36058a129b42 Mon Sep 17 00:00:00 2001 From: Bardia Mahjour Date: Wed, 26 Feb 2020 12:51:58 -0500 Subject: [PATCH] [DA] Delinearization of fixed-size multi-dimensional arrays Summary: Currently the dependence analysis in LLVM is unable to compute accurate dependence vectors for multi-dimensional fixed size arrays. This is mainly because the delinearization algorithm in scalar evolution relies on parametric terms to be present in the access functions. In the case of fixed size arrays such parametric terms are not present, but we can use the indexes from GEP instructions to recover the subscripts for each dimension of the arrays. This patch adds this ability under the existing option `-da-disable-delinearization-checks`. Authored By: bmahjour Reviewer: Meinersbur, sebpop, fhahn, dmgreen, grosser, etiotto, bollu Reviewed By: Meinersbur Subscribers: hiraditya, arphaman, Whitney, ppc-slack, llvm-commits Tags: #llvm Differential Revision: https://reviews.llvm.org/D72178 --- llvm/include/llvm/Analysis/DependenceAnalysis.h | 20 ++- llvm/lib/Analysis/DependenceAnalysis.cpp | 158 ++++++++++++++++----- .../PreliminaryNoValidityCheckFixedSize.ll | 106 ++++++++++++++ .../SimpleSIVNoValidityCheckFixedSize.ll | 120 ++++++++++++++++ .../LoopInterchange/currentLimitation.ll | 8 +- .../loop-interchange-optimization-remarks.ll | 41 ++++++ .../Transforms/LoopInterchange/profitability.ll | 10 +- 7 files changed, 426 insertions(+), 37 deletions(-) create mode 100644 llvm/test/Analysis/DependenceAnalysis/PreliminaryNoValidityCheckFixedSize.ll create mode 100644 llvm/test/Analysis/DependenceAnalysis/SimpleSIVNoValidityCheckFixedSize.ll diff --git a/llvm/include/llvm/Analysis/DependenceAnalysis.h b/llvm/include/llvm/Analysis/DependenceAnalysis.h index 0c4002c..ba1153db 100644 --- a/llvm/include/llvm/Analysis/DependenceAnalysis.h +++ b/llvm/include/llvm/Analysis/DependenceAnalysis.h @@ -924,10 +924,28 @@ template class ArrayRef; void updateDirection(Dependence::DVEntry &Level, const Constraint &CurConstraint) const; + /// Given a linear access function, tries to recover subscripts + /// for each dimension of the array element access. bool tryDelinearize(Instruction *Src, Instruction *Dst, SmallVectorImpl &Pair); - private: + /// Tries to delinearize access function for a fixed size multi-dimensional + /// array, by deriving subscripts from GEP instructions. Returns true upon + /// success and false otherwise. + bool tryDelinearizeFixedSize(Instruction *Src, Instruction *Dst, + const SCEV *SrcAccessFn, + const SCEV *DstAccessFn, + SmallVectorImpl &SrcSubscripts, + SmallVectorImpl &DstSubscripts); + + /// Tries to delinearize access function for a multi-dimensional array with + /// symbolic runtime sizes. + /// Returns true upon success and false otherwise. + bool tryDelinearizeParametricSize( + Instruction *Src, Instruction *Dst, const SCEV *SrcAccessFn, + const SCEV *DstAccessFn, SmallVectorImpl &SrcSubscripts, + SmallVectorImpl &DstSubscripts); + /// checkSubscript - Helper function for checkSrcSubscript and /// checkDstSubscript to avoid duplicate code bool checkSubscript(const SCEV *Expr, const Loop *LoopNest, diff --git a/llvm/lib/Analysis/DependenceAnalysis.cpp b/llvm/lib/Analysis/DependenceAnalysis.cpp index 9b38053..96c7bc7 100644 --- a/llvm/lib/Analysis/DependenceAnalysis.cpp +++ b/llvm/lib/Analysis/DependenceAnalysis.cpp @@ -3264,23 +3264,134 @@ bool DependenceInfo::tryDelinearize(Instruction *Src, Instruction *Dst, assert(isLoadOrStore(Dst) && "instruction is not load or store"); Value *SrcPtr = getLoadStorePointerOperand(Src); Value *DstPtr = getLoadStorePointerOperand(Dst); - Loop *SrcLoop = LI->getLoopFor(Src->getParent()); Loop *DstLoop = LI->getLoopFor(Dst->getParent()); + const SCEV *SrcAccessFn = SE->getSCEVAtScope(SrcPtr, SrcLoop); + const SCEV *DstAccessFn = SE->getSCEVAtScope(DstPtr, DstLoop); + const SCEVUnknown *SrcBase = + dyn_cast(SE->getPointerBase(SrcAccessFn)); + const SCEVUnknown *DstBase = + dyn_cast(SE->getPointerBase(DstAccessFn)); + + if (!SrcBase || !DstBase || SrcBase != DstBase) + return false; - // Below code mimics the code in Delinearization.cpp - const SCEV *SrcAccessFn = - SE->getSCEVAtScope(SrcPtr, SrcLoop); - const SCEV *DstAccessFn = - SE->getSCEVAtScope(DstPtr, DstLoop); + SmallVector SrcSubscripts, DstSubscripts; + + if (!tryDelinearizeFixedSize(Src, Dst, SrcAccessFn, DstAccessFn, + SrcSubscripts, DstSubscripts) && + !tryDelinearizeParametricSize(Src, Dst, SrcAccessFn, DstAccessFn, + SrcSubscripts, DstSubscripts)) + return false; + + int Size = SrcSubscripts.size(); + LLVM_DEBUG({ + dbgs() << "\nSrcSubscripts: "; + for (int I = 0; I < Size; I++) + dbgs() << *SrcSubscripts[I]; + dbgs() << "\nDstSubscripts: "; + for (int I = 0; I < Size; I++) + dbgs() << *DstSubscripts[I]; + }); + // The delinearization transforms a single-subscript MIV dependence test into + // a multi-subscript SIV dependence test that is easier to compute. So we + // resize Pair to contain as many pairs of subscripts as the delinearization + // has found, and then initialize the pairs following the delinearization. + Pair.resize(Size); + for (int I = 0; I < Size; ++I) { + Pair[I].Src = SrcSubscripts[I]; + Pair[I].Dst = DstSubscripts[I]; + unifySubscriptType(&Pair[I]); + } + + return true; +} + +bool DependenceInfo::tryDelinearizeFixedSize( + Instruction *Src, Instruction *Dst, const SCEV *SrcAccessFn, + const SCEV *DstAccessFn, SmallVectorImpl &SrcSubscripts, + SmallVectorImpl &DstSubscripts) { + + // In general we cannot safely assume that the subscripts recovered from GEPs + // are in the range of values defined for their corresponding array + // dimensions. For example some C language usage/interpretation make it + // impossible to verify this at compile-time. As such we give up here unless + // we can assume that the subscripts do not overlap into neighboring + // dimensions and that the number of dimensions matches the number of + // subscripts being recovered. + if (!DisableDelinearizationChecks) + return false; + + Value *SrcPtr = getLoadStorePointerOperand(Src); + Value *DstPtr = getLoadStorePointerOperand(Dst); const SCEVUnknown *SrcBase = dyn_cast(SE->getPointerBase(SrcAccessFn)); const SCEVUnknown *DstBase = dyn_cast(SE->getPointerBase(DstAccessFn)); + assert(SrcBase && DstBase && SrcBase == DstBase && + "expected src and dst scev unknowns to be equal"); - if (!SrcBase || !DstBase || SrcBase != DstBase) + // Check the simple case where the array dimensions are fixed size. + auto *SrcGEP = dyn_cast(SrcPtr); + auto *DstGEP = dyn_cast(DstPtr); + if (!SrcGEP || !DstGEP) + return false; + + SmallVector SrcSizes, DstSizes; + SE->getIndexExpressionsFromGEP(SrcGEP, SrcSubscripts, SrcSizes); + SE->getIndexExpressionsFromGEP(DstGEP, DstSubscripts, DstSizes); + + // Check that the two size arrays are non-empty and equal in length and + // value. + if (SrcSizes.empty() || SrcSubscripts.size() <= 1 || + SrcSizes.size() != DstSizes.size() || + !std::equal(SrcSizes.begin(), SrcSizes.end(), DstSizes.begin())) { + SrcSubscripts.clear(); + DstSubscripts.clear(); return false; + } + + Value *SrcBasePtr = SrcGEP->getOperand(0); + Value *DstBasePtr = DstGEP->getOperand(0); + while (auto *PCast = dyn_cast(SrcBasePtr)) + SrcBasePtr = PCast->getOperand(0); + while (auto *PCast = dyn_cast(DstBasePtr)) + DstBasePtr = PCast->getOperand(0); + + // Check that for identical base pointers we do not miss index offsets + // that have been added before this GEP is applied. + if (SrcBasePtr == SrcBase->getValue() && DstBasePtr == DstBase->getValue()) { + assert(SrcSubscripts.size() == DstSubscripts.size() && + SrcSubscripts.size() == SrcSizes.size() + 1 && + "Expected equal number of entries in the list of sizes and " + "subscripts."); + LLVM_DEBUG({ + dbgs() << "Delinearized subscripts of fixed-size array\n" + << "SrcGEP:" << *SrcGEP << "\n" + << "DstGEP:" << *DstGEP << "\n"; + }); + return true; + } + + SrcSubscripts.clear(); + DstSubscripts.clear(); + return false; +} + +bool DependenceInfo::tryDelinearizeParametricSize( + Instruction *Src, Instruction *Dst, const SCEV *SrcAccessFn, + const SCEV *DstAccessFn, SmallVectorImpl &SrcSubscripts, + SmallVectorImpl &DstSubscripts) { + + Value *SrcPtr = getLoadStorePointerOperand(Src); + Value *DstPtr = getLoadStorePointerOperand(Dst); + const SCEVUnknown *SrcBase = + dyn_cast(SE->getPointerBase(SrcAccessFn)); + const SCEVUnknown *DstBase = + dyn_cast(SE->getPointerBase(DstAccessFn)); + assert(SrcBase && DstBase && SrcBase == DstBase && + "expected src and dst scev unknowns to be equal"); const SCEV *ElementSize = SE->getElementSize(Src); if (ElementSize != SE->getElementSize(Dst)) @@ -3304,7 +3415,6 @@ bool DependenceInfo::tryDelinearize(Instruction *Src, Instruction *Dst, SE->findArrayDimensions(Terms, Sizes, ElementSize); // Third step: compute the access functions for each subscript. - SmallVector SrcSubscripts, DstSubscripts; SE->computeAccessFunctions(SrcAR, SrcSubscripts, Sizes); SE->computeAccessFunctions(DstAR, DstSubscripts, Sizes); @@ -3313,7 +3423,7 @@ bool DependenceInfo::tryDelinearize(Instruction *Src, Instruction *Dst, SrcSubscripts.size() != DstSubscripts.size()) return false; - int size = SrcSubscripts.size(); + size_t Size = SrcSubscripts.size(); // Statically check that the array bounds are in-range. The first subscript we // don't have a size for and it cannot overflow into another subscript, so is @@ -3322,40 +3432,20 @@ bool DependenceInfo::tryDelinearize(Instruction *Src, Instruction *Dst, // FIXME: It may be better to record these sizes and add them as constraints // to the dependency checks. if (!DisableDelinearizationChecks) - for (int i = 1; i < size; ++i) { - if (!isKnownNonNegative(SrcSubscripts[i], SrcPtr)) + for (size_t I = 1; I < Size; ++I) { + if (!isKnownNonNegative(SrcSubscripts[I], SrcPtr)) return false; - if (!isKnownLessThan(SrcSubscripts[i], Sizes[i - 1])) + if (!isKnownLessThan(SrcSubscripts[I], Sizes[I - 1])) return false; - if (!isKnownNonNegative(DstSubscripts[i], DstPtr)) + if (!isKnownNonNegative(DstSubscripts[I], DstPtr)) return false; - if (!isKnownLessThan(DstSubscripts[i], Sizes[i - 1])) + if (!isKnownLessThan(DstSubscripts[I], Sizes[I - 1])) return false; } - LLVM_DEBUG({ - dbgs() << "\nSrcSubscripts: "; - for (int i = 0; i < size; i++) - dbgs() << *SrcSubscripts[i]; - dbgs() << "\nDstSubscripts: "; - for (int i = 0; i < size; i++) - dbgs() << *DstSubscripts[i]; - }); - - // The delinearization transforms a single-subscript MIV dependence test into - // a multi-subscript SIV dependence test that is easier to compute. So we - // resize Pair to contain as many pairs of subscripts as the delinearization - // has found, and then initialize the pairs following the delinearization. - Pair.resize(size); - for (int i = 0; i < size; ++i) { - Pair[i].Src = SrcSubscripts[i]; - Pair[i].Dst = DstSubscripts[i]; - unifySubscriptType(&Pair[i]); - } - return true; } diff --git a/llvm/test/Analysis/DependenceAnalysis/PreliminaryNoValidityCheckFixedSize.ll b/llvm/test/Analysis/DependenceAnalysis/PreliminaryNoValidityCheckFixedSize.ll new file mode 100644 index 0000000..21bc37d --- /dev/null +++ b/llvm/test/Analysis/DependenceAnalysis/PreliminaryNoValidityCheckFixedSize.ll @@ -0,0 +1,106 @@ +; RUN: opt < %s -disable-output "-passes=print" -aa-pipeline=basic-aa 2>&1 \ +; RUN: -da-disable-delinearization-checks | FileCheck %s + +target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128" +target triple = "x86_64-apple-macosx10.6.0" + +;; for (long int i = 0; i < n; i++) { +;; for (long int j = 0; j < n; j++) { +;; for (long int k = 0; k < n; k++) { +;; A[i][j][k] = i; +;; } +;; for (long int k = 0; k < n; k++) { +;; *B++ = A[i + 3][j + 2][k + 1]; + +define void @p2(i64 %n, [100 x [100 x i64]]* %A, i64* %B) nounwind uwtable ssp { +entry: + %cmp10 = icmp sgt i64 %n, 0 + br i1 %cmp10, label %for.cond1.preheader.preheader, label %for.end26 + +; CHECK-LABEL: p2 +; CHECK: da analyze - none! +; CHECK: da analyze - flow [-3 -2]! +; CHECK: da analyze - confused! +; CHECK: da analyze - none! +; CHECK: da analyze - confused! +; CHECK: da analyze - output [* * *]! + +for.cond1.preheader.preheader: ; preds = %entry + br label %for.cond1.preheader + +for.cond1.preheader: ; preds = %for.cond1.preheader.preheader, %for.inc24 + %B.addr.012 = phi i64* [ %B.addr.1.lcssa, %for.inc24 ], [ %B, %for.cond1.preheader.preheader ] + %i.011 = phi i64 [ %inc25, %for.inc24 ], [ 0, %for.cond1.preheader.preheader ] + %cmp26 = icmp sgt i64 %n, 0 + br i1 %cmp26, label %for.cond4.preheader.preheader, label %for.inc24 + +for.cond4.preheader.preheader: ; preds = %for.cond1.preheader + br label %for.cond4.preheader + +for.cond4.preheader: ; preds = %for.cond4.preheader.preheader, %for.inc21 + %B.addr.18 = phi i64* [ %B.addr.2.lcssa, %for.inc21 ], [ %B.addr.012, %for.cond4.preheader.preheader ] + %j.07 = phi i64 [ %inc22, %for.inc21 ], [ 0, %for.cond4.preheader.preheader ] + %cmp51 = icmp sgt i64 %n, 0 + br i1 %cmp51, label %for.body6.preheader, label %for.cond10.loopexit + +for.body6.preheader: ; preds = %for.cond4.preheader + br label %for.body6 + +for.body6: ; preds = %for.body6.preheader, %for.body6 + %k.02 = phi i64 [ %inc, %for.body6 ], [ 0, %for.body6.preheader ] + %arrayidx8 = getelementptr inbounds [100 x [100 x i64]], [100 x [100 x i64]]* %A, i64 %i.011, i64 %j.07, i64 %k.02 + store i64 %i.011, i64* %arrayidx8, align 8 + %inc = add nsw i64 %k.02, 1 + %exitcond13 = icmp ne i64 %inc, %n + br i1 %exitcond13, label %for.body6, label %for.cond10.loopexit.loopexit + +for.cond10.loopexit.loopexit: ; preds = %for.body6 + br label %for.cond10.loopexit + +for.cond10.loopexit: ; preds = %for.cond10.loopexit.loopexit, %for.cond4.preheader + %cmp113 = icmp sgt i64 %n, 0 + br i1 %cmp113, label %for.body12.preheader, label %for.inc21 + +for.body12.preheader: ; preds = %for.cond10.loopexit + br label %for.body12 + +for.body12: ; preds = %for.body12.preheader, %for.body12 + %k9.05 = phi i64 [ %inc19, %for.body12 ], [ 0, %for.body12.preheader ] + %B.addr.24 = phi i64* [ %incdec.ptr, %for.body12 ], [ %B.addr.18, %for.body12.preheader ] + %add = add nsw i64 %k9.05, 1 + %add13 = add nsw i64 %j.07, 2 + %add14 = add nsw i64 %i.011, 3 + %arrayidx17 = getelementptr inbounds [100 x [100 x i64]], [100 x [100 x i64]]* %A, i64 %add14, i64 %add13, i64 %add + %0 = load i64, i64* %arrayidx17, align 8 + %incdec.ptr = getelementptr inbounds i64, i64* %B.addr.24, i64 1 + store i64 %0, i64* %B.addr.24, align 8 + %inc19 = add nsw i64 %k9.05, 1 + %exitcond = icmp ne i64 %inc19, %n + br i1 %exitcond, label %for.body12, label %for.inc21.loopexit + +for.inc21.loopexit: ; preds = %for.body12 + %scevgep = getelementptr i64, i64* %B.addr.18, i64 %n + br label %for.inc21 + +for.inc21: ; preds = %for.inc21.loopexit, %for.cond10.loopexit + %B.addr.2.lcssa = phi i64* [ %B.addr.18, %for.cond10.loopexit ], [ %scevgep, %for.inc21.loopexit ] + %inc22 = add nsw i64 %j.07, 1 + %exitcond14 = icmp ne i64 %inc22, %n + br i1 %exitcond14, label %for.cond4.preheader, label %for.inc24.loopexit + +for.inc24.loopexit: ; preds = %for.inc21 + %B.addr.2.lcssa.lcssa = phi i64* [ %B.addr.2.lcssa, %for.inc21 ] + br label %for.inc24 + +for.inc24: ; preds = %for.inc24.loopexit, %for.cond1.preheader + %B.addr.1.lcssa = phi i64* [ %B.addr.012, %for.cond1.preheader ], [ %B.addr.2.lcssa.lcssa, %for.inc24.loopexit ] + %inc25 = add nsw i64 %i.011, 1 + %exitcond15 = icmp ne i64 %inc25, %n + br i1 %exitcond15, label %for.cond1.preheader, label %for.end26.loopexit + +for.end26.loopexit: ; preds = %for.inc24 + br label %for.end26 + +for.end26: ; preds = %for.end26.loopexit, %entry + ret void +} diff --git a/llvm/test/Analysis/DependenceAnalysis/SimpleSIVNoValidityCheckFixedSize.ll b/llvm/test/Analysis/DependenceAnalysis/SimpleSIVNoValidityCheckFixedSize.ll new file mode 100644 index 0000000..d783d2e --- /dev/null +++ b/llvm/test/Analysis/DependenceAnalysis/SimpleSIVNoValidityCheckFixedSize.ll @@ -0,0 +1,120 @@ +; RUN: opt < %s -disable-output -passes="print" \ +; RUN: -da-disable-delinearization-checks 2>&1 | FileCheck %s +; RUN: opt < %s -da -analyze -da-disable-delinearization-checks | FileCheck %s + +; CHECK-LABEL: t1 +; CHECK: da analyze - none! +; CHECK: da analyze - consistent anti [1 -2]! +; CHECK: da analyze - none! + +;; #define N 1024 +;; #define M 2048 +;; void t1(int a[N][M]) { +;; for (int i = 0; i < N-1; ++i) +;; for (int j = 2; j < M; ++j) +;; a[i][j] = a[i+1][j-2]; +;; } + +define void @t1([2048 x i32]* %a) { +entry: + br label %for.body + +for.body: ; preds = %entry, %for.inc11 + %indvars.iv4 = phi i64 [ 0, %entry ], [ %indvars.iv.next5, %for.inc11 ] + br label %for.body4 + +for.body4: ; preds = %for.body, %for.body4 + %indvars.iv = phi i64 [ 2, %for.body ], [ %indvars.iv.next, %for.body4 ] + %0 = add nuw nsw i64 %indvars.iv4, 1 + %1 = add nsw i64 %indvars.iv, -2 + %arrayidx6 = getelementptr inbounds [2048 x i32], [2048 x i32]* %a, i64 %0, i64 %1 + %2 = load i32, i32* %arrayidx6, align 4 + %arrayidx10 = getelementptr inbounds [2048 x i32], [2048 x i32]* %a, i64 %indvars.iv4, i64 %indvars.iv + store i32 %2, i32* %arrayidx10, align 4 + %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 + %exitcond = icmp ne i64 %indvars.iv.next, 2048 + br i1 %exitcond, label %for.body4, label %for.inc11 + +for.inc11: ; preds = %for.body4 + %indvars.iv.next5 = add nuw nsw i64 %indvars.iv4, 1 + %exitcond7 = icmp ne i64 %indvars.iv.next5, 1023 + br i1 %exitcond7, label %for.body, label %for.end13 + +for.end13: ; preds = %for.inc11 + ret void +} + + +; CHECK-LABEL: t2 +; CHECK: da analyze - none! +; CHECK: da analyze - consistent anti [1 -2 0 -3 2]! +; CHECK: da analyze - none! + +;; #define N 1024 +;; #define M 2048 +;; void t2(int a[][N][N][N][M]) { +;; for (int i1 = 0; i1 < N-1; ++i1) +;; for (int i2 = 2; i2 < N; ++i2) +;; for (int i3 = 0; i3 < N; ++i3) +;; for (int i4 = 3; i4 < N; ++i4) +;; for (int i5 = 0; i5 < M-2; ++i5) +;; a[i1][i2][i3][i4][i5] = a[i1+1][i2-2][i3][i4-3][i5+2]; +;; } + +define void @t2([1024 x [1024 x [1024 x [2048 x i32]]]]* %a) { +entry: + br label %for.body + +for.body: ; preds = %entry, %for.inc46 + %indvars.iv18 = phi i64 [ 0, %entry ], [ %indvars.iv.next19, %for.inc46 ] + br label %for.body4 + +for.body4: ; preds = %for.body, %for.inc43 + %indvars.iv14 = phi i64 [ 2, %for.body ], [ %indvars.iv.next15, %for.inc43 ] + br label %for.body8 + +for.body8: ; preds = %for.body4, %for.inc40 + %indvars.iv11 = phi i64 [ 0, %for.body4 ], [ %indvars.iv.next12, %for.inc40 ] + br label %for.body12 + +for.body12: ; preds = %for.body8, %for.inc37 + %indvars.iv7 = phi i64 [ 3, %for.body8 ], [ %indvars.iv.next8, %for.inc37 ] + br label %for.body16 + +for.body16: ; preds = %for.body12, %for.body16 + %indvars.iv = phi i64 [ 0, %for.body12 ], [ %indvars.iv.next, %for.body16 ] + %0 = add nuw nsw i64 %indvars.iv18, 1 + %1 = add nsw i64 %indvars.iv14, -2 + %2 = add nsw i64 %indvars.iv7, -3 + %3 = add nuw nsw i64 %indvars.iv, 2 + %arrayidx26 = getelementptr inbounds [1024 x [1024 x [1024 x [2048 x i32]]]], [1024 x [1024 x [1024 x [2048 x i32]]]]* %a, i64 %0, i64 %1, i64 %indvars.iv11, i64 %2, i64 %3 + %4 = load i32, i32* %arrayidx26, align 4 + %arrayidx36 = getelementptr inbounds [1024 x [1024 x [1024 x [2048 x i32]]]], [1024 x [1024 x [1024 x [2048 x i32]]]]* %a, i64 %indvars.iv18, i64 %indvars.iv14, i64 %indvars.iv11, i64 %indvars.iv7, i64 %indvars.iv + store i32 %4, i32* %arrayidx36, align 4 + %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 + %exitcond = icmp ne i64 %indvars.iv.next, 2046 + br i1 %exitcond, label %for.body16, label %for.inc37 + +for.inc37: ; preds = %for.body16 + %indvars.iv.next8 = add nuw nsw i64 %indvars.iv7, 1 + %exitcond10 = icmp ne i64 %indvars.iv.next8, 1024 + br i1 %exitcond10, label %for.body12, label %for.inc40 + +for.inc40: ; preds = %for.inc37 + %indvars.iv.next12 = add nuw nsw i64 %indvars.iv11, 1 + %exitcond13 = icmp ne i64 %indvars.iv.next12, 1024 + br i1 %exitcond13, label %for.body8, label %for.inc43 + +for.inc43: ; preds = %for.inc40 + %indvars.iv.next15 = add nuw nsw i64 %indvars.iv14, 1 + %exitcond17 = icmp ne i64 %indvars.iv.next15, 1024 + br i1 %exitcond17, label %for.body4, label %for.inc46 + +for.inc46: ; preds = %for.inc43 + %indvars.iv.next19 = add nuw nsw i64 %indvars.iv18, 1 + %exitcond21 = icmp ne i64 %indvars.iv.next19, 1023 + br i1 %exitcond21, label %for.body, label %for.end48 + +for.end48: ; preds = %for.inc46 + ret void +} diff --git a/llvm/test/Transforms/LoopInterchange/currentLimitation.ll b/llvm/test/Transforms/LoopInterchange/currentLimitation.ll index 1a9d42db..4f20bdc 100644 --- a/llvm/test/Transforms/LoopInterchange/currentLimitation.ll +++ b/llvm/test/Transforms/LoopInterchange/currentLimitation.ll @@ -2,6 +2,11 @@ ; RUN: -pass-remarks-output=%t -verify-loop-info -verify-dom-info -S | FileCheck -check-prefix=IR %s ; RUN: FileCheck --input-file=%t %s +; RUN: opt < %s -basicaa -loop-interchange -pass-remarks-missed='loop-interchange' \ +; RUN: -da-disable-delinearization-checks -pass-remarks-output=%t \ +; RUN: -verify-loop-info -verify-dom-info -S | FileCheck -check-prefix=IR %s +; RUN: FileCheck --check-prefix=DELIN --input-file=%t %s + target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" target triple = "x86_64-unknown-linux-gnu" @@ -16,13 +21,14 @@ target triple = "x86_64-unknown-linux-gnu" ;; for(int j=1;j