From db45746821ab01a54f8df033991c3280c4284e3b Mon Sep 17 00:00:00 2001 From: Nikita Popov Date: Sat, 29 May 2021 18:33:31 +0200 Subject: [PATCH] [LoopUnroll] Separate peeling from unrolling Loop peeling is currently performed as part of UnrollLoop(). Outside test scenarios, it is always performed with an unroll count of 1. This means that unrolling doesn't actually do anything apart from performing post-unroll simplification. When testing, it's currently possible to specify both an explicit peel count and an explicit unroll count. This doesn't perform any sensible operation and may result in miscompiles, see https://bugs.llvm.org/show_bug.cgi?id=45939. This patch moves peeling from UnrollLoop() into tryToUnrollLoop(), so that peeling does not also perform a susequent unroll. We only run the post-unroll simplifications. Specifying both an explicit peel count and unroll count is forbidden. In the future, we may want to support both (non-PGO) peeling a loop and unrolling it, but this needs to be done by first performing the peel and then recalculating unrolling heuristics on a now possibly analyzable loop. Differential Revision: https://reviews.llvm.org/D103362 --- llvm/include/llvm/Transforms/Utils/UnrollLoop.h | 1 - llvm/lib/Transforms/Scalar/LoopUnrollPass.cpp | 41 ++++++- llvm/lib/Transforms/Utils/LoopUnroll.cpp | 46 +------- llvm/lib/Transforms/Utils/LoopUnrollRuntime.cpp | 2 +- .../Transforms/LoopUnroll/peel-loop-and-unroll.ll | 22 ++++ llvm/test/Transforms/LoopUnroll/pr33437.ll | 24 ++-- .../pr45939-peel-count-and-complete-unroll.ll | 131 ++------------------- .../LoopUnroll/wrong_assert_in_peeling.ll | 6 +- 8 files changed, 81 insertions(+), 192 deletions(-) create mode 100644 llvm/test/Transforms/LoopUnroll/peel-loop-and-unroll.ll diff --git a/llvm/include/llvm/Transforms/Utils/UnrollLoop.h b/llvm/include/llvm/Transforms/Utils/UnrollLoop.h index 1f09f64..452402a 100644 --- a/llvm/include/llvm/Transforms/Utils/UnrollLoop.h +++ b/llvm/include/llvm/Transforms/Utils/UnrollLoop.h @@ -71,7 +71,6 @@ struct UnrollLoopOptions { bool AllowRuntime; bool AllowExpensiveTripCount; unsigned TripMultiple; - unsigned PeelCount; bool UnrollRemainder; bool ForgetAllSCEV; }; diff --git a/llvm/lib/Transforms/Scalar/LoopUnrollPass.cpp b/llvm/lib/Transforms/Scalar/LoopUnrollPass.cpp index e228b06..7b09d8e 100644 --- a/llvm/lib/Transforms/Scalar/LoopUnrollPass.cpp +++ b/llvm/lib/Transforms/Scalar/LoopUnrollPass.cpp @@ -771,6 +771,18 @@ bool llvm::computeUnrollCount( UnrollCostEstimator UCE(*L, LoopSize); + // Use an explicit peel count that has been specified for testing. In this + // case it's not permitted to also specify an explicit unroll count. + if (PP.PeelCount) { + if (UnrollCount.getNumOccurrences() > 0) { + report_fatal_error("Cannot specify both explicit peel count and " + "explicit unroll count"); + } + UP.Count = 1; + UP.Runtime = false; + return true; + } + // Check for explicit Count. // 1st priority is unroll count set by "unroll-count" option. bool UserUnrollCount = UnrollCount.getNumOccurrences() > 0; @@ -1158,6 +1170,28 @@ static LoopUnrollResult tryToUnrollLoop( if (TripCount && UP.Count > TripCount) UP.Count = TripCount; + if (PP.PeelCount) { + assert(UP.Count == 1 && "Cannot perform peel and unroll in the same step"); + LLVM_DEBUG(dbgs() << "PEELING loop %" << L->getHeader()->getName() + << " with iteration count " << PP.PeelCount << "!\n"); + ORE.emit([&]() { + return OptimizationRemark(DEBUG_TYPE, "Peeled", L->getStartLoc(), + L->getHeader()) + << " peeled loop by " << ore::NV("PeelCount", PP.PeelCount) + << " iterations"; + }); + + if (peelLoop(L, PP.PeelCount, LI, &SE, &DT, &AC, PreserveLCSSA)) { + simplifyLoopAfterUnroll(L, true, LI, &SE, &DT, &AC, &TTI); + // If the loop was peeled, we already "used up" the profile information + // we had, so we don't want to unroll or peel again. + if (PP.PeelProfiledIterations) + L->setLoopAlreadyUnrolled(); + return LoopUnrollResult::PartiallyUnrolled; + } + return LoopUnrollResult::Unmodified; + } + // Save loop properties before it is transformed. MDNode *OrigLoopID = L->getLoopID(); @@ -1166,7 +1200,7 @@ static LoopUnrollResult tryToUnrollLoop( LoopUnrollResult UnrollResult = UnrollLoop( L, {UP.Count, TripCount, UP.Force, UP.Runtime, UP.AllowExpensiveTripCount, - TripMultiple, PP.PeelCount, UP.UnrollRemainder, ForgetAllSCEV}, + TripMultiple, UP.UnrollRemainder, ForgetAllSCEV}, LI, &SE, &DT, &AC, &TTI, &ORE, PreserveLCSSA, &RemainderLoop); if (UnrollResult == LoopUnrollResult::Unmodified) return LoopUnrollResult::Unmodified; @@ -1194,10 +1228,7 @@ static LoopUnrollResult tryToUnrollLoop( // If loop has an unroll count pragma or unrolled by explicitly set count // mark loop as unrolled to prevent unrolling beyond that requested. - // If the loop was peeled, we already "used up" the profile information - // we had, so we don't want to unroll or peel again. - if (UnrollResult != LoopUnrollResult::FullyUnrolled && - (IsCountSetExplicitly || (PP.PeelProfiledIterations && PP.PeelCount))) + if (UnrollResult != LoopUnrollResult::FullyUnrolled && IsCountSetExplicitly) L->setLoopAlreadyUnrolled(); return UnrollResult; diff --git a/llvm/lib/Transforms/Utils/LoopUnroll.cpp b/llvm/lib/Transforms/Utils/LoopUnroll.cpp index fe0833a..f7590acc 100644 --- a/llvm/lib/Transforms/Utils/LoopUnroll.cpp +++ b/llvm/lib/Transforms/Utils/LoopUnroll.cpp @@ -59,7 +59,6 @@ #include "llvm/Transforms/Utils/BasicBlockUtils.h" #include "llvm/Transforms/Utils/Cloning.h" #include "llvm/Transforms/Utils/Local.h" -#include "llvm/Transforms/Utils/LoopPeel.h" #include "llvm/Transforms/Utils/LoopSimplify.h" #include "llvm/Transforms/Utils/LoopUtils.h" #include "llvm/Transforms/Utils/SimplifyIndVar.h" @@ -259,9 +258,6 @@ void llvm::simplifyLoopAfterUnroll(Loop *L, bool SimplifyIVs, LoopInfo *LI, /// runtime-unroll the loop if computing RuntimeTripCount will be expensive and /// AllowExpensiveTripCount is false. /// -/// If we want to perform PGO-based loop peeling, PeelCount is set to the -/// number of iterations we want to peel off. -/// /// The LoopInfo Analysis that is passed will be kept consistent. /// /// This utility preserves LoopInfo. It will also preserve ScalarEvolution and @@ -311,7 +307,7 @@ LoopUnrollResult llvm::UnrollLoop(Loop *L, UnrollLoopOptions ULO, LoopInfo *LI, ULO.Count = ULO.TripCount; // Don't enter the unroll code if there is nothing to do. - if (ULO.TripCount == 0 && ULO.Count < 2 && ULO.PeelCount == 0) { + if (ULO.TripCount == 0 && ULO.Count < 2) { LLVM_DEBUG(dbgs() << "Won't unroll; almost nothing to do\n"); return LoopUnrollResult::Unmodified; } @@ -320,25 +316,6 @@ LoopUnrollResult llvm::UnrollLoop(Loop *L, UnrollLoopOptions ULO, LoopInfo *LI, assert(ULO.TripMultiple > 0); assert(ULO.TripCount == 0 || ULO.TripCount % ULO.TripMultiple == 0); - - bool Peeled = false; - if (ULO.PeelCount) { - Peeled = peelLoop(L, ULO.PeelCount, LI, SE, DT, AC, PreserveLCSSA); - - // Successful peeling may result in a change in the loop preheader/trip - // counts. If we later unroll the loop, we want these to be updated. - if (Peeled) { - // According to our guards and profitability checks the only - // meaningful exit should be latch block. Other exits go to deopt, - // so we do not worry about them. - BasicBlock *ExitingBlock = L->getLoopLatch(); - assert(ExitingBlock && "Loop without exiting block?"); - assert(L->isLoopExiting(ExitingBlock) && "Latch is not exiting?"); - ULO.TripCount = SE->getSmallConstantTripCount(L, ExitingBlock); - ULO.TripMultiple = SE->getSmallConstantTripMultiple(L, ExitingBlock); - } - } - // Are we eliminating the loop control altogether? Note that we can know // we're eliminating the backedge without knowing exactly which iteration // of the unrolled body exits. @@ -350,10 +327,6 @@ LoopUnrollResult llvm::UnrollLoop(Loop *L, UnrollLoopOptions ULO, LoopInfo *LI, bool RuntimeTripCount = (ULO.TripCount == 0 && ULO.Count > 0 && ULO.AllowRuntime); - assert((!RuntimeTripCount || !ULO.PeelCount) && - "Did not expect runtime trip-count unrolling " - "and peeling for the same loop"); - // All these values should be taken only after peeling because they might have // changed. BasicBlock *Preheader = L->getLoopPreheader(); @@ -396,9 +369,6 @@ LoopUnrollResult llvm::UnrollLoop(Loop *L, UnrollLoopOptions ULO, LoopInfo *LI, else if (BasicBlock *ExitingBlock = L->getExitingBlock()) ExitingBI = dyn_cast(ExitingBlock->getTerminator()); if (!LatchBI || (LatchBI->isConditional() && !LatchIsExiting)) { - // If the peeling guard is changed this assert may be relaxed or even - // deleted. - assert(!Peeled && "Peeling guard changed!"); LLVM_DEBUG( dbgs() << "Can't unroll; a conditional latch must exit the loop"); return LoopUnrollResult::Unmodified; @@ -473,16 +443,6 @@ LoopUnrollResult llvm::UnrollLoop(Loop *L, UnrollLoopOptions ULO, LoopInfo *LI, << "completely unrolled loop with " << NV("UnrollCount", ULO.TripCount) << " iterations"; }); - } else if (ULO.PeelCount) { - LLVM_DEBUG(dbgs() << "PEELING loop %" << Header->getName() - << " with iteration count " << ULO.PeelCount << "!\n"); - if (ORE) - ORE->emit([&]() { - return OptimizationRemark(DEBUG_TYPE, "Peeled", L->getStartLoc(), - L->getHeader()) - << " peeled loop by " << NV("PeelCount", ULO.PeelCount) - << " iterations"; - }); } else { auto DiagBuilder = [&]() { OptimizationRemark Diag(DEBUG_TYPE, "PartialUnrolled", L->getStartLoc(), @@ -835,8 +795,8 @@ LoopUnrollResult llvm::UnrollLoop(Loop *L, UnrollLoopOptions ULO, LoopInfo *LI, // At this point, the code is well formed. We now simplify the unrolled loop, // doing constant propagation and dead code elimination as we go. - simplifyLoopAfterUnroll(L, !CompletelyUnroll && (ULO.Count > 1 || Peeled), LI, - SE, DT, AC, TTI); + simplifyLoopAfterUnroll(L, !CompletelyUnroll && ULO.Count > 1, LI, SE, DT, AC, + TTI); NumCompletelyUnrolled += CompletelyUnroll; ++NumUnrolled; diff --git a/llvm/lib/Transforms/Utils/LoopUnrollRuntime.cpp b/llvm/lib/Transforms/Utils/LoopUnrollRuntime.cpp index 767b072..5281057 100644 --- a/llvm/lib/Transforms/Utils/LoopUnrollRuntime.cpp +++ b/llvm/lib/Transforms/Utils/LoopUnrollRuntime.cpp @@ -987,7 +987,7 @@ bool llvm::UnrollRuntimeLoopRemainder( {/*Count*/ Count - 1, /*TripCount*/ Count - 1, /*Force*/ false, /*AllowRuntime*/ false, /*AllowExpensiveTripCount*/ false, /*TripMultiple*/ 1, - /*PeelCount*/ 0, /*UnrollRemainder*/ false, ForgetAllSCEV}, + /*UnrollRemainder*/ false, ForgetAllSCEV}, LI, SE, DT, AC, TTI, /*ORE*/ nullptr, PreserveLCSSA); } diff --git a/llvm/test/Transforms/LoopUnroll/peel-loop-and-unroll.ll b/llvm/test/Transforms/LoopUnroll/peel-loop-and-unroll.ll new file mode 100644 index 0000000..e855ee8 --- /dev/null +++ b/llvm/test/Transforms/LoopUnroll/peel-loop-and-unroll.ll @@ -0,0 +1,22 @@ +; RUN: not --crash opt -loop-unroll -unroll-peel-count=2 -unroll-count=2 -S < %s 2>&1 | FileCheck %s + +; CHECK: LLVM ERROR: Cannot specify both explicit peel count and explicit unroll count + +@a = global [8 x i32] zeroinitializer, align 16 + +define void @test1() { +entry: + br label %for.body + +for.body: ; preds = %entry, %for.body + %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ] + %arrayidx = getelementptr inbounds [8 x i32], [8 x i32]* @a, i64 0, i64 %indvars.iv + %0 = trunc i64 %indvars.iv to i32 + store i32 %0, i32* %arrayidx, align 4 + %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 + %exitcond = icmp ne i64 %indvars.iv.next, 8 + br i1 %exitcond, label %for.body, label %for.exit + +for.exit: ; preds = %for.body + ret void +} diff --git a/llvm/test/Transforms/LoopUnroll/pr33437.ll b/llvm/test/Transforms/LoopUnroll/pr33437.ll index 55c17e06..7bf2a0b 100644 --- a/llvm/test/Transforms/LoopUnroll/pr33437.ll +++ b/llvm/test/Transforms/LoopUnroll/pr33437.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py -; RUN: opt -S -loop-unroll -unroll-count=4 -unroll-peel-count=1 < %s | FileCheck %s +; RUN: opt -S -loop-unroll -unroll-peel-count=1 < %s | FileCheck %s declare zeroext i8 @patatino() @@ -7,6 +7,8 @@ define fastcc void @tinky() { ; CHECK-LABEL: @tinky( ; CHECK-NEXT: entry: ; CHECK-NEXT: br label [[NEXT:%.*]] +; CHECK: loopexit.loopexit: +; CHECK-NEXT: br label [[LOOPEXIT:%.*]] ; CHECK: loopexit: ; CHECK-NEXT: ret void ; CHECK: next: @@ -15,7 +17,7 @@ define fastcc void @tinky() { ; CHECK-NEXT: br label [[LOOP_PEEL:%.*]] ; CHECK: loop.peel: ; CHECK-NEXT: [[CALL593_PEEL:%.*]] = tail call zeroext i8 @patatino() -; CHECK-NEXT: br i1 false, label [[LOOP_PEEL_NEXT:%.*]], label [[LOOPEXIT:%.*]] +; CHECK-NEXT: br i1 false, label [[LOOP_PEEL_NEXT:%.*]], label [[LOOPEXIT]] ; CHECK: loop.peel.next: ; CHECK-NEXT: br label [[LOOP_PEEL_NEXT1:%.*]] ; CHECK: loop.peel.next1: @@ -24,7 +26,7 @@ define fastcc void @tinky() { ; CHECK-NEXT: br label [[LOOP:%.*]] ; CHECK: loop: ; CHECK-NEXT: [[CALL593:%.*]] = tail call zeroext i8 @patatino() -; CHECK-NEXT: br label [[LOOPEXIT]] +; CHECK-NEXT: br i1 false, label [[LOOP]], label [[LOOPEXIT_LOOPEXIT:%.*]], !llvm.loop [[LOOP0:![0-9]+]] ; entry: br label %next @@ -66,19 +68,11 @@ define void @tinky2() { ; CHECK: next.peel.newph: ; CHECK-NEXT: br label [[LOOP:%.*]] ; CHECK: loop: -; CHECK-NEXT: [[B:%.*]] = phi i32 [ [[B_NEXT_PEEL]], [[NEXT_PEEL_NEWPH]] ], [ [[B_NEXT_3:%.*]], [[LOOP_2:%.*]] ] +; CHECK-NEXT: [[B:%.*]] = phi i32 [ [[B_NEXT_PEEL]], [[NEXT_PEEL_NEWPH]] ], [ [[B_NEXT:%.*]], [[LOOP]] ] ; CHECK-NEXT: [[CALL593:%.*]] = tail call zeroext i8 @patatino() -; CHECK-NEXT: [[B_NEXT:%.*]] = add nuw nsw i32 [[B]], 1 -; CHECK-NEXT: [[CALL593_1:%.*]] = tail call zeroext i8 @patatino() -; CHECK-NEXT: [[B_NEXT_1:%.*]] = add nuw nsw i32 [[B_NEXT]], 1 -; CHECK-NEXT: [[COND_1:%.*]] = icmp ne i32 [[B_NEXT]], 30 -; CHECK-NEXT: br i1 [[COND_1]], label [[LOOP_2]], label [[LOOPEXIT_LOOPEXIT:%.*]], !llvm.loop !0 -; CHECK: loop.2: -; CHECK-NEXT: [[CALL593_2:%.*]] = tail call zeroext i8 @patatino() -; CHECK-NEXT: [[B_NEXT_2:%.*]] = add nuw nsw i32 [[B_NEXT_1]], 1 -; CHECK-NEXT: [[CALL593_3:%.*]] = tail call zeroext i8 @patatino() -; CHECK-NEXT: [[B_NEXT_3]] = add nuw nsw i32 [[B_NEXT_2]], 1 -; CHECK-NEXT: br label [[LOOP]], !llvm.loop !2 +; CHECK-NEXT: [[B_NEXT]] = add nuw nsw i32 [[B]], 1 +; CHECK-NEXT: [[COND:%.*]] = icmp ne i32 [[B]], 30 +; CHECK-NEXT: br i1 [[COND]], label [[LOOP]], label [[LOOPEXIT_LOOPEXIT:%.*]], !llvm.loop [[LOOP3:![0-9]+]] ; entry: br label %next diff --git a/llvm/test/Transforms/LoopUnroll/pr45939-peel-count-and-complete-unroll.ll b/llvm/test/Transforms/LoopUnroll/pr45939-peel-count-and-complete-unroll.ll index 0b9ea76..654b94c 100644 --- a/llvm/test/Transforms/LoopUnroll/pr45939-peel-count-and-complete-unroll.ll +++ b/llvm/test/Transforms/LoopUnroll/pr45939-peel-count-and-complete-unroll.ll @@ -1,7 +1,6 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py ; RUN: opt -loop-unroll -unroll-peel-count=2 -S %s | FileCheck --check-prefix=PEEL2 %s ; RUN: opt -loop-unroll -unroll-peel-count=8 -S %s | FileCheck --check-prefix=PEEL8 %s -; RUN: opt -loop-unroll -unroll-peel-count=2 -unroll-count=2 -S %s | FileCheck --check-prefix=PEEL2UNROLL2 %s ; Test case for PR45939. Make sure unroll count is adjusted when loop is peeled and unrolled. @@ -36,47 +35,17 @@ define void @test1() { ; PEEL2: entry.peel.newph: ; PEEL2-NEXT: br label [[FOR_BODY:%.*]] ; PEEL2: for.body: -; PEEL2-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[INDVARS_IV_NEXT_PEEL4]], [[ENTRY_PEEL_NEWPH]] ], [ [[INDVARS_IV_NEXT_7:%.*]], [[FOR_BODY_6:%.*]] ] +; PEEL2-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[INDVARS_IV_NEXT_PEEL4]], [[ENTRY_PEEL_NEWPH]] ], [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ] ; PEEL2-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [8 x i32], [8 x i32]* @a, i64 0, i64 [[INDVARS_IV]] ; PEEL2-NEXT: [[TMP2:%.*]] = trunc i64 [[INDVARS_IV]] to i32 ; PEEL2-NEXT: store i32 [[TMP2]], i32* [[ARRAYIDX]], align 4 -; PEEL2-NEXT: [[INDVARS_IV_NEXT:%.*]] = add nuw nsw i64 [[INDVARS_IV]], 1 -; PEEL2-NEXT: [[ARRAYIDX_1:%.*]] = getelementptr inbounds [8 x i32], [8 x i32]* @a, i64 0, i64 [[INDVARS_IV_NEXT]] -; PEEL2-NEXT: [[TMP3:%.*]] = trunc i64 [[INDVARS_IV_NEXT]] to i32 -; PEEL2-NEXT: store i32 [[TMP3]], i32* [[ARRAYIDX_1]], align 4 -; PEEL2-NEXT: [[INDVARS_IV_NEXT_1:%.*]] = add nuw nsw i64 [[INDVARS_IV_NEXT]], 1 -; PEEL2-NEXT: [[ARRAYIDX_2:%.*]] = getelementptr inbounds [8 x i32], [8 x i32]* @a, i64 0, i64 [[INDVARS_IV_NEXT_1]] -; PEEL2-NEXT: [[TMP4:%.*]] = trunc i64 [[INDVARS_IV_NEXT_1]] to i32 -; PEEL2-NEXT: store i32 [[TMP4]], i32* [[ARRAYIDX_2]], align 4 -; PEEL2-NEXT: [[INDVARS_IV_NEXT_2:%.*]] = add nuw nsw i64 [[INDVARS_IV_NEXT_1]], 1 -; PEEL2-NEXT: [[ARRAYIDX_3:%.*]] = getelementptr inbounds [8 x i32], [8 x i32]* @a, i64 0, i64 [[INDVARS_IV_NEXT_2]] -; PEEL2-NEXT: [[TMP5:%.*]] = trunc i64 [[INDVARS_IV_NEXT_2]] to i32 -; PEEL2-NEXT: store i32 [[TMP5]], i32* [[ARRAYIDX_3]], align 4 -; PEEL2-NEXT: [[INDVARS_IV_NEXT_3:%.*]] = add nuw nsw i64 [[INDVARS_IV_NEXT_2]], 1 -; PEEL2-NEXT: [[ARRAYIDX_4:%.*]] = getelementptr inbounds [8 x i32], [8 x i32]* @a, i64 0, i64 [[INDVARS_IV_NEXT_3]] -; PEEL2-NEXT: [[TMP6:%.*]] = trunc i64 [[INDVARS_IV_NEXT_3]] to i32 -; PEEL2-NEXT: store i32 [[TMP6]], i32* [[ARRAYIDX_4]], align 4 -; PEEL2-NEXT: [[INDVARS_IV_NEXT_4:%.*]] = add nuw nsw i64 [[INDVARS_IV_NEXT_3]], 1 -; PEEL2-NEXT: [[ARRAYIDX_5:%.*]] = getelementptr inbounds [8 x i32], [8 x i32]* @a, i64 0, i64 [[INDVARS_IV_NEXT_4]] -; PEEL2-NEXT: [[TMP7:%.*]] = trunc i64 [[INDVARS_IV_NEXT_4]] to i32 -; PEEL2-NEXT: store i32 [[TMP7]], i32* [[ARRAYIDX_5]], align 4 -; PEEL2-NEXT: [[INDVARS_IV_NEXT_5:%.*]] = add nuw nsw i64 [[INDVARS_IV_NEXT_4]], 1 -; PEEL2-NEXT: [[EXITCOND_5:%.*]] = icmp ne i64 [[INDVARS_IV_NEXT_5]], 8 -; PEEL2-NEXT: br i1 [[EXITCOND_5]], label [[FOR_BODY_6]], label [[FOR_EXIT_LOOPEXIT:%.*]], !llvm.loop [[LOOP0:![0-9]+]] +; PEEL2-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1 +; PEEL2-NEXT: [[EXITCOND:%.*]] = icmp ne i64 [[INDVARS_IV_NEXT]], 8 +; PEEL2-NEXT: br i1 [[EXITCOND]], label [[FOR_BODY]], label [[FOR_EXIT_LOOPEXIT:%.*]], !llvm.loop [[LOOP0:![0-9]+]] ; PEEL2: for.exit.loopexit: ; PEEL2-NEXT: br label [[FOR_EXIT]] ; PEEL2: for.exit: ; PEEL2-NEXT: ret void -; PEEL2: for.body.6: -; PEEL2-NEXT: [[ARRAYIDX_6:%.*]] = getelementptr inbounds [8 x i32], [8 x i32]* @a, i64 0, i64 [[INDVARS_IV_NEXT_5]] -; PEEL2-NEXT: [[TMP8:%.*]] = trunc i64 [[INDVARS_IV_NEXT_5]] to i32 -; PEEL2-NEXT: store i32 [[TMP8]], i32* [[ARRAYIDX_6]], align 4 -; PEEL2-NEXT: [[INDVARS_IV_NEXT_6:%.*]] = add nuw nsw i64 [[INDVARS_IV_NEXT_5]], 1 -; PEEL2-NEXT: [[ARRAYIDX_7:%.*]] = getelementptr inbounds [8 x i32], [8 x i32]* @a, i64 0, i64 [[INDVARS_IV_NEXT_6]] -; PEEL2-NEXT: [[TMP9:%.*]] = trunc i64 [[INDVARS_IV_NEXT_6]] to i32 -; PEEL2-NEXT: store i32 [[TMP9]], i32* [[ARRAYIDX_7]], align 4 -; PEEL2-NEXT: [[INDVARS_IV_NEXT_7]] = add nuw nsw i64 [[INDVARS_IV_NEXT_6]], 1 -; PEEL2-NEXT: br label [[FOR_BODY]], !llvm.loop [[LOOP2:![0-9]+]] ; ; PEEL8-LABEL: @test1( ; PEEL8-NEXT: entry: @@ -160,102 +129,16 @@ define void @test1() { ; PEEL8: entry.peel.newph: ; PEEL8-NEXT: br label [[FOR_BODY:%.*]] ; PEEL8: for.body: -; PEEL8-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[INDVARS_IV_NEXT_PEEL34]], [[ENTRY_PEEL_NEWPH]] ], [ [[INDVARS_IV_NEXT_7:%.*]], [[FOR_BODY_7:%.*]] ] +; PEEL8-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[INDVARS_IV_NEXT_PEEL34]], [[ENTRY_PEEL_NEWPH]] ], [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ] ; PEEL8-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [8 x i32], [8 x i32]* @a, i64 0, i64 [[INDVARS_IV]] ; PEEL8-NEXT: [[TMP8:%.*]] = trunc i64 [[INDVARS_IV]] to i32 ; PEEL8-NEXT: store i32 [[TMP8]], i32* [[ARRAYIDX]], align 4 -; PEEL8-NEXT: [[INDVARS_IV_NEXT:%.*]] = add nuw nsw i64 [[INDVARS_IV]], 1 -; PEEL8-NEXT: br i1 true, label [[FOR_BODY_1:%.*]], label [[FOR_EXIT_LOOPEXIT:%.*]], !llvm.loop [[LOOP0:![0-9]+]] +; PEEL8-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1 +; PEEL8-NEXT: br i1 true, label [[FOR_BODY]], label [[FOR_EXIT_LOOPEXIT:%.*]], !llvm.loop [[LOOP0:![0-9]+]] ; PEEL8: for.exit.loopexit: ; PEEL8-NEXT: br label [[FOR_EXIT]] ; PEEL8: for.exit: ; PEEL8-NEXT: ret void -; PEEL8: for.body.1: -; PEEL8-NEXT: [[ARRAYIDX_1:%.*]] = getelementptr inbounds [8 x i32], [8 x i32]* @a, i64 0, i64 [[INDVARS_IV_NEXT]] -; PEEL8-NEXT: [[TMP9:%.*]] = trunc i64 [[INDVARS_IV_NEXT]] to i32 -; PEEL8-NEXT: store i32 [[TMP9]], i32* [[ARRAYIDX_1]], align 4 -; PEEL8-NEXT: [[INDVARS_IV_NEXT_1:%.*]] = add nuw nsw i64 [[INDVARS_IV_NEXT]], 1 -; PEEL8-NEXT: br i1 true, label [[FOR_BODY_2:%.*]], label [[FOR_EXIT_LOOPEXIT]], !llvm.loop [[LOOP0]] -; PEEL8: for.body.2: -; PEEL8-NEXT: [[ARRAYIDX_2:%.*]] = getelementptr inbounds [8 x i32], [8 x i32]* @a, i64 0, i64 [[INDVARS_IV_NEXT_1]] -; PEEL8-NEXT: [[TMP10:%.*]] = trunc i64 [[INDVARS_IV_NEXT_1]] to i32 -; PEEL8-NEXT: store i32 [[TMP10]], i32* [[ARRAYIDX_2]], align 4 -; PEEL8-NEXT: [[INDVARS_IV_NEXT_2:%.*]] = add nuw nsw i64 [[INDVARS_IV_NEXT_1]], 1 -; PEEL8-NEXT: br i1 true, label [[FOR_BODY_3:%.*]], label [[FOR_EXIT_LOOPEXIT]], !llvm.loop [[LOOP0]] -; PEEL8: for.body.3: -; PEEL8-NEXT: [[ARRAYIDX_3:%.*]] = getelementptr inbounds [8 x i32], [8 x i32]* @a, i64 0, i64 [[INDVARS_IV_NEXT_2]] -; PEEL8-NEXT: [[TMP11:%.*]] = trunc i64 [[INDVARS_IV_NEXT_2]] to i32 -; PEEL8-NEXT: store i32 [[TMP11]], i32* [[ARRAYIDX_3]], align 4 -; PEEL8-NEXT: [[INDVARS_IV_NEXT_3:%.*]] = add nuw nsw i64 [[INDVARS_IV_NEXT_2]], 1 -; PEEL8-NEXT: br i1 true, label [[FOR_BODY_4:%.*]], label [[FOR_EXIT_LOOPEXIT]], !llvm.loop [[LOOP0]] -; PEEL8: for.body.4: -; PEEL8-NEXT: [[ARRAYIDX_4:%.*]] = getelementptr inbounds [8 x i32], [8 x i32]* @a, i64 0, i64 [[INDVARS_IV_NEXT_3]] -; PEEL8-NEXT: [[TMP12:%.*]] = trunc i64 [[INDVARS_IV_NEXT_3]] to i32 -; PEEL8-NEXT: store i32 [[TMP12]], i32* [[ARRAYIDX_4]], align 4 -; PEEL8-NEXT: [[INDVARS_IV_NEXT_4:%.*]] = add nuw nsw i64 [[INDVARS_IV_NEXT_3]], 1 -; PEEL8-NEXT: br i1 true, label [[FOR_BODY_5:%.*]], label [[FOR_EXIT_LOOPEXIT]], !llvm.loop [[LOOP0]] -; PEEL8: for.body.5: -; PEEL8-NEXT: [[ARRAYIDX_5:%.*]] = getelementptr inbounds [8 x i32], [8 x i32]* @a, i64 0, i64 [[INDVARS_IV_NEXT_4]] -; PEEL8-NEXT: [[TMP13:%.*]] = trunc i64 [[INDVARS_IV_NEXT_4]] to i32 -; PEEL8-NEXT: store i32 [[TMP13]], i32* [[ARRAYIDX_5]], align 4 -; PEEL8-NEXT: [[INDVARS_IV_NEXT_5:%.*]] = add nuw nsw i64 [[INDVARS_IV_NEXT_4]], 1 -; PEEL8-NEXT: br i1 true, label [[FOR_BODY_6:%.*]], label [[FOR_EXIT_LOOPEXIT]], !llvm.loop [[LOOP0]] -; PEEL8: for.body.6: -; PEEL8-NEXT: [[ARRAYIDX_6:%.*]] = getelementptr inbounds [8 x i32], [8 x i32]* @a, i64 0, i64 [[INDVARS_IV_NEXT_5]] -; PEEL8-NEXT: [[TMP14:%.*]] = trunc i64 [[INDVARS_IV_NEXT_5]] to i32 -; PEEL8-NEXT: store i32 [[TMP14]], i32* [[ARRAYIDX_6]], align 4 -; PEEL8-NEXT: [[INDVARS_IV_NEXT_6:%.*]] = add nuw nsw i64 [[INDVARS_IV_NEXT_5]], 1 -; PEEL8-NEXT: br i1 true, label [[FOR_BODY_7]], label [[FOR_EXIT_LOOPEXIT]], !llvm.loop [[LOOP0]] -; PEEL8: for.body.7: -; PEEL8-NEXT: [[ARRAYIDX_7:%.*]] = getelementptr inbounds [8 x i32], [8 x i32]* @a, i64 0, i64 [[INDVARS_IV_NEXT_6]] -; PEEL8-NEXT: [[TMP15:%.*]] = trunc i64 [[INDVARS_IV_NEXT_6]] to i32 -; PEEL8-NEXT: store i32 [[TMP15]], i32* [[ARRAYIDX_7]], align 4 -; PEEL8-NEXT: [[INDVARS_IV_NEXT_7]] = add nuw nsw i64 [[INDVARS_IV_NEXT_6]], 1 -; PEEL8-NEXT: br i1 true, label [[FOR_BODY]], label [[FOR_EXIT_LOOPEXIT]], !llvm.loop [[LOOP2:![0-9]+]] -; -; PEEL2UNROLL2-LABEL: @test1( -; PEEL2UNROLL2-NEXT: entry: -; PEEL2UNROLL2-NEXT: br label [[FOR_BODY_PEEL_BEGIN:%.*]] -; PEEL2UNROLL2: for.body.peel.begin: -; PEEL2UNROLL2-NEXT: br label [[FOR_BODY_PEEL:%.*]] -; PEEL2UNROLL2: for.body.peel: -; PEEL2UNROLL2-NEXT: [[ARRAYIDX_PEEL:%.*]] = getelementptr inbounds [8 x i32], [8 x i32]* @a, i64 0, i64 0 -; PEEL2UNROLL2-NEXT: [[TMP0:%.*]] = trunc i64 0 to i32 -; PEEL2UNROLL2-NEXT: store i32 [[TMP0]], i32* [[ARRAYIDX_PEEL]], align 4 -; PEEL2UNROLL2-NEXT: [[INDVARS_IV_NEXT_PEEL:%.*]] = add nuw nsw i64 0, 1 -; PEEL2UNROLL2-NEXT: [[EXITCOND_PEEL:%.*]] = icmp ne i64 [[INDVARS_IV_NEXT_PEEL]], 8 -; PEEL2UNROLL2-NEXT: br i1 [[EXITCOND_PEEL]], label [[FOR_BODY_PEEL_NEXT:%.*]], label [[FOR_EXIT:%.*]] -; PEEL2UNROLL2: for.body.peel.next: -; PEEL2UNROLL2-NEXT: br label [[FOR_BODY_PEEL2:%.*]] -; PEEL2UNROLL2: for.body.peel2: -; PEEL2UNROLL2-NEXT: [[ARRAYIDX_PEEL3:%.*]] = getelementptr inbounds [8 x i32], [8 x i32]* @a, i64 0, i64 [[INDVARS_IV_NEXT_PEEL]] -; PEEL2UNROLL2-NEXT: [[TMP1:%.*]] = trunc i64 [[INDVARS_IV_NEXT_PEEL]] to i32 -; PEEL2UNROLL2-NEXT: store i32 [[TMP1]], i32* [[ARRAYIDX_PEEL3]], align 4 -; PEEL2UNROLL2-NEXT: [[INDVARS_IV_NEXT_PEEL4:%.*]] = add nuw nsw i64 [[INDVARS_IV_NEXT_PEEL]], 1 -; PEEL2UNROLL2-NEXT: [[EXITCOND_PEEL5:%.*]] = icmp ne i64 [[INDVARS_IV_NEXT_PEEL4]], 8 -; PEEL2UNROLL2-NEXT: br i1 [[EXITCOND_PEEL5]], label [[FOR_BODY_PEEL_NEXT1:%.*]], label [[FOR_EXIT]] -; PEEL2UNROLL2: for.body.peel.next1: -; PEEL2UNROLL2-NEXT: br label [[FOR_BODY_PEEL_NEXT6:%.*]] -; PEEL2UNROLL2: for.body.peel.next6: -; PEEL2UNROLL2-NEXT: br label [[ENTRY_PEEL_NEWPH:%.*]] -; PEEL2UNROLL2: entry.peel.newph: -; PEEL2UNROLL2-NEXT: br label [[FOR_BODY:%.*]] -; PEEL2UNROLL2: for.body: -; PEEL2UNROLL2-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[INDVARS_IV_NEXT_PEEL4]], [[ENTRY_PEEL_NEWPH]] ], [ [[INDVARS_IV_NEXT_1:%.*]], [[FOR_BODY]] ] -; PEEL2UNROLL2-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [8 x i32], [8 x i32]* @a, i64 0, i64 [[INDVARS_IV]] -; PEEL2UNROLL2-NEXT: [[TMP2:%.*]] = trunc i64 [[INDVARS_IV]] to i32 -; PEEL2UNROLL2-NEXT: store i32 [[TMP2]], i32* [[ARRAYIDX]], align 4 -; PEEL2UNROLL2-NEXT: [[INDVARS_IV_NEXT:%.*]] = add nuw nsw i64 [[INDVARS_IV]], 1 -; PEEL2UNROLL2-NEXT: [[ARRAYIDX_1:%.*]] = getelementptr inbounds [8 x i32], [8 x i32]* @a, i64 0, i64 [[INDVARS_IV_NEXT]] -; PEEL2UNROLL2-NEXT: [[TMP3:%.*]] = trunc i64 [[INDVARS_IV_NEXT]] to i32 -; PEEL2UNROLL2-NEXT: store i32 [[TMP3]], i32* [[ARRAYIDX_1]], align 4 -; PEEL2UNROLL2-NEXT: [[INDVARS_IV_NEXT_1]] = add nuw nsw i64 [[INDVARS_IV_NEXT]], 1 -; PEEL2UNROLL2-NEXT: [[EXITCOND_1:%.*]] = icmp ne i64 [[INDVARS_IV_NEXT_1]], 8 -; PEEL2UNROLL2-NEXT: br i1 [[EXITCOND_1]], label [[FOR_BODY]], label [[FOR_EXIT_LOOPEXIT:%.*]], !llvm.loop [[LOOP0:![0-9]+]] -; PEEL2UNROLL2: for.exit.loopexit: -; PEEL2UNROLL2-NEXT: br label [[FOR_EXIT]] -; PEEL2UNROLL2: for.exit: -; PEEL2UNROLL2-NEXT: ret void ; entry: br label %for.body diff --git a/llvm/test/Transforms/LoopUnroll/wrong_assert_in_peeling.ll b/llvm/test/Transforms/LoopUnroll/wrong_assert_in_peeling.ll index c618b29..78f459f 100644 --- a/llvm/test/Transforms/LoopUnroll/wrong_assert_in_peeling.ll +++ b/llvm/test/Transforms/LoopUnroll/wrong_assert_in_peeling.ll @@ -39,13 +39,13 @@ define i32 @test() { ; CHECK-NEXT: [[TMP4]] = add nsw i32 [[TMP3]], [[TMP]] ; CHECK-NEXT: br label [[BB5:%.*]] ; CHECK: bb5: -; CHECK-NEXT: br i1 false, label [[BB7:%.*]], label [[BB15_LOOPEXIT:%.*]] +; CHECK-NEXT: br i1 undef, label [[BB7:%.*]], label [[BB15_LOOPEXIT:%.*]] ; CHECK: bb7: ; CHECK-NEXT: br i1 undef, label [[BB10:%.*]], label [[BB10]] ; CHECK: bb10: -; CHECK-NEXT: br i1 false, label [[BB12]], label [[BB17_LOOPEXIT:%.*]] +; CHECK-NEXT: br i1 undef, label [[BB12]], label [[BB17_LOOPEXIT:%.*]] ; CHECK: bb12: -; CHECK-NEXT: br i1 false, label [[BB13_LOOPEXIT:%.*]], label [[BB2]], !llvm.loop !0 +; CHECK-NEXT: br i1 false, label [[BB13_LOOPEXIT:%.*]], label [[BB2]], !llvm.loop [[LOOP0:![0-9]+]] ; CHECK: bb13.loopexit: ; CHECK-NEXT: br label [[BB13]] ; CHECK: bb13: -- 2.7.4