From 849d01bf3d97d05074a00d78903d782b2ac804f8 Mon Sep 17 00:00:00 2001 From: Joshua Cao Date: Sun, 21 May 2023 13:30:26 -0700 Subject: [PATCH] [LoopUnroll] Peel iterations based on select conditions This also allows us to peel loops with a `select`: ``` for (int i = 0; i <= N; ++i); f3(i == 0 ? a : b); // select instruction ``` into: ``` f3(a); // peel one iteration for (int i = 1; i <= N; ++i) f3(b); ``` Reviewed By: nikic Differential Revision: https://reviews.llvm.org/D151052 --- llvm/lib/Transforms/Utils/LoopPeel.cpp | 49 +++++++++++++++------- .../Transforms/LoopUnroll/peel-loop-conditions.ll | 25 ++++++++--- 2 files changed, 53 insertions(+), 21 deletions(-) diff --git a/llvm/lib/Transforms/Utils/LoopPeel.cpp b/llvm/lib/Transforms/Utils/LoopPeel.cpp index 2acbe90..1ec46e0 100644 --- a/llvm/lib/Transforms/Utils/LoopPeel.cpp +++ b/llvm/lib/Transforms/Utils/LoopPeel.cpp @@ -345,20 +345,20 @@ static unsigned countToEliminateCompares(Loop &L, unsigned MaxPeelCount, assert(L.isLoopSimplifyForm() && "Loop needs to be in loop simplify form"); unsigned DesiredPeelCount = 0; - for (auto *BB : L.blocks()) { - auto *BI = dyn_cast(BB->getTerminator()); - if (!BI || BI->isUnconditional()) - continue; - - // Ignore loop exit condition. - if (L.getLoopLatch() == BB) - continue; + // Do not peel the entire loop. + const SCEV *BE = SE.getConstantMaxBackedgeTakenCount(&L); + if (const SCEVConstant *SC = dyn_cast(BE)) + MaxPeelCount = + std::min((unsigned)SC->getAPInt().getLimitedValue() - 1, MaxPeelCount); + + auto ComputePeelCount = [&](Value *Condition) -> void { + if (!Condition->getType()->isIntegerTy()) + return; - Value *Condition = BI->getCondition(); Value *LeftVal, *RightVal; CmpInst::Predicate Pred; if (!match(Condition, m_ICmp(Pred, m_Value(LeftVal), m_Value(RightVal)))) - continue; + return; const SCEV *LeftSCEV = SE.getSCEV(LeftVal); const SCEV *RightSCEV = SE.getSCEV(RightVal); @@ -366,7 +366,7 @@ static unsigned countToEliminateCompares(Loop &L, unsigned MaxPeelCount, // Do not consider predicates that are known to be true or false // independently of the loop iteration. if (SE.evaluatePredicate(Pred, LeftSCEV, RightSCEV)) - continue; + return; // Check if we have a condition with one AddRec and one non AddRec // expression. Normalize LeftSCEV to be the AddRec. @@ -375,7 +375,7 @@ static unsigned countToEliminateCompares(Loop &L, unsigned MaxPeelCount, std::swap(LeftSCEV, RightSCEV); Pred = ICmpInst::getSwappedPredicate(Pred); } else - continue; + return; } const SCEVAddRecExpr *LeftAR = cast(LeftSCEV); @@ -383,10 +383,10 @@ static unsigned countToEliminateCompares(Loop &L, unsigned MaxPeelCount, // Avoid huge SCEV computations in the loop below, make sure we only // consider AddRecs of the loop we are trying to peel. if (!LeftAR->isAffine() || LeftAR->getLoop() != &L) - continue; + return; if (!(ICmpInst::isEquality(Pred) && LeftAR->hasNoSelfWrap()) && !SE.getMonotonicPredicateType(LeftAR, Pred)) - continue; + return; // Check if extending the current DesiredPeelCount lets us evaluate Pred // or !Pred in the loop body statically. @@ -422,7 +422,7 @@ static unsigned countToEliminateCompares(Loop &L, unsigned MaxPeelCount, // first iteration of the loop body after peeling? if (!SE.isKnownPredicate(ICmpInst::getInversePredicate(Pred), IterVal, RightSCEV)) - continue; // If not, give up. + return; // If not, give up. // However, for equality comparisons, that isn't always sufficient to // eliminate the comparsion in loop body, we may need to peel one more @@ -433,11 +433,28 @@ static unsigned countToEliminateCompares(Loop &L, unsigned MaxPeelCount, !SE.isKnownPredicate(Pred, IterVal, RightSCEV) && SE.isKnownPredicate(Pred, NextIterVal, RightSCEV)) { if (!CanPeelOneMoreIteration()) - continue; // Need to peel one more iteration, but can't. Give up. + return; // Need to peel one more iteration, but can't. Give up. PeelOneMoreIteration(); // Great! } DesiredPeelCount = std::max(DesiredPeelCount, NewPeelCount); + }; + + for (BasicBlock *BB : L.blocks()) { + for (Instruction &I : *BB) { + if (SelectInst *SI = dyn_cast(&I)) + ComputePeelCount(SI->getCondition()); + } + + auto *BI = dyn_cast(BB->getTerminator()); + if (!BI || BI->isUnconditional()) + continue; + + // Ignore loop exit condition. + if (L.getLoopLatch() == BB) + continue; + + ComputePeelCount(BI->getCondition()); } return DesiredPeelCount; diff --git a/llvm/test/Transforms/LoopUnroll/peel-loop-conditions.ll b/llvm/test/Transforms/LoopUnroll/peel-loop-conditions.ll index 4b432f9..1266e51 100644 --- a/llvm/test/Transforms/LoopUnroll/peel-loop-conditions.ll +++ b/llvm/test/Transforms/LoopUnroll/peel-loop-conditions.ll @@ -1281,19 +1281,34 @@ define void @test19(i32 %num, i32 %a, i32 %b) { ; CHECK-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[NUM:%.*]], 0 ; CHECK-NEXT: br i1 [[CMP5]], label [[FOR_BODY_PREHEADER:%.*]], label [[FOR_COND_CLEANUP:%.*]] ; CHECK: for.body.preheader: +; CHECK-NEXT: br label [[FOR_BODY_PEEL_BEGIN:%.*]] +; CHECK: for.body.peel.begin: +; CHECK-NEXT: br label [[FOR_BODY_PEEL:%.*]] +; CHECK: for.body.peel: +; CHECK-NEXT: [[CMP1_PEEL:%.*]] = icmp eq i32 0, 0 +; CHECK-NEXT: [[COND_PEEL:%.*]] = select i1 [[CMP1_PEEL]], i32 [[A:%.*]], i32 [[B:%.*]] +; CHECK-NEXT: tail call void @f3(i32 noundef [[COND_PEEL]]) +; CHECK-NEXT: [[INC_PEEL:%.*]] = add nuw nsw i32 0, 1 +; CHECK-NEXT: [[EXITCOND_NOT_PEEL:%.*]] = icmp eq i32 [[INC_PEEL]], [[NUM]] +; CHECK-NEXT: br i1 [[EXITCOND_NOT_PEEL]], label [[FOR_COND_CLEANUP_LOOPEXIT:%.*]], label [[FOR_BODY_PEEL_NEXT:%.*]] +; CHECK: for.body.peel.next: +; CHECK-NEXT: br label [[FOR_BODY_PEEL_NEXT1:%.*]] +; CHECK: for.body.peel.next1: +; CHECK-NEXT: br label [[FOR_BODY_PREHEADER_PEEL_NEWPH:%.*]] +; CHECK: for.body.preheader.peel.newph: ; CHECK-NEXT: br label [[FOR_BODY:%.*]] +; CHECK: for.cond.cleanup.loopexit.loopexit: +; CHECK-NEXT: br label [[FOR_COND_CLEANUP_LOOPEXIT]] ; CHECK: for.cond.cleanup.loopexit: ; CHECK-NEXT: br label [[FOR_COND_CLEANUP]] ; CHECK: for.cond.cleanup: ; CHECK-NEXT: ret void ; CHECK: for.body: -; CHECK-NEXT: [[I_06:%.*]] = phi i32 [ [[INC:%.*]], [[FOR_BODY]] ], [ 0, [[FOR_BODY_PREHEADER]] ] -; CHECK-NEXT: [[CMP1:%.*]] = icmp eq i32 [[I_06]], 0 -; CHECK-NEXT: [[COND:%.*]] = select i1 [[CMP1]], i32 [[A:%.*]], i32 [[B:%.*]] -; CHECK-NEXT: tail call void @f3(i32 noundef [[COND]]) +; CHECK-NEXT: [[I_06:%.*]] = phi i32 [ [[INC:%.*]], [[FOR_BODY]] ], [ [[INC_PEEL]], [[FOR_BODY_PREHEADER_PEEL_NEWPH]] ] +; CHECK-NEXT: tail call void @f3(i32 noundef [[B]]) ; CHECK-NEXT: [[INC]] = add nuw nsw i32 [[I_06]], 1 ; CHECK-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i32 [[INC]], [[NUM]] -; CHECK-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_COND_CLEANUP_LOOPEXIT:%.*]], label [[FOR_BODY]] +; CHECK-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_COND_CLEANUP_LOOPEXIT_LOOPEXIT:%.*]], label [[FOR_BODY]], !llvm.loop [[LOOP16:![0-9]+]] ; entry: %cmp5 = icmp sgt i32 %num, 0 -- 2.7.4