From 9758242046b3cdce6fb713acb6d3f5bfaa933a47 Mon Sep 17 00:00:00 2001 From: Florian Hahn Date: Sat, 24 Dec 2022 18:33:36 +0000 Subject: [PATCH] [LV] Use SCEV to check if the trip count <= VF * UF. Just comparing constant trip counts causes LV to miss cases where the vector loop body only executes once. The motivation for this is to remove the need for unrolling to remove vector loop back-edges, if the body only executes once in more cases. Reviewed By: Ayal Differential Revision: https://reviews.llvm.org/D133017 --- llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp | 7 ++++--- .../Transforms/LoopVectorize/vector-loop-backedge-elimination.ll | 6 ++---- 2 files changed, 6 insertions(+), 7 deletions(-) diff --git a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp index 400eacf..7406775 100644 --- a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp +++ b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp @@ -482,10 +482,11 @@ void VPlanTransforms::optimizeForVFAndUF(VPlan &Plan, ElementCount BestVF, Type *IdxTy = Plan.getCanonicalIV()->getStartValue()->getLiveInIRValue()->getType(); const SCEV *TripCount = createTripCountSCEV(IdxTy, PSE); - auto *C = dyn_cast(TripCount); ScalarEvolution &SE = *PSE.getSE(); - if (!C || TripCount->isZero() || - C->getAPInt().getZExtValue() > BestVF.getKnownMinValue() * BestUF) + const SCEV *C = + SE.getConstant(TripCount->getType(), BestVF.getKnownMinValue() * BestUF); + if (TripCount->isZero() || + !SE.isKnownPredicate(CmpInst::ICMP_ULE, TripCount, C)) return; LLVMContext &Ctx = SE.getContext(); diff --git a/llvm/test/Transforms/LoopVectorize/vector-loop-backedge-elimination.ll b/llvm/test/Transforms/LoopVectorize/vector-loop-backedge-elimination.ll index 9cb78e4..fd75177 100644 --- a/llvm/test/Transforms/LoopVectorize/vector-loop-backedge-elimination.ll +++ b/llvm/test/Transforms/LoopVectorize/vector-loop-backedge-elimination.ll @@ -11,11 +11,9 @@ define void @test_tc_less_than_16(ptr %A, i64 %N) { ; VF8UF1: [[CMP:%.+]] = icmp eq i64 %index.next, %n.vec ; VF8UF1-NEXT: br i1 [[CMP]], label %middle.block, label %vector.body ; -; VF8UF2: [[CMP:%.+]] = icmp eq i64 %index.next, %n.vec -; VF8UF2-NEXT: br i1 [[CMP]], label %middle.block, label %vector.body +; VF8UF2: br i1 true, label %middle.block, label %vector.body ; -; VF16UF1: [[CMP:%.+]] = icmp eq i64 %index.next, %n.vec -; VF16UF1-NEXT: br i1 [[CMP]], label %middle.block, label %vector.body +; VF16UF1: br i1 true, label %middle.block, label %vector.body ; entry: %and = and i64 %N, 15 -- 2.7.4