From bc19b7c3cc169151ef30883c9b313114d7d5a9db Mon Sep 17 00:00:00 2001 From: Florian Hahn Date: Thu, 7 Jul 2022 08:40:26 -0700 Subject: [PATCH] [LV] Remove collectTriviallyDeadInstructions, already handled by VP DCE. Now that removeDeadRecipes can remove most dead recipes across a whole VPlan, there is no need to first collect some dead instructions. Instead removeDeadRecipes can simply clean them up. Depends D127580. Reviewed By: Ayal Differential Revision: https://reviews.llvm.org/D128408 --- .../Vectorize/LoopVectorizationPlanner.h | 5 -- llvm/lib/Transforms/Vectorize/LoopVectorize.cpp | 55 +--------------------- .../LoopVectorize/X86/consecutive-ptr-uniforms.ll | 6 +-- .../Transforms/LoopVectorize/pointer-induction.ll | 3 +- 4 files changed, 4 insertions(+), 65 deletions(-) diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorizationPlanner.h b/llvm/lib/Transforms/Vectorize/LoopVectorizationPlanner.h index 933ea9e..0845ec5 100644 --- a/llvm/lib/Transforms/Vectorize/LoopVectorizationPlanner.h +++ b/llvm/lib/Transforms/Vectorize/LoopVectorizationPlanner.h @@ -332,11 +332,6 @@ public: bool requiresTooManyRuntimeChecks() const; protected: - /// Collect the instructions from the original loop that would be trivially - /// dead in the vectorized loop if generated. - void collectTriviallyDeadInstructions( - SmallPtrSetImpl &DeadInstructions); - /// Build VPlans for power-of-2 VF's between \p MinVF and \p MaxVF inclusive, /// according to the information gathered by Legal when it checked if it is /// legal to vectorize the loop. diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp index 6926da5..83c3122 100644 --- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp +++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp @@ -7645,51 +7645,6 @@ void LoopVectorizationPlanner::printPlans(raw_ostream &O) { } #endif -void LoopVectorizationPlanner::collectTriviallyDeadInstructions( - SmallPtrSetImpl &DeadInstructions) { - - // We create new control-flow for the vectorized loop, so the original exit - // conditions will be dead after vectorization if it's only used by the - // terminator - SmallVector ExitingBlocks; - OrigLoop->getExitingBlocks(ExitingBlocks); - for (auto *BB : ExitingBlocks) { - auto *Cmp = dyn_cast(BB->getTerminator()->getOperand(0)); - if (!Cmp || !Cmp->hasOneUse()) - continue; - - // TODO: we should introduce a getUniqueExitingBlocks on Loop - if (!DeadInstructions.insert(Cmp).second) - continue; - - // The operands of the icmp is often a dead trunc, used by IndUpdate. - // TODO: can recurse through operands in general - for (Value *Op : Cmp->operands()) { - if (isa(Op) && Op->hasOneUse()) - DeadInstructions.insert(cast(Op)); - } - } - - // We create new "steps" for induction variable updates to which the original - // induction variables map. An original update instruction will be dead if - // all its users except the induction variable are dead. - auto *Latch = OrigLoop->getLoopLatch(); - for (auto &Induction : Legal->getInductionVars()) { - PHINode *Ind = Induction.first; - auto *IndUpdate = cast(Ind->getIncomingValueForBlock(Latch)); - - // If the tail is to be folded by masking, the primary induction variable, - // if exists, isn't dead: it will be used for masking. Don't kill it. - if (CM.foldTailByMasking() && IndUpdate == Legal->getPrimaryInduction()) - continue; - - if (llvm::all_of(IndUpdate->users(), [&](User *U) -> bool { - return U == Ind || DeadInstructions.count(cast(U)); - })) - DeadInstructions.insert(IndUpdate); - } -} - Value *InnerLoopUnroller::getBroadcastInstrs(Value *V) { return V; } //===--------------------------------------------------------------------===// @@ -8577,19 +8532,11 @@ void LoopVectorizationPlanner::buildVPlansWithVPRecipes(ElementCount MinVF, ElementCount MaxVF) { assert(OrigLoop->isInnermost() && "Inner loop expected."); - // Collect instructions from the original loop that will become trivially dead - // in the vectorized loop. We don't need to vectorize these instructions. For - // example, original induction update instructions can become dead because we - // separately emit induction "steps" when generating code for the new loop. - // Similarly, we create a new latch condition when setting up the structure - // of the new loop, so the old one can become dead. - SmallPtrSet DeadInstructions; - collectTriviallyDeadInstructions(DeadInstructions); - // Add assume instructions we need to drop to DeadInstructions, to prevent // them from being added to the VPlan. // TODO: We only need to drop assumes in blocks that get flattend. If the // control flow is preserved, we should keep them. + SmallPtrSet DeadInstructions; auto &ConditionalAssumes = Legal->getConditionalAssumes(); DeadInstructions.insert(ConditionalAssumes.begin(), ConditionalAssumes.end()); diff --git a/llvm/test/Transforms/LoopVectorize/X86/consecutive-ptr-uniforms.ll b/llvm/test/Transforms/LoopVectorize/X86/consecutive-ptr-uniforms.ll index 53a95d9..6088fbe 100644 --- a/llvm/test/Transforms/LoopVectorize/X86/consecutive-ptr-uniforms.ll +++ b/llvm/test/Transforms/LoopVectorize/X86/consecutive-ptr-uniforms.ll @@ -98,10 +98,9 @@ attributes #0 = { "target-cpu"="knl" } ; FORCE-NEXT: store i32 [[TMP0]], i32* @b, align 1 ; FORCE-NEXT: [[TMP6:%.*]] = getelementptr inbounds [3 x i32], [3 x i32]* @a, i32 0, i32 [[TMP0]] ; FORCE-NEXT: [[TMP7:%.*]] = load i32, i32* [[TMP6]], align 1 -; FORCE-NEXT: [[TMP8:%.*]] = insertelement <2 x i32> poison, i32 [[TMP7]], i32 0 ; FORCE-NEXT: br label [[PRED_LOAD_CONTINUE]] ; FORCE: pred.load.continue: -; FORCE-NEXT: [[TMP9:%.*]] = phi <2 x i32> [ poison, [[VECTOR_BODY]] ], [ [[TMP8]], [[PRED_LOAD_IF]] ] +; FORCE-NEXT: [[TMP9:%.*]] = phi i32 [ poison, [[VECTOR_BODY]] ], [ [[TMP7]], [[PRED_LOAD_IF]] ] ; FORCE-NEXT: [[TMP10:%.*]] = extractelement <2 x i1> [[TMP2]], i32 1 ; FORCE-NEXT: br i1 [[TMP10]], label [[PRED_LOAD_IF3:%.*]], label [[PRED_LOAD_CONTINUE4]] ; FORCE: pred.load.if1: @@ -109,10 +108,9 @@ attributes #0 = { "target-cpu"="knl" } ; FORCE-NEXT: store i32 [[TMP1]], i32* @b, align 1 ; FORCE-NEXT: [[TMP11:%.*]] = getelementptr inbounds [3 x i32], [3 x i32]* @a, i32 0, i32 [[TMP1]] ; FORCE-NEXT: [[TMP12:%.*]] = load i32, i32* [[TMP11]], align 1 -; FORCE-NEXT: [[TMP13:%.*]] = insertelement <2 x i32> [[TMP9]], i32 [[TMP12]], i32 1 ; FORCE-NEXT: br label [[PRED_LOAD_CONTINUE4]] ; FORCE: pred.load.continue2: -; FORCE-NEXT: [[TMP14:%.*]] = phi <2 x i32> [ [[TMP9]], [[PRED_LOAD_CONTINUE]] ], [ [[TMP13]], [[PRED_LOAD_IF3]] ] +; FORCE-NEXT: [[TMP13:%.*]] = phi i32 [ poison, %pred.load.continue ], [ [[TMP12]], %pred.load.if1 ] ; FORCE-NEXT: [[INDEX_NEXT]] = add i32 [[INDEX]], 2 ; FORCE-NEXT: [[VEC_IND_NEXT]] = add <2 x i32> [[VEC_IND]], ; FORCE-NEXT: [[TMP15:%.*]] = icmp eq i32 [[INDEX_NEXT]], 4 diff --git a/llvm/test/Transforms/LoopVectorize/pointer-induction.ll b/llvm/test/Transforms/LoopVectorize/pointer-induction.ll index f10ac3f..7859688 100644 --- a/llvm/test/Transforms/LoopVectorize/pointer-induction.ll +++ b/llvm/test/Transforms/LoopVectorize/pointer-induction.ll @@ -36,8 +36,7 @@ define void @a(i8* readnone %b) { ; CHECK-NEXT: [[TMP10:%.*]] = extractelement <4 x i1> [[TMP9]], i32 0 ; CHECK-NEXT: br i1 [[TMP10]], label [[PRED_STORE_IF:%.*]], label [[PRED_STORE_CONTINUE:%.*]] ; CHECK: pred.store.if: -; CHECK-NEXT: [[TMP11:%.*]] = getelementptr inbounds i8, i8* [[NEXT_GEP]], i64 -1 -; CHECK-NEXT: store i8 95, i8* [[TMP11]], align 1 +; CHECK-NEXT: store i8 95, i8* [[TMP4]], align 1 ; CHECK-NEXT: br label [[PRED_STORE_CONTINUE]] ; CHECK: pred.store.continue: ; CHECK-NEXT: [[TMP12:%.*]] = extractelement <4 x i1> [[TMP9]], i32 1 -- 2.7.4