From: Florian Hahn Date: Wed, 8 Mar 2023 19:11:27 +0000 (+0100) Subject: [VPlan] Add predicate to VPReplicateRecipe, expand region later. X-Git-Tag: upstream/17.0.6~15481 X-Git-Url: http://review.tizen.org/git/?a=commitdiff_plain;h=79272ec028ec9c8bd2e3842f6624b13417fbcebd;p=platform%2Fupstream%2Fllvm.git [VPlan] Add predicate to VPReplicateRecipe, expand region later. This patch adds the predicate as additional operand to VPReplicateRecipe during initial construction. The predicated recipes are later moved into replicate regions. This simplifies constructions and some VPlan transformations, like fixed-order recurrence handling. It also improves codegen in some cases (e.g. for in-loop reductions), because the recipes remain in the same block. Reviewed By: Ayal Differential Revision: https://reviews.llvm.org/D143865 --- diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp index b12c364..19e6ec2 100644 --- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp +++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp @@ -8622,9 +8622,9 @@ void VPRecipeBuilder::fixHeaderPhis() { } } -VPBasicBlock *VPRecipeBuilder::handleReplication(Instruction *I, VFRange &Range, - VPBasicBlock *VPBB, - VPlan &Plan) { +VPRecipeOrVPValueTy VPRecipeBuilder::handleReplication(Instruction *I, + VFRange &Range, + VPlan &Plan) { bool IsUniform = LoopVectorizationPlanner::getDecisionAndClampRange( [&](ElementCount VF) { return CM.isUniformAfterVectorization(I, VF); }, Range); @@ -8661,60 +8661,53 @@ VPBasicBlock *VPRecipeBuilder::handleReplication(Instruction *I, VFRange &Range, break; } } - - auto *Recipe = new VPReplicateRecipe(I, Plan.mapToVPValues(I->operands()), - IsUniform, IsPredicated); - - // Finalize the recipe for Instr, first if it is not predicated. + VPValue *BlockInMask = nullptr; if (!IsPredicated) { + // Finalize the recipe for Instr, first if it is not predicated. LLVM_DEBUG(dbgs() << "LV: Scalarizing:" << *I << "\n"); - setRecipe(I, Recipe); - Plan.addVPValue(I, Recipe); - VPBB->appendRecipe(Recipe); - return VPBB; - } - LLVM_DEBUG(dbgs() << "LV: Scalarizing and predicating:" << *I << "\n"); - - VPBlockBase *SingleSucc = VPBB->getSingleSuccessor(); - assert(SingleSucc && "VPBB must have a single successor when handling " - "predicated replication."); - VPBlockUtils::disconnectBlocks(VPBB, SingleSucc); - // Record predicated instructions for above packing optimizations. - VPBlockBase *Region = createReplicateRegion(Recipe, Plan); - VPBlockUtils::insertBlockAfter(Region, VPBB); - auto *RegSucc = new VPBasicBlock(); - VPBlockUtils::insertBlockAfter(RegSucc, Region); - VPBlockUtils::connectBlocks(RegSucc, SingleSucc); - return RegSucc; + } else { + LLVM_DEBUG(dbgs() << "LV: Scalarizing and predicating:" << *I << "\n"); + // Instructions marked for predication are replicated and a mask operand is + // added initially. Masked replicate recipes will later be placed under an + // if-then construct to prevent side-effects. Generate recipes to compute + // the block mask for this region. + BlockInMask = createBlockInMask(I->getParent(), Plan); + } + + auto *Recipe = new VPReplicateRecipe(I, Plan.mapToVPValues(I->operands()), + IsUniform, BlockInMask); + return toVPRecipeResult(Recipe); } VPRegionBlock * VPRecipeBuilder::createReplicateRegion(VPReplicateRecipe *PredRecipe, VPlan &Plan) { Instruction *Instr = PredRecipe->getUnderlyingInstr(); - // Instructions marked for predication are replicated and placed under an - // if-then construct to prevent side-effects. - // Generate recipes to compute the block mask for this region. - VPValue *BlockInMask = createBlockInMask(Instr->getParent(), Plan); - // Build the triangular if-then region. std::string RegionName = (Twine("pred.") + Instr->getOpcodeName()).str(); assert(Instr->getParent() && "Predicated instruction not in any basic block"); + auto *BlockInMask = PredRecipe->getMask(); + // Replace predicated replicate recipe with a replicate recipe without a + // mask but in the replicate region. + auto *RecipeWithoutMask = new VPReplicateRecipe( + PredRecipe->getUnderlyingInstr(), + make_range(PredRecipe->op_begin(), std::prev(PredRecipe->op_end())), + PredRecipe->isUniform()); + PredRecipe->replaceAllUsesWith(RecipeWithoutMask); + PredRecipe->eraseFromParent(); + auto *BOMRecipe = new VPBranchOnMaskRecipe(BlockInMask); auto *Entry = new VPBasicBlock(Twine(RegionName) + ".entry", BOMRecipe); - auto *PHIRecipe = Instr->getType()->isVoidTy() + auto *PHIRecipe = RecipeWithoutMask->getNumUsers() == 0 ? nullptr - : new VPPredInstPHIRecipe(PredRecipe); + : new VPPredInstPHIRecipe(RecipeWithoutMask); if (PHIRecipe) { - setRecipe(Instr, PHIRecipe); - Plan.addVPValue(Instr, PHIRecipe); - } else { - setRecipe(Instr, PredRecipe); - Plan.addVPValue(Instr, PredRecipe); + RecipeWithoutMask->replaceAllUsesWith(PHIRecipe); + PHIRecipe->setOperand(0, RecipeWithoutMask); } auto *Exiting = new VPBasicBlock(Twine(RegionName) + ".continue", PHIRecipe); - auto *Pred = new VPBasicBlock(Twine(RegionName) + ".if", PredRecipe); + auto *Pred = new VPBasicBlock(Twine(RegionName) + ".if", RecipeWithoutMask); VPRegionBlock *Region = new VPRegionBlock(Entry, Exiting, RegionName, true); // Note: first set Entry as region entry and then connect successors starting @@ -9046,7 +9039,6 @@ VPlanPtr LoopVectorizationPlanner::buildVPlanWithVPRecipes( for (BasicBlock *BB : make_range(DFS.beginRPO(), DFS.endRPO())) { // Relevant instructions from basic block BB will be grouped into VPRecipe // ingredients and fill a new VPBasicBlock. - unsigned VPBBsForBB = 0; if (VPBB != HeaderVPBB) VPBB->setName(BB->getName()); Builder.setInsertPoint(VPBB); @@ -9078,46 +9070,36 @@ VPlanPtr LoopVectorizationPlanner::buildVPlanWithVPRecipes( Legal->isInvariantAddressOfReduction(SI->getPointerOperand())) continue; - if (auto RecipeOrValue = RecipeBuilder.tryToCreateWidenRecipe( - Instr, Operands, Range, VPBB, Plan)) { - // If Instr can be simplified to an existing VPValue, use it. - if (RecipeOrValue.is()) { - auto *VPV = RecipeOrValue.get(); - Plan->addVPValue(Instr, VPV); - // If the re-used value is a recipe, register the recipe for the - // instruction, in case the recipe for Instr needs to be recorded. - if (VPRecipeBase *R = VPV->getDefiningRecipe()) - RecipeBuilder.setRecipe(Instr, R); - continue; - } - // Otherwise, add the new recipe. - VPRecipeBase *Recipe = RecipeOrValue.get(); - for (auto *Def : Recipe->definedValues()) { - auto *UV = Def->getUnderlyingValue(); - Plan->addVPValue(UV, Def); - } - - RecipeBuilder.setRecipe(Instr, Recipe); - if (isa(Recipe) && - HeaderVPBB->getFirstNonPhi() != VPBB->end()) { - // Move VPWidenIntOrFpInductionRecipes for optimized truncates to the - // phi section of HeaderVPBB. - assert(isa(Instr)); - Recipe->insertBefore(*HeaderVPBB, HeaderVPBB->getFirstNonPhi()); - } else - VPBB->appendRecipe(Recipe); + auto RecipeOrValue = RecipeBuilder.tryToCreateWidenRecipe( + Instr, Operands, Range, VPBB, Plan); + if (!RecipeOrValue) + RecipeOrValue = RecipeBuilder.handleReplication(Instr, Range, *Plan); + // If Instr can be simplified to an existing VPValue, use it. + if (RecipeOrValue.is()) { + auto *VPV = RecipeOrValue.get(); + Plan->addVPValue(Instr, VPV); + // If the re-used value is a recipe, register the recipe for the + // instruction, in case the recipe for Instr needs to be recorded. + if (VPRecipeBase *R = VPV->getDefiningRecipe()) + RecipeBuilder.setRecipe(Instr, R); continue; } - - // Otherwise, if all widening options failed, Instruction is to be - // replicated. This may create a successor for VPBB. - VPBasicBlock *NextVPBB = - RecipeBuilder.handleReplication(Instr, Range, VPBB, *Plan); - if (NextVPBB != VPBB) { - VPBB = NextVPBB; - VPBB->setName(BB->hasName() ? BB->getName() + "." + Twine(VPBBsForBB++) - : ""); + // Otherwise, add the new recipe. + VPRecipeBase *Recipe = RecipeOrValue.get(); + for (auto *Def : Recipe->definedValues()) { + auto *UV = Def->getUnderlyingValue(); + Plan->addVPValue(UV, Def); } + + RecipeBuilder.setRecipe(Instr, Recipe); + if (isa(Recipe) && + HeaderVPBB->getFirstNonPhi() != VPBB->end()) { + // Move VPWidenIntOrFpInductionRecipes for optimized truncates to the + // phi section of HeaderVPBB. + assert(isa(Instr)); + Recipe->insertBefore(*HeaderVPBB, HeaderVPBB->getFirstNonPhi()); + } else + VPBB->appendRecipe(Recipe); } VPBlockUtils::insertBlockAfter(new VPBasicBlock(), VPBB); @@ -9194,6 +9176,9 @@ VPlanPtr LoopVectorizationPlanner::buildVPlanWithVPRecipes( VPlanTransforms::optimizeInductions(*Plan, *PSE.getSE()); VPlanTransforms::removeDeadRecipes(*Plan); + // Convert masked VPReplicateRecipes to if-then region blocks. + VPlanTransforms::addReplicateRegions(*Plan, RecipeBuilder); + bool ShouldSimplify = true; while (ShouldSimplify) { ShouldSimplify = VPlanTransforms::sinkScalarOperands(*Plan); diff --git a/llvm/lib/Transforms/Vectorize/VPRecipeBuilder.h b/llvm/lib/Transforms/Vectorize/VPRecipeBuilder.h index 1d2a96f..ca8ef71 100644 --- a/llvm/lib/Transforms/Vectorize/VPRecipeBuilder.h +++ b/llvm/lib/Transforms/Vectorize/VPRecipeBuilder.h @@ -163,13 +163,11 @@ public: VPRegionBlock *createReplicateRegion(VPReplicateRecipe *PredRecipe, VPlan &Plan); - /// Build a VPReplicationRecipe for \p I and enclose it within a Region if it - /// is predicated. \return \p VPBB augmented with this new recipe if \p I is - /// not predicated, otherwise \return a new VPBasicBlock that succeeds the new - /// Region. Range.End may be decreased to ensure same recipe behavior from \p - /// Range.Start to \p Range.End. - VPBasicBlock *handleReplication(Instruction *I, VFRange &Range, - VPBasicBlock *VPBB, VPlan &Plan); + /// Build a VPReplicationRecipe for \p I. If it is predicated, add the mask as + /// last operand. Range.End may be decreased to ensure same recipe behavior + /// from \p Range.Start to \p Range.End. + VPRecipeOrVPValueTy handleReplication(Instruction *I, VFRange &Range, + VPlan &Plan); /// Add the incoming values from the backedge to reduction & first-order /// recurrence cross-iteration phis. diff --git a/llvm/lib/Transforms/Vectorize/VPlan.h b/llvm/lib/Transforms/Vectorize/VPlan.h index efcfc13..3dcbda5 100644 --- a/llvm/lib/Transforms/Vectorize/VPlan.h +++ b/llvm/lib/Transforms/Vectorize/VPlan.h @@ -1508,9 +1508,12 @@ class VPReplicateRecipe : public VPRecipeBase, public VPValue { public: template VPReplicateRecipe(Instruction *I, iterator_range Operands, - bool IsUniform, bool IsPredicated = false) + bool IsUniform, VPValue *Mask = nullptr) : VPRecipeBase(VPDef::VPReplicateSC, Operands), VPValue(this, I), - IsUniform(IsUniform), IsPredicated(IsPredicated) {} + IsUniform(IsUniform), IsPredicated(Mask) { + if (Mask) + addOperand(Mask); + } ~VPReplicateRecipe() override = default; @@ -1549,6 +1552,12 @@ public: /// VPPredInstPHIRecipe. In this case, the scalar values should also be packed /// in a vector. bool shouldPack() const; + + /// Return the mask of a predicated VPReplicateRecipe. + VPValue *getMask() { + assert(isPredicated() && "Trying to get the mask of a unpredicated recipe"); + return getOperand(getNumOperands() - 1); + } }; /// A recipe for generating conditional branches on the bits of a mask. diff --git a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp index d3f84da..31d8515 100644 --- a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp +++ b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp @@ -169,8 +169,7 @@ bool VPlanTransforms::sinkScalarOperands(VPlan &Plan) { continue; Instruction *I = cast( cast(SinkCandidate)->getUnderlyingValue()); - auto *Clone = - new VPReplicateRecipe(I, SinkCandidate->operands(), true, false); + auto *Clone = new VPReplicateRecipe(I, SinkCandidate->operands(), true); // TODO: add ".cloned" suffix to name of Clone's VPValue. Clone->insertBefore(SinkCandidate); @@ -620,58 +619,11 @@ sinkRecurrenceUsersAfterPrevious(VPFirstOrderRecurrencePHIRecipe *FOR, }); for (VPRecipeBase *SinkCandidate : WorkList) { - // VPPredInstPHIRecipes don't need sinking, because they will be sunk when - // sinking the containing replicate region. - if (isa(SinkCandidate) || SinkCandidate == FOR) + if (SinkCandidate == FOR) continue; - VPRecipeBase *Target = Previous; + SinkCandidate->moveAfter(Previous); Previous = SinkCandidate; - auto *TargetRegion = GetReplicateRegion(Target); - auto *SinkRegion = GetReplicateRegion(SinkCandidate); - if (!SinkRegion) { - // If the sink source is not a replicate region, sink the recipe - // directly. - if (TargetRegion) { - // The target is in a replication region, make sure to move Sink to - // the block after it, not into the replication region itself. - VPBasicBlock *NextBlock = - cast(TargetRegion->getSuccessors().front()); - SinkCandidate->moveBefore(*NextBlock, NextBlock->getFirstNonPhi()); - } else - SinkCandidate->moveAfter(Target); - continue; - } - // The sink source is in a replicate region. Unhook the region from the - // CFG. - auto *SinkPred = SinkRegion->getSinglePredecessor(); - auto *SinkSucc = SinkRegion->getSingleSuccessor(); - VPBlockUtils::disconnectBlocks(SinkPred, SinkRegion); - VPBlockUtils::disconnectBlocks(SinkRegion, SinkSucc); - VPBlockUtils::connectBlocks(SinkPred, SinkSucc); - - if (TargetRegion) { - // The target recipe is also in a replicate region, move the sink - // region after the target region. - auto *TargetSucc = TargetRegion->getSingleSuccessor(); - VPBlockUtils::disconnectBlocks(TargetRegion, TargetSucc); - VPBlockUtils::connectBlocks(TargetRegion, SinkRegion); - VPBlockUtils::connectBlocks(SinkRegion, TargetSucc); - } else { - // The sink source is in a replicate region, we need to move the whole - // replicate region, which should only contain a single recipe in the - // main block. - auto *SplitBlock = - Target->getParent()->splitAt(std::next(Target->getIterator())); - - auto *SplitPred = SplitBlock->getSinglePredecessor(); - - VPBlockUtils::disconnectBlocks(SplitPred, SplitBlock); - VPBlockUtils::connectBlocks(SplitPred, SinkRegion); - VPBlockUtils::connectBlocks(SinkRegion, SplitBlock); - } - // We modified the CFG, update dominator tree. - VPDT.recalculate(*SinkRegion->getPlan()); } } @@ -703,14 +655,7 @@ void VPlanTransforms::adjustFixedOrderRecurrences(VPlan &Plan, // Introduce a recipe to combine the incoming and previous values of a // fixed-order recurrence. VPBasicBlock *InsertBlock = Previous->getParent(); - auto *Region = GetReplicateRegion(Previous); - if (Region) - InsertBlock = dyn_cast(Region->getSingleSuccessor()); - if (!InsertBlock) { - InsertBlock = new VPBasicBlock(Region->getName() + ".succ"); - VPBlockUtils::insertBlockAfter(InsertBlock, Region); - } - if (Region || isa(Previous)) + if (isa(Previous)) Builder.setInsertPoint(InsertBlock, InsertBlock->getFirstNonPhi()); else Builder.setInsertPoint(InsertBlock, std::next(Previous->getIterator())); @@ -725,3 +670,32 @@ void VPlanTransforms::adjustFixedOrderRecurrences(VPlan &Plan, RecurSplice->setOperand(0, FOR); } } + +void VPlanTransforms::addReplicateRegions(VPlan &Plan, + VPRecipeBuilder &Builder) { + SmallVector WorkList; + for (VPBasicBlock *VPBB : VPBlockUtils::blocksOnly( + vp_depth_first_deep(Plan.getEntry()))) { + for (VPRecipeBase &R : *VPBB) + if (auto *RepR = dyn_cast(&R)) { + if (RepR->isPredicated()) + WorkList.push_back(RepR); + } + } + + unsigned BBNum = 0; + for (VPReplicateRecipe *RepR : WorkList) { + VPBasicBlock *CurrentBlock = RepR->getParent(); + VPBasicBlock *SplitBlock = CurrentBlock->splitAt(RepR->getIterator()); + + BasicBlock *OrigBB = RepR->getUnderlyingInstr()->getParent(); + SplitBlock->setName( + OrigBB->hasName() ? OrigBB->getName() + "." + Twine(BBNum++) : ""); + // Record predicated instructions for above packing optimizations. + VPBlockBase *Region = Builder.createReplicateRegion(RepR, Plan); + Region->setParent(CurrentBlock->getParent()); + VPBlockUtils::disconnectBlocks(CurrentBlock, SplitBlock); + VPBlockUtils::connectBlocks(CurrentBlock, Region); + VPBlockUtils::connectBlocks(Region, SplitBlock); + } +} diff --git a/llvm/lib/Transforms/Vectorize/VPlanTransforms.h b/llvm/lib/Transforms/Vectorize/VPlanTransforms.h index bc1094d..2b08ebd 100644 --- a/llvm/lib/Transforms/Vectorize/VPlanTransforms.h +++ b/llvm/lib/Transforms/Vectorize/VPlanTransforms.h @@ -26,6 +26,7 @@ class Loop; class PredicatedScalarEvolution; class TargetLibraryInfo; class VPBuilder; +class VPRecipeBuilder; struct VPlanTransforms { /// Replaces the VPInstructions in \p Plan with corresponding @@ -37,6 +38,10 @@ struct VPlanTransforms { SmallPtrSetImpl &DeadInstructions, ScalarEvolution &SE, const TargetLibraryInfo &TLI); + /// Wrap predicated VPReplicateRecipes with a mask operand in an if-then + /// region block and remove the mask operand. + static void addReplicateRegions(VPlan &Plan, VPRecipeBuilder &Builder); + static bool sinkScalarOperands(VPlan &Plan); /// Merge replicate regions in their successor region, if a replicate region diff --git a/llvm/test/Transforms/LoopVectorize/X86/consecutive-ptr-uniforms.ll b/llvm/test/Transforms/LoopVectorize/X86/consecutive-ptr-uniforms.ll index 7ca6b38..51f4b01 100644 --- a/llvm/test/Transforms/LoopVectorize/X86/consecutive-ptr-uniforms.ll +++ b/llvm/test/Transforms/LoopVectorize/X86/consecutive-ptr-uniforms.ll @@ -87,23 +87,23 @@ attributes #0 = { "target-cpu"="knl" } ; FORCE: vector.ph: ; FORCE-NEXT: br label [[VECTOR_BODY:%.*]] ; FORCE: vector.body: -; FORCE-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[PRED_LOAD_CONTINUE4:%.*]] ] -; FORCE-NEXT: [[VEC_IND:%.*]] = phi <2 x i32> [ , [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[PRED_LOAD_CONTINUE4]] ] +; FORCE-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[PRED_STORE_CONTINUE4:%.*]] ] +; FORCE-NEXT: [[VEC_IND:%.*]] = phi <2 x i32> [ , [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[PRED_STORE_CONTINUE4]] ] ; FORCE-NEXT: [[TMP2:%.*]] = icmp ule <2 x i32> [[VEC_IND]], ; FORCE-NEXT: [[TMP3:%.*]] = extractelement <2 x i1> [[TMP2]], i32 0 -; FORCE-NEXT: br i1 [[TMP3]], label [[PRED_LOAD_IF:%.*]], label [[PRED_LOAD_CONTINUE:%.*]] -; FORCE: pred.load.if: +; FORCE-NEXT: br i1 [[TMP3]], label [[PRED_STORE_IF:%.*]], label [[PRED_STORE_CONTINUE:%.*]] +; FORCE: pred.store.if: ; FORCE-NEXT: [[TMP0:%.*]] = add i32 [[INDEX]], 0 ; FORCE-NEXT: store i32 [[TMP0]], i32* @b, align 1 -; FORCE-NEXT: br label [[PRED_LOAD_CONTINUE]] -; FORCE: pred.load.continue: +; FORCE-NEXT: br label [[PRED_STORE_CONTINUE]] +; FORCE: pred.store.continue: ; FORCE-NEXT: [[TMP10:%.*]] = extractelement <2 x i1> [[TMP2]], i32 1 -; FORCE-NEXT: br i1 [[TMP10]], label [[PRED_LOAD_IF3:%.*]], label [[PRED_LOAD_CONTINUE4]] -; FORCE: pred.load.if1: +; FORCE-NEXT: br i1 [[TMP10]], label [[PRED_STORE_IF3:%.*]], label [[PRED_STORE_CONTINUE4]] +; FORCE: pred.store.if1: ; FORCE-NEXT: [[TMP1:%.*]] = add i32 [[INDEX]], 1 ; FORCE-NEXT: store i32 [[TMP1]], i32* @b, align 1 -; FORCE-NEXT: br label [[PRED_LOAD_CONTINUE4]] -; FORCE: pred.load.continue2: +; FORCE-NEXT: br label [[PRED_STORE_CONTINUE4]] +; FORCE: pred.store.continue2: ; FORCE-NEXT: [[INDEX_NEXT]] = add i32 [[INDEX]], 2 ; FORCE-NEXT: [[VEC_IND_NEXT]] = add <2 x i32> [[VEC_IND]], ; FORCE-NEXT: [[TMP15:%.*]] = icmp eq i32 [[INDEX_NEXT]], 4 diff --git a/llvm/test/Transforms/LoopVectorize/first-order-recurrence-sink-replicate-region.ll b/llvm/test/Transforms/LoopVectorize/first-order-recurrence-sink-replicate-region.ll index 7de4ef1..c8d109b 100644 --- a/llvm/test/Transforms/LoopVectorize/first-order-recurrence-sink-replicate-region.ll +++ b/llvm/test/Transforms/LoopVectorize/first-order-recurrence-sink-replicate-region.ll @@ -39,9 +39,9 @@ define void @sink_replicate_region_1(i32 %x, ptr %ptr, ptr noalias %dst) optsize ; CHECK-NEXT: PHI-PREDICATED-INSTRUCTION vp<[[PRED1:%.+]]> = ir<%lv> ; CHECK-NEXT: No successors ; CHECK-NEXT: } -; CHECK-NEXT: Successor(s): loop.1 +; CHECK-NEXT: Successor(s): loop.0 ; CHECK-EMPTY: -; CHECK-NEXT: loop.1: +; CHECK-NEXT: loop.0: ; CHECK-NEXT: WIDEN ir<%conv> = sext vp<[[PRED1]]> ; CHECK-NEXT: EMIT vp<[[SPLICE:%.+]]> = first-order splice ir<%0> ir<%conv> ; CHECK-NEXT: Successor(s): pred.store @@ -199,9 +199,9 @@ define i32 @sink_replicate_region_3_reduction(i32 %x, i8 %y, ptr %ptr) optsize { ; CHECK-NEXT: PHI-PREDICATED-INSTRUCTION vp<[[PRED:%.+]]> = ir<%rem> ; CHECK-NEXT: No successors ; CHECK-NEXT: } -; CHECK-NEXT: Successor(s): loop.0.split +; CHECK-NEXT: Successor(s): loop.0 ; CHECK-EMPTY: -; CHECK-NEXT: loop.0.split: +; CHECK-NEXT: loop.0: ; CHECK-NEXT: WIDEN ir<%add> = add vp<[[PRED]]>, ir<%recur.next> ; CHECK-NEXT: WIDEN ir<%and.red.next> = and ir<%and.red>, ir<%add> ; CHECK-NEXT: EMIT vp<[[SEL:%.+]]> = select vp<[[MASK]]> ir<%and.red.next> ir<%and.red> @@ -272,9 +272,9 @@ define void @sink_replicate_region_4_requires_split_at_end_of_block(i32 %x, ptr ; CHECK-NEXT: PHI-PREDICATED-INSTRUCTION vp<[[PRED:%.+]]> = ir<%lv> ; CHECK-NEXT: No successors ; CHECK-NEXT: } -; CHECK-NEXT: Successor(s): loop.1 +; CHECK-NEXT: Successor(s): loop.0 ; CHECK-EMPTY: -; CHECK-NEXT: loop.1: +; CHECK-NEXT: loop.0: ; CHECK-NEXT: WIDEN ir<%conv> = sext vp<[[PRED]]> ; CHECK-NEXT: EMIT vp<[[SPLICE:%.+]]> = first-order splice ir<%0> ir<%conv> ; CHECK-NEXT: Successor(s): pred.store @@ -363,8 +363,8 @@ define void @sink_replicate_region_after_replicate_region(ptr %ptr, ptr noalias ; CHECK-NEXT: Successor(s): pred.store.if, pred.store.continue ; CHECK-EMPTY: ; CHECK-NEXT: pred.store.if: -; CHECK-NEXT: REPLICATE ir<%rem> = srem vp<[[SPLICE]]>, ir<%x> ; CHECK-NEXT: vp<[[STEPS:%.+]]> = SCALAR-STEPS vp<[[CAN_IV]]>, ir<1> +; CHECK-NEXT: REPLICATE ir<%rem> = srem vp<[[SPLICE]]>, ir<%x> ; CHECK-NEXT: REPLICATE ir<%rem.div> = sdiv ir<20>, ir<%rem> ; CHECK-NEXT: REPLICATE ir<%gep> = getelementptr ir<%ptr>, vp<[[STEPS]]> ; CHECK-NEXT: REPLICATE store ir<%rem.div>, ir<%gep> diff --git a/llvm/test/Transforms/LoopVectorize/reduction-inloop-cond.ll b/llvm/test/Transforms/LoopVectorize/reduction-inloop-cond.ll index 3476750..0836d8b 100644 --- a/llvm/test/Transforms/LoopVectorize/reduction-inloop-cond.ll +++ b/llvm/test/Transforms/LoopVectorize/reduction-inloop-cond.ll @@ -353,124 +353,104 @@ define i32 @simple_chained_rdx(ptr noalias %a, ptr noalias %b, ptr noalias %cond ; CHECK-NEXT: [[N_VEC:%.*]] = and i64 [[N]], -4 ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: -; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[PRED_LOAD_CONTINUE14:%.*]] ] -; CHECK-NEXT: [[VEC_PHI:%.*]] = phi i32 [ 5, [[VECTOR_PH]] ], [ [[TMP51:%.*]], [[PRED_LOAD_CONTINUE14]] ] -; CHECK-NEXT: [[TMP0:%.*]] = or i64 [[INDEX]], 1 -; CHECK-NEXT: [[TMP1:%.*]] = or i64 [[INDEX]], 2 -; CHECK-NEXT: [[TMP2:%.*]] = or i64 [[INDEX]], 3 -; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds i32, ptr [[COND:%.*]], i64 [[INDEX]] -; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i32>, ptr [[TMP3]], align 4 -; CHECK-NEXT: [[TMP5:%.*]] = icmp ne <4 x i32> [[WIDE_LOAD]], zeroinitializer -; CHECK-NEXT: [[TMP6:%.*]] = extractelement <4 x i1> [[TMP5]], i64 0 -; CHECK-NEXT: br i1 [[TMP6]], label [[PRED_LOAD_IF:%.*]], label [[PRED_LOAD_CONTINUE:%.*]] +; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[PRED_LOAD_CONTINUE6:%.*]] ] +; CHECK-NEXT: [[VEC_PHI:%.*]] = phi i32 [ 5, [[VECTOR_PH]] ], [ [[TMP46:%.*]], [[PRED_LOAD_CONTINUE6]] ] +; CHECK-NEXT: [[TMP0:%.*]] = getelementptr inbounds i32, ptr [[COND:%.*]], i64 [[INDEX]] +; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i32>, ptr [[TMP0]], align 4 +; CHECK-NEXT: [[TMP1:%.*]] = icmp ne <4 x i32> [[WIDE_LOAD]], zeroinitializer +; CHECK-NEXT: [[TMP2:%.*]] = extractelement <4 x i1> [[TMP1]], i64 0 +; CHECK-NEXT: br i1 [[TMP2]], label [[PRED_LOAD_IF:%.*]], label [[PRED_LOAD_CONTINUE:%.*]] ; CHECK: pred.load.if: -; CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds i32, ptr [[A:%.*]], i64 [[INDEX]] -; CHECK-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 -; CHECK-NEXT: [[TMP9:%.*]] = insertelement <4 x i32> poison, i32 [[TMP8]], i64 0 +; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds i32, ptr [[A:%.*]], i64 [[INDEX]] +; CHECK-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +; CHECK-NEXT: [[TMP5:%.*]] = insertelement <4 x i32> poison, i32 [[TMP4]], i64 0 +; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds i32, ptr [[B:%.*]], i64 [[INDEX]] +; CHECK-NEXT: [[TMP7:%.*]] = load i32, ptr [[TMP6]], align 4 +; CHECK-NEXT: [[TMP8:%.*]] = insertelement <4 x i32> poison, i32 [[TMP7]], i64 0 ; CHECK-NEXT: br label [[PRED_LOAD_CONTINUE]] ; CHECK: pred.load.continue: -; CHECK-NEXT: [[TMP10:%.*]] = phi <4 x i32> [ poison, [[VECTOR_BODY]] ], [ [[TMP9]], [[PRED_LOAD_IF]] ] -; CHECK-NEXT: [[TMP11:%.*]] = extractelement <4 x i1> [[TMP5]], i64 1 +; CHECK-NEXT: [[TMP9:%.*]] = phi <4 x i32> [ poison, [[VECTOR_BODY]] ], [ [[TMP5]], [[PRED_LOAD_IF]] ] +; CHECK-NEXT: [[TMP10:%.*]] = phi <4 x i32> [ poison, [[VECTOR_BODY]] ], [ [[TMP8]], [[PRED_LOAD_IF]] ] +; CHECK-NEXT: [[TMP11:%.*]] = extractelement <4 x i1> [[TMP1]], i64 1 ; CHECK-NEXT: br i1 [[TMP11]], label [[PRED_LOAD_IF1:%.*]], label [[PRED_LOAD_CONTINUE2:%.*]] ; CHECK: pred.load.if1: -; CHECK-NEXT: [[TMP12:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[TMP0]] -; CHECK-NEXT: [[TMP13:%.*]] = load i32, ptr [[TMP12]], align 4 -; CHECK-NEXT: [[TMP14:%.*]] = insertelement <4 x i32> [[TMP10]], i32 [[TMP13]], i64 1 +; CHECK-NEXT: [[TMP12:%.*]] = or i64 [[INDEX]], 1 +; CHECK-NEXT: [[TMP13:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[TMP12]] +; CHECK-NEXT: [[TMP14:%.*]] = load i32, ptr [[TMP13]], align 4 +; CHECK-NEXT: [[TMP15:%.*]] = insertelement <4 x i32> [[TMP9]], i32 [[TMP14]], i64 1 +; CHECK-NEXT: [[TMP16:%.*]] = getelementptr inbounds i32, ptr [[B]], i64 [[TMP12]] +; CHECK-NEXT: [[TMP17:%.*]] = load i32, ptr [[TMP16]], align 4 +; CHECK-NEXT: [[TMP18:%.*]] = insertelement <4 x i32> [[TMP10]], i32 [[TMP17]], i64 1 ; CHECK-NEXT: br label [[PRED_LOAD_CONTINUE2]] ; CHECK: pred.load.continue2: -; CHECK-NEXT: [[TMP15:%.*]] = phi <4 x i32> [ [[TMP10]], [[PRED_LOAD_CONTINUE]] ], [ [[TMP14]], [[PRED_LOAD_IF1]] ] -; CHECK-NEXT: [[TMP16:%.*]] = extractelement <4 x i1> [[TMP5]], i64 2 -; CHECK-NEXT: br i1 [[TMP16]], label [[PRED_LOAD_IF3:%.*]], label [[PRED_LOAD_CONTINUE4:%.*]] +; CHECK-NEXT: [[TMP19:%.*]] = phi <4 x i32> [ [[TMP9]], [[PRED_LOAD_CONTINUE]] ], [ [[TMP15]], [[PRED_LOAD_IF1]] ] +; CHECK-NEXT: [[TMP20:%.*]] = phi <4 x i32> [ [[TMP10]], [[PRED_LOAD_CONTINUE]] ], [ [[TMP18]], [[PRED_LOAD_IF1]] ] +; CHECK-NEXT: [[TMP21:%.*]] = extractelement <4 x i1> [[TMP1]], i64 2 +; CHECK-NEXT: br i1 [[TMP21]], label [[PRED_LOAD_IF3:%.*]], label [[PRED_LOAD_CONTINUE4:%.*]] ; CHECK: pred.load.if3: -; CHECK-NEXT: [[TMP17:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[TMP1]] -; CHECK-NEXT: [[TMP18:%.*]] = load i32, ptr [[TMP17]], align 4 -; CHECK-NEXT: [[TMP19:%.*]] = insertelement <4 x i32> [[TMP15]], i32 [[TMP18]], i64 2 +; CHECK-NEXT: [[TMP22:%.*]] = or i64 [[INDEX]], 2 +; CHECK-NEXT: [[TMP23:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[TMP22]] +; CHECK-NEXT: [[TMP24:%.*]] = load i32, ptr [[TMP23]], align 4 +; CHECK-NEXT: [[TMP25:%.*]] = insertelement <4 x i32> [[TMP19]], i32 [[TMP24]], i64 2 +; CHECK-NEXT: [[TMP26:%.*]] = getelementptr inbounds i32, ptr [[B]], i64 [[TMP22]] +; CHECK-NEXT: [[TMP27:%.*]] = load i32, ptr [[TMP26]], align 4 +; CHECK-NEXT: [[TMP28:%.*]] = insertelement <4 x i32> [[TMP20]], i32 [[TMP27]], i64 2 ; CHECK-NEXT: br label [[PRED_LOAD_CONTINUE4]] ; CHECK: pred.load.continue4: -; CHECK-NEXT: [[TMP20:%.*]] = phi <4 x i32> [ [[TMP15]], [[PRED_LOAD_CONTINUE2]] ], [ [[TMP19]], [[PRED_LOAD_IF3]] ] -; CHECK-NEXT: [[TMP21:%.*]] = extractelement <4 x i1> [[TMP5]], i64 3 -; CHECK-NEXT: br i1 [[TMP21]], label [[PRED_LOAD_IF5:%.*]], label [[PRED_LOAD_CONTINUE6:%.*]] +; CHECK-NEXT: [[TMP29:%.*]] = phi <4 x i32> [ [[TMP19]], [[PRED_LOAD_CONTINUE2]] ], [ [[TMP25]], [[PRED_LOAD_IF3]] ] +; CHECK-NEXT: [[TMP30:%.*]] = phi <4 x i32> [ [[TMP20]], [[PRED_LOAD_CONTINUE2]] ], [ [[TMP28]], [[PRED_LOAD_IF3]] ] +; CHECK-NEXT: [[TMP31:%.*]] = extractelement <4 x i1> [[TMP1]], i64 3 +; CHECK-NEXT: br i1 [[TMP31]], label [[PRED_LOAD_IF5:%.*]], label [[PRED_LOAD_CONTINUE6]] ; CHECK: pred.load.if5: -; CHECK-NEXT: [[TMP22:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[TMP2]] -; CHECK-NEXT: [[TMP23:%.*]] = load i32, ptr [[TMP22]], align 4 -; CHECK-NEXT: [[TMP24:%.*]] = insertelement <4 x i32> [[TMP20]], i32 [[TMP23]], i64 3 +; CHECK-NEXT: [[TMP32:%.*]] = or i64 [[INDEX]], 3 +; CHECK-NEXT: [[TMP33:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[TMP32]] +; CHECK-NEXT: [[TMP34:%.*]] = load i32, ptr [[TMP33]], align 4 +; CHECK-NEXT: [[TMP35:%.*]] = insertelement <4 x i32> [[TMP29]], i32 [[TMP34]], i64 3 +; CHECK-NEXT: [[TMP36:%.*]] = getelementptr inbounds i32, ptr [[B]], i64 [[TMP32]] +; CHECK-NEXT: [[TMP37:%.*]] = load i32, ptr [[TMP36]], align 4 +; CHECK-NEXT: [[TMP38:%.*]] = insertelement <4 x i32> [[TMP30]], i32 [[TMP37]], i64 3 ; CHECK-NEXT: br label [[PRED_LOAD_CONTINUE6]] ; CHECK: pred.load.continue6: -; CHECK-NEXT: [[TMP25:%.*]] = phi <4 x i32> [ [[TMP20]], [[PRED_LOAD_CONTINUE4]] ], [ [[TMP24]], [[PRED_LOAD_IF5]] ] -; CHECK-NEXT: [[TMP26:%.*]] = select <4 x i1> [[TMP5]], <4 x i32> [[TMP25]], <4 x i32> zeroinitializer -; CHECK-NEXT: [[TMP27:%.*]] = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> [[TMP26]]) -; CHECK-NEXT: [[TMP28:%.*]] = add i32 [[TMP27]], [[VEC_PHI]] -; CHECK-NEXT: [[TMP29:%.*]] = extractelement <4 x i1> [[TMP5]], i64 0 -; CHECK-NEXT: br i1 [[TMP29]], label [[PRED_LOAD_IF7:%.*]], label [[PRED_LOAD_CONTINUE8:%.*]] -; CHECK: pred.load.if7: -; CHECK-NEXT: [[TMP30:%.*]] = getelementptr inbounds i32, ptr [[B:%.*]], i64 [[INDEX]] -; CHECK-NEXT: [[TMP31:%.*]] = load i32, ptr [[TMP30]], align 4 -; CHECK-NEXT: [[TMP32:%.*]] = insertelement <4 x i32> poison, i32 [[TMP31]], i64 0 -; CHECK-NEXT: br label [[PRED_LOAD_CONTINUE8]] -; CHECK: pred.load.continue8: -; CHECK-NEXT: [[TMP33:%.*]] = phi <4 x i32> [ poison, [[PRED_LOAD_CONTINUE6]] ], [ [[TMP32]], [[PRED_LOAD_IF7]] ] -; CHECK-NEXT: [[TMP34:%.*]] = extractelement <4 x i1> [[TMP5]], i64 1 -; CHECK-NEXT: br i1 [[TMP34]], label [[PRED_LOAD_IF9:%.*]], label [[PRED_LOAD_CONTINUE10:%.*]] -; CHECK: pred.load.if9: -; CHECK-NEXT: [[TMP35:%.*]] = getelementptr inbounds i32, ptr [[B]], i64 [[TMP0]] -; CHECK-NEXT: [[TMP36:%.*]] = load i32, ptr [[TMP35]], align 4 -; CHECK-NEXT: [[TMP37:%.*]] = insertelement <4 x i32> [[TMP33]], i32 [[TMP36]], i64 1 -; CHECK-NEXT: br label [[PRED_LOAD_CONTINUE10]] -; CHECK: pred.load.continue10: -; CHECK-NEXT: [[TMP38:%.*]] = phi <4 x i32> [ [[TMP33]], [[PRED_LOAD_CONTINUE8]] ], [ [[TMP37]], [[PRED_LOAD_IF9]] ] -; CHECK-NEXT: [[TMP39:%.*]] = extractelement <4 x i1> [[TMP5]], i64 2 -; CHECK-NEXT: br i1 [[TMP39]], label [[PRED_LOAD_IF11:%.*]], label [[PRED_LOAD_CONTINUE12:%.*]] -; CHECK: pred.load.if11: -; CHECK-NEXT: [[TMP40:%.*]] = getelementptr inbounds i32, ptr [[B]], i64 [[TMP1]] -; CHECK-NEXT: [[TMP41:%.*]] = load i32, ptr [[TMP40]], align 4 -; CHECK-NEXT: [[TMP42:%.*]] = insertelement <4 x i32> [[TMP38]], i32 [[TMP41]], i64 2 -; CHECK-NEXT: br label [[PRED_LOAD_CONTINUE12]] -; CHECK: pred.load.continue12: -; CHECK-NEXT: [[TMP43:%.*]] = phi <4 x i32> [ [[TMP38]], [[PRED_LOAD_CONTINUE10]] ], [ [[TMP42]], [[PRED_LOAD_IF11]] ] -; CHECK-NEXT: [[TMP44:%.*]] = extractelement <4 x i1> [[TMP5]], i64 3 -; CHECK-NEXT: br i1 [[TMP44]], label [[PRED_LOAD_IF13:%.*]], label [[PRED_LOAD_CONTINUE14]] -; CHECK: pred.load.if13: -; CHECK-NEXT: [[TMP45:%.*]] = getelementptr inbounds i32, ptr [[B]], i64 [[TMP2]] -; CHECK-NEXT: [[TMP46:%.*]] = load i32, ptr [[TMP45]], align 4 -; CHECK-NEXT: [[TMP47:%.*]] = insertelement <4 x i32> [[TMP43]], i32 [[TMP46]], i64 3 -; CHECK-NEXT: br label [[PRED_LOAD_CONTINUE14]] -; CHECK: pred.load.continue14: -; CHECK-NEXT: [[TMP48:%.*]] = phi <4 x i32> [ [[TMP43]], [[PRED_LOAD_CONTINUE12]] ], [ [[TMP47]], [[PRED_LOAD_IF13]] ] -; CHECK-NEXT: [[TMP49:%.*]] = select <4 x i1> [[TMP5]], <4 x i32> [[TMP48]], <4 x i32> zeroinitializer -; CHECK-NEXT: [[TMP50:%.*]] = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> [[TMP49]]) -; CHECK-NEXT: [[TMP51]] = add i32 [[TMP50]], [[TMP28]] +; CHECK-NEXT: [[TMP39:%.*]] = phi <4 x i32> [ [[TMP29]], [[PRED_LOAD_CONTINUE4]] ], [ [[TMP35]], [[PRED_LOAD_IF5]] ] +; CHECK-NEXT: [[TMP40:%.*]] = phi <4 x i32> [ [[TMP30]], [[PRED_LOAD_CONTINUE4]] ], [ [[TMP38]], [[PRED_LOAD_IF5]] ] +; CHECK-NEXT: [[TMP41:%.*]] = select <4 x i1> [[TMP1]], <4 x i32> [[TMP39]], <4 x i32> zeroinitializer +; CHECK-NEXT: [[TMP42:%.*]] = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> [[TMP41]]) +; CHECK-NEXT: [[TMP43:%.*]] = add i32 [[TMP42]], [[VEC_PHI]] +; CHECK-NEXT: [[TMP44:%.*]] = select <4 x i1> [[TMP1]], <4 x i32> [[TMP40]], <4 x i32> zeroinitializer +; CHECK-NEXT: [[TMP45:%.*]] = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> [[TMP44]]) +; CHECK-NEXT: [[TMP46]] = add i32 [[TMP45]], [[TMP43]] ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 -; CHECK-NEXT: [[TMP52:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] -; CHECK-NEXT: br i1 [[TMP52]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] +; CHECK-NEXT: [[TMP47:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] +; CHECK-NEXT: br i1 [[TMP47]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]] ; CHECK: middle.block: ; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N_VEC]], [[N]] ; CHECK-NEXT: br i1 [[CMP_N]], label [[FOR_END:%.*]], label [[SCALAR_PH]] ; CHECK: scalar.ph: ; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] -; CHECK-NEXT: [[BC_MERGE_RDX:%.*]] = phi i32 [ [[TMP51]], [[MIDDLE_BLOCK]] ], [ 5, [[ENTRY]] ] +; CHECK-NEXT: [[BC_MERGE_RDX:%.*]] = phi i32 [ [[TMP46]], [[MIDDLE_BLOCK]] ], [ 5, [[ENTRY]] ] ; CHECK-NEXT: br label [[FOR_BODY:%.*]] ; CHECK: for.body: ; CHECK-NEXT: [[IV:%.*]] = phi i64 [ [[IV_NEXT:%.*]], [[FOR_INC:%.*]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ] ; CHECK-NEXT: [[RDX:%.*]] = phi i32 [ [[RES:%.*]], [[FOR_INC]] ], [ [[BC_MERGE_RDX]], [[SCALAR_PH]] ] ; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[COND]], i64 [[IV]] -; CHECK-NEXT: [[TMP53:%.*]] = load i32, ptr [[ARRAYIDX]], align 4 -; CHECK-NEXT: [[TOBOOL_NOT:%.*]] = icmp eq i32 [[TMP53]], 0 +; CHECK-NEXT: [[TMP48:%.*]] = load i32, ptr [[ARRAYIDX]], align 4 +; CHECK-NEXT: [[TOBOOL_NOT:%.*]] = icmp eq i32 [[TMP48]], 0 ; CHECK-NEXT: br i1 [[TOBOOL_NOT]], label [[FOR_INC]], label [[IF_THEN:%.*]] ; CHECK: if.then: ; CHECK-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[IV]] -; CHECK-NEXT: [[TMP54:%.*]] = load i32, ptr [[ARRAYIDX1]], align 4 -; CHECK-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP54]], [[RDX]] +; CHECK-NEXT: [[TMP49:%.*]] = load i32, ptr [[ARRAYIDX1]], align 4 +; CHECK-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP49]], [[RDX]] ; CHECK-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds i32, ptr [[B]], i64 [[IV]] -; CHECK-NEXT: [[TMP55:%.*]] = load i32, ptr [[ARRAYIDX2]], align 4 -; CHECK-NEXT: [[ADD3:%.*]] = add nsw i32 [[ADD]], [[TMP55]] +; CHECK-NEXT: [[TMP50:%.*]] = load i32, ptr [[ARRAYIDX2]], align 4 +; CHECK-NEXT: [[ADD3:%.*]] = add nsw i32 [[ADD]], [[TMP50]] ; CHECK-NEXT: br label [[FOR_INC]] ; CHECK: for.inc: ; CHECK-NEXT: [[RES]] = phi i32 [ [[ADD3]], [[IF_THEN]] ], [ [[RDX]], [[FOR_BODY]] ] ; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 ; CHECK-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT]], [[N]] -; CHECK-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP2:![0-9]+]] +; CHECK-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP9:![0-9]+]] ; CHECK: for.end: -; CHECK-NEXT: [[RES_LCSSA:%.*]] = phi i32 [ [[RES]], [[FOR_INC]] ], [ [[TMP51]], [[MIDDLE_BLOCK]] ] +; CHECK-NEXT: [[RES_LCSSA:%.*]] = phi i32 [ [[RES]], [[FOR_INC]] ], [ [[TMP46]], [[MIDDLE_BLOCK]] ] ; CHECK-NEXT: ret i32 [[RES_LCSSA]] ; entry: diff --git a/llvm/test/Transforms/LoopVectorize/vplan-sink-scalars-and-merge.ll b/llvm/test/Transforms/LoopVectorize/vplan-sink-scalars-and-merge.ll index 98749da..c8d79b8 100644 --- a/llvm/test/Transforms/LoopVectorize/vplan-sink-scalars-and-merge.ll +++ b/llvm/test/Transforms/LoopVectorize/vplan-sink-scalars-and-merge.ll @@ -359,9 +359,9 @@ define void @pred_cfg1(i32 %k, i32 %j) { ; CHECK-NEXT: pred.store.continue: ; CHECK-NEXT: No successors ; CHECK-NEXT: } -; CHECK-NEXT: Successor(s): next.0.0 +; CHECK-NEXT: Successor(s): next.0.1 ; CHECK-EMPTY: -; CHECK-NEXT: next.0.0: +; CHECK-NEXT: next.0.1: ; CHECK-NEXT: EMIT vp<[[CAN_IV_NEXT:%.+]]> = VF * UF + vp<[[CAN_IV]]> ; CHECK-NEXT: EMIT branch-on-count vp<[[CAN_IV_NEXT]]> vp<[[VEC_TC]]> ; CHECK-NEXT: No successors @@ -456,9 +456,9 @@ define void @pred_cfg2(i32 %k, i32 %j) { ; CHECK-NEXT: pred.store.continue: ; CHECK-NEXT: No successors ; CHECK-NEXT: } -; CHECK-NEXT: Successor(s): then.1.0 +; CHECK-NEXT: Successor(s): then.1.1 ; CHECK-EMPTY: -; CHECK-NEXT: then.1.0: +; CHECK-NEXT: then.1.1: ; CHECK-NEXT: EMIT vp<[[CAN_IV_NEXT:%.+]]> = VF * UF + vp<[[CAN_IV]]> ; CHECK-NEXT: EMIT branch-on-count vp<[[CAN_IV_NEXT]]> vp<[[VEC_TC]]> ; CHECK-NEXT: No successors @@ -561,9 +561,9 @@ define void @pred_cfg3(i32 %k, i32 %j) { ; CHECK-NEXT: pred.store.continue: ; CHECK-NEXT: No successors ; CHECK-NEXT: } -; CHECK-NEXT: Successor(s): then.1.1 +; CHECK-NEXT: Successor(s): then.1.2 ; CHECK-EMPTY: -; CHECK-NEXT: then.1.1: +; CHECK-NEXT: then.1.2: ; CHECK-NEXT: EMIT vp<[[CAN_IV_NEXT:%.+]]> = VF * UF + vp<[[CAN_IV]]> ; CHECK-NEXT: EMIT branch-on-count vp<[[CAN_IV_NEXT]]> vp<[[VEC_TC]]> ; CHECK-NEXT: No successors @@ -664,9 +664,9 @@ define void @merge_3_replicate_region(i32 %k, i32 %j) { ; CHECK-NEXT: pred.store.continue: ; CHECK-NEXT: No successors ; CHECK-NEXT: } -; CHECK-NEXT: Successor(s): then.0.0 +; CHECK-NEXT: Successor(s): then.0.4 ; CHECK-EMPTY: -; CHECK-NEXT: then.0.0: +; CHECK-NEXT: then.0.4: ; CHECK-NEXT: EMIT vp<[[CAN_IV_NEXT:%.+]]> = VF * UF + vp<[[CAN_IV]]> ; CHECK-NEXT: EMIT branch-on-count vp<[[CAN_IV_NEXT]]> vp<[[VEC_TC]]> ; CHECK-NEXT: No successors diff --git a/llvm/unittests/Transforms/Vectorize/VPlanTest.cpp b/llvm/unittests/Transforms/Vectorize/VPlanTest.cpp index 430cb29..ec6ef3a 100644 --- a/llvm/unittests/Transforms/Vectorize/VPlanTest.cpp +++ b/llvm/unittests/Transforms/Vectorize/VPlanTest.cpp @@ -976,8 +976,7 @@ TEST(VPRecipeTest, CastVPReplicateRecipeToVPUser) { Args.push_back(&Op1); Args.push_back(&Op2); - VPReplicateRecipe Recipe(nullptr, make_range(Args.begin(), Args.end()), true, - false); + VPReplicateRecipe Recipe(nullptr, make_range(Args.begin(), Args.end()), true); EXPECT_TRUE(isa(&Recipe)); VPRecipeBase *BaseR = &Recipe; EXPECT_TRUE(isa(BaseR));