From 8867fc69f03d0438b0e8beb1dd89c04238f631a1 Mon Sep 17 00:00:00 2001 From: Florian Hahn Date: Fri, 2 Apr 2021 13:28:44 +0100 Subject: [PATCH] [LV] Hoist mapping of IR operands to VPValues (NFC). This patch moves mapping of IR operands to VPValues out of tryToCreateWidenRecipe. This allows using existing VPValue operands when widening recipes directly, which will be introduced in future patches. --- llvm/lib/Transforms/Vectorize/LoopVectorize.cpp | 100 ++++++++++++++---------- llvm/lib/Transforms/Vectorize/VPRecipeBuilder.h | 26 +++--- 2 files changed, 74 insertions(+), 52 deletions(-) diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp index 15c53e0..f0c5e55 100644 --- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp +++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp @@ -8398,7 +8398,9 @@ VPValue *VPRecipeBuilder::createBlockInMask(BasicBlock *BB, VPlanPtr &Plan) { return BlockMaskCache[BB] = BlockMask; } -VPRecipeBase *VPRecipeBuilder::tryToWidenMemory(Instruction *I, VFRange &Range, +VPRecipeBase *VPRecipeBuilder::tryToWidenMemory(Instruction *I, + ArrayRef Operands, + VFRange &Range, VPlanPtr &Plan) { assert((isa(I) || isa(I)) && "Must be called with either a load or store"); @@ -8425,34 +8427,35 @@ VPRecipeBase *VPRecipeBuilder::tryToWidenMemory(Instruction *I, VFRange &Range, if (Legal->isMaskRequired(I)) Mask = createBlockInMask(I->getParent(), Plan); - VPValue *Addr = Plan->getOrAddVPValue(getLoadStorePointerOperand(I)); if (LoadInst *Load = dyn_cast(I)) - return new VPWidenMemoryInstructionRecipe(*Load, Addr, Mask); + return new VPWidenMemoryInstructionRecipe(*Load, Operands[0], Mask); StoreInst *Store = cast(I); - VPValue *StoredValue = Plan->getOrAddVPValue(Store->getValueOperand()); - return new VPWidenMemoryInstructionRecipe(*Store, Addr, StoredValue, Mask); + return new VPWidenMemoryInstructionRecipe(*Store, Operands[1], Operands[0], + Mask); } VPWidenIntOrFpInductionRecipe * -VPRecipeBuilder::tryToOptimizeInductionPHI(PHINode *Phi, VPlan &Plan) const { +VPRecipeBuilder::tryToOptimizeInductionPHI(PHINode *Phi, + ArrayRef Operands) const { // Check if this is an integer or fp induction. If so, build the recipe that // produces its scalar and vector values. InductionDescriptor II = Legal->getInductionVars().lookup(Phi); if (II.getKind() == InductionDescriptor::IK_IntInduction || II.getKind() == InductionDescriptor::IK_FpInduction) { - VPValue *Start = Plan.getOrAddVPValue(II.getStartValue()); + assert(II.getStartValue() == + Phi->getIncomingValueForBlock(OrigLoop->getLoopPreheader())); const SmallVectorImpl &Casts = II.getCastInsts(); return new VPWidenIntOrFpInductionRecipe( - Phi, Start, Casts.empty() ? nullptr : Casts.front()); + Phi, Operands[0], Casts.empty() ? nullptr : Casts.front()); } return nullptr; } -VPWidenIntOrFpInductionRecipe * -VPRecipeBuilder::tryToOptimizeInductionTruncate(TruncInst *I, VFRange &Range, - VPlan &Plan) const { +VPWidenIntOrFpInductionRecipe *VPRecipeBuilder::tryToOptimizeInductionTruncate( + TruncInst *I, ArrayRef Operands, VFRange &Range, + VPlan &Plan) const { // Optimize the special case where the source is a constant integer // induction variable. Notice that we can only optimize the 'trunc' case // because (a) FP conversions lose precision, (b) sext/zext may wrap, and @@ -8479,14 +8482,16 @@ VPRecipeBuilder::tryToOptimizeInductionTruncate(TruncInst *I, VFRange &Range, return nullptr; } -VPRecipeOrVPValueTy VPRecipeBuilder::tryToBlend(PHINode *Phi, VPlanPtr &Plan) { +VPRecipeOrVPValueTy VPRecipeBuilder::tryToBlend(PHINode *Phi, + ArrayRef Operands, + VPlanPtr &Plan) { // If all incoming values are equal, the incoming VPValue can be used directly // instead of creating a new VPBlendRecipe. - Value *FirstIncoming = Phi->getIncomingValue(0); - if (all_of(Phi->incoming_values(), [FirstIncoming](const Value *Inc) { + VPValue *FirstIncoming = Operands[0]; + if (all_of(Operands, [FirstIncoming](const VPValue *Inc) { return FirstIncoming == Inc; })) { - return Plan->getOrAddVPValue(Phi->getIncomingValue(0)); + return Operands[0]; } // We know that all PHIs in non-header blocks are converted into selects, so @@ -8494,7 +8499,7 @@ VPRecipeOrVPValueTy VPRecipeBuilder::tryToBlend(PHINode *Phi, VPlanPtr &Plan) { // builder. At this point we generate the predication tree. There may be // duplications since this is a simple recursive scan, but future // optimizations will clean it up. - SmallVector Operands; + SmallVector OperandsWithMask; unsigned NumIncoming = Phi->getNumIncomingValues(); for (unsigned In = 0; In < NumIncoming; In++) { @@ -8502,15 +8507,16 @@ VPRecipeOrVPValueTy VPRecipeBuilder::tryToBlend(PHINode *Phi, VPlanPtr &Plan) { createEdgeMask(Phi->getIncomingBlock(In), Phi->getParent(), Plan); assert((EdgeMask || NumIncoming == 1) && "Multiple predecessors with one having a full mask"); - Operands.push_back(Plan->getOrAddVPValue(Phi->getIncomingValue(In))); + OperandsWithMask.push_back(Operands[In]); if (EdgeMask) - Operands.push_back(EdgeMask); + OperandsWithMask.push_back(EdgeMask); } - return toVPRecipeResult(new VPBlendRecipe(Phi, Operands)); + return toVPRecipeResult(new VPBlendRecipe(Phi, OperandsWithMask)); } -VPWidenCallRecipe *VPRecipeBuilder::tryToWidenCall(CallInst *CI, VFRange &Range, - VPlan &Plan) const { +VPWidenCallRecipe *VPRecipeBuilder::tryToWidenCall(CallInst *CI, + ArrayRef Operands, + VFRange &Range) const { bool IsPredicated = LoopVectorizationPlanner::getDecisionAndClampRange( [this, CI](ElementCount VF) { @@ -8546,7 +8552,8 @@ VPWidenCallRecipe *VPRecipeBuilder::tryToWidenCall(CallInst *CI, VFRange &Range, if (!LoopVectorizationPlanner::getDecisionAndClampRange(willWiden, Range)) return nullptr; - return new VPWidenCallRecipe(*CI, Plan.mapToVPValues(CI->arg_operands())); + ArrayRef Ops = Operands.take_front(CI->getNumArgOperands()); + return new VPWidenCallRecipe(*CI, make_range(Ops.begin(), Ops.end())); } bool VPRecipeBuilder::shouldWiden(Instruction *I, VFRange &Range) const { @@ -8563,7 +8570,8 @@ bool VPRecipeBuilder::shouldWiden(Instruction *I, VFRange &Range) const { Range); } -VPWidenRecipe *VPRecipeBuilder::tryToWiden(Instruction *I, VPlan &Plan) const { +VPWidenRecipe *VPRecipeBuilder::tryToWiden(Instruction *I, + ArrayRef Operands) const { auto IsVectorizableOpcode = [](unsigned Opcode) { switch (Opcode) { case Instruction::Add: @@ -8609,7 +8617,7 @@ VPWidenRecipe *VPRecipeBuilder::tryToWiden(Instruction *I, VPlan &Plan) const { return nullptr; // Success: widen this instruction. - return new VPWidenRecipe(*I, Plan.mapToVPValues(I->operands())); + return new VPWidenRecipe(*I, make_range(Operands.begin(), Operands.end())); } VPBasicBlock *VPRecipeBuilder::handleReplication( @@ -8692,36 +8700,39 @@ VPRegionBlock *VPRecipeBuilder::createReplicateRegion(Instruction *Instr, return Region; } -VPRecipeOrVPValueTy VPRecipeBuilder::tryToCreateWidenRecipe(Instruction *Instr, - VFRange &Range, - VPlanPtr &Plan) { +VPRecipeOrVPValueTy +VPRecipeBuilder::tryToCreateWidenRecipe(Instruction *Instr, + ArrayRef Operands, + VFRange &Range, VPlanPtr &Plan) { // First, check for specific widening recipes that deal with calls, memory // operations, inductions and Phi nodes. if (auto *CI = dyn_cast(Instr)) - return toVPRecipeResult(tryToWidenCall(CI, Range, *Plan)); + return toVPRecipeResult(tryToWidenCall(CI, Operands, Range)); if (isa(Instr) || isa(Instr)) - return toVPRecipeResult(tryToWidenMemory(Instr, Range, Plan)); + return toVPRecipeResult(tryToWidenMemory(Instr, Operands, Range, Plan)); VPRecipeBase *Recipe; if (auto Phi = dyn_cast(Instr)) { if (Phi->getParent() != OrigLoop->getHeader()) - return tryToBlend(Phi, Plan); - if ((Recipe = tryToOptimizeInductionPHI(Phi, *Plan))) + return tryToBlend(Phi, Operands, Plan); + if ((Recipe = tryToOptimizeInductionPHI(Phi, Operands))) return toVPRecipeResult(Recipe); if (Legal->isReductionVariable(Phi)) { RecurrenceDescriptor &RdxDesc = Legal->getReductionVars()[Phi]; - VPValue *StartV = - Plan->getOrAddVPValue(RdxDesc.getRecurrenceStartValue()); + assert(RdxDesc.getRecurrenceStartValue() == + Phi->getIncomingValueForBlock(OrigLoop->getLoopPreheader())); + VPValue *StartV = Operands[0]; return toVPRecipeResult(new VPWidenPHIRecipe(Phi, RdxDesc, *StartV)); } return toVPRecipeResult(new VPWidenPHIRecipe(Phi)); } - if (isa(Instr) && (Recipe = tryToOptimizeInductionTruncate( - cast(Instr), Range, *Plan))) + if (isa(Instr) && + (Recipe = tryToOptimizeInductionTruncate(cast(Instr), Operands, + Range, *Plan))) return toVPRecipeResult(Recipe); if (!shouldWiden(Instr, Range)) @@ -8729,16 +8740,16 @@ VPRecipeOrVPValueTy VPRecipeBuilder::tryToCreateWidenRecipe(Instruction *Instr, if (auto GEP = dyn_cast(Instr)) return toVPRecipeResult(new VPWidenGEPRecipe( - GEP, Plan->mapToVPValues(GEP->operands()), OrigLoop)); + GEP, make_range(Operands.begin(), Operands.end()), OrigLoop)); if (auto *SI = dyn_cast(Instr)) { bool InvariantCond = PSE.getSE()->isLoopInvariant(PSE.getSCEV(SI->getOperand(0)), OrigLoop); return toVPRecipeResult(new VPWidenSelectRecipe( - *SI, Plan->mapToVPValues(SI->operands()), InvariantCond)); + *SI, make_range(Operands.begin(), Operands.end()), InvariantCond)); } - return toVPRecipeResult(tryToWiden(Instr, *Plan)); + return toVPRecipeResult(tryToWiden(Instr, Operands)); } void LoopVectorizationPlanner::buildVPlansWithVPRecipes(ElementCount MinVF, @@ -8861,8 +8872,17 @@ VPlanPtr LoopVectorizationPlanner::buildVPlanWithVPRecipes( if (isa(Instr) || DeadInstructions.count(Instr)) continue; - if (auto RecipeOrValue = - RecipeBuilder.tryToCreateWidenRecipe(Instr, Range, Plan)) { + SmallVector Operands; + auto *Phi = dyn_cast(Instr); + if (Phi && Phi->getParent() == OrigLoop->getHeader()) { + Operands.push_back(Plan->getOrAddVPValue( + Phi->getIncomingValueForBlock(OrigLoop->getLoopPreheader()))); + } else { + auto OpRange = Plan->mapToVPValues(Instr->operands()); + Operands = {OpRange.begin(), OpRange.end()}; + } + if (auto RecipeOrValue = RecipeBuilder.tryToCreateWidenRecipe( + Instr, Operands, Range, Plan)) { // If Instr can be simplified to an existing VPValue, use it. if (RecipeOrValue.is()) { Plan->addVPValue(Instr, RecipeOrValue.get()); diff --git a/llvm/lib/Transforms/Vectorize/VPRecipeBuilder.h b/llvm/lib/Transforms/Vectorize/VPRecipeBuilder.h index 89c7b12..a101730 100644 --- a/llvm/lib/Transforms/Vectorize/VPRecipeBuilder.h +++ b/llvm/lib/Transforms/Vectorize/VPRecipeBuilder.h @@ -64,36 +64,37 @@ class VPRecipeBuilder { /// Check if the load or store instruction \p I should widened for \p /// Range.Start and potentially masked. Such instructions are handled by a /// recipe that takes an additional VPInstruction for the mask. - VPRecipeBase *tryToWidenMemory(Instruction *I, VFRange &Range, - VPlanPtr &Plan); + VPRecipeBase *tryToWidenMemory(Instruction *I, ArrayRef Operands, + VFRange &Range, VPlanPtr &Plan); /// Check if an induction recipe should be constructed for \I. If so build and /// return it. If not, return null. - VPWidenIntOrFpInductionRecipe *tryToOptimizeInductionPHI(PHINode *Phi, - VPlan &Plan) const; + VPWidenIntOrFpInductionRecipe * + tryToOptimizeInductionPHI(PHINode *Phi, ArrayRef Operands) const; /// Optimize the special case where the operand of \p I is a constant integer /// induction variable. VPWidenIntOrFpInductionRecipe * - tryToOptimizeInductionTruncate(TruncInst *I, VFRange &Range, - VPlan &Plan) const; + tryToOptimizeInductionTruncate(TruncInst *I, ArrayRef Operands, + VFRange &Range, VPlan &Plan) const; /// Handle non-loop phi nodes. Return a VPValue, if all incoming values match /// or a new VPBlendRecipe otherwise. Currently all such phi nodes are turned /// into a sequence of select instructions as the vectorizer currently /// performs full if-conversion. - VPRecipeOrVPValueTy tryToBlend(PHINode *Phi, VPlanPtr &Plan); + VPRecipeOrVPValueTy tryToBlend(PHINode *Phi, ArrayRef Operands, + VPlanPtr &Plan); /// Handle call instructions. If \p CI can be widened for \p Range.Start, /// return a new VPWidenCallRecipe. Range.End may be decreased to ensure same /// decision from \p Range.Start to \p Range.End. - VPWidenCallRecipe *tryToWidenCall(CallInst *CI, VFRange &Range, - VPlan &Plan) const; + VPWidenCallRecipe *tryToWidenCall(CallInst *CI, ArrayRef Operands, + VFRange &Range) const; /// Check if \p I has an opcode that can be widened and return a VPWidenRecipe /// if it can. The function should only be called if the cost-model indicates /// that widening should be performed. - VPWidenRecipe *tryToWiden(Instruction *I, VPlan &Plan) const; + VPWidenRecipe *tryToWiden(Instruction *I, ArrayRef Operands) const; /// Return a VPRecipeOrValueTy with VPRecipeBase * being set. This can be used to force the use as VPRecipeBase* for recipe sub-types that also inherit from VPValue. VPRecipeOrVPValueTy toVPRecipeResult(VPRecipeBase *R) const { return R; } @@ -110,8 +111,9 @@ public: /// create for \p I withing the given VF \p Range. If an existing VPValue can /// be used or if a recipe can be created, return it. Otherwise return a /// VPRecipeOrVPValueTy with nullptr. - VPRecipeOrVPValueTy tryToCreateWidenRecipe(Instruction *Instr, VFRange &Range, - VPlanPtr &Plan); + VPRecipeOrVPValueTy tryToCreateWidenRecipe(Instruction *Instr, + ArrayRef Operands, + VFRange &Range, VPlanPtr &Plan); /// Set the recipe created for given ingredient. This operation is a no-op for /// ingredients that were not marked using a nullptr entry in the map. -- 2.7.4