/// previous iteration (e.g. if the value is defined in the previous
/// iteration, we refer to it as first-order recurrence, if it is defined in
/// the iteration before the previous, we refer to it as second-order
- /// recurrence and so on). \p SinkAfter includes pairs of instructions where
- /// the first will be rescheduled to appear after the second if/when the loop
- /// is vectorized. It may be augmented with additional pairs if needed in
- /// order to handle Phi as a first-order recurrence.
- static bool
- isFixedOrderRecurrence(PHINode *Phi, Loop *TheLoop,
- MapVector<Instruction *, Instruction *> &SinkAfter,
- DominatorTree *DT);
+ /// recurrence and so on). Note that this function optimistically assumes that
+ /// uses of the recurrence can be re-ordered if necessary and users need to
+ /// check and perform the re-ordering.
+ static bool isFixedOrderRecurrence(PHINode *Phi, Loop *TheLoop,
+ DominatorTree *DT);
RecurKind getRecurrenceKind() const { return Kind; }
/// Holds the phi nodes that are fixed-order recurrences.
RecurrenceSet FixedOrderRecurrences;
- /// Holds instructions that need to sink past other instructions to handle
- /// fixed-order recurrences.
- MapVector<Instruction *, Instruction *> SinkAfter;
-
/// Holds the widest induction type encountered.
Type *WidestIndTy = nullptr;
return false;
}
-bool RecurrenceDescriptor::isFixedOrderRecurrence(
- PHINode *Phi, Loop *TheLoop,
- MapVector<Instruction *, Instruction *> &SinkAfter, DominatorTree *DT) {
+bool RecurrenceDescriptor::isFixedOrderRecurrence(PHINode *Phi, Loop *TheLoop,
+ DominatorTree *DT) {
// Ensure the phi node is in the loop header and has two incoming values.
if (Phi->getParent() != TheLoop->getHeader() ||
Previous = dyn_cast<Instruction>(PrevPhi->getIncomingValueForBlock(Latch));
}
- if (!Previous || !TheLoop->contains(Previous) || isa<PHINode>(Previous) ||
- SinkAfter.count(Previous)) // Cannot rely on dominance due to motion.
+ if (!Previous || !TheLoop->contains(Previous) || isa<PHINode>(Previous))
return false;
// Ensure every user of the phi node (recursively) is dominated by the
// loop.
// TODO: Consider extending this sinking to handle memory instructions.
- // We optimistically assume we can sink all users after Previous. Keep a set
- // of instructions to sink after Previous ordered by dominance in the common
- // basic block. It will be applied to SinkAfter if all users can be sunk.
- auto CompareByComesBefore = [](const Instruction *A, const Instruction *B) {
- return A->comesBefore(B);
- };
- std::set<Instruction *, decltype(CompareByComesBefore)> InstrsToSink(
- CompareByComesBefore);
-
BasicBlock *PhiBB = Phi->getParent();
SmallVector<Instruction *, 8> WorkList;
auto TryToPushSinkCandidate = [&](Instruction *SinkCandidate) {
- // Already sunk SinkCandidate.
- if (SinkCandidate->getParent() == PhiBB &&
- InstrsToSink.find(SinkCandidate) != InstrsToSink.end())
- return true;
-
// Cyclic dependence.
if (Previous == SinkCandidate)
return false;
SinkCandidate->mayReadFromMemory() || SinkCandidate->isTerminator())
return false;
- // Avoid sinking an instruction multiple times (if multiple operands are
- // fixed order recurrences) by sinking once - after the latest 'previous'
- // instruction.
- auto It = SinkAfter.find(SinkCandidate);
- if (It != SinkAfter.end()) {
- auto *OtherPrev = It->second;
- // Find the earliest entry in the 'sink-after' chain. The last entry in
- // the chain is the original 'Previous' for a recurrence handled earlier.
- auto EarlierIt = SinkAfter.find(OtherPrev);
- while (EarlierIt != SinkAfter.end()) {
- Instruction *EarlierInst = EarlierIt->second;
- EarlierIt = SinkAfter.find(EarlierInst);
- // Bail out if order has not been preserved.
- if (EarlierIt != SinkAfter.end() &&
- !DT->dominates(EarlierInst, OtherPrev))
- return false;
- OtherPrev = EarlierInst;
- }
- // Bail out if order has not been preserved.
- if (OtherPrev != It->second && !DT->dominates(It->second, OtherPrev))
- return false;
-
- // SinkCandidate is already being sunk after an instruction after
- // Previous. Nothing left to do.
- if (DT->dominates(Previous, OtherPrev) || Previous == OtherPrev)
- return true;
-
- // If there are other instructions to be sunk after SinkCandidate, remove
- // and re-insert SinkCandidate can break those instructions. Bail out for
- // simplicity.
- if (any_of(SinkAfter,
- [SinkCandidate](const std::pair<Instruction *, Instruction *> &P) {
- return P.second == SinkCandidate;
- }))
- return false;
-
- // Otherwise, Previous comes after OtherPrev and SinkCandidate needs to be
- // re-sunk to Previous, instead of sinking to OtherPrev. Remove
- // SinkCandidate from SinkAfter to ensure it's insert position is updated.
- SinkAfter.erase(SinkCandidate);
- }
-
// If we reach a PHI node that is not dominated by Previous, we reached a
// header PHI. No need for sinking.
if (isa<PHINode>(SinkCandidate))
return true;
// Sink User tentatively and check its users
- InstrsToSink.insert(SinkCandidate);
WorkList.push_back(SinkCandidate);
return true;
};
}
}
- // We can sink all users of Phi. Update the mapping.
- for (Instruction *I : InstrsToSink) {
- SinkAfter[I] = Previous;
- Previous = I;
- }
return true;
}
continue;
}
- if (RecurrenceDescriptor::isFixedOrderRecurrence(Phi, TheLoop,
- SinkAfter, DT)) {
+ if (RecurrenceDescriptor::isFixedOrderRecurrence(Phi, TheLoop, DT)) {
AllowedExit.insert(Phi);
FixedOrderRecurrences.insert(Phi);
continue;
}
}
- // For fixed order recurrences, we use the previous value (incoming value from
- // the latch) to check if it dominates all users of the recurrence. Bail out
- // if we have to sink such an instruction for another recurrence, as the
- // dominance requirement may not hold after sinking.
- BasicBlock *LoopLatch = TheLoop->getLoopLatch();
- if (any_of(FixedOrderRecurrences, [LoopLatch, this](const PHINode *Phi) {
- Instruction *V =
- cast<Instruction>(Phi->getIncomingValueForBlock(LoopLatch));
- return SinkAfter.contains(V);
- }))
- return false;
-
// Now we know the widest induction type, check if our found induction
// is the same size. If it's not, unset it here and InnerLoopVectorizer
// will create another.
// Sink users of fixed-order recurrence past the recipe defining the previous
// value and introduce FirstOrderRecurrenceSplice VPInstructions.
- VPlanTransforms::adjustFixedOrderRecurrences(*Plan, Builder);
+ if (!VPlanTransforms::adjustFixedOrderRecurrences(*Plan, Builder))
+ return std::nullopt;
// Interleave memory: for each Interleave Group we marked earlier as relevant
// for this VPlan, replace the Recipes widening its memory instructions with a
return VPDT.properlyDominates(ParentA, ParentB);
}
-// Sink users of \p FOR after the recipe defining the previous value \p Previous
-// of the recurrence.
-static void
+/// Sink users of \p FOR after the recipe defining the previous value \p
+/// Previous of the recurrence. \returns true if all users of \p FOR could be
+/// re-arranged as needed or false if it is not possible.
+static bool
sinkRecurrenceUsersAfterPrevious(VPFirstOrderRecurrencePHIRecipe *FOR,
VPRecipeBase *Previous,
VPDominatorTree &VPDT) {
SmallPtrSet<VPRecipeBase *, 8> Seen;
Seen.insert(Previous);
auto TryToPushSinkCandidate = [&](VPRecipeBase *SinkCandidate) {
- assert(
- SinkCandidate != Previous &&
- "The previous value cannot depend on the users of the recurrence phi.");
+ // The previous value must not depend on the users of the recurrence phi. In
+ // that case, FOR is not a fixed order recurrence.
+ if (SinkCandidate == Previous)
+ return false;
+
if (isa<VPHeaderPHIRecipe>(SinkCandidate) ||
!Seen.insert(SinkCandidate).second ||
properlyDominates(Previous, SinkCandidate, VPDT))
- return;
+ return true;
WorkList.push_back(SinkCandidate);
+ return true;
};
// Recursively sink users of FOR after Previous.
"only recipes with a single defined value expected");
for (VPUser *User : Current->getVPSingleValue()->users()) {
if (auto *R = dyn_cast<VPRecipeBase>(User))
- TryToPushSinkCandidate(R);
+ if (!TryToPushSinkCandidate(R))
+ return false;
}
}
SinkCandidate->moveAfter(Previous);
Previous = SinkCandidate;
}
+ return true;
}
-void VPlanTransforms::adjustFixedOrderRecurrences(VPlan &Plan,
+bool VPlanTransforms::adjustFixedOrderRecurrences(VPlan &Plan,
VPBuilder &Builder) {
VPDominatorTree VPDT;
VPDT.recalculate(Plan);
Previous = PrevPhi->getBackedgeValue()->getDefiningRecipe();
}
- sinkRecurrenceUsersAfterPrevious(FOR, Previous, VPDT);
+ if (!sinkRecurrenceUsersAfterPrevious(FOR, Previous, VPDT))
+ return false;
// Introduce a recipe to combine the incoming and previous values of a
// fixed-order recurrence.
// all users.
RecurSplice->setOperand(0, FOR);
}
+ return true;
}
/// to combine the value from the recurrence phis and previous values. The
/// current implementation assumes all users can be sunk after the previous
/// value, which is enforced by earlier legality checks.
- static void adjustFixedOrderRecurrences(VPlan &Plan, VPBuilder &Builder);
+ /// \returns true if all users of fixed-order recurrences could be re-arranged
+ /// as needed or false if it is not possible. In the latter case, \p Plan is
+ /// not valid.
+ static bool adjustFixedOrderRecurrences(VPlan &Plan, VPBuilder &Builder);
/// Optimize \p Plan based on \p BestVF and \p BestUF. This may restrict the
/// resulting plan to \p BestVF and \p BestUF.
ret ptr %for.1
}
-; Make sure LLVM doesn't generate wrong data in SinkAfter, and causes crash in
-; loop vectorizer.
-define double @test_crash(ptr %p, ptr noalias %a, ptr noalias %b) {
-; CHECK-LABEL: @test_crash
-; CHECK-NOT: vector.body:
-; CHECK: ret
+; In this test case, %USE_2_FORS uses 2 different fixed-order recurrences and
+; it needs to be sunk past the previous value for both recurrences.
+define double @test_resinking_required(ptr %p, ptr noalias %a, ptr noalias %b) {
+; CHECK-LABEL: @test_resinking_required(
+; CHECK: vector.body:
+; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %vector.ph ], [ [[INDEX_NEXT:%.*]], %vector.body ]
+; CHECK-NEXT: [[VECTOR_RECUR:%.*]] = phi <4 x double> [ <double poison, double poison, double poison, double 0.000000e+00>, %vector.ph ], [ [[BROADCAST_SPLAT:%.*]], %vector.body ]
+; CHECK-NEXT: [[VECTOR_RECUR1:%.*]] = phi <4 x double> [ <double poison, double poison, double poison, double 0.000000e+00>, %vector.ph ], [ [[BROADCAST_SPLAT4:%.*]], %vector.body ]
+; CHECK-NEXT: [[VECTOR_RECUR2:%.*]] = phi <4 x double> [ <double poison, double poison, double poison, double 0.000000e+00>, %vector.ph ], [ [[TMP4:%.*]], %vector.body ]
+; CHECK-NEXT: [[TMP0:%.*]] = load double, ptr %a, align 8
+; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x double> poison, double [[TMP0]], i64 0
+; CHECK-NEXT: [[BROADCAST_SPLAT]] = shufflevector <4 x double> [[BROADCAST_SPLATINSERT]], <4 x double> poison, <4 x i32> zeroinitializer
+; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x double> [[VECTOR_RECUR]], <4 x double> [[BROADCAST_SPLAT]], <4 x i32> <i32 3, i32 4, i32 5, i32 6>
+; CHECK-NEXT: [[TMP2:%.*]] = fdiv <4 x double> zeroinitializer, [[TMP1]]
+; CHECK-NEXT: [[TMP3:%.*]] = load double, ptr %b, align 8
+; CHECK-NEXT: [[BROADCAST_SPLATINSERT3:%.*]] = insertelement <4 x double> poison, double [[TMP3]], i64 0
+; CHECK-NEXT: [[BROADCAST_SPLAT4]] = shufflevector <4 x double> [[BROADCAST_SPLATINSERT3]], <4 x double> poison, <4 x i32> zeroinitializer
+; CHECK-NEXT: [[TMP4]] = shufflevector <4 x double> [[VECTOR_RECUR1]], <4 x double> [[BROADCAST_SPLAT4]], <4 x i32> <i32 3, i32 4, i32 5, i32 6>
+; CHECK-NEXT: [[TMP5:%.*]] = shufflevector <4 x double> [[VECTOR_RECUR2]], <4 x double> [[TMP4]], <4 x i32> <i32 3, i32 4, i32 5, i32 6>
+; CHECK-NEXT: [[TMP6:%.*]] = extractelement <4 x double> [[TMP2]], i32 3
+; CHECK-NEXT: store double [[TMP6]], ptr [[P:%.*]], align 8
+; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
+; CHECK-NEXT: [[TMP7:%.*]] = icmp eq i64 [[INDEX_NEXT]], 0
+; CHECK-NEXT: br i1 [[TMP7]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP28:![0-9]+]]
+; CHECK: middle.block:
+; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 0, 0
+; CHECK-NEXT: [[VECTOR_RECUR_EXTRACT:%.*]] = extractelement <4 x double> [[BROADCAST_SPLAT]], i32 3
+; CHECK-NEXT: [[VECTOR_RECUR_EXTRACT_FOR_PHI:%.*]] = extractelement <4 x double> [[BROADCAST_SPLAT]], i32 2
+; CHECK-NEXT: [[VECTOR_RECUR_EXTRACT5:%.*]] = extractelement <4 x double> [[BROADCAST_SPLAT4]], i32 3
+; CHECK-NEXT: [[VECTOR_RECUR_EXTRACT_FOR_PHI6:%.*]] = extractelement <4 x double> [[BROADCAST_SPLAT4]], i32 2
+; CHECK-NEXT: [[VECTOR_RECUR_EXTRACT9:%.*]] = extractelement <4 x double> [[TMP4]], i32 3
+; CHECK-NEXT: [[VECTOR_RECUR_EXTRACT_FOR_PHI10:%.*]] = extractelement <4 x double> [[TMP4]], i32 2
+; CHECK-NEXT: br i1 [[CMP_N]], label %End, label %scalar.ph
+;
Entry:
br label %Loop
%for.2 = phi double [ %l2, %Loop ], [ 0.000000e+00, %Entry ]
%for.3 = phi double [ %for.2, %Loop ], [ 0.000000e+00, %Entry ]
%iv = phi i64 [ %iv.next, %Loop ], [ 0, %Entry ]
- %USE_2_INDVARS = fdiv double %for.3, %for.1
+ %USE_2_FORS = fdiv double %for.3, %for.1
%div = fdiv double 0.000000e+00, %for.1
%l1 = load double, ptr %a, align 8
%iv.next= add nuw nsw i64 %iv, 1
define void @test_pr54233_for_depend_on_each_other(ptr noalias %a, ptr noalias %b) {
; CHECK-LABEL: @test_pr54233_for_depend_on_each_other(
; CHECK-NEXT: entry:
+; CHECK-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
+; CHECK: vector.ph:
+; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]
+; CHECK: vector.body:
+; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
+; CHECK-NEXT: [[VECTOR_RECUR:%.*]] = phi <4 x i32> [ <i32 poison, i32 poison, i32 poison, i32 0>, [[VECTOR_PH]] ], [ [[TMP4:%.*]], [[VECTOR_BODY]] ]
+; CHECK-NEXT: [[VECTOR_RECUR1:%.*]] = phi <4 x i32> [ <i32 poison, i32 poison, i32 poison, i32 0>, [[VECTOR_PH]] ], [ [[BROADCAST_SPLAT:%.*]], [[VECTOR_BODY]] ]
+; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 0
+; CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr [[B:%.*]], align 4
+; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i32> poison, i32 [[TMP1]], i64 0
+; CHECK-NEXT: [[BROADCAST_SPLAT]] = shufflevector <4 x i32> [[BROADCAST_SPLATINSERT]], <4 x i32> poison, <4 x i32> zeroinitializer
+; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <4 x i32> [[VECTOR_RECUR1]], <4 x i32> [[BROADCAST_SPLAT]], <4 x i32> <i32 3, i32 4, i32 5, i32 6>
+; CHECK-NEXT: [[TMP3:%.*]] = or <4 x i32> [[TMP2]], <i32 10, i32 10, i32 10, i32 10>
+; CHECK-NEXT: [[TMP4]] = xor <4 x i32> <i32 12, i32 12, i32 12, i32 12>, [[TMP2]]
+; CHECK-NEXT: [[TMP5:%.*]] = shufflevector <4 x i32> [[VECTOR_RECUR]], <4 x i32> [[TMP4]], <4 x i32> <i32 3, i32 4, i32 5, i32 6>
+; CHECK-NEXT: [[TMP6:%.*]] = shl <4 x i32> [[TMP2]], [[TMP5]]
+; CHECK-NEXT: [[TMP7:%.*]] = xor <4 x i32> [[TMP6]], <i32 255, i32 255, i32 255, i32 255>
+; CHECK-NEXT: [[TMP8:%.*]] = and <4 x i32> [[TMP7]], [[TMP3]]
+; CHECK-NEXT: [[TMP9:%.*]] = getelementptr inbounds i32, ptr [[A:%.*]], i64 [[TMP0]]
+; CHECK-NEXT: [[TMP10:%.*]] = getelementptr inbounds i32, ptr [[TMP9]], i32 0
+; CHECK-NEXT: store <4 x i32> [[TMP8]], ptr [[TMP10]], align 4
+; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
+; CHECK-NEXT: [[TMP11:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1000
+; CHECK-NEXT: br i1 [[TMP11]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]]
+; CHECK: middle.block:
+; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 1001, 1000
+; CHECK-NEXT: [[VECTOR_RECUR_EXTRACT:%.*]] = extractelement <4 x i32> [[TMP4]], i32 3
+; CHECK-NEXT: [[VECTOR_RECUR_EXTRACT2:%.*]] = extractelement <4 x i32> [[BROADCAST_SPLAT]], i32 3
+; CHECK-NEXT: br i1 [[CMP_N]], label [[EXIT:%.*]], label [[SCALAR_PH]]
+; CHECK: scalar.ph:
+; CHECK-NEXT: [[SCALAR_RECUR_INIT3:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[VECTOR_RECUR_EXTRACT2]], [[MIDDLE_BLOCK]] ]
+; CHECK-NEXT: [[SCALAR_RECUR_INIT:%.*]] = phi i32 [ 0, [[ENTRY]] ], [ [[VECTOR_RECUR_EXTRACT]], [[MIDDLE_BLOCK]] ]
+; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 1000, [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY]] ]
; CHECK-NEXT: br label [[LOOP:%.*]]
; CHECK: loop:
-; CHECK-NEXT: [[IV:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[IV_NEXT:%.*]], [[LOOP]] ]
-; CHECK-NEXT: [[FOR_1:%.*]] = phi i32 [ 0, [[ENTRY]] ], [ [[FOR_1_NEXT:%.*]], [[LOOP]] ]
-; CHECK-NEXT: [[FOR_2:%.*]] = phi i32 [ 0, [[ENTRY]] ], [ [[FOR_2_NEXT:%.*]], [[LOOP]] ]
-; CHECK-NEXT: [[OR:%.*]] = or i32 [[FOR_2]], 10
-; CHECK-NEXT: [[SHL:%.*]] = shl i32 [[FOR_2]], [[FOR_1]]
+; CHECK-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[LOOP]] ]
+; CHECK-NEXT: [[SCALAR_RECUR:%.*]] = phi i32 [ [[SCALAR_RECUR_INIT]], [[SCALAR_PH]] ], [ [[FOR_1_NEXT:%.*]], [[LOOP]] ]
+; CHECK-NEXT: [[SCALAR_RECUR4:%.*]] = phi i32 [ [[SCALAR_RECUR_INIT3]], [[SCALAR_PH]] ], [ [[FOR_2_NEXT:%.*]], [[LOOP]] ]
+; CHECK-NEXT: [[OR:%.*]] = or i32 [[SCALAR_RECUR4]], 10
+; CHECK-NEXT: [[SHL:%.*]] = shl i32 [[SCALAR_RECUR4]], [[SCALAR_RECUR]]
; CHECK-NEXT: [[XOR:%.*]] = xor i32 [[SHL]], 255
; CHECK-NEXT: [[AND:%.*]] = and i32 [[XOR]], [[OR]]
-; CHECK-NEXT: [[FOR_1_NEXT]] = xor i32 12, [[FOR_2]]
-; CHECK-NEXT: [[FOR_2_NEXT]] = load i32, ptr [[B:%.*]], align 4
-; CHECK-NEXT: [[A_GEP:%.*]] = getelementptr inbounds i32, ptr [[A:%.*]], i64 [[IV]]
+; CHECK-NEXT: [[FOR_1_NEXT]] = xor i32 12, [[SCALAR_RECUR4]]
+; CHECK-NEXT: [[FOR_2_NEXT]] = load i32, ptr [[B]], align 4
+; CHECK-NEXT: [[A_GEP:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[IV]]
; CHECK-NEXT: store i32 [[AND]], ptr [[A_GEP]], align 4
; CHECK-NEXT: [[IV_NEXT]] = add nuw i64 [[IV]], 1
; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i64 [[IV]], 1000
-; CHECK-NEXT: br i1 [[EXITCOND]], label [[EXIT:%.*]], label [[LOOP]]
+; CHECK-NEXT: br i1 [[EXITCOND]], label [[EXIT]], label [[LOOP]], !llvm.loop [[LOOP5:![0-9]+]]
; CHECK: exit:
; CHECK-NEXT: ret void
;