From 7fc0b3049df532fce726d1ff6869a9f6e3183780 Mon Sep 17 00:00:00 2001 From: Florian Hahn Date: Thu, 13 Apr 2023 22:00:51 +0100 Subject: [PATCH] [VPlan] Switch to checking sinking legality for recurrences in VPlan. Building on D142885 and D142589, retire the SinkAfter map from the recurrence handling code. It is replaced by checking whether it is possible to sink all users of a recurrence directly in VPlan. This results in simpler code overall and allows to handle additional cases (see the improvements in @test_crash). Depends on D142885. Depends on D142589. Reviewed By: Ayal Differential Revision: https://reviews.llvm.org/D142886 --- llvm/include/llvm/Analysis/IVDescriptors.h | 13 ++-- .../Vectorize/LoopVectorizationLegality.h | 4 -- llvm/lib/Analysis/IVDescriptors.cpp | 70 +--------------------- .../Vectorize/LoopVectorizationLegality.cpp | 15 +---- llvm/lib/Transforms/Vectorize/LoopVectorize.cpp | 3 +- llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp | 28 +++++---- llvm/lib/Transforms/Vectorize/VPlanTransforms.h | 5 +- .../LoopVectorize/first-order-recurrence-chains.ll | 42 ++++++++++--- .../first-order-recurrence-multiply-recurrences.ll | 51 +++++++++++++--- 9 files changed, 110 insertions(+), 121 deletions(-) diff --git a/llvm/include/llvm/Analysis/IVDescriptors.h b/llvm/include/llvm/Analysis/IVDescriptors.h index 696d5e2..ad936d2 100644 --- a/llvm/include/llvm/Analysis/IVDescriptors.h +++ b/llvm/include/llvm/Analysis/IVDescriptors.h @@ -186,14 +186,11 @@ public: /// previous iteration (e.g. if the value is defined in the previous /// iteration, we refer to it as first-order recurrence, if it is defined in /// the iteration before the previous, we refer to it as second-order - /// recurrence and so on). \p SinkAfter includes pairs of instructions where - /// the first will be rescheduled to appear after the second if/when the loop - /// is vectorized. It may be augmented with additional pairs if needed in - /// order to handle Phi as a first-order recurrence. - static bool - isFixedOrderRecurrence(PHINode *Phi, Loop *TheLoop, - MapVector &SinkAfter, - DominatorTree *DT); + /// recurrence and so on). Note that this function optimistically assumes that + /// uses of the recurrence can be re-ordered if necessary and users need to + /// check and perform the re-ordering. + static bool isFixedOrderRecurrence(PHINode *Phi, Loop *TheLoop, + DominatorTree *DT); RecurKind getRecurrenceKind() const { return Kind; } diff --git a/llvm/include/llvm/Transforms/Vectorize/LoopVectorizationLegality.h b/llvm/include/llvm/Transforms/Vectorize/LoopVectorizationLegality.h index 4514e60..1863e2e 100644 --- a/llvm/include/llvm/Transforms/Vectorize/LoopVectorizationLegality.h +++ b/llvm/include/llvm/Transforms/Vectorize/LoopVectorizationLegality.h @@ -516,10 +516,6 @@ private: /// Holds the phi nodes that are fixed-order recurrences. RecurrenceSet FixedOrderRecurrences; - /// Holds instructions that need to sink past other instructions to handle - /// fixed-order recurrences. - MapVector SinkAfter; - /// Holds the widest induction type encountered. Type *WidestIndTy = nullptr; diff --git a/llvm/lib/Analysis/IVDescriptors.cpp b/llvm/lib/Analysis/IVDescriptors.cpp index 22b9f64..6fcf584 100644 --- a/llvm/lib/Analysis/IVDescriptors.cpp +++ b/llvm/lib/Analysis/IVDescriptors.cpp @@ -927,9 +927,8 @@ bool RecurrenceDescriptor::isReductionPHI(PHINode *Phi, Loop *TheLoop, return false; } -bool RecurrenceDescriptor::isFixedOrderRecurrence( - PHINode *Phi, Loop *TheLoop, - MapVector &SinkAfter, DominatorTree *DT) { +bool RecurrenceDescriptor::isFixedOrderRecurrence(PHINode *Phi, Loop *TheLoop, + DominatorTree *DT) { // Ensure the phi node is in the loop header and has two incoming values. if (Phi->getParent() != TheLoop->getHeader() || @@ -965,8 +964,7 @@ bool RecurrenceDescriptor::isFixedOrderRecurrence( Previous = dyn_cast(PrevPhi->getIncomingValueForBlock(Latch)); } - if (!Previous || !TheLoop->contains(Previous) || isa(Previous) || - SinkAfter.count(Previous)) // Cannot rely on dominance due to motion. + if (!Previous || !TheLoop->contains(Previous) || isa(Previous)) return false; // Ensure every user of the phi node (recursively) is dominated by the @@ -975,23 +973,9 @@ bool RecurrenceDescriptor::isFixedOrderRecurrence( // loop. // TODO: Consider extending this sinking to handle memory instructions. - // We optimistically assume we can sink all users after Previous. Keep a set - // of instructions to sink after Previous ordered by dominance in the common - // basic block. It will be applied to SinkAfter if all users can be sunk. - auto CompareByComesBefore = [](const Instruction *A, const Instruction *B) { - return A->comesBefore(B); - }; - std::set InstrsToSink( - CompareByComesBefore); - BasicBlock *PhiBB = Phi->getParent(); SmallVector WorkList; auto TryToPushSinkCandidate = [&](Instruction *SinkCandidate) { - // Already sunk SinkCandidate. - if (SinkCandidate->getParent() == PhiBB && - InstrsToSink.find(SinkCandidate) != InstrsToSink.end()) - return true; - // Cyclic dependence. if (Previous == SinkCandidate) return false; @@ -1005,55 +989,12 @@ bool RecurrenceDescriptor::isFixedOrderRecurrence( SinkCandidate->mayReadFromMemory() || SinkCandidate->isTerminator()) return false; - // Avoid sinking an instruction multiple times (if multiple operands are - // fixed order recurrences) by sinking once - after the latest 'previous' - // instruction. - auto It = SinkAfter.find(SinkCandidate); - if (It != SinkAfter.end()) { - auto *OtherPrev = It->second; - // Find the earliest entry in the 'sink-after' chain. The last entry in - // the chain is the original 'Previous' for a recurrence handled earlier. - auto EarlierIt = SinkAfter.find(OtherPrev); - while (EarlierIt != SinkAfter.end()) { - Instruction *EarlierInst = EarlierIt->second; - EarlierIt = SinkAfter.find(EarlierInst); - // Bail out if order has not been preserved. - if (EarlierIt != SinkAfter.end() && - !DT->dominates(EarlierInst, OtherPrev)) - return false; - OtherPrev = EarlierInst; - } - // Bail out if order has not been preserved. - if (OtherPrev != It->second && !DT->dominates(It->second, OtherPrev)) - return false; - - // SinkCandidate is already being sunk after an instruction after - // Previous. Nothing left to do. - if (DT->dominates(Previous, OtherPrev) || Previous == OtherPrev) - return true; - - // If there are other instructions to be sunk after SinkCandidate, remove - // and re-insert SinkCandidate can break those instructions. Bail out for - // simplicity. - if (any_of(SinkAfter, - [SinkCandidate](const std::pair &P) { - return P.second == SinkCandidate; - })) - return false; - - // Otherwise, Previous comes after OtherPrev and SinkCandidate needs to be - // re-sunk to Previous, instead of sinking to OtherPrev. Remove - // SinkCandidate from SinkAfter to ensure it's insert position is updated. - SinkAfter.erase(SinkCandidate); - } - // If we reach a PHI node that is not dominated by Previous, we reached a // header PHI. No need for sinking. if (isa(SinkCandidate)) return true; // Sink User tentatively and check its users - InstrsToSink.insert(SinkCandidate); WorkList.push_back(SinkCandidate); return true; }; @@ -1068,11 +1009,6 @@ bool RecurrenceDescriptor::isFixedOrderRecurrence( } } - // We can sink all users of Phi. Update the mapping. - for (Instruction *I : InstrsToSink) { - SinkAfter[I] = Previous; - Previous = I; - } return true; } diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorizationLegality.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorizationLegality.cpp index f45d800..3a868e8 100644 --- a/llvm/lib/Transforms/Vectorize/LoopVectorizationLegality.cpp +++ b/llvm/lib/Transforms/Vectorize/LoopVectorizationLegality.cpp @@ -743,8 +743,7 @@ bool LoopVectorizationLegality::canVectorizeInstrs() { continue; } - if (RecurrenceDescriptor::isFixedOrderRecurrence(Phi, TheLoop, - SinkAfter, DT)) { + if (RecurrenceDescriptor::isFixedOrderRecurrence(Phi, TheLoop, DT)) { AllowedExit.insert(Phi); FixedOrderRecurrences.insert(Phi); continue; @@ -917,18 +916,6 @@ bool LoopVectorizationLegality::canVectorizeInstrs() { } } - // For fixed order recurrences, we use the previous value (incoming value from - // the latch) to check if it dominates all users of the recurrence. Bail out - // if we have to sink such an instruction for another recurrence, as the - // dominance requirement may not hold after sinking. - BasicBlock *LoopLatch = TheLoop->getLoopLatch(); - if (any_of(FixedOrderRecurrences, [LoopLatch, this](const PHINode *Phi) { - Instruction *V = - cast(Phi->getIncomingValueForBlock(LoopLatch)); - return SinkAfter.contains(V); - })) - return false; - // Now we know the widest induction type, check if our found induction // is the same size. If it's not, unset it here and InnerLoopVectorizer // will create another. diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp index b073cf7..c419df8 100644 --- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp +++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp @@ -9067,7 +9067,8 @@ std::optional LoopVectorizationPlanner::tryToBuildVPlanWithVPRecipes( // Sink users of fixed-order recurrence past the recipe defining the previous // value and introduce FirstOrderRecurrenceSplice VPInstructions. - VPlanTransforms::adjustFixedOrderRecurrences(*Plan, Builder); + if (!VPlanTransforms::adjustFixedOrderRecurrences(*Plan, Builder)) + return std::nullopt; // Interleave memory: for each Interleave Group we marked earlier as relevant // for this VPlan, replace the Recipes widening its memory instructions with a diff --git a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp index e0188d2..f091955 100644 --- a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp +++ b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp @@ -652,9 +652,10 @@ static bool properlyDominates(const VPRecipeBase *A, const VPRecipeBase *B, return VPDT.properlyDominates(ParentA, ParentB); } -// Sink users of \p FOR after the recipe defining the previous value \p Previous -// of the recurrence. -static void +/// Sink users of \p FOR after the recipe defining the previous value \p +/// Previous of the recurrence. \returns true if all users of \p FOR could be +/// re-arranged as needed or false if it is not possible. +static bool sinkRecurrenceUsersAfterPrevious(VPFirstOrderRecurrencePHIRecipe *FOR, VPRecipeBase *Previous, VPDominatorTree &VPDT) { @@ -663,15 +664,18 @@ sinkRecurrenceUsersAfterPrevious(VPFirstOrderRecurrencePHIRecipe *FOR, SmallPtrSet Seen; Seen.insert(Previous); auto TryToPushSinkCandidate = [&](VPRecipeBase *SinkCandidate) { - assert( - SinkCandidate != Previous && - "The previous value cannot depend on the users of the recurrence phi."); + // The previous value must not depend on the users of the recurrence phi. In + // that case, FOR is not a fixed order recurrence. + if (SinkCandidate == Previous) + return false; + if (isa(SinkCandidate) || !Seen.insert(SinkCandidate).second || properlyDominates(Previous, SinkCandidate, VPDT)) - return; + return true; WorkList.push_back(SinkCandidate); + return true; }; // Recursively sink users of FOR after Previous. @@ -682,7 +686,8 @@ sinkRecurrenceUsersAfterPrevious(VPFirstOrderRecurrencePHIRecipe *FOR, "only recipes with a single defined value expected"); for (VPUser *User : Current->getVPSingleValue()->users()) { if (auto *R = dyn_cast(User)) - TryToPushSinkCandidate(R); + if (!TryToPushSinkCandidate(R)) + return false; } } @@ -699,9 +704,10 @@ sinkRecurrenceUsersAfterPrevious(VPFirstOrderRecurrencePHIRecipe *FOR, SinkCandidate->moveAfter(Previous); Previous = SinkCandidate; } + return true; } -void VPlanTransforms::adjustFixedOrderRecurrences(VPlan &Plan, +bool VPlanTransforms::adjustFixedOrderRecurrences(VPlan &Plan, VPBuilder &Builder) { VPDominatorTree VPDT; VPDT.recalculate(Plan); @@ -724,7 +730,8 @@ void VPlanTransforms::adjustFixedOrderRecurrences(VPlan &Plan, Previous = PrevPhi->getBackedgeValue()->getDefiningRecipe(); } - sinkRecurrenceUsersAfterPrevious(FOR, Previous, VPDT); + if (!sinkRecurrenceUsersAfterPrevious(FOR, Previous, VPDT)) + return false; // Introduce a recipe to combine the incoming and previous values of a // fixed-order recurrence. @@ -743,4 +750,5 @@ void VPlanTransforms::adjustFixedOrderRecurrences(VPlan &Plan, // all users. RecurSplice->setOperand(0, FOR); } + return true; } diff --git a/llvm/lib/Transforms/Vectorize/VPlanTransforms.h b/llvm/lib/Transforms/Vectorize/VPlanTransforms.h index e5bd1a4..430628cb 100644 --- a/llvm/lib/Transforms/Vectorize/VPlanTransforms.h +++ b/llvm/lib/Transforms/Vectorize/VPlanTransforms.h @@ -77,7 +77,10 @@ struct VPlanTransforms { /// to combine the value from the recurrence phis and previous values. The /// current implementation assumes all users can be sunk after the previous /// value, which is enforced by earlier legality checks. - static void adjustFixedOrderRecurrences(VPlan &Plan, VPBuilder &Builder); + /// \returns true if all users of fixed-order recurrences could be re-arranged + /// as needed or false if it is not possible. In the latter case, \p Plan is + /// not valid. + static bool adjustFixedOrderRecurrences(VPlan &Plan, VPBuilder &Builder); /// Optimize \p Plan based on \p BestVF and \p BestUF. This may restrict the /// resulting plan to \p BestVF and \p BestUF. diff --git a/llvm/test/Transforms/LoopVectorize/first-order-recurrence-chains.ll b/llvm/test/Transforms/LoopVectorize/first-order-recurrence-chains.ll index 5e23507..371dc7f 100644 --- a/llvm/test/Transforms/LoopVectorize/first-order-recurrence-chains.ll +++ b/llvm/test/Transforms/LoopVectorize/first-order-recurrence-chains.ll @@ -647,12 +647,40 @@ exit: ret ptr %for.1 } -; Make sure LLVM doesn't generate wrong data in SinkAfter, and causes crash in -; loop vectorizer. -define double @test_crash(ptr %p, ptr noalias %a, ptr noalias %b) { -; CHECK-LABEL: @test_crash -; CHECK-NOT: vector.body: -; CHECK: ret +; In this test case, %USE_2_FORS uses 2 different fixed-order recurrences and +; it needs to be sunk past the previous value for both recurrences. +define double @test_resinking_required(ptr %p, ptr noalias %a, ptr noalias %b) { +; CHECK-LABEL: @test_resinking_required( +; CHECK: vector.body: +; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %vector.ph ], [ [[INDEX_NEXT:%.*]], %vector.body ] +; CHECK-NEXT: [[VECTOR_RECUR:%.*]] = phi <4 x double> [ , %vector.ph ], [ [[BROADCAST_SPLAT:%.*]], %vector.body ] +; CHECK-NEXT: [[VECTOR_RECUR1:%.*]] = phi <4 x double> [ , %vector.ph ], [ [[BROADCAST_SPLAT4:%.*]], %vector.body ] +; CHECK-NEXT: [[VECTOR_RECUR2:%.*]] = phi <4 x double> [ , %vector.ph ], [ [[TMP4:%.*]], %vector.body ] +; CHECK-NEXT: [[TMP0:%.*]] = load double, ptr %a, align 8 +; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x double> poison, double [[TMP0]], i64 0 +; CHECK-NEXT: [[BROADCAST_SPLAT]] = shufflevector <4 x double> [[BROADCAST_SPLATINSERT]], <4 x double> poison, <4 x i32> zeroinitializer +; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x double> [[VECTOR_RECUR]], <4 x double> [[BROADCAST_SPLAT]], <4 x i32> +; CHECK-NEXT: [[TMP2:%.*]] = fdiv <4 x double> zeroinitializer, [[TMP1]] +; CHECK-NEXT: [[TMP3:%.*]] = load double, ptr %b, align 8 +; CHECK-NEXT: [[BROADCAST_SPLATINSERT3:%.*]] = insertelement <4 x double> poison, double [[TMP3]], i64 0 +; CHECK-NEXT: [[BROADCAST_SPLAT4]] = shufflevector <4 x double> [[BROADCAST_SPLATINSERT3]], <4 x double> poison, <4 x i32> zeroinitializer +; CHECK-NEXT: [[TMP4]] = shufflevector <4 x double> [[VECTOR_RECUR1]], <4 x double> [[BROADCAST_SPLAT4]], <4 x i32> +; CHECK-NEXT: [[TMP5:%.*]] = shufflevector <4 x double> [[VECTOR_RECUR2]], <4 x double> [[TMP4]], <4 x i32> +; CHECK-NEXT: [[TMP6:%.*]] = extractelement <4 x double> [[TMP2]], i32 3 +; CHECK-NEXT: store double [[TMP6]], ptr [[P:%.*]], align 8 +; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 +; CHECK-NEXT: [[TMP7:%.*]] = icmp eq i64 [[INDEX_NEXT]], 0 +; CHECK-NEXT: br i1 [[TMP7]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP28:![0-9]+]] +; CHECK: middle.block: +; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 0, 0 +; CHECK-NEXT: [[VECTOR_RECUR_EXTRACT:%.*]] = extractelement <4 x double> [[BROADCAST_SPLAT]], i32 3 +; CHECK-NEXT: [[VECTOR_RECUR_EXTRACT_FOR_PHI:%.*]] = extractelement <4 x double> [[BROADCAST_SPLAT]], i32 2 +; CHECK-NEXT: [[VECTOR_RECUR_EXTRACT5:%.*]] = extractelement <4 x double> [[BROADCAST_SPLAT4]], i32 3 +; CHECK-NEXT: [[VECTOR_RECUR_EXTRACT_FOR_PHI6:%.*]] = extractelement <4 x double> [[BROADCAST_SPLAT4]], i32 2 +; CHECK-NEXT: [[VECTOR_RECUR_EXTRACT9:%.*]] = extractelement <4 x double> [[TMP4]], i32 3 +; CHECK-NEXT: [[VECTOR_RECUR_EXTRACT_FOR_PHI10:%.*]] = extractelement <4 x double> [[TMP4]], i32 2 +; CHECK-NEXT: br i1 [[CMP_N]], label %End, label %scalar.ph +; Entry: br label %Loop @@ -661,7 +689,7 @@ Loop: %for.2 = phi double [ %l2, %Loop ], [ 0.000000e+00, %Entry ] %for.3 = phi double [ %for.2, %Loop ], [ 0.000000e+00, %Entry ] %iv = phi i64 [ %iv.next, %Loop ], [ 0, %Entry ] - %USE_2_INDVARS = fdiv double %for.3, %for.1 + %USE_2_FORS = fdiv double %for.3, %for.1 %div = fdiv double 0.000000e+00, %for.1 %l1 = load double, ptr %a, align 8 %iv.next= add nuw nsw i64 %iv, 1 diff --git a/llvm/test/Transforms/LoopVectorize/first-order-recurrence-multiply-recurrences.ll b/llvm/test/Transforms/LoopVectorize/first-order-recurrence-multiply-recurrences.ll index 3d07e2c..aea49ac 100644 --- a/llvm/test/Transforms/LoopVectorize/first-order-recurrence-multiply-recurrences.ll +++ b/llvm/test/Transforms/LoopVectorize/first-order-recurrence-multiply-recurrences.ll @@ -221,22 +221,55 @@ exit: define void @test_pr54233_for_depend_on_each_other(ptr noalias %a, ptr noalias %b) { ; CHECK-LABEL: @test_pr54233_for_depend_on_each_other( ; CHECK-NEXT: entry: +; CHECK-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] +; CHECK: vector.ph: +; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] +; CHECK: vector.body: +; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] +; CHECK-NEXT: [[VECTOR_RECUR:%.*]] = phi <4 x i32> [ , [[VECTOR_PH]] ], [ [[TMP4:%.*]], [[VECTOR_BODY]] ] +; CHECK-NEXT: [[VECTOR_RECUR1:%.*]] = phi <4 x i32> [ , [[VECTOR_PH]] ], [ [[BROADCAST_SPLAT:%.*]], [[VECTOR_BODY]] ] +; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 0 +; CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr [[B:%.*]], align 4 +; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i32> poison, i32 [[TMP1]], i64 0 +; CHECK-NEXT: [[BROADCAST_SPLAT]] = shufflevector <4 x i32> [[BROADCAST_SPLATINSERT]], <4 x i32> poison, <4 x i32> zeroinitializer +; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <4 x i32> [[VECTOR_RECUR1]], <4 x i32> [[BROADCAST_SPLAT]], <4 x i32> +; CHECK-NEXT: [[TMP3:%.*]] = or <4 x i32> [[TMP2]], +; CHECK-NEXT: [[TMP4]] = xor <4 x i32> , [[TMP2]] +; CHECK-NEXT: [[TMP5:%.*]] = shufflevector <4 x i32> [[VECTOR_RECUR]], <4 x i32> [[TMP4]], <4 x i32> +; CHECK-NEXT: [[TMP6:%.*]] = shl <4 x i32> [[TMP2]], [[TMP5]] +; CHECK-NEXT: [[TMP7:%.*]] = xor <4 x i32> [[TMP6]], +; CHECK-NEXT: [[TMP8:%.*]] = and <4 x i32> [[TMP7]], [[TMP3]] +; CHECK-NEXT: [[TMP9:%.*]] = getelementptr inbounds i32, ptr [[A:%.*]], i64 [[TMP0]] +; CHECK-NEXT: [[TMP10:%.*]] = getelementptr inbounds i32, ptr [[TMP9]], i32 0 +; CHECK-NEXT: store <4 x i32> [[TMP8]], ptr [[TMP10]], align 4 +; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 +; CHECK-NEXT: [[TMP11:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1000 +; CHECK-NEXT: br i1 [[TMP11]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] +; CHECK: middle.block: +; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 1001, 1000 +; CHECK-NEXT: [[VECTOR_RECUR_EXTRACT:%.*]] = extractelement <4 x i32> [[TMP4]], i32 3 +; CHECK-NEXT: [[VECTOR_RECUR_EXTRACT2:%.*]] = extractelement <4 x i32> [[BROADCAST_SPLAT]], i32 3 +; CHECK-NEXT: br i1 [[CMP_N]], label [[EXIT:%.*]], label [[SCALAR_PH]] +; CHECK: scalar.ph: +; CHECK-NEXT: [[SCALAR_RECUR_INIT3:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[VECTOR_RECUR_EXTRACT2]], [[MIDDLE_BLOCK]] ] +; CHECK-NEXT: [[SCALAR_RECUR_INIT:%.*]] = phi i32 [ 0, [[ENTRY]] ], [ [[VECTOR_RECUR_EXTRACT]], [[MIDDLE_BLOCK]] ] +; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 1000, [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY]] ] ; CHECK-NEXT: br label [[LOOP:%.*]] ; CHECK: loop: -; CHECK-NEXT: [[IV:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[IV_NEXT:%.*]], [[LOOP]] ] -; CHECK-NEXT: [[FOR_1:%.*]] = phi i32 [ 0, [[ENTRY]] ], [ [[FOR_1_NEXT:%.*]], [[LOOP]] ] -; CHECK-NEXT: [[FOR_2:%.*]] = phi i32 [ 0, [[ENTRY]] ], [ [[FOR_2_NEXT:%.*]], [[LOOP]] ] -; CHECK-NEXT: [[OR:%.*]] = or i32 [[FOR_2]], 10 -; CHECK-NEXT: [[SHL:%.*]] = shl i32 [[FOR_2]], [[FOR_1]] +; CHECK-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[LOOP]] ] +; CHECK-NEXT: [[SCALAR_RECUR:%.*]] = phi i32 [ [[SCALAR_RECUR_INIT]], [[SCALAR_PH]] ], [ [[FOR_1_NEXT:%.*]], [[LOOP]] ] +; CHECK-NEXT: [[SCALAR_RECUR4:%.*]] = phi i32 [ [[SCALAR_RECUR_INIT3]], [[SCALAR_PH]] ], [ [[FOR_2_NEXT:%.*]], [[LOOP]] ] +; CHECK-NEXT: [[OR:%.*]] = or i32 [[SCALAR_RECUR4]], 10 +; CHECK-NEXT: [[SHL:%.*]] = shl i32 [[SCALAR_RECUR4]], [[SCALAR_RECUR]] ; CHECK-NEXT: [[XOR:%.*]] = xor i32 [[SHL]], 255 ; CHECK-NEXT: [[AND:%.*]] = and i32 [[XOR]], [[OR]] -; CHECK-NEXT: [[FOR_1_NEXT]] = xor i32 12, [[FOR_2]] -; CHECK-NEXT: [[FOR_2_NEXT]] = load i32, ptr [[B:%.*]], align 4 -; CHECK-NEXT: [[A_GEP:%.*]] = getelementptr inbounds i32, ptr [[A:%.*]], i64 [[IV]] +; CHECK-NEXT: [[FOR_1_NEXT]] = xor i32 12, [[SCALAR_RECUR4]] +; CHECK-NEXT: [[FOR_2_NEXT]] = load i32, ptr [[B]], align 4 +; CHECK-NEXT: [[A_GEP:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[IV]] ; CHECK-NEXT: store i32 [[AND]], ptr [[A_GEP]], align 4 ; CHECK-NEXT: [[IV_NEXT]] = add nuw i64 [[IV]], 1 ; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i64 [[IV]], 1000 -; CHECK-NEXT: br i1 [[EXITCOND]], label [[EXIT:%.*]], label [[LOOP]] +; CHECK-NEXT: br i1 [[EXITCOND]], label [[EXIT]], label [[LOOP]], !llvm.loop [[LOOP5:![0-9]+]] ; CHECK: exit: ; CHECK-NEXT: ret void ; -- 2.7.4