PHINode *PN, const LSRUse &LU, const LSRFixup &LF, const Formula &F,
SmallVectorImpl<WeakTrackingVH> &DeadInsts) const {
DenseMap<BasicBlock *, Value *> Inserted;
+
+ // Inserting instructions in the loop and using them as PHI's input could
+ // break LCSSA in case if PHI's parent block is not a loop exit (i.e. the
+ // corresponding incoming block is not loop exiting). So collect all such
+ // instructions to form LCSSA for them later.
+ SmallVector<Instruction *, 4> InsertedNonLCSSAInsts;
+
for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i)
if (PN->getIncomingValue(i) == LF.OperandValToReplace) {
bool needUpdateFixups = false;
FullV, LF.OperandValToReplace->getType(),
"tmp", BB->getTerminator());
+ // If the incoming block for this value is not in the loop, it means the
+ // current PHI is not in a loop exit, so we must create a LCSSA PHI for
+ // the inserted value.
+ if (auto *I = dyn_cast<Instruction>(FullV))
+ if (L->contains(I) && !L->contains(BB))
+ InsertedNonLCSSAInsts.push_back(I);
+
PN->setIncomingValue(i, FullV);
Pair.first->second = FullV;
}
}
}
}
+
+ IRBuilder<> Builder(L->getHeader()->getContext());
+ formLCSSAForInstructions(InsertedNonLCSSAInsts, DT, LI, &SE, Builder);
}
/// Emit instructions for the leading candidate expression for this LSRUse (this
define arm_aapcs_vfpcc void @float_float_mul(ptr nocapture readonly %a, ptr nocapture readonly %b, ptr nocapture %c, i32 %N) {
; CHECK-LABEL: float_float_mul:
; CHECK: @ %bb.0: @ %entry
-; CHECK-NEXT: push {r4, r5, r6, r7, lr}
+; CHECK-NEXT: push.w {r4, r5, r6, r7, r8, lr}
; CHECK-NEXT: cmp r3, #0
; CHECK-NEXT: beq .LBB0_10
; CHECK-NEXT: @ %bb.1: @ %for.body.preheader
; CHECK-NEXT: beq .LBB0_11
; CHECK-NEXT: .LBB0_4: @ %for.body.preheader22
; CHECK-NEXT: mvn.w r7, r12
-; CHECK-NEXT: adds r4, r7, r3
-; CHECK-NEXT: and r7, r3, #3
-; CHECK-NEXT: wls lr, r7, .LBB0_7
+; CHECK-NEXT: add.w r8, r7, r3
+; CHECK-NEXT: and r5, r3, #3
+; CHECK-NEXT: wls lr, r5, .LBB0_7
; CHECK-NEXT: @ %bb.5: @ %for.body.prol.preheader
+; CHECK-NEXT: add.w r4, r12, r5
; CHECK-NEXT: add.w r5, r0, r12, lsl #2
; CHECK-NEXT: add.w r6, r1, r12, lsl #2
; CHECK-NEXT: add.w r7, r2, r12, lsl #2
+; CHECK-NEXT: mov r12, r4
; CHECK-NEXT: .LBB0_6: @ %for.body.prol
; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1
; CHECK-NEXT: vldmia r6!, {s0}
-; CHECK-NEXT: add.w r12, r12, #1
; CHECK-NEXT: vldmia r5!, {s2}
; CHECK-NEXT: vmul.f32 s0, s2, s0
; CHECK-NEXT: vstmia r7!, {s0}
; CHECK-NEXT: le lr, .LBB0_6
; CHECK-NEXT: .LBB0_7: @ %for.body.prol.loopexit
-; CHECK-NEXT: cmp r4, #3
+; CHECK-NEXT: cmp.w r8, #3
; CHECK-NEXT: blo .LBB0_10
; CHECK-NEXT: @ %bb.8: @ %for.body.preheader1
; CHECK-NEXT: sub.w r3, r3, r12
; CHECK-NEXT: vstr s0, [r5, #12]
; CHECK-NEXT: bne .LBB0_9
; CHECK-NEXT: .LBB0_10: @ %for.cond.cleanup
-; CHECK-NEXT: pop {r4, r5, r6, r7, pc}
+; CHECK-NEXT: pop.w {r4, r5, r6, r7, r8, pc}
; CHECK-NEXT: .LBB0_11: @ %vector.ph
; CHECK-NEXT: bic r12, r3, #3
; CHECK-NEXT: movs r6, #1
define arm_aapcs_vfpcc void @float_float_add(ptr nocapture readonly %a, ptr nocapture readonly %b, ptr nocapture %c, i32 %N) {
; CHECK-LABEL: float_float_add:
; CHECK: @ %bb.0: @ %entry
-; CHECK-NEXT: push {r4, r5, r6, r7, lr}
+; CHECK-NEXT: push.w {r4, r5, r6, r7, r8, lr}
; CHECK-NEXT: cmp r3, #0
; CHECK-NEXT: beq .LBB1_10
; CHECK-NEXT: @ %bb.1: @ %for.body.preheader
; CHECK-NEXT: beq .LBB1_11
; CHECK-NEXT: .LBB1_4: @ %for.body.preheader22
; CHECK-NEXT: mvn.w r7, r12
-; CHECK-NEXT: adds r4, r7, r3
-; CHECK-NEXT: and r7, r3, #3
-; CHECK-NEXT: wls lr, r7, .LBB1_7
+; CHECK-NEXT: add.w r8, r7, r3
+; CHECK-NEXT: and r5, r3, #3
+; CHECK-NEXT: wls lr, r5, .LBB1_7
; CHECK-NEXT: @ %bb.5: @ %for.body.prol.preheader
+; CHECK-NEXT: add.w r4, r12, r5
; CHECK-NEXT: add.w r5, r0, r12, lsl #2
; CHECK-NEXT: add.w r6, r1, r12, lsl #2
; CHECK-NEXT: add.w r7, r2, r12, lsl #2
+; CHECK-NEXT: mov r12, r4
; CHECK-NEXT: .LBB1_6: @ %for.body.prol
; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1
; CHECK-NEXT: vldmia r6!, {s0}
-; CHECK-NEXT: add.w r12, r12, #1
; CHECK-NEXT: vldmia r5!, {s2}
; CHECK-NEXT: vadd.f32 s0, s2, s0
; CHECK-NEXT: vstmia r7!, {s0}
; CHECK-NEXT: le lr, .LBB1_6
; CHECK-NEXT: .LBB1_7: @ %for.body.prol.loopexit
-; CHECK-NEXT: cmp r4, #3
+; CHECK-NEXT: cmp.w r8, #3
; CHECK-NEXT: blo .LBB1_10
; CHECK-NEXT: @ %bb.8: @ %for.body.preheader1
; CHECK-NEXT: sub.w r3, r3, r12
; CHECK-NEXT: vstr s0, [r5, #12]
; CHECK-NEXT: bne .LBB1_9
; CHECK-NEXT: .LBB1_10: @ %for.cond.cleanup
-; CHECK-NEXT: pop {r4, r5, r6, r7, pc}
+; CHECK-NEXT: pop.w {r4, r5, r6, r7, r8, pc}
; CHECK-NEXT: .LBB1_11: @ %vector.ph
; CHECK-NEXT: bic r12, r3, #3
; CHECK-NEXT: movs r6, #1
define arm_aapcs_vfpcc void @float_float_sub(ptr nocapture readonly %a, ptr nocapture readonly %b, ptr nocapture %c, i32 %N) {
; CHECK-LABEL: float_float_sub:
; CHECK: @ %bb.0: @ %entry
-; CHECK-NEXT: push {r4, r5, r6, r7, lr}
+; CHECK-NEXT: push.w {r4, r5, r6, r7, r8, lr}
; CHECK-NEXT: cmp r3, #0
; CHECK-NEXT: beq .LBB2_10
; CHECK-NEXT: @ %bb.1: @ %for.body.preheader
; CHECK-NEXT: beq .LBB2_11
; CHECK-NEXT: .LBB2_4: @ %for.body.preheader22
; CHECK-NEXT: mvn.w r7, r12
-; CHECK-NEXT: adds r4, r7, r3
-; CHECK-NEXT: and r7, r3, #3
-; CHECK-NEXT: wls lr, r7, .LBB2_7
+; CHECK-NEXT: add.w r8, r7, r3
+; CHECK-NEXT: and r5, r3, #3
+; CHECK-NEXT: wls lr, r5, .LBB2_7
; CHECK-NEXT: @ %bb.5: @ %for.body.prol.preheader
+; CHECK-NEXT: add.w r4, r12, r5
; CHECK-NEXT: add.w r5, r0, r12, lsl #2
; CHECK-NEXT: add.w r6, r1, r12, lsl #2
; CHECK-NEXT: add.w r7, r2, r12, lsl #2
+; CHECK-NEXT: mov r12, r4
; CHECK-NEXT: .LBB2_6: @ %for.body.prol
; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1
; CHECK-NEXT: vldmia r6!, {s0}
-; CHECK-NEXT: add.w r12, r12, #1
; CHECK-NEXT: vldmia r5!, {s2}
; CHECK-NEXT: vsub.f32 s0, s2, s0
; CHECK-NEXT: vstmia r7!, {s0}
; CHECK-NEXT: le lr, .LBB2_6
; CHECK-NEXT: .LBB2_7: @ %for.body.prol.loopexit
-; CHECK-NEXT: cmp r4, #3
+; CHECK-NEXT: cmp.w r8, #3
; CHECK-NEXT: blo .LBB2_10
; CHECK-NEXT: @ %bb.8: @ %for.body.preheader1
; CHECK-NEXT: sub.w r3, r3, r12
; CHECK-NEXT: vstr s0, [r5, #12]
; CHECK-NEXT: bne .LBB2_9
; CHECK-NEXT: .LBB2_10: @ %for.cond.cleanup
-; CHECK-NEXT: pop {r4, r5, r6, r7, pc}
+; CHECK-NEXT: pop.w {r4, r5, r6, r7, r8, pc}
; CHECK-NEXT: .LBB2_11: @ %vector.ph
; CHECK-NEXT: bic r12, r3, #3
; CHECK-NEXT: movs r6, #1
; CHECK-NEXT: .LBB3_7: @ %for.body.preheader16
; CHECK-NEXT: mvn.w r7, r12
; CHECK-NEXT: add.w r8, r7, r3
-; CHECK-NEXT: and r7, r3, #3
-; CHECK-NEXT: wls lr, r7, .LBB3_10
+; CHECK-NEXT: and r5, r3, #3
+; CHECK-NEXT: wls lr, r5, .LBB3_10
; CHECK-NEXT: @ %bb.8: @ %for.body.prol.preheader
+; CHECK-NEXT: add.w r4, r12, r5
; CHECK-NEXT: add.w r5, r0, r12, lsl #2
; CHECK-NEXT: add.w r6, r1, r12, lsl #2
; CHECK-NEXT: add.w r7, r2, r12, lsl #2
+; CHECK-NEXT: mov r12, r4
; CHECK-NEXT: .LBB3_9: @ %for.body.prol
; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1
; CHECK-NEXT: ldr r4, [r6], #4
-; CHECK-NEXT: add.w r12, r12, #1
; CHECK-NEXT: vldmia r5!, {s2}
; CHECK-NEXT: vmov s0, r4
; CHECK-NEXT: vcvt.f32.s32 s0, s0
; CHECK-NEXT: [[SCEVGEP]] = getelementptr i8, ptr [[LSR_IV]], i64 1
; CHECK-NEXT: br i1 false, label [[LOOP]], label [[LOOPEXIT:%.*]]
; CHECK: loopexit:
+; CHECK-NEXT: [[SCEVGEP_LCSSA:%.*]] = phi ptr [ [[SCEVGEP]], [[LOOP]] ]
; CHECK-NEXT: br i1 false, label [[BBA:%.*]], label [[BBB:%.*]]
; CHECK: bbA:
; CHECK-NEXT: switch i32 0, label [[BBA_BB89_CRIT_EDGE:%.*]] [
; CHECK: bbB.bb89_crit_edge:
; CHECK-NEXT: br label [[BB89]]
; CHECK: bb89:
-; CHECK-NEXT: [[TMP75PHI:%.*]] = phi ptr [ [[SCEVGEP]], [[BBA_BB89_CRIT_EDGE]] ], [ [[SCEVGEP]], [[BBB_BB89_CRIT_EDGE]] ]
+; CHECK-NEXT: [[TMP75PHI:%.*]] = phi ptr [ [[SCEVGEP_LCSSA]], [[BBA_BB89_CRIT_EDGE]] ], [ [[SCEVGEP_LCSSA]], [[BBB_BB89_CRIT_EDGE]] ]
; CHECK-NEXT: br label [[EXIT:%.*]]
; CHECK: exit:
; CHECK-NEXT: ret ptr [[TMP75PHI]]
; CHECK-NEXT: [[SCEVGEP]] = getelementptr i8, ptr [[LSR_IV]], i64 1
; CHECK-NEXT: br i1 false, label [[LOOP]], label [[LOOPEXIT:%.*]]
; CHECK: loopexit:
+; CHECK-NEXT: [[SCEVGEP_LCSSA1:%.*]] = phi ptr [ [[SCEVGEP]], [[LOOP]] ]
+; CHECK-NEXT: [[SCEVGEP_LCSSA:%.*]] = phi ptr [ [[SCEVGEP]], [[LOOP]] ]
; CHECK-NEXT: br i1 false, label [[BBA:%.*]], label [[BBB:%.*]]
; CHECK: bbA:
; CHECK-NEXT: switch i32 0, label [[BB89:%.*]] [
; CHECK: bbB.exit_crit_edge:
; CHECK-NEXT: br label [[EXIT:%.*]]
; CHECK: bb89:
-; CHECK-NEXT: [[TMP75PHI:%.*]] = phi ptr [ [[SCEVGEP]], [[BBA]] ], [ [[SCEVGEP]], [[BBA]] ], [ [[SCEVGEP]], [[BBA]] ]
+; CHECK-NEXT: [[TMP75PHI:%.*]] = phi ptr [ [[SCEVGEP_LCSSA1]], [[BBA]] ], [ [[SCEVGEP_LCSSA1]], [[BBA]] ], [ [[SCEVGEP_LCSSA1]], [[BBA]] ]
; CHECK-NEXT: br label [[EXIT]]
; CHECK: exit:
-; CHECK-NEXT: [[RESULT:%.*]] = phi ptr [ [[TMP75PHI]], [[BB89]] ], [ [[SCEVGEP]], [[BBB_EXIT_CRIT_EDGE]] ]
+; CHECK-NEXT: [[RESULT:%.*]] = phi ptr [ [[TMP75PHI]], [[BB89]] ], [ [[SCEVGEP_LCSSA]], [[BBB_EXIT_CRIT_EDGE]] ]
; CHECK-NEXT: ret ptr [[RESULT]]
;
entry:
; CHECK-NEXT: entry:
; CHECK-NEXT: br label [[FOR_BODY:%.*]]
; CHECK: loopexit:
+; CHECK-NEXT: [[SCEVGEP13_LCSSA:%.*]] = phi ptr [ [[SCEVGEP13:%.*]], [[FOR_BODY]] ]
+; CHECK-NEXT: [[SCEVGEP11_LCSSA:%.*]] = phi ptr addrspace(5) [ [[SCEVGEP11:%.*]], [[FOR_BODY]] ]
; CHECK-NEXT: br label [[FOR_BODY_1:%.*]]
; CHECK: for.body.1:
-; CHECK-NEXT: [[LSR_IV5:%.*]] = phi ptr addrspace(5) [ [[SCEVGEP6:%.*]], [[FOR_BODY_1]] ], [ [[SCEVGEP11:%.*]], [[LOOPEXIT:%.*]] ]
-; CHECK-NEXT: [[LSR_IV1:%.*]] = phi ptr [ [[SCEVGEP2:%.*]], [[FOR_BODY_1]] ], [ [[SCEVGEP13:%.*]], [[LOOPEXIT]] ]
+; CHECK-NEXT: [[LSR_IV5:%.*]] = phi ptr addrspace(5) [ [[SCEVGEP6:%.*]], [[FOR_BODY_1]] ], [ [[SCEVGEP11_LCSSA]], [[LOOPEXIT:%.*]] ]
+; CHECK-NEXT: [[LSR_IV1:%.*]] = phi ptr [ [[SCEVGEP2:%.*]], [[FOR_BODY_1]] ], [ [[SCEVGEP13_LCSSA]], [[LOOPEXIT]] ]
; CHECK-NEXT: [[TMP0:%.*]] = load ptr, ptr addrspace(5) [[LSR_IV5]], align 8
; CHECK-NEXT: store ptr [[TMP0]], ptr [[LSR_IV1]], align 8
; CHECK-NEXT: [[SCEVGEP2]] = getelementptr i8, ptr [[LSR_IV1]], i64 8
; CHECK-NEXT: .p2align 4, 0x90
; CHECK-NEXT: .LBB0_1: # %bb1
; CHECK-NEXT: # =>This Inner Loop Header: Depth=1
+; CHECK-NEXT: movq %rbx, %rcx
; CHECK-NEXT: testb %al, %al
-; CHECK-NEXT: je .LBB0_4
+; CHECK-NEXT: je .LBB0_3
; CHECK-NEXT: # %bb.2: # %bb4
; CHECK-NEXT: # in Loop: Header=BB0_1 Depth=1
-; CHECK-NEXT: incq %rbx
+; CHECK-NEXT: leaq 1(%rcx), %rbx
; CHECK-NEXT: testb %al, %al
; CHECK-NEXT: jne .LBB0_1
-; CHECK-NEXT: # %bb.3: # %bb8split
-; CHECK-NEXT: decq %rbx
-; CHECK-NEXT: .LBB0_4: # %bb8
-; CHECK-NEXT: movq %rbx, %rax
+; CHECK-NEXT: .LBB0_3: # %bb8
+; CHECK-NEXT: movq %rcx, %rax
; CHECK-NEXT: popq %rbx
; CHECK-NEXT: retq
bb:
; CHECK-NEXT: [[EC:%.*]] = icmp eq %struct.hoge* [[IV_NEXT]], [[END:%.*]]
; CHECK-NEXT: br i1 [[EC]], label [[LOOP_2_PH:%.*]], label [[LOOP_1_HEADER]]
; CHECK: loop.2.ph:
+; CHECK-NEXT: [[IV_NEXT_LCSSA:%.*]] = phi %struct.hoge* [ [[IV_NEXT]], [[LOOP_1_HEADER]] ]
+; CHECK-NEXT: [[LSR_IV_NEXT6_LCSSA:%.*]] = phi i64 [ [[LSR_IV_NEXT6]], [[LOOP_1_HEADER]] ]
; CHECK-NEXT: br label [[LOOP_2_HEADER:%.*]]
; CHECK: loop.2.header:
-; CHECK-NEXT: [[LSR_IV2:%.*]] = phi i64 [ [[LSR_IV_NEXT3:%.*]], [[LOOP_2_LATCH:%.*]] ], [ [[LSR_IV_NEXT6]], [[LOOP_2_PH]] ]
-; CHECK-NEXT: [[IV2:%.*]] = phi %struct.hoge* [ [[IV2_NEXT:%.*]], [[LOOP_2_LATCH]] ], [ [[IV_NEXT]], [[LOOP_2_PH]] ]
+; CHECK-NEXT: [[LSR_IV2:%.*]] = phi i64 [ [[LSR_IV_NEXT3:%.*]], [[LOOP_2_LATCH:%.*]] ], [ [[LSR_IV_NEXT6_LCSSA]], [[LOOP_2_PH]] ]
+; CHECK-NEXT: [[IV2:%.*]] = phi %struct.hoge* [ [[IV2_NEXT:%.*]], [[LOOP_2_LATCH]] ], [ [[IV_NEXT_LCSSA]], [[LOOP_2_PH]] ]
; CHECK-NEXT: [[IV24:%.*]] = bitcast %struct.hoge* [[IV2]] to i32*
; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[LSR_IV2]], 12
; CHECK-NEXT: call void @use.i64(i64 [[TMP0]])
; CHECK-NEXT: [[EC0:%.*]] = icmp eq ptr [[ADD_PTR94]], [[END:%.*]]
; CHECK-NEXT: br i1 [[EC0]], label [[FOR_BODY37]], label [[FOR_END_LOOPEXIT:%.*]]
; CHECK: for.end.loopexit:
+; CHECK-NEXT: [[ADD_PTR94_LCSSA:%.*]] = phi ptr [ [[ADD_PTR94]], [[FOR_BODY37]] ]
; CHECK-NEXT: br label [[FOR_END:%.*]]
; CHECK: for.end:
-; CHECK-NEXT: [[P0R_0_LCSSA:%.*]] = phi ptr [ [[ADD_PTR94]], [[FOR_END_LOOPEXIT]] ]
+; CHECK-NEXT: [[P0R_0_LCSSA:%.*]] = phi ptr [ [[ADD_PTR94_LCSSA]], [[FOR_END_LOOPEXIT]] ]
; CHECK-NEXT: [[EC1:%.*]] = icmp eq ptr [[P0R_0_LCSSA]], [[END]]
; CHECK-NEXT: br i1 [[EC1]], label [[FOR_BODY15]], label [[FOR_INC133:%.*]]
; CHECK: for.inc133:
; CHECK-NEXT: [[TOBOOL12:%.*]] = icmp eq i64 [[T2]], 0
; CHECK-NEXT: br i1 [[TOBOOL12]], label [[DO_BODY14_PREHEADER:%.*]], label [[DO_BODY8]]
; CHECK: do.body14.preheader:
+; CHECK-NEXT: [[INC10_LCSSA:%.*]] = phi i64 [ [[INC10]], [[DO_BODY8]] ]
; CHECK-NEXT: br label [[DO_BODY14:%.*]]
; CHECK: do.body14:
; CHECK-NEXT: [[I_3:%.*]] = phi i64 [ [[INC15:%.*]], [[DO_BODY14]] ], [ 0, [[DO_BODY14_PREHEADER]] ]
-; CHECK-NEXT: [[J_3:%.*]] = phi i64 [ [[INC16:%.*]], [[DO_BODY14]] ], [ [[INC10]], [[DO_BODY14_PREHEADER]] ]
+; CHECK-NEXT: [[J_3:%.*]] = phi i64 [ [[INC16:%.*]], [[DO_BODY14]] ], [ [[INC10_LCSSA]], [[DO_BODY14_PREHEADER]] ]
; CHECK-NEXT: tail call void @goo(i64 [[I_3]], i64 [[J_3]])
; CHECK-NEXT: [[INC15]] = add nuw nsw i64 [[I_3]], 1
; CHECK-NEXT: [[INC16]] = add i64 [[J_3]], 1
; CHECK-NEXT: [[TOBOOL18:%.*]] = icmp eq i64 [[T3]], 0
; CHECK-NEXT: br i1 [[TOBOOL18]], label [[DO_BODY20_PREHEADER:%.*]], label [[DO_BODY14]]
; CHECK: do.body20.preheader:
+; CHECK-NEXT: [[INC16_LCSSA:%.*]] = phi i64 [ [[INC16]], [[DO_BODY14]] ]
; CHECK-NEXT: br label [[DO_BODY20:%.*]]
; CHECK: do.body20:
; CHECK-NEXT: [[I_4:%.*]] = phi i64 [ [[INC21:%.*]], [[DO_BODY20]] ], [ 0, [[DO_BODY20_PREHEADER]] ]
-; CHECK-NEXT: [[J_4:%.*]] = phi i64 [ [[INC22:%.*]], [[DO_BODY20]] ], [ [[INC16]], [[DO_BODY20_PREHEADER]] ]
+; CHECK-NEXT: [[J_4:%.*]] = phi i64 [ [[INC22:%.*]], [[DO_BODY20]] ], [ [[INC16_LCSSA]], [[DO_BODY20_PREHEADER]] ]
; CHECK-NEXT: tail call void @goo(i64 [[I_4]], i64 [[J_4]])
; CHECK-NEXT: [[INC21]] = add nuw nsw i64 [[I_4]], 1
; CHECK-NEXT: [[INC22]] = add i64 [[J_4]], 1
; CHECK-NEXT: [[TOBOOL24:%.*]] = icmp eq i64 [[T4]], 0
; CHECK-NEXT: br i1 [[TOBOOL24]], label [[DO_BODY26_PREHEADER:%.*]], label [[DO_BODY20]]
; CHECK: do.body26.preheader:
+; CHECK-NEXT: [[INC22_LCSSA:%.*]] = phi i64 [ [[INC22]], [[DO_BODY20]] ]
; CHECK-NEXT: br label [[DO_BODY26:%.*]]
; CHECK: do.body26:
; CHECK-NEXT: [[I_5:%.*]] = phi i64 [ [[INC27:%.*]], [[DO_BODY26]] ], [ 0, [[DO_BODY26_PREHEADER]] ]
-; CHECK-NEXT: [[J_5:%.*]] = phi i64 [ [[INC28:%.*]], [[DO_BODY26]] ], [ [[INC22]], [[DO_BODY26_PREHEADER]] ]
+; CHECK-NEXT: [[J_5:%.*]] = phi i64 [ [[INC28:%.*]], [[DO_BODY26]] ], [ [[INC22_LCSSA]], [[DO_BODY26_PREHEADER]] ]
; CHECK-NEXT: tail call void @goo(i64 [[I_5]], i64 [[J_5]])
; CHECK-NEXT: [[INC27]] = add nuw nsw i64 [[I_5]], 1
; CHECK-NEXT: [[INC28]] = add nsw i64 [[J_5]], 1
; LEGACYPM: cond.true.i:
; LEGACYPM-NEXT: br label [[DO_BODY_I_I_DO_BODY_I_I_CRIT_EDGE:%.*]]
; LEGACYPM: do.body.i.i.do.body.i.i_crit_edge:
-; LEGACYPM-NEXT: [[LSR_IV:%.*]] = phi i64 [ [[LSR_IV_NEXT:%.*]], [[DO_BODY_I_I_DO_BODY_I_I_CRIT_EDGE]] ], [ undef, [[COND_TRUE_I]] ]
-; LEGACYPM-NEXT: [[LSR_IV_NEXT]] = add i64 [[LSR_IV]], 1
; LEGACYPM-NEXT: br i1 true, label [[DO_BODY_I_I_RDRAND_INT_EXIT_I_CRIT_EDGE:%.*]], label [[DO_BODY_I_I_DO_BODY_I_I_CRIT_EDGE]]
; LEGACYPM: do.body.i.i.rdrand_int.exit.i_crit_edge:
; LEGACYPM-NEXT: br i1 true, label [[DO_BODY_I_I_RDRAND_INT_EXIT_I_CRIT_EDGE_FOR_END_CRIT_EDGE:%.*]], label [[FOR_INC:%.*]]
; LEGACYPM: do.body.i.i.rdrand_int.exit.i_crit_edge.for.end_crit_edge:
+; LEGACYPM-NEXT: [[LSR_IV_NEXT_LCSSA_LCSSA:%.*]] = phi i64 [ undef, [[DO_BODY_I_I_RDRAND_INT_EXIT_I_CRIT_EDGE]] ]
; LEGACYPM-NEXT: br label [[FOR_END:%.*]]
; LEGACYPM: for.inc:
; LEGACYPM-NEXT: br label [[FOR_COND]]
; LEGACYPM: for.endsplit:
; LEGACYPM-NEXT: br label [[FOR_END]]
; LEGACYPM: for.end:
-; LEGACYPM-NEXT: [[PGOCOUNT_PROMOTED24:%.*]] = phi i64 [ [[LSR_IV_NEXT]], [[DO_BODY_I_I_RDRAND_INT_EXIT_I_CRIT_EDGE_FOR_END_CRIT_EDGE]] ], [ undef, [[FOR_ENDSPLIT:%.*]] ]
+; LEGACYPM-NEXT: [[PGOCOUNT_PROMOTED24:%.*]] = phi i64 [ [[LSR_IV_NEXT_LCSSA_LCSSA]], [[DO_BODY_I_I_RDRAND_INT_EXIT_I_CRIT_EDGE_FOR_END_CRIT_EDGE]] ], [ undef, [[FOR_ENDSPLIT:%.*]] ]
; LEGACYPM-NEXT: ret i32 undef
;
; NEWPM-LABEL: @test1(
;
; LIMIT-LABEL: @test(
; LIMIT-NEXT: entry:
+; LIMIT-NEXT: [[TMP0:%.*]] = mul i32 [[C:%.*]], -3
; LIMIT-NEXT: br label [[OUTER_LOOP:%.*]]
; LIMIT: outer_loop:
; LIMIT-NEXT: [[PHI2:%.*]] = phi i32 [ [[A:%.*]], [[ENTRY:%.*]] ], [ 204, [[OUTER_TAIL:%.*]] ]
; LIMIT-NEXT: [[PHI4:%.*]] = phi i32 [ [[B:%.*]], [[ENTRY]] ], [ [[I35:%.*]], [[OUTER_TAIL]] ]
; LIMIT-NEXT: br label [[GUARD:%.*]]
; LIMIT: guard:
-; LIMIT-NEXT: [[LCMP_MOD:%.*]] = icmp eq i32 [[C:%.*]], 0
+; LIMIT-NEXT: [[LCMP_MOD:%.*]] = icmp eq i32 [[C]], 0
; LIMIT-NEXT: br i1 [[LCMP_MOD]], label [[OUTER_TAIL]], label [[PREHEADER:%.*]]
; LIMIT: preheader:
; LIMIT-NEXT: [[I15:%.*]] = shl i32 [[B]], 1
-; LIMIT-NEXT: [[TMP0:%.*]] = mul i32 [[PHI2]], -1
-; LIMIT-NEXT: [[TMP1:%.*]] = mul i32 [[TMP0]], -1
-; LIMIT-NEXT: [[TMP2:%.*]] = sub i32 [[PHI4]], [[TMP1]]
-; LIMIT-NEXT: [[TMP3:%.*]] = add i32 [[B]], [[PHI4]]
-; LIMIT-NEXT: [[TMP4:%.*]] = sub i32 [[TMP3]], [[TMP1]]
-; LIMIT-NEXT: [[TMP5:%.*]] = sub i32 14, [[TMP4]]
+; LIMIT-NEXT: [[TMP1:%.*]] = mul i32 [[PHI2]], -1
+; LIMIT-NEXT: [[TMP2:%.*]] = mul i32 [[TMP1]], -1
+; LIMIT-NEXT: [[TMP3:%.*]] = sub i32 [[PHI4]], [[TMP2]]
+; LIMIT-NEXT: [[TMP4:%.*]] = add i32 [[B]], [[PHI4]]
+; LIMIT-NEXT: [[TMP5:%.*]] = sub i32 [[TMP4]], [[TMP2]]
+; LIMIT-NEXT: [[TMP6:%.*]] = sub i32 14, [[TMP5]]
+; LIMIT-NEXT: [[TMP7:%.*]] = add i32 [[TMP0]], [[PHI2]]
; LIMIT-NEXT: br label [[INNER_LOOP:%.*]]
; LIMIT: inner_loop:
-; LIMIT-NEXT: [[LSR_IV3:%.*]] = phi i32 [ [[LSR_IV_NEXT4:%.*]], [[INNER_LOOP]] ], [ [[TMP5]], [[PREHEADER]] ]
-; LIMIT-NEXT: [[LSR_IV1:%.*]] = phi i32 [ [[LSR_IV_NEXT2:%.*]], [[INNER_LOOP]] ], [ [[TMP4]], [[PREHEADER]] ]
-; LIMIT-NEXT: [[LSR_IV:%.*]] = phi i32 [ [[LSR_IV_NEXT:%.*]], [[INNER_LOOP]] ], [ [[TMP2]], [[PREHEADER]] ]
+; LIMIT-NEXT: [[LSR_IV3:%.*]] = phi i32 [ [[LSR_IV_NEXT4:%.*]], [[INNER_LOOP]] ], [ [[TMP6]], [[PREHEADER]] ]
+; LIMIT-NEXT: [[LSR_IV1:%.*]] = phi i32 [ [[LSR_IV_NEXT2:%.*]], [[INNER_LOOP]] ], [ [[TMP5]], [[PREHEADER]] ]
+; LIMIT-NEXT: [[LSR_IV:%.*]] = phi i32 [ [[LSR_IV_NEXT:%.*]], [[INNER_LOOP]] ], [ [[TMP3]], [[PREHEADER]] ]
; LIMIT-NEXT: [[PHI5:%.*]] = phi i32 [ [[PHI3]], [[PREHEADER]] ], [ [[I30:%.*]], [[INNER_LOOP]] ]
; LIMIT-NEXT: [[PHI6:%.*]] = phi i32 [ [[PHI2]], [[PREHEADER]] ], [ [[I33:%.*]], [[INNER_LOOP]] ]
; LIMIT-NEXT: [[ITER:%.*]] = phi i32 [ [[C]], [[PREHEADER]] ], [ [[ITER_SUB:%.*]], [[INNER_LOOP]] ]
; LIMIT-NEXT: [[I18:%.*]] = sub i32 14, [[PHI5]]
; LIMIT-NEXT: [[I19:%.*]] = mul i32 [[I18]], [[C]]
; LIMIT-NEXT: [[FACTOR_PROL:%.*]] = shl i32 [[PHI5]], 1
-; LIMIT-NEXT: [[TMP6:%.*]] = add i32 [[LSR_IV1]], [[I19]]
-; LIMIT-NEXT: [[TMP7:%.*]] = add i32 [[TMP6]], [[FACTOR_PROL]]
-; LIMIT-NEXT: [[TMP8:%.*]] = shl i32 [[TMP7]], 1
-; LIMIT-NEXT: [[TMP9:%.*]] = add i32 [[LSR_IV]], [[TMP8]]
-; LIMIT-NEXT: [[TMP10:%.*]] = sub i32 [[LSR_IV3]], [[I19]]
-; LIMIT-NEXT: [[TMP11:%.*]] = sub i32 [[TMP10]], [[FACTOR_PROL]]
-; LIMIT-NEXT: [[TMP12:%.*]] = mul i32 [[C]], [[TMP11]]
-; LIMIT-NEXT: [[TMP13:%.*]] = add i32 [[LSR_IV1]], [[I19]]
-; LIMIT-NEXT: [[TMP14:%.*]] = add i32 [[TMP13]], [[FACTOR_PROL]]
-; LIMIT-NEXT: [[TMP15:%.*]] = shl i32 [[TMP14]], 1
-; LIMIT-NEXT: [[TMP16:%.*]] = add i32 [[TMP12]], [[TMP15]]
-; LIMIT-NEXT: [[TMP17:%.*]] = add i32 [[LSR_IV]], [[TMP16]]
-; LIMIT-NEXT: [[I29:%.*]] = mul i32 [[TMP9]], [[C]]
-; LIMIT-NEXT: [[FACTOR_2_PROL:%.*]] = shl i32 [[TMP17]], 1
+; LIMIT-NEXT: [[TMP8:%.*]] = add i32 [[LSR_IV1]], [[I19]]
+; LIMIT-NEXT: [[TMP9:%.*]] = add i32 [[TMP8]], [[FACTOR_PROL]]
+; LIMIT-NEXT: [[TMP10:%.*]] = shl i32 [[TMP9]], 1
+; LIMIT-NEXT: [[TMP11:%.*]] = add i32 [[LSR_IV]], [[TMP10]]
+; LIMIT-NEXT: [[TMP12:%.*]] = sub i32 [[LSR_IV3]], [[I19]]
+; LIMIT-NEXT: [[TMP13:%.*]] = sub i32 [[TMP12]], [[FACTOR_PROL]]
+; LIMIT-NEXT: [[TMP14:%.*]] = mul i32 [[C]], [[TMP13]]
+; LIMIT-NEXT: [[TMP15:%.*]] = add i32 [[LSR_IV1]], [[I19]]
+; LIMIT-NEXT: [[TMP16:%.*]] = add i32 [[TMP15]], [[FACTOR_PROL]]
+; LIMIT-NEXT: [[TMP17:%.*]] = shl i32 [[TMP16]], 1
+; LIMIT-NEXT: [[TMP18:%.*]] = add i32 [[TMP14]], [[TMP17]]
+; LIMIT-NEXT: [[TMP19:%.*]] = add i32 [[LSR_IV]], [[TMP18]]
+; LIMIT-NEXT: [[I29:%.*]] = mul i32 [[TMP11]], [[C]]
+; LIMIT-NEXT: [[FACTOR_2_PROL:%.*]] = shl i32 [[TMP19]], 1
; LIMIT-NEXT: [[I30]] = add i32 [[I17]], [[FACTOR_2_PROL]]
; LIMIT-NEXT: [[I33]] = add i32 [[PHI6]], -3
; LIMIT-NEXT: [[ITER_SUB]] = add i32 [[ITER]], -1
; LIMIT: outer_tail.loopexit:
; LIMIT-NEXT: br label [[OUTER_TAIL]]
; LIMIT: outer_tail:
-; LIMIT-NEXT: [[PHI7:%.*]] = phi i32 [ [[PHI2]], [[GUARD]] ], [ [[I33]], [[OUTER_TAIL_LOOPEXIT]] ]
+; LIMIT-NEXT: [[PHI7:%.*]] = phi i32 [ [[PHI2]], [[GUARD]] ], [ [[TMP7]], [[OUTER_TAIL_LOOPEXIT]] ]
; LIMIT-NEXT: [[I35]] = sub i32 [[A]], [[PHI7]]
; LIMIT-NEXT: [[CMP:%.*]] = icmp sgt i32 [[I35]], 9876
; LIMIT-NEXT: br i1 [[CMP]], label [[EXIT:%.*]], label [[OUTER_LOOP]]
; CHECK-NEXT: [[LSR_IV_NEXT17]] = add i64 [[LSR_IV16]], 1
; CHECK-NEXT: br i1 [[TMP0]], label [[DO_BODY]], label [[DO_END:%.*]]
; CHECK: do.end:
+; CHECK-NEXT: [[DOTLCSSA:%.*]] = phi [33 x i16]* [ [[TMP2]], [[DO_BODY]] ]
; CHECK-NEXT: [[XAP_0:%.*]] = inttoptr i64 [[LSR_IV_NEXT17]] to i1*
; CHECK-NEXT: [[CAP_0:%.*]] = ptrtoint i1* [[XAP_0]] to i64
; CHECK-NEXT: [[SUB_PTR_SUB:%.*]] = sub i64 [[SUB_PTR_LHS_CAST]], [[SUB_PTR_RHS_CAST]]
; CHECK-NEXT: [[SCEVGEP1:%.*]] = bitcast i16* [[SCEVGEP]] to i8*
; CHECK-NEXT: br label [[FOR_BODY:%.*]]
; CHECK: for.body:
-; CHECK-NEXT: [[LSR_IV9:%.*]] = phi [33 x i16]* [ [[TMP3:%.*]], [[FOR_BODY]] ], [ [[TMP2]], [[FOR_BODY_LR_PH]] ]
+; CHECK-NEXT: [[LSR_IV9:%.*]] = phi [33 x i16]* [ [[TMP3:%.*]], [[FOR_BODY]] ], [ [[DOTLCSSA]], [[FOR_BODY_LR_PH]] ]
; CHECK-NEXT: [[LSR_IV:%.*]] = phi i64 [ [[LSR_IV_NEXT:%.*]], [[FOR_BODY]] ], [ 0, [[FOR_BODY_LR_PH]] ]
; CHECK-NEXT: [[LSR_IV911:%.*]] = bitcast [33 x i16]* [[LSR_IV9]] to i16*
; CHECK-NEXT: [[SCEVGEP2:%.*]] = getelementptr i8, i8* [[SCEVGEP1]], i64 [[LSR_IV]]
; RUN: opt -S -loop-reduce %s | FileCheck --check-prefixes=LEGACYPM %s
; RUN: opt -S -passes=loop-reduce %s | FileCheck --check-prefixes=NEWPM %s
-; REQUIRES: asserts
-; XFAIL: *
-
target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
target triple = "x86_64-unknown-linux-gnu"
; LEGACYPM-NEXT: [[LSR_IV_NEXT]] = add i32 [[LSR_IV]], 1
; LEGACYPM-NEXT: br i1 [[VAL_I1_22]], label [[LOOP_4]], label [[BE_6_LOOP_EXIT_7_CRIT_EDGE:%.*]]
; LEGACYPM: loop_exit_7split:
+; LEGACYPM-NEXT: [[LSR_IV_LCSSA:%.*]] = phi i32 [ [[LSR_IV]], [[LOOP_4]] ]
; LEGACYPM-NEXT: br label [[LOOP_EXIT_7:%.*]]
; LEGACYPM: be_6.loop_exit_7_crit_edge:
+; LEGACYPM-NEXT: [[LSR_IV_LCSSA1:%.*]] = phi i32 [ [[LSR_IV]], [[BE_6]] ]
; LEGACYPM-NEXT: br label [[LOOP_EXIT_7]]
; LEGACYPM: loop_exit_7:
-; LEGACYPM-NEXT: [[VAL_I32_24_LCSSA]] = phi i32 [ [[LSR_IV]], [[BE_6_LOOP_EXIT_7_CRIT_EDGE]] ], [ [[LSR_IV]], [[LOOP_EXIT_7SPLIT]] ]
+; LEGACYPM-NEXT: [[VAL_I32_24_LCSSA]] = phi i32 [ [[LSR_IV_LCSSA1]], [[BE_6_LOOP_EXIT_7_CRIT_EDGE]] ], [ [[LSR_IV_LCSSA]], [[LOOP_EXIT_7SPLIT]] ]
; LEGACYPM-NEXT: br label [[BB_5:%.*]]
;
+; NEWPM-LABEL: define void @function_0
+; NEWPM-SAME: (i32 [[VAL_I32_8:%.*]], i32 [[VAL_I32_9:%.*]]) {
+; NEWPM-NEXT: [[VAL_I1_22:%.*]] = trunc i8 -66 to i1
+; NEWPM-NEXT: br i1 [[VAL_I1_22]], label [[BB_2_PREHEADER:%.*]], label [[BB_2_PREHEADER]]
+; NEWPM: bb_2.preheader:
+; NEWPM-NEXT: br label [[BB_2:%.*]]
+; NEWPM: bb_2:
+; NEWPM-NEXT: br label [[PRHDR_LOOP_3:%.*]]
+; NEWPM: prhdr_loop_3:
+; NEWPM-NEXT: br label [[LOOP_4:%.*]]
+; NEWPM: loop_4:
+; NEWPM-NEXT: [[LSR_IV:%.*]] = phi i32 [ [[LSR_IV_NEXT:%.*]], [[BE_6:%.*]] ], [ 7851, [[PRHDR_LOOP_3]] ]
+; NEWPM-NEXT: br i1 [[VAL_I1_22]], label [[BE_6]], label [[LOOP_EXIT_7SPLIT:%.*]]
+; NEWPM: bb_5:
+; NEWPM-NEXT: [[VAL_I32_40:%.*]] = mul i32 [[VAL_I32_9]], [[VAL_I32_24_LCSSA:%.*]]
+; NEWPM-NEXT: br label [[BB_2]]
+; NEWPM: be_6:
+; NEWPM-NEXT: [[LSR_IV_NEXT]] = add i32 [[LSR_IV]], 1
+; NEWPM-NEXT: br i1 [[VAL_I1_22]], label [[LOOP_4]], label [[BE_6_LOOP_EXIT_7_CRIT_EDGE:%.*]]
+; NEWPM: loop_exit_7split:
+; NEWPM-NEXT: [[LSR_IV_LCSSA:%.*]] = phi i32 [ [[LSR_IV]], [[LOOP_4]] ]
+; NEWPM-NEXT: br label [[LOOP_EXIT_7:%.*]]
+; NEWPM: be_6.loop_exit_7_crit_edge:
+; NEWPM-NEXT: [[LSR_IV_LCSSA1:%.*]] = phi i32 [ [[LSR_IV]], [[BE_6]] ]
+; NEWPM-NEXT: br label [[LOOP_EXIT_7]]
+; NEWPM: loop_exit_7:
+; NEWPM-NEXT: [[VAL_I32_24_LCSSA]] = phi i32 [ [[LSR_IV_LCSSA1]], [[BE_6_LOOP_EXIT_7_CRIT_EDGE]] ], [ [[LSR_IV_LCSSA]], [[LOOP_EXIT_7SPLIT]] ]
+; NEWPM-NEXT: br label [[BB_5:%.*]]
+;
%val_i1_22 = trunc i8 -66 to i1
br i1 %val_i1_22, label %bb_2, label %bb_2
; CHECK-NEXT: [[P9_PH:%.*]] = phi i32 [ undef, [[BB5_BB6SPLIT_CRIT_EDGE]] ], [ [[I1]], [[BB6SPLITSPLIT]] ]
; CHECK-NEXT: br label [[BB6:%.*]]
; CHECK: loop1.bb6_crit_edge:
+; CHECK-NEXT: [[I1_LCSSA:%.*]] = phi i32 [ [[I1]], [[LOOP1]] ]
; CHECK-NEXT: br label [[BB6]]
; CHECK: bb6:
; CHECK-NEXT: [[P8:%.*]] = phi i32 [ undef, [[LOOP1_BB6_CRIT_EDGE]] ], [ [[P8_PH]], [[BB6SPLIT]] ]
-; CHECK-NEXT: [[P9:%.*]] = phi i32 [ [[I1]], [[LOOP1_BB6_CRIT_EDGE]] ], [ [[P9_PH]], [[BB6SPLIT]] ]
+; CHECK-NEXT: [[P9:%.*]] = phi i32 [ [[I1_LCSSA]], [[LOOP1_BB6_CRIT_EDGE]] ], [ [[P9_PH]], [[BB6SPLIT]] ]
; CHECK-NEXT: unreachable
;
bb: