if (LI->getLoopFor(Cmp->getParent()) != L)
return false;
- // IV increment may have other users than the IV. We do not want to make
- // dominance queries to analyze the legality of moving it towards the cmp,
- // so just check that there is no other users.
- if (!BO->hasOneUse())
- return false;
- // Ultimately, the insertion point must dominate latch. This should be a
- // cheap check because no CFG changes & dom tree recomputation happens
- // during the transform.
- Function *F = BO->getParent()->getParent();
- return getDT(*F).dominates(Cmp->getParent(), L->getLoopLatch());
+ // Finally, we need to ensure that the insert point will dominate all
+ // existing uses of the increment.
+
+ auto &DT = getDT(*BO->getParent()->getParent());
+ if (DT.dominates(Cmp->getParent(), BO->getParent()))
+ // If we're moving up the dom tree, all uses are trivially dominated.
+ // (This is the common case for code produced by LSR.)
+ return true;
+
+ // Otherwise, special case the single use in the phi recurrence.
+ return BO->hasOneUse() && DT.dominates(Cmp->getParent(), L->getLoopLatch());
};
if (BO->getParent() != Cmp->getParent() && !IsReplacableIVIncrement(BO)) {
// We used to use a dominator tree here to allow multi-block optimization.
; CHECK-NEXT: entry:
; CHECK-NEXT: br label [[LOOP:%.*]]
; CHECK: loop:
-; CHECK-NEXT: [[IV:%.*]] = phi i64 [ [[IV_NEXT:%.*]], [[BACKEDGE:%.*]] ], [ [[LEN:%.*]], [[ENTRY:%.*]] ]
-; CHECK-NEXT: [[COND_1:%.*]] = icmp eq i64 [[IV]], 0
-; CHECK-NEXT: br i1 [[COND_1]], label [[EXIT:%.*]], label [[BACKEDGE]]
+; CHECK-NEXT: [[IV:%.*]] = phi i64 [ [[MATH:%.*]], [[BACKEDGE:%.*]] ], [ [[LEN:%.*]], [[ENTRY:%.*]] ]
+; CHECK-NEXT: [[TMP0:%.*]] = call { i64, i1 } @llvm.usub.with.overflow.i64(i64 [[IV]], i64 1)
+; CHECK-NEXT: [[MATH]] = extractvalue { i64, i1 } [[TMP0]], 0
+; CHECK-NEXT: [[OV:%.*]] = extractvalue { i64, i1 } [[TMP0]], 1
+; CHECK-NEXT: br i1 [[OV]], label [[EXIT:%.*]], label [[BACKEDGE]]
; CHECK: backedge:
-; CHECK-NEXT: [[SUNKADDR:%.*]] = mul i64 [[IV]], 4
-; CHECK-NEXT: [[TMP0:%.*]] = bitcast i32* [[P:%.*]] to i8*
-; CHECK-NEXT: [[SUNKADDR1:%.*]] = getelementptr i8, i8* [[TMP0]], i64 [[SUNKADDR]]
-; CHECK-NEXT: [[SUNKADDR2:%.*]] = getelementptr i8, i8* [[SUNKADDR1]], i64 -4
-; CHECK-NEXT: [[TMP1:%.*]] = bitcast i8* [[SUNKADDR2]] to i32*
-; CHECK-NEXT: [[LOADED:%.*]] = load atomic i32, i32* [[TMP1]] unordered, align 4
-; CHECK-NEXT: [[IV_NEXT]] = add i64 [[IV]], -1
+; CHECK-NEXT: [[SUNKADDR:%.*]] = mul i64 [[MATH]], 4
+; CHECK-NEXT: [[TMP1:%.*]] = bitcast i32* [[P:%.*]] to i8*
+; CHECK-NEXT: [[SUNKADDR1:%.*]] = getelementptr i8, i8* [[TMP1]], i64 [[SUNKADDR]]
+; CHECK-NEXT: [[TMP2:%.*]] = bitcast i8* [[SUNKADDR1]] to i32*
+; CHECK-NEXT: [[LOADED:%.*]] = load atomic i32, i32* [[TMP2]] unordered, align 4
; CHECK-NEXT: [[COND_2:%.*]] = icmp eq i32 [[LOADED]], [[X:%.*]]
; CHECK-NEXT: br i1 [[COND_2]], label [[FAILURE:%.*]], label [[LOOP]]
; CHECK: exit:
; CHECK-NEXT: ret i32 -1
; CHECK: failure:
-; CHECK-NEXT: [[TRUNC:%.*]] = trunc i64 [[IV_NEXT]] to i32
+; CHECK-NEXT: [[TRUNC:%.*]] = trunc i64 [[MATH]] to i32
; CHECK-NEXT: ret i32 [[TRUNC]]
;
entry: