}
// Compute:
- // Start + |Step| * Backedge < Start
- // Start - |Step| * Backedge > Start
+ // 1. Start + |Step| * Backedge < Start
+ // 2. Start - |Step| * Backedge > Start
+ //
+ // And select either 1. or 2. depending on whether step is positive or
+ // negative. If Step is known to be positive or negative, only create
+ // either 1. or 2.
Value *Add = nullptr, *Sub = nullptr;
+ bool NeedPosCheck = !SE.isKnownNegative(Step);
+ bool NeedNegCheck = !SE.isKnownPositive(Step);
+
if (PointerType *ARPtrTy = dyn_cast<PointerType>(ARTy)) {
StartValue = InsertNoopCastOfTo(
StartValue, Builder.getInt8PtrTy(ARPtrTy->getAddressSpace()));
Value *NegMulV = Builder.CreateNeg(MulV);
- Add = Builder.CreateGEP(Builder.getInt8Ty(), StartValue, MulV);
- Sub = Builder.CreateGEP(Builder.getInt8Ty(), StartValue, NegMulV);
+ if (NeedPosCheck)
+ Add = Builder.CreateGEP(Builder.getInt8Ty(), StartValue, MulV);
+ if (NeedNegCheck)
+ Sub = Builder.CreateGEP(Builder.getInt8Ty(), StartValue, NegMulV);
} else {
- Add = Builder.CreateAdd(StartValue, MulV);
- Sub = Builder.CreateSub(StartValue, MulV);
+ if (NeedPosCheck)
+ Add = Builder.CreateAdd(StartValue, MulV);
+ if (NeedNegCheck)
+ Sub = Builder.CreateSub(StartValue, MulV);
+ }
+
+ Value *EndCompareLT = nullptr;
+ Value *EndCompareGT = nullptr;
+ Value *EndCheck = nullptr;
+ if (NeedPosCheck)
+ EndCheck = EndCompareLT = Builder.CreateICmp(
+ Signed ? ICmpInst::ICMP_SLT : ICmpInst::ICMP_ULT, Add, StartValue);
+ if (NeedNegCheck)
+ EndCheck = EndCompareGT = Builder.CreateICmp(
+ Signed ? ICmpInst::ICMP_SGT : ICmpInst::ICMP_UGT, Sub, StartValue);
+ if (NeedPosCheck && NeedNegCheck) {
+ // Select the answer based on the sign of Step.
+ EndCheck = Builder.CreateSelect(StepCompare, EndCompareGT, EndCompareLT);
}
- Value *EndCompareGT = Builder.CreateICmp(
- Signed ? ICmpInst::ICMP_SGT : ICmpInst::ICMP_UGT, Sub, StartValue);
-
- Value *EndCompareLT = Builder.CreateICmp(
- Signed ? ICmpInst::ICMP_SLT : ICmpInst::ICMP_ULT, Add, StartValue);
-
- // Select the answer based on the sign of Step.
- Value *EndCheck =
- Builder.CreateSelect(StepCompare, EndCompareGT, EndCompareLT);
-
// If the backedge taken count type is larger than the AR type,
// check that we don't drop any bits by truncating it. If we are
// dropping bits, then we have overflow (unless the step is zero).
; CHECK-NEXT: [[MUL_RESULT:%.*]] = extractvalue { i32, i1 } [[MUL1]], 0
; CHECK-NEXT: [[MUL_OVERFLOW:%.*]] = extractvalue { i32, i1 } [[MUL1]], 1
; CHECK-NEXT: [[TMP2:%.*]] = add i32 0, [[MUL_RESULT]]
-; CHECK-NEXT: [[TMP3:%.*]] = sub i32 0, [[MUL_RESULT]]
-; CHECK-NEXT: [[TMP4:%.*]] = icmp ugt i32 [[TMP3]], 0
; CHECK-NEXT: [[TMP5:%.*]] = icmp ult i32 [[TMP2]], 0
-; CHECK-NEXT: [[TMP6:%.*]] = select i1 false, i1 [[TMP4]], i1 [[TMP5]]
; CHECK-NEXT: [[TMP7:%.*]] = icmp ugt i64 [[TMP0]], 4294967295
-; CHECK-NEXT: [[TMP8:%.*]] = or i1 [[TMP6]], [[TMP7]]
+; CHECK-NEXT: [[TMP8:%.*]] = or i1 [[TMP5]], [[TMP7]]
; CHECK-NEXT: [[TMP9:%.*]] = or i1 [[TMP8]], [[MUL_OVERFLOW]]
; CHECK-NEXT: [[MUL2:%.*]] = call { i64, i1 } @llvm.umul.with.overflow.i64(i64 8, i64 [[TMP0]])
; CHECK-NEXT: [[MUL_RESULT3:%.*]] = extractvalue { i64, i1 } [[MUL2]], 0
; CHECK-NEXT: [[MUL_OVERFLOW4:%.*]] = extractvalue { i64, i1 } [[MUL2]], 1
; CHECK-NEXT: [[TMP11:%.*]] = sub i64 0, [[MUL_RESULT3]]
; CHECK-NEXT: [[TMP12:%.*]] = getelementptr i8, i8* [[A5]], i64 [[MUL_RESULT3]]
-; CHECK-NEXT: [[TMP13:%.*]] = getelementptr i8, i8* [[A5]], i64 [[TMP11]]
-; CHECK-NEXT: [[TMP14:%.*]] = icmp ugt i8* [[TMP13]], [[A5]]
; CHECK-NEXT: [[TMP15:%.*]] = icmp ult i8* [[TMP12]], [[A5]]
-; CHECK-NEXT: [[TMP16:%.*]] = select i1 false, i1 [[TMP14]], i1 [[TMP15]]
-; CHECK-NEXT: [[TMP17:%.*]] = or i1 [[TMP16]], [[MUL_OVERFLOW4]]
+; CHECK-NEXT: [[TMP17:%.*]] = or i1 [[TMP15]], [[MUL_OVERFLOW4]]
; CHECK-NEXT: [[TMP18:%.*]] = or i1 [[TMP9]], [[TMP17]]
; CHECK-NEXT: br i1 [[TMP18]], label [[FOR_BODY_PH_LVER_ORIG:%.*]], label [[FOR_BODY_PH_LDIST1:%.*]]
; CHECK: for.body.ph.lver.orig:
; CHECK-NEXT: [[MUL_RESULT:%.*]] = extractvalue { i32, i1 } [[MUL1]], 0
; CHECK-NEXT: [[MUL_OVERFLOW:%.*]] = extractvalue { i32, i1 } [[MUL1]], 1
; CHECK-NEXT: [[TMP2:%.*]] = add i32 0, [[MUL_RESULT]]
-; CHECK-NEXT: [[TMP3:%.*]] = sub i32 0, [[MUL_RESULT]]
-; CHECK-NEXT: [[TMP4:%.*]] = icmp ugt i32 [[TMP3]], 0
; CHECK-NEXT: [[TMP5:%.*]] = icmp ult i32 [[TMP2]], 0
-; CHECK-NEXT: [[TMP6:%.*]] = select i1 false, i1 [[TMP4]], i1 [[TMP5]]
; CHECK-NEXT: [[TMP7:%.*]] = icmp ugt i64 [[TMP0]], 4294967295
-; CHECK-NEXT: [[TMP8:%.*]] = or i1 [[TMP6]], [[TMP7]]
+; CHECK-NEXT: [[TMP8:%.*]] = or i1 [[TMP5]], [[TMP7]]
; CHECK-NEXT: [[TMP9:%.*]] = or i1 [[TMP8]], [[MUL_OVERFLOW]]
; CHECK-NEXT: [[MUL2:%.*]] = call { i64, i1 } @llvm.umul.with.overflow.i64(i64 8, i64 [[TMP0]])
; CHECK-NEXT: [[MUL_RESULT3:%.*]] = extractvalue { i64, i1 } [[MUL2]], 0
; CHECK-NEXT: [[MUL_OVERFLOW4:%.*]] = extractvalue { i64, i1 } [[MUL2]], 1
; CHECK-NEXT: [[TMP11:%.*]] = sub i64 0, [[MUL_RESULT3]]
; CHECK-NEXT: [[TMP12:%.*]] = getelementptr i8, i8* bitcast (i32* getelementptr inbounds ([8192 x i32], [8192 x i32]* @global_a, i64 0, i64 42) to i8*), i64 [[MUL_RESULT3]]
-; CHECK-NEXT: [[TMP13:%.*]] = getelementptr i8, i8* bitcast (i32* getelementptr inbounds ([8192 x i32], [8192 x i32]* @global_a, i64 0, i64 42) to i8*), i64 [[TMP11]]
-; CHECK-NEXT: [[TMP14:%.*]] = icmp ugt i8* [[TMP13]], bitcast (i32* getelementptr inbounds ([8192 x i32], [8192 x i32]* @global_a, i64 0, i64 42) to i8*)
; CHECK-NEXT: [[TMP15:%.*]] = icmp ult i8* [[TMP12]], bitcast (i32* getelementptr inbounds ([8192 x i32], [8192 x i32]* @global_a, i64 0, i64 42) to i8*)
-; CHECK-NEXT: [[TMP16:%.*]] = select i1 false, i1 [[TMP14]], i1 [[TMP15]]
-; CHECK-NEXT: [[TMP17:%.*]] = or i1 [[TMP16]], [[MUL_OVERFLOW4]]
+; CHECK-NEXT: [[TMP17:%.*]] = or i1 [[TMP15]], [[MUL_OVERFLOW4]]
; CHECK-NEXT: [[TMP18:%.*]] = or i1 [[TMP9]], [[TMP17]]
; CHECK-NEXT: br i1 [[TMP18]], label [[FOR_BODY_PH_LVER_ORIG:%.*]], label [[FOR_BODY_PH_LDIST1:%.*]]
; CHECK: for.body.ph.lver.orig:
; VF-TWO-CHECK-NEXT: [[MUL:%.*]] = call { i32, i1 } @llvm.umul.with.overflow.i32(i32 1, i32 [[TMP2]])
; VF-TWO-CHECK-NEXT: [[MUL_RESULT:%.*]] = extractvalue { i32, i1 } [[MUL]], 0
; VF-TWO-CHECK-NEXT: [[MUL_OVERFLOW:%.*]] = extractvalue { i32, i1 } [[MUL]], 1
-; VF-TWO-CHECK-NEXT: [[TMP3:%.*]] = add i32 [[TMP0]], [[MUL_RESULT]]
; VF-TWO-CHECK-NEXT: [[TMP4:%.*]] = sub i32 [[TMP0]], [[MUL_RESULT]]
; VF-TWO-CHECK-NEXT: [[TMP5:%.*]] = icmp sgt i32 [[TMP4]], [[TMP0]]
-; VF-TWO-CHECK-NEXT: [[TMP6:%.*]] = icmp slt i32 [[TMP3]], [[TMP0]]
-; VF-TWO-CHECK-NEXT: [[TMP7:%.*]] = select i1 true, i1 [[TMP5]], i1 [[TMP6]]
; VF-TWO-CHECK-NEXT: [[TMP8:%.*]] = icmp ugt i64 [[TMP1]], 4294967295
-; VF-TWO-CHECK-NEXT: [[TMP9:%.*]] = or i1 [[TMP7]], [[TMP8]]
+; VF-TWO-CHECK-NEXT: [[TMP9:%.*]] = or i1 [[TMP5]], [[TMP8]]
; VF-TWO-CHECK-NEXT: [[TMP10:%.*]] = or i1 [[TMP9]], [[MUL_OVERFLOW]]
; VF-TWO-CHECK-NEXT: br i1 [[TMP10]], label [[VEC_EPILOG_SCALAR_PH]], label [[VECTOR_MAIN_LOOP_ITER_CHECK:%.*]]
; VF-TWO-CHECK: vector.main.loop.iter.check:
; VF-FOUR-CHECK-NEXT: [[MUL:%.*]] = call { i32, i1 } @llvm.umul.with.overflow.i32(i32 1, i32 [[TMP2]])
; VF-FOUR-CHECK-NEXT: [[MUL_RESULT:%.*]] = extractvalue { i32, i1 } [[MUL]], 0
; VF-FOUR-CHECK-NEXT: [[MUL_OVERFLOW:%.*]] = extractvalue { i32, i1 } [[MUL]], 1
-; VF-FOUR-CHECK-NEXT: [[TMP3:%.*]] = add i32 [[TMP0]], [[MUL_RESULT]]
; VF-FOUR-CHECK-NEXT: [[TMP4:%.*]] = sub i32 [[TMP0]], [[MUL_RESULT]]
; VF-FOUR-CHECK-NEXT: [[TMP5:%.*]] = icmp sgt i32 [[TMP4]], [[TMP0]]
-; VF-FOUR-CHECK-NEXT: [[TMP6:%.*]] = icmp slt i32 [[TMP3]], [[TMP0]]
-; VF-FOUR-CHECK-NEXT: [[TMP7:%.*]] = select i1 true, i1 [[TMP5]], i1 [[TMP6]]
; VF-FOUR-CHECK-NEXT: [[TMP8:%.*]] = icmp ugt i64 [[TMP1]], 4294967295
-; VF-FOUR-CHECK-NEXT: [[TMP9:%.*]] = or i1 [[TMP7]], [[TMP8]]
+; VF-FOUR-CHECK-NEXT: [[TMP9:%.*]] = or i1 [[TMP5]], [[TMP8]]
; VF-FOUR-CHECK-NEXT: [[TMP10:%.*]] = or i1 [[TMP9]], [[MUL_OVERFLOW]]
; VF-FOUR-CHECK-NEXT: br i1 [[TMP10]], label [[VEC_EPILOG_SCALAR_PH]], label [[VECTOR_MAIN_LOOP_ITER_CHECK:%.*]]
; VF-FOUR-CHECK: vector.main.loop.iter.check:
; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH]], label [[VECTOR_SCEVCHECK:%.*]]
; CHECK: vector.scevcheck:
; CHECK-NEXT: [[TMP10:%.*]] = add i32 [[TMP8]], [[TMP0]]
-; CHECK-NEXT: [[TMP11:%.*]] = sub i32 [[TMP8]], [[TMP0]]
-; CHECK-NEXT: [[TMP12:%.*]] = icmp sgt i32 [[TMP11]], [[TMP8]]
; CHECK-NEXT: [[TMP13:%.*]] = icmp slt i32 [[TMP10]], [[TMP8]]
-; CHECK-NEXT: [[TMP14:%.*]] = select i1 false, i1 [[TMP12]], i1 [[TMP13]]
-; CHECK-NEXT: br i1 [[TMP14]], label [[SCALAR_PH]], label [[VECTOR_PH:%.*]]
+; CHECK-NEXT: br i1 [[TMP13]], label [[SCALAR_PH]], label [[VECTOR_PH:%.*]]
; CHECK: vector.ph:
; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[TMP3]], 4
; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[TMP3]], [[N_MOD_VF]]
; CHECK-NEXT: [[MUL:%.*]] = call { i8, i1 } @llvm.umul.with.overflow.i8(i8 1, i8 [[TMP10]])
; CHECK-NEXT: [[MUL_RESULT:%.*]] = extractvalue { i8, i1 } [[MUL]], 0
; CHECK-NEXT: [[MUL_OVERFLOW:%.*]] = extractvalue { i8, i1 } [[MUL]], 1
-; CHECK-NEXT: [[TMP11:%.*]] = add i8 [[TMP7]], [[MUL_RESULT]]
; CHECK-NEXT: [[TMP12:%.*]] = sub i8 [[TMP7]], [[MUL_RESULT]]
-; CHECK-NEXT: [[TMP13:%.*]] = icmp ugt i8 [[TMP12]], [[TMP7]]
-; CHECK-NEXT: [[TMP14:%.*]] = icmp ult i8 [[TMP11]], [[TMP7]]
-; CHECK-NEXT: [[TMP15:%.*]] = select i1 true, i1 [[TMP13]], i1 [[TMP14]]
+; CHECK-NEXT: [[TMP14:%.*]] = icmp ugt i8 [[TMP12]], [[TMP7]]
; CHECK-NEXT: [[TMP16:%.*]] = icmp ugt i32 [[TMP9]], 255
-; CHECK-NEXT: [[TMP17:%.*]] = or i1 [[TMP15]], [[TMP16]]
+; CHECK-NEXT: [[TMP17:%.*]] = or i1 [[TMP14]], [[TMP16]]
; CHECK-NEXT: [[TMP18:%.*]] = or i1 [[TMP17]], [[MUL_OVERFLOW]]
; CHECK-NEXT: br i1 [[TMP18]], label [[SCALAR_PH]], label [[VECTOR_PH:%.*]]
; CHECK: vector.ph:
; CHECK-NEXT: [[UMAX:%.*]] = call i32 @llvm.umax.i32(i32 [[N]], i32 1)
; CHECK-NEXT: [[TMP0:%.*]] = add i32 [[UMAX]], -1
; CHECK-NEXT: [[TMP1:%.*]] = add i32 0, [[TMP0]]
-; CHECK-NEXT: [[TMP2:%.*]] = sub i32 0, [[TMP0]]
-; CHECK-NEXT: [[TMP3:%.*]] = icmp sgt i32 [[TMP2]], 0
; CHECK-NEXT: [[TMP4:%.*]] = icmp slt i32 [[TMP1]], 0
-; CHECK-NEXT: [[TMP5:%.*]] = select i1 false, i1 [[TMP3]], i1 [[TMP4]]
-; CHECK-NEXT: br i1 [[TMP5]], label [[SCALAR_PH]], label [[VECTOR_PH:%.*]]
+; CHECK-NEXT: br i1 [[TMP4]], label [[SCALAR_PH]], label [[VECTOR_PH:%.*]]
; CHECK: vector.ph:
; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i32 [[UMAX1]], 4
; CHECK-NEXT: [[N_VEC:%.*]] = sub i32 [[UMAX1]], [[N_MOD_VF]]
; CHECK-NEXT: [[UMAX:%.*]] = call i32 @llvm.umax.i32(i32 [[N]], i32 1)
; CHECK-NEXT: [[TMP0:%.*]] = add i32 [[UMAX]], -1
; CHECK-NEXT: [[TMP1:%.*]] = add i32 0, [[TMP0]]
-; CHECK-NEXT: [[TMP2:%.*]] = sub i32 0, [[TMP0]]
-; CHECK-NEXT: [[TMP3:%.*]] = icmp sgt i32 [[TMP2]], 0
; CHECK-NEXT: [[TMP4:%.*]] = icmp slt i32 [[TMP1]], 0
-; CHECK-NEXT: [[TMP5:%.*]] = select i1 false, i1 [[TMP3]], i1 [[TMP4]]
-; CHECK-NEXT: br i1 [[TMP5]], label [[SCALAR_PH]], label [[VECTOR_PH:%.*]]
+; CHECK-NEXT: br i1 [[TMP4]], label [[SCALAR_PH]], label [[VECTOR_PH:%.*]]
; CHECK: vector.ph:
; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i32 [[UMAX1]], 4
; CHECK-NEXT: [[N_VEC:%.*]] = sub i32 [[UMAX1]], [[N_MOD_VF]]
; CHECK: vector.scevcheck:
; CHECK-NEXT: [[TMP1:%.*]] = trunc i32 [[LEN]] to i8
; CHECK-NEXT: [[TMP2:%.*]] = add i8 [[T]], [[TMP1]]
-; CHECK-NEXT: [[TMP3:%.*]] = sub i8 [[T]], [[TMP1]]
-; CHECK-NEXT: [[TMP4:%.*]] = icmp ugt i8 [[TMP3]], [[T]]
; CHECK-NEXT: [[TMP5:%.*]] = icmp ult i8 [[TMP2]], [[T]]
-; CHECK-NEXT: [[TMP6:%.*]] = select i1 false, i1 [[TMP4]], i1 [[TMP5]]
; CHECK-NEXT: [[TMP7:%.*]] = icmp ugt i32 [[LEN]], 255
-; CHECK-NEXT: [[TMP8:%.*]] = or i1 [[TMP6]], [[TMP7]]
+; CHECK-NEXT: [[TMP8:%.*]] = or i1 [[TMP5]], [[TMP7]]
; CHECK-NEXT: [[TMP10:%.*]] = trunc i32 [[LEN]] to i8
; CHECK-NEXT: [[TMP11:%.*]] = add i8 [[T]], [[TMP10]]
-; CHECK-NEXT: [[TMP12:%.*]] = sub i8 [[T]], [[TMP10]]
-; CHECK-NEXT: [[TMP13:%.*]] = icmp sgt i8 [[TMP12]], [[T]]
; CHECK-NEXT: [[TMP14:%.*]] = icmp slt i8 [[TMP11]], [[T]]
-; CHECK-NEXT: [[TMP15:%.*]] = select i1 false, i1 [[TMP13]], i1 [[TMP14]]
; CHECK-NEXT: [[TMP16:%.*]] = icmp ugt i32 [[LEN]], 255
-; CHECK-NEXT: [[TMP17:%.*]] = or i1 [[TMP15]], [[TMP16]]
+; CHECK-NEXT: [[TMP17:%.*]] = or i1 [[TMP14]], [[TMP16]]
; CHECK-NEXT: [[TMP18:%.*]] = or i1 [[TMP8]], [[TMP17]]
; CHECK-NEXT: br i1 [[TMP18]], label [[SCALAR_PH]], label [[VECTOR_PH:%.*]]
; CHECK: vector.ph:
; UNROLL-NO-IC: vector.scevcheck:
; UNROLL-NO-IC-NEXT: [[TMP1:%.*]] = trunc i32 [[LEN]] to i8
; UNROLL-NO-IC-NEXT: [[TMP2:%.*]] = add i8 [[T]], [[TMP1]]
-; UNROLL-NO-IC-NEXT: [[TMP3:%.*]] = sub i8 [[T]], [[TMP1]]
-; UNROLL-NO-IC-NEXT: [[TMP4:%.*]] = icmp ugt i8 [[TMP3]], [[T]]
; UNROLL-NO-IC-NEXT: [[TMP5:%.*]] = icmp ult i8 [[TMP2]], [[T]]
-; UNROLL-NO-IC-NEXT: [[TMP6:%.*]] = select i1 false, i1 [[TMP4]], i1 [[TMP5]]
; UNROLL-NO-IC-NEXT: [[TMP7:%.*]] = icmp ugt i32 [[LEN]], 255
-; UNROLL-NO-IC-NEXT: [[TMP8:%.*]] = or i1 [[TMP6]], [[TMP7]]
+; UNROLL-NO-IC-NEXT: [[TMP8:%.*]] = or i1 [[TMP5]], [[TMP7]]
; UNROLL-NO-IC-NEXT: [[TMP10:%.*]] = trunc i32 [[LEN]] to i8
; UNROLL-NO-IC-NEXT: [[TMP11:%.*]] = add i8 [[T]], [[TMP10]]
-; UNROLL-NO-IC-NEXT: [[TMP12:%.*]] = sub i8 [[T]], [[TMP10]]
-; UNROLL-NO-IC-NEXT: [[TMP13:%.*]] = icmp sgt i8 [[TMP12]], [[T]]
; UNROLL-NO-IC-NEXT: [[TMP14:%.*]] = icmp slt i8 [[TMP11]], [[T]]
-; UNROLL-NO-IC-NEXT: [[TMP15:%.*]] = select i1 false, i1 [[TMP13]], i1 [[TMP14]]
; UNROLL-NO-IC-NEXT: [[TMP16:%.*]] = icmp ugt i32 [[LEN]], 255
-; UNROLL-NO-IC-NEXT: [[TMP17:%.*]] = or i1 [[TMP15]], [[TMP16]]
+; UNROLL-NO-IC-NEXT: [[TMP17:%.*]] = or i1 [[TMP14]], [[TMP16]]
; UNROLL-NO-IC-NEXT: [[TMP18:%.*]] = or i1 [[TMP8]], [[TMP17]]
; UNROLL-NO-IC-NEXT: br i1 [[TMP18]], label [[SCALAR_PH]], label [[VECTOR_PH:%.*]]
; UNROLL-NO-IC: vector.ph:
; CHECK: vector.scevcheck:
; CHECK-NEXT: [[TMP1:%.*]] = trunc i32 [[LEN]] to i8
; CHECK-NEXT: [[TMP2:%.*]] = add i8 [[T]], [[TMP1]]
-; CHECK-NEXT: [[TMP3:%.*]] = sub i8 [[T]], [[TMP1]]
-; CHECK-NEXT: [[TMP4:%.*]] = icmp ugt i8 [[TMP3]], [[T]]
; CHECK-NEXT: [[TMP5:%.*]] = icmp ult i8 [[TMP2]], [[T]]
-; CHECK-NEXT: [[TMP6:%.*]] = select i1 false, i1 [[TMP4]], i1 [[TMP5]]
; CHECK-NEXT: [[TMP7:%.*]] = icmp ugt i32 [[LEN]], 255
-; CHECK-NEXT: [[TMP8:%.*]] = or i1 [[TMP6]], [[TMP7]]
+; CHECK-NEXT: [[TMP8:%.*]] = or i1 [[TMP5]], [[TMP7]]
; CHECK-NEXT: [[TMP10:%.*]] = trunc i32 [[LEN]] to i8
; CHECK-NEXT: [[TMP11:%.*]] = add i8 [[T]], [[TMP10]]
-; CHECK-NEXT: [[TMP12:%.*]] = sub i8 [[T]], [[TMP10]]
-; CHECK-NEXT: [[TMP13:%.*]] = icmp sgt i8 [[TMP12]], [[T]]
; CHECK-NEXT: [[TMP14:%.*]] = icmp slt i8 [[TMP11]], [[T]]
-; CHECK-NEXT: [[TMP15:%.*]] = select i1 false, i1 [[TMP13]], i1 [[TMP14]]
; CHECK-NEXT: [[TMP16:%.*]] = icmp ugt i32 [[LEN]], 255
-; CHECK-NEXT: [[TMP17:%.*]] = or i1 [[TMP15]], [[TMP16]]
+; CHECK-NEXT: [[TMP17:%.*]] = or i1 [[TMP14]], [[TMP16]]
; CHECK-NEXT: [[TMP18:%.*]] = or i1 [[TMP8]], [[TMP17]]
; CHECK-NEXT: br i1 [[TMP18]], label [[SCALAR_PH]], label [[VECTOR_PH:%.*]]
; CHECK: vector.ph:
; UNROLL-NO-IC: vector.scevcheck:
; UNROLL-NO-IC-NEXT: [[TMP1:%.*]] = trunc i32 [[LEN]] to i8
; UNROLL-NO-IC-NEXT: [[TMP2:%.*]] = add i8 [[T]], [[TMP1]]
-; UNROLL-NO-IC-NEXT: [[TMP3:%.*]] = sub i8 [[T]], [[TMP1]]
-; UNROLL-NO-IC-NEXT: [[TMP4:%.*]] = icmp ugt i8 [[TMP3]], [[T]]
; UNROLL-NO-IC-NEXT: [[TMP5:%.*]] = icmp ult i8 [[TMP2]], [[T]]
-; UNROLL-NO-IC-NEXT: [[TMP6:%.*]] = select i1 false, i1 [[TMP4]], i1 [[TMP5]]
; UNROLL-NO-IC-NEXT: [[TMP7:%.*]] = icmp ugt i32 [[LEN]], 255
-; UNROLL-NO-IC-NEXT: [[TMP8:%.*]] = or i1 [[TMP6]], [[TMP7]]
+; UNROLL-NO-IC-NEXT: [[TMP8:%.*]] = or i1 [[TMP5]], [[TMP7]]
; UNROLL-NO-IC-NEXT: [[TMP10:%.*]] = trunc i32 [[LEN]] to i8
; UNROLL-NO-IC-NEXT: [[TMP11:%.*]] = add i8 [[T]], [[TMP10]]
-; UNROLL-NO-IC-NEXT: [[TMP12:%.*]] = sub i8 [[T]], [[TMP10]]
-; UNROLL-NO-IC-NEXT: [[TMP13:%.*]] = icmp sgt i8 [[TMP12]], [[T]]
; UNROLL-NO-IC-NEXT: [[TMP14:%.*]] = icmp slt i8 [[TMP11]], [[T]]
-; UNROLL-NO-IC-NEXT: [[TMP15:%.*]] = select i1 false, i1 [[TMP13]], i1 [[TMP14]]
; UNROLL-NO-IC-NEXT: [[TMP16:%.*]] = icmp ugt i32 [[LEN]], 255
-; UNROLL-NO-IC-NEXT: [[TMP17:%.*]] = or i1 [[TMP15]], [[TMP16]]
+; UNROLL-NO-IC-NEXT: [[TMP17:%.*]] = or i1 [[TMP14]], [[TMP16]]
; UNROLL-NO-IC-NEXT: [[TMP18:%.*]] = or i1 [[TMP8]], [[TMP17]]
; UNROLL-NO-IC-NEXT: br i1 [[TMP18]], label [[SCALAR_PH]], label [[VECTOR_PH:%.*]]
; UNROLL-NO-IC: vector.ph:
; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[K]], -1
; CHECK-NEXT: [[TMP1:%.*]] = trunc i64 [[TMP0]] to i32
; CHECK-NEXT: [[TMP2:%.*]] = add i32 0, [[TMP1]]
-; CHECK-NEXT: [[TMP3:%.*]] = sub i32 0, [[TMP1]]
-; CHECK-NEXT: [[TMP4:%.*]] = icmp sgt i32 [[TMP3]], 0
; CHECK-NEXT: [[TMP5:%.*]] = icmp slt i32 [[TMP2]], 0
-; CHECK-NEXT: [[TMP6:%.*]] = select i1 false, i1 [[TMP4]], i1 [[TMP5]]
; CHECK-NEXT: [[TMP7:%.*]] = icmp ugt i64 [[TMP0]], 4294967295
-; CHECK-NEXT: [[TMP8:%.*]] = or i1 [[TMP6]], [[TMP7]]
+; CHECK-NEXT: [[TMP8:%.*]] = or i1 [[TMP5]], [[TMP7]]
; CHECK-NEXT: br i1 [[TMP8]], label [[SCALAR_PH]], label [[VECTOR_PH:%.*]]
; CHECK: vector.ph:
; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[K]], 2
; UNROLL-NO-IC-NEXT: [[TMP0:%.*]] = add i64 [[K]], -1
; UNROLL-NO-IC-NEXT: [[TMP1:%.*]] = trunc i64 [[TMP0]] to i32
; UNROLL-NO-IC-NEXT: [[TMP2:%.*]] = add i32 0, [[TMP1]]
-; UNROLL-NO-IC-NEXT: [[TMP3:%.*]] = sub i32 0, [[TMP1]]
-; UNROLL-NO-IC-NEXT: [[TMP4:%.*]] = icmp sgt i32 [[TMP3]], 0
; UNROLL-NO-IC-NEXT: [[TMP5:%.*]] = icmp slt i32 [[TMP2]], 0
-; UNROLL-NO-IC-NEXT: [[TMP6:%.*]] = select i1 false, i1 [[TMP4]], i1 [[TMP5]]
; UNROLL-NO-IC-NEXT: [[TMP7:%.*]] = icmp ugt i64 [[TMP0]], 4294967295
-; UNROLL-NO-IC-NEXT: [[TMP8:%.*]] = or i1 [[TMP6]], [[TMP7]]
+; UNROLL-NO-IC-NEXT: [[TMP8:%.*]] = or i1 [[TMP5]], [[TMP7]]
; UNROLL-NO-IC-NEXT: br i1 [[TMP8]], label [[SCALAR_PH]], label [[VECTOR_PH:%.*]]
; UNROLL-NO-IC: vector.ph:
; UNROLL-NO-IC-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[K]], 4
; CHECK-NEXT: [[MUL_OVERFLOW:%.*]] = extractvalue { i8, i1 } [[MUL]], 1
; CHECK-NEXT: [[TMP6:%.*]] = add i8 0, [[MUL_RESULT]]
; CHECK-NEXT: [[TMP7:%.*]] = sub i8 0, [[MUL_RESULT]]
-; CHECK-NEXT: [[TMP8:%.*]] = icmp sgt i8 [[TMP7]], 0
-; CHECK-NEXT: [[TMP9:%.*]] = icmp slt i8 [[TMP6]], 0
-; CHECK-NEXT: [[TMP10:%.*]] = select i1 [[TMP3]], i1 [[TMP8]], i1 [[TMP9]]
+; CHECK-NEXT: [[TMP8:%.*]] = icmp slt i8 [[TMP6]], 0
+; CHECK-NEXT: [[TMP9:%.*]] = icmp sgt i8 [[TMP7]], 0
+; CHECK-NEXT: [[TMP10:%.*]] = select i1 [[TMP3]], i1 [[TMP9]], i1 [[TMP8]]
; CHECK-NEXT: [[TMP11:%.*]] = icmp ugt i64 [[TMP0]], 255
; CHECK-NEXT: [[TMP12:%.*]] = icmp ne i8 [[TMP1]], 0
; CHECK-NEXT: [[TMP13:%.*]] = and i1 [[TMP11]], [[TMP12]]
; IND-NEXT: [[MUL:%.*]] = call { i8, i1 } @llvm.umul.with.overflow.i8(i8 [[TMP4]], i8 [[TMP5]])
; IND-NEXT: [[MUL_RESULT:%.*]] = extractvalue { i8, i1 } [[MUL]], 0
; IND-NEXT: [[MUL_OVERFLOW:%.*]] = extractvalue { i8, i1 } [[MUL]], 1
-; IND-NEXT: [[TMP6:%.*]] = icmp ugt i8 [[MUL_RESULT]], -128
-; IND-NEXT: [[TMP7:%.*]] = icmp slt i8 [[MUL_RESULT]], 0
-; IND-NEXT: [[TMP8:%.*]] = select i1 [[TMP3]], i1 [[TMP6]], i1 [[TMP7]]
+; IND-NEXT: [[TMP6:%.*]] = icmp slt i8 [[MUL_RESULT]], 0
+; IND-NEXT: [[TMP7:%.*]] = icmp ugt i8 [[MUL_RESULT]], -128
+; IND-NEXT: [[TMP8:%.*]] = select i1 [[TMP3]], i1 [[TMP7]], i1 [[TMP6]]
; IND-NEXT: [[TMP9:%.*]] = icmp ugt i64 [[TMP0]], 255
; IND-NEXT: [[TMP10:%.*]] = icmp ne i8 [[TMP1]], 0
; IND-NEXT: [[TMP11:%.*]] = and i1 [[TMP9]], [[TMP10]]
; UNROLL-NEXT: [[MUL:%.*]] = call { i8, i1 } @llvm.umul.with.overflow.i8(i8 [[TMP4]], i8 [[TMP5]])
; UNROLL-NEXT: [[MUL_RESULT:%.*]] = extractvalue { i8, i1 } [[MUL]], 0
; UNROLL-NEXT: [[MUL_OVERFLOW:%.*]] = extractvalue { i8, i1 } [[MUL]], 1
-; UNROLL-NEXT: [[TMP6:%.*]] = icmp ugt i8 [[MUL_RESULT]], -128
-; UNROLL-NEXT: [[TMP7:%.*]] = icmp slt i8 [[MUL_RESULT]], 0
-; UNROLL-NEXT: [[TMP8:%.*]] = select i1 [[TMP3]], i1 [[TMP6]], i1 [[TMP7]]
+; UNROLL-NEXT: [[TMP6:%.*]] = icmp slt i8 [[MUL_RESULT]], 0
+; UNROLL-NEXT: [[TMP7:%.*]] = icmp ugt i8 [[MUL_RESULT]], -128
+; UNROLL-NEXT: [[TMP8:%.*]] = select i1 [[TMP3]], i1 [[TMP7]], i1 [[TMP6]]
; UNROLL-NEXT: [[TMP9:%.*]] = icmp ugt i64 [[TMP0]], 255
; UNROLL-NEXT: [[TMP10:%.*]] = icmp ne i8 [[TMP1]], 0
; UNROLL-NEXT: [[TMP11:%.*]] = and i1 [[TMP9]], [[TMP10]]
; UNROLL-NO-IC-NEXT: [[MUL_OVERFLOW:%.*]] = extractvalue { i8, i1 } [[MUL]], 1
; UNROLL-NO-IC-NEXT: [[TMP6:%.*]] = add i8 0, [[MUL_RESULT]]
; UNROLL-NO-IC-NEXT: [[TMP7:%.*]] = sub i8 0, [[MUL_RESULT]]
-; UNROLL-NO-IC-NEXT: [[TMP8:%.*]] = icmp sgt i8 [[TMP7]], 0
-; UNROLL-NO-IC-NEXT: [[TMP9:%.*]] = icmp slt i8 [[TMP6]], 0
-; UNROLL-NO-IC-NEXT: [[TMP10:%.*]] = select i1 [[TMP3]], i1 [[TMP8]], i1 [[TMP9]]
+; UNROLL-NO-IC-NEXT: [[TMP8:%.*]] = icmp slt i8 [[TMP6]], 0
+; UNROLL-NO-IC-NEXT: [[TMP9:%.*]] = icmp sgt i8 [[TMP7]], 0
+; UNROLL-NO-IC-NEXT: [[TMP10:%.*]] = select i1 [[TMP3]], i1 [[TMP9]], i1 [[TMP8]]
; UNROLL-NO-IC-NEXT: [[TMP11:%.*]] = icmp ugt i64 [[TMP0]], 255
; UNROLL-NO-IC-NEXT: [[TMP12:%.*]] = icmp ne i8 [[TMP1]], 0
; UNROLL-NO-IC-NEXT: [[TMP13:%.*]] = and i1 [[TMP11]], [[TMP12]]
; INTERLEAVE-NEXT: [[MUL:%.*]] = call { i8, i1 } @llvm.umul.with.overflow.i8(i8 [[TMP4]], i8 [[TMP5]])
; INTERLEAVE-NEXT: [[MUL_RESULT:%.*]] = extractvalue { i8, i1 } [[MUL]], 0
; INTERLEAVE-NEXT: [[MUL_OVERFLOW:%.*]] = extractvalue { i8, i1 } [[MUL]], 1
-; INTERLEAVE-NEXT: [[TMP6:%.*]] = icmp ugt i8 [[MUL_RESULT]], -128
-; INTERLEAVE-NEXT: [[TMP7:%.*]] = icmp slt i8 [[MUL_RESULT]], 0
-; INTERLEAVE-NEXT: [[TMP8:%.*]] = select i1 [[TMP3]], i1 [[TMP6]], i1 [[TMP7]]
+; INTERLEAVE-NEXT: [[TMP6:%.*]] = icmp slt i8 [[MUL_RESULT]], 0
+; INTERLEAVE-NEXT: [[TMP7:%.*]] = icmp ugt i8 [[MUL_RESULT]], -128
+; INTERLEAVE-NEXT: [[TMP8:%.*]] = select i1 [[TMP3]], i1 [[TMP7]], i1 [[TMP6]]
; INTERLEAVE-NEXT: [[TMP9:%.*]] = icmp ugt i64 [[TMP0]], 255
; INTERLEAVE-NEXT: [[TMP10:%.*]] = icmp ne i8 [[TMP1]], 0
; INTERLEAVE-NEXT: [[TMP11:%.*]] = and i1 [[TMP9]], [[TMP10]]
; CHECK-NEXT: [[MUL:%.*]] = call { i32, i1 } @llvm.umul.with.overflow.i32(i32 1, i32 [[TMP2]])
; CHECK-NEXT: [[MUL_RESULT:%.*]] = extractvalue { i32, i1 } [[MUL]], 0
; CHECK-NEXT: [[MUL_OVERFLOW:%.*]] = extractvalue { i32, i1 } [[MUL]], 1
-; CHECK-NEXT: [[TMP3:%.*]] = add i32 [[TMP0]], [[MUL_RESULT]]
; CHECK-NEXT: [[TMP4:%.*]] = sub i32 [[TMP0]], [[MUL_RESULT]]
; CHECK-NEXT: [[TMP5:%.*]] = icmp sgt i32 [[TMP4]], [[TMP0]]
-; CHECK-NEXT: [[TMP6:%.*]] = icmp slt i32 [[TMP3]], [[TMP0]]
-; CHECK-NEXT: [[TMP7:%.*]] = select i1 true, i1 [[TMP5]], i1 [[TMP6]]
; CHECK-NEXT: [[TMP8:%.*]] = icmp ugt i64 [[TMP1]], 4294967295
-; CHECK-NEXT: [[TMP9:%.*]] = or i1 [[TMP7]], [[TMP8]]
+; CHECK-NEXT: [[TMP9:%.*]] = or i1 [[TMP5]], [[TMP8]]
; CHECK-NEXT: [[TMP10:%.*]] = or i1 [[TMP9]], [[MUL_OVERFLOW]]
; CHECK-NEXT: br i1 [[TMP10]], label [[VEC_EPILOG_SCALAR_PH]], label [[VECTOR_MAIN_LOOP_ITER_CHECK:%.*]]
; CHECK: vector.main.loop.iter.check:
; CHECK-NEXT: [[MUL_OVERFLOW:%.*]] = extractvalue { i8, i1 } [[MUL]], 1
; CHECK-NEXT: [[TMP6:%.*]] = add i8 0, [[MUL_RESULT]]
; CHECK-NEXT: [[TMP7:%.*]] = sub i8 0, [[MUL_RESULT]]
-; CHECK-NEXT: [[TMP8:%.*]] = icmp sgt i8 [[TMP7]], 0
-; CHECK-NEXT: [[TMP9:%.*]] = icmp slt i8 [[TMP6]], 0
-; CHECK-NEXT: [[TMP10:%.*]] = select i1 [[TMP3]], i1 [[TMP8]], i1 [[TMP9]]
+; CHECK-NEXT: [[TMP8:%.*]] = icmp slt i8 [[TMP6]], 0
+; CHECK-NEXT: [[TMP9:%.*]] = icmp sgt i8 [[TMP7]], 0
+; CHECK-NEXT: [[TMP10:%.*]] = select i1 [[TMP3]], i1 [[TMP9]], i1 [[TMP8]]
; CHECK-NEXT: [[TMP11:%.*]] = icmp ugt i64 [[TMP0]], 255
; CHECK-NEXT: [[TMP12:%.*]] = icmp ne i8 [[TMP1]], 0
; CHECK-NEXT: [[TMP13:%.*]] = and i1 [[TMP11]], [[TMP12]]
; CHECK-NEXT: [[MUL_OVERFLOW:%.*]] = extractvalue { i8, i1 } [[MUL]], 1
; CHECK-NEXT: [[TMP6:%.*]] = add i8 0, [[MUL_RESULT]]
; CHECK-NEXT: [[TMP7:%.*]] = sub i8 0, [[MUL_RESULT]]
-; CHECK-NEXT: [[TMP8:%.*]] = icmp ugt i8 [[TMP7]], 0
-; CHECK-NEXT: [[TMP9:%.*]] = icmp ult i8 [[TMP6]], 0
-; CHECK-NEXT: [[TMP10:%.*]] = select i1 [[TMP3]], i1 [[TMP8]], i1 [[TMP9]]
+; CHECK-NEXT: [[TMP8:%.*]] = icmp ult i8 [[TMP6]], 0
+; CHECK-NEXT: [[TMP9:%.*]] = icmp ugt i8 [[TMP7]], 0
+; CHECK-NEXT: [[TMP10:%.*]] = select i1 [[TMP3]], i1 [[TMP9]], i1 [[TMP8]]
; CHECK-NEXT: [[TMP11:%.*]] = icmp ugt i64 [[TMP0]], 255
; CHECK-NEXT: [[TMP12:%.*]] = icmp ne i8 [[TMP1]], 0
; CHECK-NEXT: [[TMP13:%.*]] = and i1 [[TMP11]], [[TMP12]]
; CHECK-NEXT: [[MUL_OVERFLOW:%.*]] = extractvalue { i8, i1 } [[MUL]], 1
; CHECK-NEXT: [[TMP5:%.*]] = add i8 0, [[MUL_RESULT]]
; CHECK-NEXT: [[TMP6:%.*]] = sub i8 0, [[MUL_RESULT]]
-; CHECK-NEXT: [[TMP7:%.*]] = icmp sgt i8 [[TMP6]], 0
-; CHECK-NEXT: [[TMP8:%.*]] = icmp slt i8 [[TMP5]], 0
-; CHECK-NEXT: [[TMP9:%.*]] = select i1 [[TMP2]], i1 [[TMP7]], i1 [[TMP8]]
+; CHECK-NEXT: [[TMP7:%.*]] = icmp slt i8 [[TMP5]], 0
+; CHECK-NEXT: [[TMP8:%.*]] = icmp sgt i8 [[TMP6]], 0
+; CHECK-NEXT: [[TMP9:%.*]] = select i1 [[TMP2]], i1 [[TMP8]], i1 [[TMP7]]
; CHECK-NEXT: [[TMP10:%.*]] = icmp ugt i64 [[TMP0]], 255
; CHECK-NEXT: [[TMP11:%.*]] = icmp ne i8 [[CSTEP]], 0
; CHECK-NEXT: [[TMP12:%.*]] = and i1 [[TMP10]], [[TMP11]]
; CHECK-NEXT: [[TMP4:%.*]] = sub i64 [[TMP3]], [[ARR2]]
; CHECK-NEXT: [[TMP5:%.*]] = trunc i64 [[TMP4]] to i8
; CHECK-NEXT: [[TMP6:%.*]] = add i8 1, [[TMP5]]
-; CHECK-NEXT: [[TMP7:%.*]] = sub i8 1, [[TMP5]]
-; CHECK-NEXT: [[TMP8:%.*]] = icmp sgt i8 [[TMP7]], 1
; CHECK-NEXT: [[TMP9:%.*]] = icmp slt i8 [[TMP6]], 1
-; CHECK-NEXT: [[TMP10:%.*]] = select i1 false, i1 [[TMP8]], i1 [[TMP9]]
; CHECK-NEXT: [[TMP11:%.*]] = icmp ugt i64 [[TMP4]], 255
-; CHECK-NEXT: [[TMP12:%.*]] = or i1 [[TMP10]], [[TMP11]]
+; CHECK-NEXT: [[TMP12:%.*]] = or i1 [[TMP9]], [[TMP11]]
; CHECK-NEXT: br i1 [[TMP12]], label [[SCALAR_PH]], label [[VECTOR_PH:%.*]]
; CHECK: vector.ph:
; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i32 [[TMP2]], 4
; CHECK-NEXT: [[TMP0:%.*]] = add i32 [[N]], -1
; CHECK-NEXT: [[TMP1:%.*]] = trunc i32 [[TMP0]] to i2
; CHECK-NEXT: [[TMP2:%.*]] = add i2 0, [[TMP1]]
-; CHECK-NEXT: [[TMP3:%.*]] = sub i2 0, [[TMP1]]
-; CHECK-NEXT: [[TMP4:%.*]] = icmp ugt i2 [[TMP3]], 0
; CHECK-NEXT: [[TMP5:%.*]] = icmp ult i2 [[TMP2]], 0
-; CHECK-NEXT: [[TMP6:%.*]] = select i1 false, i1 [[TMP4]], i1 [[TMP5]]
; CHECK-NEXT: [[TMP7:%.*]] = icmp ugt i32 [[TMP0]], 3
-; CHECK-NEXT: [[TMP8:%.*]] = or i1 [[TMP6]], [[TMP7]]
+; CHECK-NEXT: [[TMP8:%.*]] = or i1 [[TMP5]], [[TMP7]]
; CHECK-NEXT: br i1 [[TMP8]], label [[SCALAR_PH]], label [[VECTOR_MEMCHECK:%.*]]
; CHECK: vector.memcheck:
; CHECK-NEXT: [[TMP10:%.*]] = add i32 [[N]], -1
; CHECK-NEXT: [[TMP0:%.*]] = add i32 [[N]], -1
; CHECK-NEXT: [[TMP1:%.*]] = trunc i32 [[TMP0]] to i2
; CHECK-NEXT: [[TMP2:%.*]] = add i2 0, [[TMP1]]
-; CHECK-NEXT: [[TMP3:%.*]] = sub i2 0, [[TMP1]]
-; CHECK-NEXT: [[TMP4:%.*]] = icmp ugt i2 [[TMP3]], 0
; CHECK-NEXT: [[TMP5:%.*]] = icmp ult i2 [[TMP2]], 0
-; CHECK-NEXT: [[TMP6:%.*]] = select i1 false, i1 [[TMP4]], i1 [[TMP5]]
; CHECK-NEXT: [[TMP7:%.*]] = icmp ugt i32 [[TMP0]], 3
-; CHECK-NEXT: [[TMP8:%.*]] = or i1 [[TMP6]], [[TMP7]]
+; CHECK-NEXT: [[TMP8:%.*]] = or i1 [[TMP5]], [[TMP7]]
; CHECK-NEXT: br i1 [[TMP8]], label [[SCALAR_PH]], label [[VECTOR_MEMCHECK:%.*]]
; CHECK: vector.memcheck:
; CHECK-NEXT: [[TMP10:%.*]] = add i32 [[N]], -1
; CHECK-NEXT: [[TMP0:%.*]] = add i32 [[N]], -1
; CHECK-NEXT: [[TMP1:%.*]] = trunc i32 [[TMP0]] to i2
; CHECK-NEXT: [[TMP2:%.*]] = add i2 0, [[TMP1]]
-; CHECK-NEXT: [[TMP3:%.*]] = sub i2 0, [[TMP1]]
-; CHECK-NEXT: [[TMP4:%.*]] = icmp ugt i2 [[TMP3]], 0
; CHECK-NEXT: [[TMP5:%.*]] = icmp ult i2 [[TMP2]], 0
-; CHECK-NEXT: [[TMP6:%.*]] = select i1 false, i1 [[TMP4]], i1 [[TMP5]]
; CHECK-NEXT: [[TMP7:%.*]] = icmp ugt i32 [[TMP0]], 3
-; CHECK-NEXT: [[TMP8:%.*]] = or i1 [[TMP6]], [[TMP7]]
+; CHECK-NEXT: [[TMP8:%.*]] = or i1 [[TMP5]], [[TMP7]]
; CHECK-NEXT: br i1 [[TMP8]], label [[SCALAR_PH]], label [[VECTOR_PH:%.*]]
; CHECK: vector.ph:
; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i32 [[N]], 2
; LV-NEXT: [[OFMulResult:%[^ ]*]] = extractvalue { i64, i1 } [[OFMul]], 0
; LV-NEXT: [[OFMulOverflow:%[^ ]*]] = extractvalue { i64, i1 } [[OFMul]], 1
; LV: [[OFNegMulResult:%[^ ]*]] = sub i64 0, [[OFMulResult]]
-; LV-NEXT: [[PosGEP:%[^ ]*]] = getelementptr i8, i8 addrspace(13)* [[Base:%[^ ]*]], i64 [[OFMulResult]]
-; LV-NEXT: [[NegGEP:%[^ ]*]] = getelementptr i8, i8 addrspace(13)* [[Base]], i64 [[OFNegMulResult]]
+; LV-NEXT: [[NegGEP:%[^ ]*]] = getelementptr i8, i8 addrspace(13)* [[Base:%[^ ]*]], i64 [[OFNegMulResult]]
; LV-NEXT: icmp ugt i8 addrspace(13)* [[NegGEP]], [[Base]]
-; LV-NEXT: icmp ult i8 addrspace(13)* [[PosGEP]], [[Base]]
; LV-NOT: inttoptr
; LV-NOT: ptrtoint
top:
; LV-NEXT: [[MUL_RESULT:%.*]] = extractvalue { i32, i1 } [[MUL1]], 0
; LV-NEXT: [[MUL_OVERFLOW:%.*]] = extractvalue { i32, i1 } [[MUL1]], 1
; LV-NEXT: [[TMP2:%.*]] = add i32 0, [[MUL_RESULT]]
-; LV-NEXT: [[TMP3:%.*]] = sub i32 0, [[MUL_RESULT]]
-; LV-NEXT: [[TMP4:%.*]] = icmp ugt i32 [[TMP3]], 0
; LV-NEXT: [[TMP5:%.*]] = icmp ult i32 [[TMP2]], 0
-; LV-NEXT: [[TMP6:%.*]] = select i1 false, i1 [[TMP4]], i1 [[TMP5]]
; LV-NEXT: [[TMP7:%.*]] = icmp ugt i64 [[TMP0]], 4294967295
-; LV-NEXT: [[TMP8:%.*]] = or i1 [[TMP6]], [[TMP7]]
+; LV-NEXT: [[TMP8:%.*]] = or i1 [[TMP5]], [[TMP7]]
; LV-NEXT: [[TMP9:%.*]] = or i1 [[TMP8]], [[MUL_OVERFLOW]]
; LV-NEXT: [[MUL2:%.*]] = call { i64, i1 } @llvm.umul.with.overflow.i64(i64 4, i64 [[TMP0]])
; LV-NEXT: [[MUL_RESULT3:%.*]] = extractvalue { i64, i1 } [[MUL2]], 0
; LV-NEXT: [[MUL_OVERFLOW4:%.*]] = extractvalue { i64, i1 } [[MUL2]], 1
; LV-NEXT: [[TMP11:%.*]] = sub i64 0, [[MUL_RESULT3]]
; LV-NEXT: [[TMP12:%.*]] = getelementptr i8, i8* [[A5]], i64 [[MUL_RESULT3]]
-; LV-NEXT: [[TMP13:%.*]] = getelementptr i8, i8* [[A5]], i64 [[TMP11]]
-; LV-NEXT: [[TMP14:%.*]] = icmp ugt i8* [[TMP13]], [[A5]]
; LV-NEXT: [[TMP15:%.*]] = icmp ult i8* [[TMP12]], [[A5]]
-; LV-NEXT: [[TMP16:%.*]] = select i1 false, i1 [[TMP14]], i1 [[TMP15]]
-; LV-NEXT: [[TMP17:%.*]] = or i1 [[TMP16]], [[MUL_OVERFLOW4]]
+; LV-NEXT: [[TMP17:%.*]] = or i1 [[TMP15]], [[MUL_OVERFLOW4]]
; LV-NEXT: [[TMP18:%.*]] = or i1 [[TMP9]], [[TMP17]]
; LV-NEXT: br i1 [[TMP18]], label [[FOR_BODY_PH_LVER_ORIG:%.*]], label [[FOR_BODY_PH:%.*]]
; LV: for.body.ph.lver.orig:
; LV-NEXT: [[MUL1:%.*]] = call { i32, i1 } @llvm.umul.with.overflow.i32(i32 2, i32 [[TMP2]])
; LV-NEXT: [[MUL_RESULT:%.*]] = extractvalue { i32, i1 } [[MUL1]], 0
; LV-NEXT: [[MUL_OVERFLOW:%.*]] = extractvalue { i32, i1 } [[MUL1]], 1
-; LV-NEXT: [[TMP3:%.*]] = add i32 [[TMP1]], [[MUL_RESULT]]
; LV-NEXT: [[TMP4:%.*]] = sub i32 [[TMP1]], [[MUL_RESULT]]
; LV-NEXT: [[TMP5:%.*]] = icmp ugt i32 [[TMP4]], [[TMP1]]
-; LV-NEXT: [[TMP6:%.*]] = icmp ult i32 [[TMP3]], [[TMP1]]
-; LV-NEXT: [[TMP7:%.*]] = select i1 true, i1 [[TMP5]], i1 [[TMP6]]
; LV-NEXT: [[TMP8:%.*]] = icmp ugt i64 [[TMP0]], 4294967295
-; LV-NEXT: [[TMP9:%.*]] = or i1 [[TMP7]], [[TMP8]]
+; LV-NEXT: [[TMP9:%.*]] = or i1 [[TMP5]], [[TMP8]]
; LV-NEXT: [[TMP10:%.*]] = or i1 [[TMP9]], [[MUL_OVERFLOW]]
; LV-NEXT: [[TMP12:%.*]] = trunc i64 [[N]] to i31
; LV-NEXT: [[TMP13:%.*]] = zext i31 [[TMP12]] to i64
; LV-NEXT: [[MUL_OVERFLOW4:%.*]] = extractvalue { i64, i1 } [[MUL2]], 1
; LV-NEXT: [[SCEVGEP5:%.*]] = bitcast i16* [[SCEVGEP]] to i8*
; LV-NEXT: [[TMP15:%.*]] = sub i64 0, [[MUL_RESULT3]]
-; LV-NEXT: [[TMP16:%.*]] = getelementptr i8, i8* [[SCEVGEP5]], i64 [[MUL_RESULT3]]
; LV-NEXT: [[TMP17:%.*]] = getelementptr i8, i8* [[SCEVGEP5]], i64 [[TMP15]]
; LV-NEXT: [[TMP18:%.*]] = icmp ugt i8* [[TMP17]], [[SCEVGEP5]]
-; LV-NEXT: [[TMP19:%.*]] = icmp ult i8* [[TMP16]], [[SCEVGEP5]]
-; LV-NEXT: [[TMP20:%.*]] = select i1 true, i1 [[TMP18]], i1 [[TMP19]]
-; LV-NEXT: [[TMP21:%.*]] = or i1 [[TMP20]], [[MUL_OVERFLOW4]]
+; LV-NEXT: [[TMP21:%.*]] = or i1 [[TMP18]], [[MUL_OVERFLOW4]]
; LV-NEXT: [[TMP22:%.*]] = or i1 [[TMP10]], [[TMP21]]
; LV-NEXT: br i1 [[TMP22]], label [[FOR_BODY_PH_LVER_ORIG:%.*]], label [[FOR_BODY_PH:%.*]]
; LV: for.body.ph.lver.orig:
; LV-NEXT: [[MUL_RESULT:%.*]] = extractvalue { i32, i1 } [[MUL1]], 0
; LV-NEXT: [[MUL_OVERFLOW:%.*]] = extractvalue { i32, i1 } [[MUL1]], 1
; LV-NEXT: [[TMP2:%.*]] = add i32 0, [[MUL_RESULT]]
-; LV-NEXT: [[TMP3:%.*]] = sub i32 0, [[MUL_RESULT]]
-; LV-NEXT: [[TMP4:%.*]] = icmp sgt i32 [[TMP3]], 0
; LV-NEXT: [[TMP5:%.*]] = icmp slt i32 [[TMP2]], 0
-; LV-NEXT: [[TMP6:%.*]] = select i1 false, i1 [[TMP4]], i1 [[TMP5]]
; LV-NEXT: [[TMP7:%.*]] = icmp ugt i64 [[TMP0]], 4294967295
-; LV-NEXT: [[TMP8:%.*]] = or i1 [[TMP6]], [[TMP7]]
+; LV-NEXT: [[TMP8:%.*]] = or i1 [[TMP5]], [[TMP7]]
; LV-NEXT: [[TMP9:%.*]] = or i1 [[TMP8]], [[MUL_OVERFLOW]]
; LV-NEXT: [[MUL2:%.*]] = call { i64, i1 } @llvm.umul.with.overflow.i64(i64 4, i64 [[TMP0]])
; LV-NEXT: [[MUL_RESULT3:%.*]] = extractvalue { i64, i1 } [[MUL2]], 0
; LV-NEXT: [[MUL_OVERFLOW4:%.*]] = extractvalue { i64, i1 } [[MUL2]], 1
; LV-NEXT: [[TMP11:%.*]] = sub i64 0, [[MUL_RESULT3]]
; LV-NEXT: [[TMP12:%.*]] = getelementptr i8, i8* [[A5]], i64 [[MUL_RESULT3]]
-; LV-NEXT: [[TMP13:%.*]] = getelementptr i8, i8* [[A5]], i64 [[TMP11]]
-; LV-NEXT: [[TMP14:%.*]] = icmp ugt i8* [[TMP13]], [[A5]]
; LV-NEXT: [[TMP15:%.*]] = icmp ult i8* [[TMP12]], [[A5]]
-; LV-NEXT: [[TMP16:%.*]] = select i1 false, i1 [[TMP14]], i1 [[TMP15]]
-; LV-NEXT: [[TMP17:%.*]] = or i1 [[TMP16]], [[MUL_OVERFLOW4]]
+; LV-NEXT: [[TMP17:%.*]] = or i1 [[TMP15]], [[MUL_OVERFLOW4]]
; LV-NEXT: [[TMP18:%.*]] = or i1 [[TMP9]], [[TMP17]]
; LV-NEXT: br i1 [[TMP18]], label [[FOR_BODY_PH_LVER_ORIG:%.*]], label [[FOR_BODY_PH:%.*]]
; LV: for.body.ph.lver.orig:
; LV-NEXT: [[MUL1:%.*]] = call { i32, i1 } @llvm.umul.with.overflow.i32(i32 2, i32 [[TMP2]])
; LV-NEXT: [[MUL_RESULT:%.*]] = extractvalue { i32, i1 } [[MUL1]], 0
; LV-NEXT: [[MUL_OVERFLOW:%.*]] = extractvalue { i32, i1 } [[MUL1]], 1
-; LV-NEXT: [[TMP3:%.*]] = add i32 [[TMP1]], [[MUL_RESULT]]
; LV-NEXT: [[TMP4:%.*]] = sub i32 [[TMP1]], [[MUL_RESULT]]
; LV-NEXT: [[TMP5:%.*]] = icmp sgt i32 [[TMP4]], [[TMP1]]
-; LV-NEXT: [[TMP6:%.*]] = icmp slt i32 [[TMP3]], [[TMP1]]
-; LV-NEXT: [[TMP7:%.*]] = select i1 true, i1 [[TMP5]], i1 [[TMP6]]
; LV-NEXT: [[TMP8:%.*]] = icmp ugt i64 [[TMP0]], 4294967295
-; LV-NEXT: [[TMP9:%.*]] = or i1 [[TMP7]], [[TMP8]]
+; LV-NEXT: [[TMP9:%.*]] = or i1 [[TMP5]], [[TMP8]]
; LV-NEXT: [[TMP10:%.*]] = or i1 [[TMP9]], [[MUL_OVERFLOW]]
; LV-NEXT: [[TMP12:%.*]] = sext i32 [[TMP1]] to i64
; LV-NEXT: [[SCEVGEP:%.*]] = getelementptr i16, i16* [[A:%.*]], i64 [[TMP12]]
; LV-NEXT: [[MUL_OVERFLOW4:%.*]] = extractvalue { i64, i1 } [[MUL2]], 1
; LV-NEXT: [[SCEVGEP5:%.*]] = bitcast i16* [[SCEVGEP]] to i8*
; LV-NEXT: [[TMP13:%.*]] = sub i64 0, [[MUL_RESULT3]]
-; LV-NEXT: [[TMP14:%.*]] = getelementptr i8, i8* [[SCEVGEP5]], i64 [[MUL_RESULT3]]
; LV-NEXT: [[TMP15:%.*]] = getelementptr i8, i8* [[SCEVGEP5]], i64 [[TMP13]]
; LV-NEXT: [[TMP16:%.*]] = icmp ugt i8* [[TMP15]], [[SCEVGEP5]]
-; LV-NEXT: [[TMP17:%.*]] = icmp ult i8* [[TMP14]], [[SCEVGEP5]]
-; LV-NEXT: [[TMP18:%.*]] = select i1 true, i1 [[TMP16]], i1 [[TMP17]]
-; LV-NEXT: [[TMP19:%.*]] = or i1 [[TMP18]], [[MUL_OVERFLOW4]]
+; LV-NEXT: [[TMP19:%.*]] = or i1 [[TMP16]], [[MUL_OVERFLOW4]]
; LV-NEXT: [[TMP20:%.*]] = or i1 [[TMP10]], [[TMP19]]
; LV-NEXT: br i1 [[TMP20]], label [[FOR_BODY_PH_LVER_ORIG:%.*]], label [[FOR_BODY_PH:%.*]]
; LV: for.body.ph.lver.orig:
; LV-NEXT: [[MUL1:%.*]] = call { i32, i1 } @llvm.umul.with.overflow.i32(i32 2, i32 [[TMP2]])
; LV-NEXT: [[MUL_RESULT:%.*]] = extractvalue { i32, i1 } [[MUL1]], 0
; LV-NEXT: [[MUL_OVERFLOW:%.*]] = extractvalue { i32, i1 } [[MUL1]], 1
-; LV-NEXT: [[TMP3:%.*]] = add i32 [[TMP1]], [[MUL_RESULT]]
; LV-NEXT: [[TMP4:%.*]] = sub i32 [[TMP1]], [[MUL_RESULT]]
; LV-NEXT: [[TMP5:%.*]] = icmp sgt i32 [[TMP4]], [[TMP1]]
-; LV-NEXT: [[TMP6:%.*]] = icmp slt i32 [[TMP3]], [[TMP1]]
-; LV-NEXT: [[TMP7:%.*]] = select i1 true, i1 [[TMP5]], i1 [[TMP6]]
; LV-NEXT: [[TMP8:%.*]] = icmp ugt i64 [[TMP0]], 4294967295
-; LV-NEXT: [[TMP9:%.*]] = or i1 [[TMP7]], [[TMP8]]
+; LV-NEXT: [[TMP9:%.*]] = or i1 [[TMP5]], [[TMP8]]
; LV-NEXT: [[TMP10:%.*]] = or i1 [[TMP9]], [[MUL_OVERFLOW]]
; LV-NEXT: [[TMP12:%.*]] = sext i32 [[TMP1]] to i64
; LV-NEXT: [[SCEVGEP:%.*]] = getelementptr i16, i16* [[A:%.*]], i64 [[TMP12]]
; LV-NEXT: [[MUL_OVERFLOW4:%.*]] = extractvalue { i64, i1 } [[MUL2]], 1
; LV-NEXT: [[SCEVGEP5:%.*]] = bitcast i16* [[SCEVGEP]] to i8*
; LV-NEXT: [[TMP13:%.*]] = sub i64 0, [[MUL_RESULT3]]
-; LV-NEXT: [[TMP14:%.*]] = getelementptr i8, i8* [[SCEVGEP5]], i64 [[MUL_RESULT3]]
; LV-NEXT: [[TMP15:%.*]] = getelementptr i8, i8* [[SCEVGEP5]], i64 [[TMP13]]
; LV-NEXT: [[TMP16:%.*]] = icmp ugt i8* [[TMP15]], [[SCEVGEP5]]
-; LV-NEXT: [[TMP17:%.*]] = icmp ult i8* [[TMP14]], [[SCEVGEP5]]
-; LV-NEXT: [[TMP18:%.*]] = select i1 true, i1 [[TMP16]], i1 [[TMP17]]
-; LV-NEXT: [[TMP19:%.*]] = or i1 [[TMP18]], [[MUL_OVERFLOW4]]
+; LV-NEXT: [[TMP19:%.*]] = or i1 [[TMP16]], [[MUL_OVERFLOW4]]
; LV-NEXT: [[TMP20:%.*]] = or i1 [[TMP10]], [[TMP19]]
; LV-NEXT: br i1 [[TMP20]], label [[FOR_BODY_PH_LVER_ORIG:%.*]], label [[FOR_BODY_PH:%.*]]
; LV: for.body.ph.lver.orig: