[SCEVExpand] Only create required predicate checks.

author Florian Hahn <flo@fhahn.com>

Fri, 7 Jan 2022 14:44:03 +0000 (14:44 +0000)

committer Florian Hahn <flo@fhahn.com>

Fri, 7 Jan 2022 14:49:02 +0000 (14:49 +0000)
author Florian Hahn <flo@fhahn.com>
Fri, 7 Jan 2022 14:44:03 +0000 (14:44 +0000)
committer Florian Hahn <flo@fhahn.com>
Fri, 7 Jan 2022 14:49:02 +0000 (14:49 +0000)
diff --git a/llvm/lib/Transforms/Utils/ScalarEvolutionExpander.cpp b/llvm/lib/Transforms/Utils/ScalarEvolutionExpander.cpp

index 0bb3c8b..136dfae 100644 (file)
--- a/llvm/lib/Transforms/Utils/ScalarEvolutionExpander.cpp
+++ b/llvm/lib/Transforms/Utils/ScalarEvolutionExpander.cpp
@@ -2510,30 +2510,45 @@ Value *SCEVExpander::generateOverflowCheck(const SCEVAddRecExpr *AR,
    }
  
    // Compute:
-  //   Start + |Step| * Backedge < Start
-  //   Start - |Step| * Backedge > Start
+  //   1. Start + |Step| * Backedge < Start
+  //   2. Start - |Step| * Backedge > Start
+  //
+  // And select either 1. or 2. depending on whether step is positive or
+  // negative. If Step is known to be positive or negative, only create
+  // either 1. or 2.
    Value *Add = nullptr, *Sub = nullptr;
+  bool NeedPosCheck = !SE.isKnownNegative(Step);
+  bool NeedNegCheck = !SE.isKnownPositive(Step);
+
    if (PointerType *ARPtrTy = dyn_cast<PointerType>(ARTy)) {
      StartValue = InsertNoopCastOfTo(
          StartValue, Builder.getInt8PtrTy(ARPtrTy->getAddressSpace()));
      Value *NegMulV = Builder.CreateNeg(MulV);
-    Add = Builder.CreateGEP(Builder.getInt8Ty(), StartValue, MulV);
-    Sub = Builder.CreateGEP(Builder.getInt8Ty(), StartValue, NegMulV);
+    if (NeedPosCheck)
+      Add = Builder.CreateGEP(Builder.getInt8Ty(), StartValue, MulV);
+    if (NeedNegCheck)
+      Sub = Builder.CreateGEP(Builder.getInt8Ty(), StartValue, NegMulV);
    } else {
-    Add = Builder.CreateAdd(StartValue, MulV);
-    Sub = Builder.CreateSub(StartValue, MulV);
+    if (NeedPosCheck)
+      Add = Builder.CreateAdd(StartValue, MulV);
+    if (NeedNegCheck)
+      Sub = Builder.CreateSub(StartValue, MulV);
+  }
+
+  Value *EndCompareLT = nullptr;
+  Value *EndCompareGT = nullptr;
+  Value *EndCheck = nullptr;
+  if (NeedPosCheck)
+    EndCheck = EndCompareLT = Builder.CreateICmp(
+        Signed ? ICmpInst::ICMP_SLT : ICmpInst::ICMP_ULT, Add, StartValue);
+  if (NeedNegCheck)
+    EndCheck = EndCompareGT = Builder.CreateICmp(
+        Signed ? ICmpInst::ICMP_SGT : ICmpInst::ICMP_UGT, Sub, StartValue);
+  if (NeedPosCheck && NeedNegCheck) {
+    // Select the answer based on the sign of Step.
+    EndCheck = Builder.CreateSelect(StepCompare, EndCompareGT, EndCompareLT);
    }
  
-  Value *EndCompareGT = Builder.CreateICmp(
-      Signed ? ICmpInst::ICMP_SGT : ICmpInst::ICMP_UGT, Sub, StartValue);
-
-  Value *EndCompareLT = Builder.CreateICmp(
-      Signed ? ICmpInst::ICMP_SLT : ICmpInst::ICMP_ULT, Add, StartValue);
-
-  // Select the answer based on the sign of Step.
-  Value *EndCheck =
-      Builder.CreateSelect(StepCompare, EndCompareGT, EndCompareLT);
-
    // If the backedge taken count type is larger than the AR type,
    // check that we don't drop any bits by truncating it. If we are
    // dropping bits, then we have overflow (unless the step is zero).
diff --git a/llvm/test/Transforms/LoopDistribute/scev-inserted-runtime-check.ll b/llvm/test/Transforms/LoopDistribute/scev-inserted-runtime-check.ll

index 2fc49f2..102149e 100644 (file)
--- a/llvm/test/Transforms/LoopDistribute/scev-inserted-runtime-check.ll
+++ b/llvm/test/Transforms/LoopDistribute/scev-inserted-runtime-check.ll
@@ -18,23 +18,17 @@ define void @f(i32* noalias %a, i32* noalias %b, i32* noalias %c, i32* noalias %
  ; CHECK-NEXT:    [[MUL_RESULT:%.*]] = extractvalue { i32, i1 } [[MUL1]], 0
  ; CHECK-NEXT:    [[MUL_OVERFLOW:%.*]] = extractvalue { i32, i1 } [[MUL1]], 1
  ; CHECK-NEXT:    [[TMP2:%.*]] = add i32 0, [[MUL_RESULT]]
-; CHECK-NEXT:    [[TMP3:%.*]] = sub i32 0, [[MUL_RESULT]]
-; CHECK-NEXT:    [[TMP4:%.*]] = icmp ugt i32 [[TMP3]], 0
  ; CHECK-NEXT:    [[TMP5:%.*]] = icmp ult i32 [[TMP2]], 0
-; CHECK-NEXT:    [[TMP6:%.*]] = select i1 false, i1 [[TMP4]], i1 [[TMP5]]
  ; CHECK-NEXT:    [[TMP7:%.*]] = icmp ugt i64 [[TMP0]], 4294967295
-; CHECK-NEXT:    [[TMP8:%.*]] = or i1 [[TMP6]], [[TMP7]]
+; CHECK-NEXT:    [[TMP8:%.*]] = or i1 [[TMP5]], [[TMP7]]
  ; CHECK-NEXT:    [[TMP9:%.*]] = or i1 [[TMP8]], [[MUL_OVERFLOW]]
  ; CHECK-NEXT:    [[MUL2:%.*]] = call { i64, i1 } @llvm.umul.with.overflow.i64(i64 8, i64 [[TMP0]])
  ; CHECK-NEXT:    [[MUL_RESULT3:%.*]] = extractvalue { i64, i1 } [[MUL2]], 0
  ; CHECK-NEXT:    [[MUL_OVERFLOW4:%.*]] = extractvalue { i64, i1 } [[MUL2]], 1
  ; CHECK-NEXT:    [[TMP11:%.*]] = sub i64 0, [[MUL_RESULT3]]
  ; CHECK-NEXT:    [[TMP12:%.*]] = getelementptr i8, i8* [[A5]], i64 [[MUL_RESULT3]]
-; CHECK-NEXT:    [[TMP13:%.*]] = getelementptr i8, i8* [[A5]], i64 [[TMP11]]
-; CHECK-NEXT:    [[TMP14:%.*]] = icmp ugt i8* [[TMP13]], [[A5]]
  ; CHECK-NEXT:    [[TMP15:%.*]] = icmp ult i8* [[TMP12]], [[A5]]
-; CHECK-NEXT:    [[TMP16:%.*]] = select i1 false, i1 [[TMP14]], i1 [[TMP15]]
-; CHECK-NEXT:    [[TMP17:%.*]] = or i1 [[TMP16]], [[MUL_OVERFLOW4]]
+; CHECK-NEXT:    [[TMP17:%.*]] = or i1 [[TMP15]], [[MUL_OVERFLOW4]]
  ; CHECK-NEXT:    [[TMP18:%.*]] = or i1 [[TMP9]], [[TMP17]]
  ; CHECK-NEXT:    br i1 [[TMP18]], label [[FOR_BODY_PH_LVER_ORIG:%.*]], label [[FOR_BODY_PH_LDIST1:%.*]]
  ; CHECK:       for.body.ph.lver.orig:
@@ -166,23 +160,17 @@ define void @f_with_offset(i32* noalias %b, i32* noalias %c, i32* noalias %d, i3
  ; CHECK-NEXT:    [[MUL_RESULT:%.*]] = extractvalue { i32, i1 } [[MUL1]], 0
  ; CHECK-NEXT:    [[MUL_OVERFLOW:%.*]] = extractvalue { i32, i1 } [[MUL1]], 1
  ; CHECK-NEXT:    [[TMP2:%.*]] = add i32 0, [[MUL_RESULT]]
-; CHECK-NEXT:    [[TMP3:%.*]] = sub i32 0, [[MUL_RESULT]]
-; CHECK-NEXT:    [[TMP4:%.*]] = icmp ugt i32 [[TMP3]], 0
  ; CHECK-NEXT:    [[TMP5:%.*]] = icmp ult i32 [[TMP2]], 0
-; CHECK-NEXT:    [[TMP6:%.*]] = select i1 false, i1 [[TMP4]], i1 [[TMP5]]
  ; CHECK-NEXT:    [[TMP7:%.*]] = icmp ugt i64 [[TMP0]], 4294967295
-; CHECK-NEXT:    [[TMP8:%.*]] = or i1 [[TMP6]], [[TMP7]]
+; CHECK-NEXT:    [[TMP8:%.*]] = or i1 [[TMP5]], [[TMP7]]
  ; CHECK-NEXT:    [[TMP9:%.*]] = or i1 [[TMP8]], [[MUL_OVERFLOW]]
  ; CHECK-NEXT:    [[MUL2:%.*]] = call { i64, i1 } @llvm.umul.with.overflow.i64(i64 8, i64 [[TMP0]])
  ; CHECK-NEXT:    [[MUL_RESULT3:%.*]] = extractvalue { i64, i1 } [[MUL2]], 0
  ; CHECK-NEXT:    [[MUL_OVERFLOW4:%.*]] = extractvalue { i64, i1 } [[MUL2]], 1
  ; CHECK-NEXT:    [[TMP11:%.*]] = sub i64 0, [[MUL_RESULT3]]
  ; CHECK-NEXT:    [[TMP12:%.*]] = getelementptr i8, i8* bitcast (i32* getelementptr inbounds ([8192 x i32], [8192 x i32]* @global_a, i64 0, i64 42) to i8*), i64 [[MUL_RESULT3]]
-; CHECK-NEXT:    [[TMP13:%.*]] = getelementptr i8, i8* bitcast (i32* getelementptr inbounds ([8192 x i32], [8192 x i32]* @global_a, i64 0, i64 42) to i8*), i64 [[TMP11]]
-; CHECK-NEXT:    [[TMP14:%.*]] = icmp ugt i8* [[TMP13]], bitcast (i32* getelementptr inbounds ([8192 x i32], [8192 x i32]* @global_a, i64 0, i64 42) to i8*)
  ; CHECK-NEXT:    [[TMP15:%.*]] = icmp ult i8* [[TMP12]], bitcast (i32* getelementptr inbounds ([8192 x i32], [8192 x i32]* @global_a, i64 0, i64 42) to i8*)
-; CHECK-NEXT:    [[TMP16:%.*]] = select i1 false, i1 [[TMP14]], i1 [[TMP15]]
-; CHECK-NEXT:    [[TMP17:%.*]] = or i1 [[TMP16]], [[MUL_OVERFLOW4]]
+; CHECK-NEXT:    [[TMP17:%.*]] = or i1 [[TMP15]], [[MUL_OVERFLOW4]]
  ; CHECK-NEXT:    [[TMP18:%.*]] = or i1 [[TMP9]], [[TMP17]]
  ; CHECK-NEXT:    br i1 [[TMP18]], label [[FOR_BODY_PH_LVER_ORIG:%.*]], label [[FOR_BODY_PH_LDIST1:%.*]]
  ; CHECK:       for.body.ph.lver.orig:
diff --git a/llvm/test/Transforms/LoopVectorize/PowerPC/optimal-epilog-vectorization.ll b/llvm/test/Transforms/LoopVectorize/PowerPC/optimal-epilog-vectorization.ll

index 7c39374..7b044dc 100644 (file)
--- a/llvm/test/Transforms/LoopVectorize/PowerPC/optimal-epilog-vectorization.ll
+++ b/llvm/test/Transforms/LoopVectorize/PowerPC/optimal-epilog-vectorization.ll
@@ -541,13 +541,10 @@ define dso_local signext i32 @f2(float* noalias %A, float* noalias %B, i32 signe
  ; VF-TWO-CHECK-NEXT:    [[MUL:%.*]] = call { i32, i1 } @llvm.umul.with.overflow.i32(i32 1, i32 [[TMP2]])
  ; VF-TWO-CHECK-NEXT:    [[MUL_RESULT:%.*]] = extractvalue { i32, i1 } [[MUL]], 0
  ; VF-TWO-CHECK-NEXT:    [[MUL_OVERFLOW:%.*]] = extractvalue { i32, i1 } [[MUL]], 1
-; VF-TWO-CHECK-NEXT:    [[TMP3:%.*]] = add i32 [[TMP0]], [[MUL_RESULT]]
  ; VF-TWO-CHECK-NEXT:    [[TMP4:%.*]] = sub i32 [[TMP0]], [[MUL_RESULT]]
  ; VF-TWO-CHECK-NEXT:    [[TMP5:%.*]] = icmp sgt i32 [[TMP4]], [[TMP0]]
-; VF-TWO-CHECK-NEXT:    [[TMP6:%.*]] = icmp slt i32 [[TMP3]], [[TMP0]]
-; VF-TWO-CHECK-NEXT:    [[TMP7:%.*]] = select i1 true, i1 [[TMP5]], i1 [[TMP6]]
  ; VF-TWO-CHECK-NEXT:    [[TMP8:%.*]] = icmp ugt i64 [[TMP1]], 4294967295
-; VF-TWO-CHECK-NEXT:    [[TMP9:%.*]] = or i1 [[TMP7]], [[TMP8]]
+; VF-TWO-CHECK-NEXT:    [[TMP9:%.*]] = or i1 [[TMP5]], [[TMP8]]
  ; VF-TWO-CHECK-NEXT:    [[TMP10:%.*]] = or i1 [[TMP9]], [[MUL_OVERFLOW]]
  ; VF-TWO-CHECK-NEXT:    br i1 [[TMP10]], label [[VEC_EPILOG_SCALAR_PH]], label [[VECTOR_MAIN_LOOP_ITER_CHECK:%.*]]
  ; VF-TWO-CHECK:       vector.main.loop.iter.check:
@@ -771,13 +768,10 @@ define dso_local signext i32 @f2(float* noalias %A, float* noalias %B, i32 signe
  ; VF-FOUR-CHECK-NEXT:    [[MUL:%.*]] = call { i32, i1 } @llvm.umul.with.overflow.i32(i32 1, i32 [[TMP2]])
  ; VF-FOUR-CHECK-NEXT:    [[MUL_RESULT:%.*]] = extractvalue { i32, i1 } [[MUL]], 0
  ; VF-FOUR-CHECK-NEXT:    [[MUL_OVERFLOW:%.*]] = extractvalue { i32, i1 } [[MUL]], 1
-; VF-FOUR-CHECK-NEXT:    [[TMP3:%.*]] = add i32 [[TMP0]], [[MUL_RESULT]]
  ; VF-FOUR-CHECK-NEXT:    [[TMP4:%.*]] = sub i32 [[TMP0]], [[MUL_RESULT]]
  ; VF-FOUR-CHECK-NEXT:    [[TMP5:%.*]] = icmp sgt i32 [[TMP4]], [[TMP0]]
-; VF-FOUR-CHECK-NEXT:    [[TMP6:%.*]] = icmp slt i32 [[TMP3]], [[TMP0]]
-; VF-FOUR-CHECK-NEXT:    [[TMP7:%.*]] = select i1 true, i1 [[TMP5]], i1 [[TMP6]]
  ; VF-FOUR-CHECK-NEXT:    [[TMP8:%.*]] = icmp ugt i64 [[TMP1]], 4294967295
-; VF-FOUR-CHECK-NEXT:    [[TMP9:%.*]] = or i1 [[TMP7]], [[TMP8]]
+; VF-FOUR-CHECK-NEXT:    [[TMP9:%.*]] = or i1 [[TMP5]], [[TMP8]]
  ; VF-FOUR-CHECK-NEXT:    [[TMP10:%.*]] = or i1 [[TMP9]], [[MUL_OVERFLOW]]
  ; VF-FOUR-CHECK-NEXT:    br i1 [[TMP10]], label [[VEC_EPILOG_SCALAR_PH]], label [[VECTOR_MAIN_LOOP_ITER_CHECK:%.*]]
  ; VF-FOUR-CHECK:       vector.main.loop.iter.check:
diff --git a/llvm/test/Transforms/LoopVectorize/X86/illegal-parallel-loop-uniform-write.ll b/llvm/test/Transforms/LoopVectorize/X86/illegal-parallel-loop-uniform-write.ll

index 2153839..d2543a2 100644 (file)
--- a/llvm/test/Transforms/LoopVectorize/X86/illegal-parallel-loop-uniform-write.ll
+++ b/llvm/test/Transforms/LoopVectorize/X86/illegal-parallel-loop-uniform-write.ll
@@ -63,11 +63,8 @@ define void @foo(i32* nocapture %a, i32* nocapture %b, i32 %k, i32 %m) #0 {
  ; CHECK-NEXT:    br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH]], label [[VECTOR_SCEVCHECK:%.*]]
  ; CHECK:       vector.scevcheck:
  ; CHECK-NEXT:    [[TMP10:%.*]] = add i32 [[TMP8]], [[TMP0]]
-; CHECK-NEXT:    [[TMP11:%.*]] = sub i32 [[TMP8]], [[TMP0]]
-; CHECK-NEXT:    [[TMP12:%.*]] = icmp sgt i32 [[TMP11]], [[TMP8]]
  ; CHECK-NEXT:    [[TMP13:%.*]] = icmp slt i32 [[TMP10]], [[TMP8]]
-; CHECK-NEXT:    [[TMP14:%.*]] = select i1 false, i1 [[TMP12]], i1 [[TMP13]]
-; CHECK-NEXT:    br i1 [[TMP14]], label [[SCALAR_PH]], label [[VECTOR_PH:%.*]]
+; CHECK-NEXT:    br i1 [[TMP13]], label [[SCALAR_PH]], label [[VECTOR_PH:%.*]]
  ; CHECK:       vector.ph:
  ; CHECK-NEXT:    [[N_MOD_VF:%.*]] = urem i64 [[TMP3]], 4
  ; CHECK-NEXT:    [[N_VEC:%.*]] = sub i64 [[TMP3]], [[N_MOD_VF]]
diff --git a/llvm/test/Transforms/LoopVectorize/X86/pr35432.ll b/llvm/test/Transforms/LoopVectorize/X86/pr35432.ll

index f53e8e6..61d703a 100644 (file)
--- a/llvm/test/Transforms/LoopVectorize/X86/pr35432.ll
+++ b/llvm/test/Transforms/LoopVectorize/X86/pr35432.ll
@@ -52,13 +52,10 @@ define i32 @main() local_unnamed_addr #0 {
  ; CHECK-NEXT:    [[MUL:%.*]] = call { i8, i1 } @llvm.umul.with.overflow.i8(i8 1, i8 [[TMP10]])
  ; CHECK-NEXT:    [[MUL_RESULT:%.*]] = extractvalue { i8, i1 } [[MUL]], 0
  ; CHECK-NEXT:    [[MUL_OVERFLOW:%.*]] = extractvalue { i8, i1 } [[MUL]], 1
-; CHECK-NEXT:    [[TMP11:%.*]] = add i8 [[TMP7]], [[MUL_RESULT]]
  ; CHECK-NEXT:    [[TMP12:%.*]] = sub i8 [[TMP7]], [[MUL_RESULT]]
-; CHECK-NEXT:    [[TMP13:%.*]] = icmp ugt i8 [[TMP12]], [[TMP7]]
-; CHECK-NEXT:    [[TMP14:%.*]] = icmp ult i8 [[TMP11]], [[TMP7]]
-; CHECK-NEXT:    [[TMP15:%.*]] = select i1 true, i1 [[TMP13]], i1 [[TMP14]]
+; CHECK-NEXT:    [[TMP14:%.*]] = icmp ugt i8 [[TMP12]], [[TMP7]]
  ; CHECK-NEXT:    [[TMP16:%.*]] = icmp ugt i32 [[TMP9]], 255
-; CHECK-NEXT:    [[TMP17:%.*]] = or i1 [[TMP15]], [[TMP16]]
+; CHECK-NEXT:    [[TMP17:%.*]] = or i1 [[TMP14]], [[TMP16]]
  ; CHECK-NEXT:    [[TMP18:%.*]] = or i1 [[TMP17]], [[MUL_OVERFLOW]]
  ; CHECK-NEXT:    br i1 [[TMP18]], label [[SCALAR_PH]], label [[VECTOR_PH:%.*]]
  ; CHECK:       vector.ph:
diff --git a/llvm/test/Transforms/LoopVectorize/first-order-recurrence-complex.ll b/llvm/test/Transforms/LoopVectorize/first-order-recurrence-complex.ll

index e2ea0af..0a8763a 100644 (file)
--- a/llvm/test/Transforms/LoopVectorize/first-order-recurrence-complex.ll
+++ b/llvm/test/Transforms/LoopVectorize/first-order-recurrence-complex.ll
@@ -646,11 +646,8 @@ define void @sink_dominance(i32* %ptr, i32 %N) {
  ; CHECK-NEXT:    [[UMAX:%.*]] = call i32 @llvm.umax.i32(i32 [[N]], i32 1)
  ; CHECK-NEXT:    [[TMP0:%.*]] = add i32 [[UMAX]], -1
  ; CHECK-NEXT:    [[TMP1:%.*]] = add i32 0, [[TMP0]]
-; CHECK-NEXT:    [[TMP2:%.*]] = sub i32 0, [[TMP0]]
-; CHECK-NEXT:    [[TMP3:%.*]] = icmp sgt i32 [[TMP2]], 0
  ; CHECK-NEXT:    [[TMP4:%.*]] = icmp slt i32 [[TMP1]], 0
-; CHECK-NEXT:    [[TMP5:%.*]] = select i1 false, i1 [[TMP3]], i1 [[TMP4]]
-; CHECK-NEXT:    br i1 [[TMP5]], label [[SCALAR_PH]], label [[VECTOR_PH:%.*]]
+; CHECK-NEXT:    br i1 [[TMP4]], label [[SCALAR_PH]], label [[VECTOR_PH:%.*]]
  ; CHECK:       vector.ph:
  ; CHECK-NEXT:    [[N_MOD_VF:%.*]] = urem i32 [[UMAX1]], 4
  ; CHECK-NEXT:    [[N_VEC:%.*]] = sub i32 [[UMAX1]], [[N_MOD_VF]]
@@ -734,11 +731,8 @@ define void @sink_dominance_2(i32* %ptr, i32 %N) {
  ; CHECK-NEXT:    [[UMAX:%.*]] = call i32 @llvm.umax.i32(i32 [[N]], i32 1)
  ; CHECK-NEXT:    [[TMP0:%.*]] = add i32 [[UMAX]], -1
  ; CHECK-NEXT:    [[TMP1:%.*]] = add i32 0, [[TMP0]]
-; CHECK-NEXT:    [[TMP2:%.*]] = sub i32 0, [[TMP0]]
-; CHECK-NEXT:    [[TMP3:%.*]] = icmp sgt i32 [[TMP2]], 0
  ; CHECK-NEXT:    [[TMP4:%.*]] = icmp slt i32 [[TMP1]], 0
-; CHECK-NEXT:    [[TMP5:%.*]] = select i1 false, i1 [[TMP3]], i1 [[TMP4]]
-; CHECK-NEXT:    br i1 [[TMP5]], label [[SCALAR_PH]], label [[VECTOR_PH:%.*]]
+; CHECK-NEXT:    br i1 [[TMP4]], label [[SCALAR_PH]], label [[VECTOR_PH:%.*]]
  ; CHECK:       vector.ph:
  ; CHECK-NEXT:    [[N_MOD_VF:%.*]] = urem i32 [[UMAX1]], 4
  ; CHECK-NEXT:    [[N_VEC:%.*]] = sub i32 [[UMAX1]], [[N_MOD_VF]]
diff --git a/llvm/test/Transforms/LoopVectorize/induction.ll b/llvm/test/Transforms/LoopVectorize/induction.ll

index 68b9ce0..a128b44 100644 (file)
--- a/llvm/test/Transforms/LoopVectorize/induction.ll
+++ b/llvm/test/Transforms/LoopVectorize/induction.ll
@@ -3557,20 +3557,14 @@ define void @wrappingindvars1(i8 %t, i32 %len, i32 *%A) {
  ; CHECK:       vector.scevcheck:
  ; CHECK-NEXT:    [[TMP1:%.*]] = trunc i32 [[LEN]] to i8
  ; CHECK-NEXT:    [[TMP2:%.*]] = add i8 [[T]], [[TMP1]]
-; CHECK-NEXT:    [[TMP3:%.*]] = sub i8 [[T]], [[TMP1]]
-; CHECK-NEXT:    [[TMP4:%.*]] = icmp ugt i8 [[TMP3]], [[T]]
  ; CHECK-NEXT:    [[TMP5:%.*]] = icmp ult i8 [[TMP2]], [[T]]
-; CHECK-NEXT:    [[TMP6:%.*]] = select i1 false, i1 [[TMP4]], i1 [[TMP5]]
  ; CHECK-NEXT:    [[TMP7:%.*]] = icmp ugt i32 [[LEN]], 255
-; CHECK-NEXT:    [[TMP8:%.*]] = or i1 [[TMP6]], [[TMP7]]
+; CHECK-NEXT:    [[TMP8:%.*]] = or i1 [[TMP5]], [[TMP7]]
  ; CHECK-NEXT:    [[TMP10:%.*]] = trunc i32 [[LEN]] to i8
  ; CHECK-NEXT:    [[TMP11:%.*]] = add i8 [[T]], [[TMP10]]
-; CHECK-NEXT:    [[TMP12:%.*]] = sub i8 [[T]], [[TMP10]]
-; CHECK-NEXT:    [[TMP13:%.*]] = icmp sgt i8 [[TMP12]], [[T]]
  ; CHECK-NEXT:    [[TMP14:%.*]] = icmp slt i8 [[TMP11]], [[T]]
-; CHECK-NEXT:    [[TMP15:%.*]] = select i1 false, i1 [[TMP13]], i1 [[TMP14]]
  ; CHECK-NEXT:    [[TMP16:%.*]] = icmp ugt i32 [[LEN]], 255
-; CHECK-NEXT:    [[TMP17:%.*]] = or i1 [[TMP15]], [[TMP16]]
+; CHECK-NEXT:    [[TMP17:%.*]] = or i1 [[TMP14]], [[TMP16]]
  ; CHECK-NEXT:    [[TMP18:%.*]] = or i1 [[TMP8]], [[TMP17]]
  ; CHECK-NEXT:    br i1 [[TMP18]], label [[SCALAR_PH]], label [[VECTOR_PH:%.*]]
  ; CHECK:       vector.ph:
@@ -3784,20 +3778,14 @@ define void @wrappingindvars1(i8 %t, i32 %len, i32 *%A) {
  ; UNROLL-NO-IC:       vector.scevcheck:
  ; UNROLL-NO-IC-NEXT:    [[TMP1:%.*]] = trunc i32 [[LEN]] to i8
  ; UNROLL-NO-IC-NEXT:    [[TMP2:%.*]] = add i8 [[T]], [[TMP1]]
-; UNROLL-NO-IC-NEXT:    [[TMP3:%.*]] = sub i8 [[T]], [[TMP1]]
-; UNROLL-NO-IC-NEXT:    [[TMP4:%.*]] = icmp ugt i8 [[TMP3]], [[T]]
  ; UNROLL-NO-IC-NEXT:    [[TMP5:%.*]] = icmp ult i8 [[TMP2]], [[T]]
-; UNROLL-NO-IC-NEXT:    [[TMP6:%.*]] = select i1 false, i1 [[TMP4]], i1 [[TMP5]]
  ; UNROLL-NO-IC-NEXT:    [[TMP7:%.*]] = icmp ugt i32 [[LEN]], 255
-; UNROLL-NO-IC-NEXT:    [[TMP8:%.*]] = or i1 [[TMP6]], [[TMP7]]
+; UNROLL-NO-IC-NEXT:    [[TMP8:%.*]] = or i1 [[TMP5]], [[TMP7]]
  ; UNROLL-NO-IC-NEXT:    [[TMP10:%.*]] = trunc i32 [[LEN]] to i8
  ; UNROLL-NO-IC-NEXT:    [[TMP11:%.*]] = add i8 [[T]], [[TMP10]]
-; UNROLL-NO-IC-NEXT:    [[TMP12:%.*]] = sub i8 [[T]], [[TMP10]]
-; UNROLL-NO-IC-NEXT:    [[TMP13:%.*]] = icmp sgt i8 [[TMP12]], [[T]]
  ; UNROLL-NO-IC-NEXT:    [[TMP14:%.*]] = icmp slt i8 [[TMP11]], [[T]]
-; UNROLL-NO-IC-NEXT:    [[TMP15:%.*]] = select i1 false, i1 [[TMP13]], i1 [[TMP14]]
  ; UNROLL-NO-IC-NEXT:    [[TMP16:%.*]] = icmp ugt i32 [[LEN]], 255
-; UNROLL-NO-IC-NEXT:    [[TMP17:%.*]] = or i1 [[TMP15]], [[TMP16]]
+; UNROLL-NO-IC-NEXT:    [[TMP17:%.*]] = or i1 [[TMP14]], [[TMP16]]
  ; UNROLL-NO-IC-NEXT:    [[TMP18:%.*]] = or i1 [[TMP8]], [[TMP17]]
  ; UNROLL-NO-IC-NEXT:    br i1 [[TMP18]], label [[SCALAR_PH]], label [[VECTOR_PH:%.*]]
  ; UNROLL-NO-IC:       vector.ph:
@@ -3984,20 +3972,14 @@ define void @wrappingindvars2(i8 %t, i32 %len, i32 *%A) {
  ; CHECK:       vector.scevcheck:
  ; CHECK-NEXT:    [[TMP1:%.*]] = trunc i32 [[LEN]] to i8
  ; CHECK-NEXT:    [[TMP2:%.*]] = add i8 [[T]], [[TMP1]]
-; CHECK-NEXT:    [[TMP3:%.*]] = sub i8 [[T]], [[TMP1]]
-; CHECK-NEXT:    [[TMP4:%.*]] = icmp ugt i8 [[TMP3]], [[T]]
  ; CHECK-NEXT:    [[TMP5:%.*]] = icmp ult i8 [[TMP2]], [[T]]
-; CHECK-NEXT:    [[TMP6:%.*]] = select i1 false, i1 [[TMP4]], i1 [[TMP5]]
  ; CHECK-NEXT:    [[TMP7:%.*]] = icmp ugt i32 [[LEN]], 255
-; CHECK-NEXT:    [[TMP8:%.*]] = or i1 [[TMP6]], [[TMP7]]
+; CHECK-NEXT:    [[TMP8:%.*]] = or i1 [[TMP5]], [[TMP7]]
  ; CHECK-NEXT:    [[TMP10:%.*]] = trunc i32 [[LEN]] to i8
  ; CHECK-NEXT:    [[TMP11:%.*]] = add i8 [[T]], [[TMP10]]
-; CHECK-NEXT:    [[TMP12:%.*]] = sub i8 [[T]], [[TMP10]]
-; CHECK-NEXT:    [[TMP13:%.*]] = icmp sgt i8 [[TMP12]], [[T]]
  ; CHECK-NEXT:    [[TMP14:%.*]] = icmp slt i8 [[TMP11]], [[T]]
-; CHECK-NEXT:    [[TMP15:%.*]] = select i1 false, i1 [[TMP13]], i1 [[TMP14]]
  ; CHECK-NEXT:    [[TMP16:%.*]] = icmp ugt i32 [[LEN]], 255
-; CHECK-NEXT:    [[TMP17:%.*]] = or i1 [[TMP15]], [[TMP16]]
+; CHECK-NEXT:    [[TMP17:%.*]] = or i1 [[TMP14]], [[TMP16]]
  ; CHECK-NEXT:    [[TMP18:%.*]] = or i1 [[TMP8]], [[TMP17]]
  ; CHECK-NEXT:    br i1 [[TMP18]], label [[SCALAR_PH]], label [[VECTOR_PH:%.*]]
  ; CHECK:       vector.ph:
@@ -4221,20 +4203,14 @@ define void @wrappingindvars2(i8 %t, i32 %len, i32 *%A) {
  ; UNROLL-NO-IC:       vector.scevcheck:
  ; UNROLL-NO-IC-NEXT:    [[TMP1:%.*]] = trunc i32 [[LEN]] to i8
  ; UNROLL-NO-IC-NEXT:    [[TMP2:%.*]] = add i8 [[T]], [[TMP1]]
-; UNROLL-NO-IC-NEXT:    [[TMP3:%.*]] = sub i8 [[T]], [[TMP1]]
-; UNROLL-NO-IC-NEXT:    [[TMP4:%.*]] = icmp ugt i8 [[TMP3]], [[T]]
  ; UNROLL-NO-IC-NEXT:    [[TMP5:%.*]] = icmp ult i8 [[TMP2]], [[T]]
-; UNROLL-NO-IC-NEXT:    [[TMP6:%.*]] = select i1 false, i1 [[TMP4]], i1 [[TMP5]]
  ; UNROLL-NO-IC-NEXT:    [[TMP7:%.*]] = icmp ugt i32 [[LEN]], 255
-; UNROLL-NO-IC-NEXT:    [[TMP8:%.*]] = or i1 [[TMP6]], [[TMP7]]
+; UNROLL-NO-IC-NEXT:    [[TMP8:%.*]] = or i1 [[TMP5]], [[TMP7]]
  ; UNROLL-NO-IC-NEXT:    [[TMP10:%.*]] = trunc i32 [[LEN]] to i8
  ; UNROLL-NO-IC-NEXT:    [[TMP11:%.*]] = add i8 [[T]], [[TMP10]]
-; UNROLL-NO-IC-NEXT:    [[TMP12:%.*]] = sub i8 [[T]], [[TMP10]]
-; UNROLL-NO-IC-NEXT:    [[TMP13:%.*]] = icmp sgt i8 [[TMP12]], [[T]]
  ; UNROLL-NO-IC-NEXT:    [[TMP14:%.*]] = icmp slt i8 [[TMP11]], [[T]]
-; UNROLL-NO-IC-NEXT:    [[TMP15:%.*]] = select i1 false, i1 [[TMP13]], i1 [[TMP14]]
  ; UNROLL-NO-IC-NEXT:    [[TMP16:%.*]] = icmp ugt i32 [[LEN]], 255
-; UNROLL-NO-IC-NEXT:    [[TMP17:%.*]] = or i1 [[TMP15]], [[TMP16]]
+; UNROLL-NO-IC-NEXT:    [[TMP17:%.*]] = or i1 [[TMP14]], [[TMP16]]
  ; UNROLL-NO-IC-NEXT:    [[TMP18:%.*]] = or i1 [[TMP8]], [[TMP17]]
  ; UNROLL-NO-IC-NEXT:    br i1 [[TMP18]], label [[SCALAR_PH]], label [[VECTOR_PH:%.*]]
  ; UNROLL-NO-IC:       vector.ph:
@@ -4634,12 +4610,9 @@ define void @trunciv(i32* nocapture %a, i32 %start, i64 %k) {
  ; CHECK-NEXT:    [[TMP0:%.*]] = add i64 [[K]], -1
  ; CHECK-NEXT:    [[TMP1:%.*]] = trunc i64 [[TMP0]] to i32
  ; CHECK-NEXT:    [[TMP2:%.*]] = add i32 0, [[TMP1]]
-; CHECK-NEXT:    [[TMP3:%.*]] = sub i32 0, [[TMP1]]
-; CHECK-NEXT:    [[TMP4:%.*]] = icmp sgt i32 [[TMP3]], 0
  ; CHECK-NEXT:    [[TMP5:%.*]] = icmp slt i32 [[TMP2]], 0
-; CHECK-NEXT:    [[TMP6:%.*]] = select i1 false, i1 [[TMP4]], i1 [[TMP5]]
  ; CHECK-NEXT:    [[TMP7:%.*]] = icmp ugt i64 [[TMP0]], 4294967295
-; CHECK-NEXT:    [[TMP8:%.*]] = or i1 [[TMP6]], [[TMP7]]
+; CHECK-NEXT:    [[TMP8:%.*]] = or i1 [[TMP5]], [[TMP7]]
  ; CHECK-NEXT:    br i1 [[TMP8]], label [[SCALAR_PH]], label [[VECTOR_PH:%.*]]
  ; CHECK:       vector.ph:
  ; CHECK-NEXT:    [[N_MOD_VF:%.*]] = urem i64 [[K]], 2
@@ -4774,12 +4747,9 @@ define void @trunciv(i32* nocapture %a, i32 %start, i64 %k) {
  ; UNROLL-NO-IC-NEXT:    [[TMP0:%.*]] = add i64 [[K]], -1
  ; UNROLL-NO-IC-NEXT:    [[TMP1:%.*]] = trunc i64 [[TMP0]] to i32
  ; UNROLL-NO-IC-NEXT:    [[TMP2:%.*]] = add i32 0, [[TMP1]]
-; UNROLL-NO-IC-NEXT:    [[TMP3:%.*]] = sub i32 0, [[TMP1]]
-; UNROLL-NO-IC-NEXT:    [[TMP4:%.*]] = icmp sgt i32 [[TMP3]], 0
  ; UNROLL-NO-IC-NEXT:    [[TMP5:%.*]] = icmp slt i32 [[TMP2]], 0
-; UNROLL-NO-IC-NEXT:    [[TMP6:%.*]] = select i1 false, i1 [[TMP4]], i1 [[TMP5]]
  ; UNROLL-NO-IC-NEXT:    [[TMP7:%.*]] = icmp ugt i64 [[TMP0]], 4294967295
-; UNROLL-NO-IC-NEXT:    [[TMP8:%.*]] = or i1 [[TMP6]], [[TMP7]]
+; UNROLL-NO-IC-NEXT:    [[TMP8:%.*]] = or i1 [[TMP5]], [[TMP7]]
  ; UNROLL-NO-IC-NEXT:    br i1 [[TMP8]], label [[SCALAR_PH]], label [[VECTOR_PH:%.*]]
  ; UNROLL-NO-IC:       vector.ph:
  ; UNROLL-NO-IC-NEXT:    [[N_MOD_VF:%.*]] = urem i64 [[K]], 4
@@ -6549,9 +6519,9 @@ define void @test_optimized_cast_induction_feeding_first_order_recurrence(i64 %n
  ; CHECK-NEXT:    [[MUL_OVERFLOW:%.*]] = extractvalue { i8, i1 } [[MUL]], 1
  ; CHECK-NEXT:    [[TMP6:%.*]] = add i8 0, [[MUL_RESULT]]
  ; CHECK-NEXT:    [[TMP7:%.*]] = sub i8 0, [[MUL_RESULT]]
-; CHECK-NEXT:    [[TMP8:%.*]] = icmp sgt i8 [[TMP7]], 0
-; CHECK-NEXT:    [[TMP9:%.*]] = icmp slt i8 [[TMP6]], 0
-; CHECK-NEXT:    [[TMP10:%.*]] = select i1 [[TMP3]], i1 [[TMP8]], i1 [[TMP9]]
+; CHECK-NEXT:    [[TMP8:%.*]] = icmp slt i8 [[TMP6]], 0
+; CHECK-NEXT:    [[TMP9:%.*]] = icmp sgt i8 [[TMP7]], 0
+; CHECK-NEXT:    [[TMP10:%.*]] = select i1 [[TMP3]], i1 [[TMP9]], i1 [[TMP8]]
  ; CHECK-NEXT:    [[TMP11:%.*]] = icmp ugt i64 [[TMP0]], 255
  ; CHECK-NEXT:    [[TMP12:%.*]] = icmp ne i8 [[TMP1]], 0
  ; CHECK-NEXT:    [[TMP13:%.*]] = and i1 [[TMP11]], [[TMP12]]
@@ -6627,9 +6597,9 @@ define void @test_optimized_cast_induction_feeding_first_order_recurrence(i64 %n
  ; IND-NEXT:    [[MUL:%.*]] = call { i8, i1 } @llvm.umul.with.overflow.i8(i8 [[TMP4]], i8 [[TMP5]])
  ; IND-NEXT:    [[MUL_RESULT:%.*]] = extractvalue { i8, i1 } [[MUL]], 0
  ; IND-NEXT:    [[MUL_OVERFLOW:%.*]] = extractvalue { i8, i1 } [[MUL]], 1
-; IND-NEXT:    [[TMP6:%.*]] = icmp ugt i8 [[MUL_RESULT]], -128
-; IND-NEXT:    [[TMP7:%.*]] = icmp slt i8 [[MUL_RESULT]], 0
-; IND-NEXT:    [[TMP8:%.*]] = select i1 [[TMP3]], i1 [[TMP6]], i1 [[TMP7]]
+; IND-NEXT:    [[TMP6:%.*]] = icmp slt i8 [[MUL_RESULT]], 0
+; IND-NEXT:    [[TMP7:%.*]] = icmp ugt i8 [[MUL_RESULT]], -128
+; IND-NEXT:    [[TMP8:%.*]] = select i1 [[TMP3]], i1 [[TMP7]], i1 [[TMP6]]
  ; IND-NEXT:    [[TMP9:%.*]] = icmp ugt i64 [[TMP0]], 255
  ; IND-NEXT:    [[TMP10:%.*]] = icmp ne i8 [[TMP1]], 0
  ; IND-NEXT:    [[TMP11:%.*]] = and i1 [[TMP9]], [[TMP10]]
@@ -6700,9 +6670,9 @@ define void @test_optimized_cast_induction_feeding_first_order_recurrence(i64 %n
  ; UNROLL-NEXT:    [[MUL:%.*]] = call { i8, i1 } @llvm.umul.with.overflow.i8(i8 [[TMP4]], i8 [[TMP5]])
  ; UNROLL-NEXT:    [[MUL_RESULT:%.*]] = extractvalue { i8, i1 } [[MUL]], 0
  ; UNROLL-NEXT:    [[MUL_OVERFLOW:%.*]] = extractvalue { i8, i1 } [[MUL]], 1
-; UNROLL-NEXT:    [[TMP6:%.*]] = icmp ugt i8 [[MUL_RESULT]], -128
-; UNROLL-NEXT:    [[TMP7:%.*]] = icmp slt i8 [[MUL_RESULT]], 0
-; UNROLL-NEXT:    [[TMP8:%.*]] = select i1 [[TMP3]], i1 [[TMP6]], i1 [[TMP7]]
+; UNROLL-NEXT:    [[TMP6:%.*]] = icmp slt i8 [[MUL_RESULT]], 0
+; UNROLL-NEXT:    [[TMP7:%.*]] = icmp ugt i8 [[MUL_RESULT]], -128
+; UNROLL-NEXT:    [[TMP8:%.*]] = select i1 [[TMP3]], i1 [[TMP7]], i1 [[TMP6]]
  ; UNROLL-NEXT:    [[TMP9:%.*]] = icmp ugt i64 [[TMP0]], 255
  ; UNROLL-NEXT:    [[TMP10:%.*]] = icmp ne i8 [[TMP1]], 0
  ; UNROLL-NEXT:    [[TMP11:%.*]] = and i1 [[TMP9]], [[TMP10]]
@@ -6780,9 +6750,9 @@ define void @test_optimized_cast_induction_feeding_first_order_recurrence(i64 %n
  ; UNROLL-NO-IC-NEXT:    [[MUL_OVERFLOW:%.*]] = extractvalue { i8, i1 } [[MUL]], 1
  ; UNROLL-NO-IC-NEXT:    [[TMP6:%.*]] = add i8 0, [[MUL_RESULT]]
  ; UNROLL-NO-IC-NEXT:    [[TMP7:%.*]] = sub i8 0, [[MUL_RESULT]]
-; UNROLL-NO-IC-NEXT:    [[TMP8:%.*]] = icmp sgt i8 [[TMP7]], 0
-; UNROLL-NO-IC-NEXT:    [[TMP9:%.*]] = icmp slt i8 [[TMP6]], 0
-; UNROLL-NO-IC-NEXT:    [[TMP10:%.*]] = select i1 [[TMP3]], i1 [[TMP8]], i1 [[TMP9]]
+; UNROLL-NO-IC-NEXT:    [[TMP8:%.*]] = icmp slt i8 [[TMP6]], 0
+; UNROLL-NO-IC-NEXT:    [[TMP9:%.*]] = icmp sgt i8 [[TMP7]], 0
+; UNROLL-NO-IC-NEXT:    [[TMP10:%.*]] = select i1 [[TMP3]], i1 [[TMP9]], i1 [[TMP8]]
  ; UNROLL-NO-IC-NEXT:    [[TMP11:%.*]] = icmp ugt i64 [[TMP0]], 255
  ; UNROLL-NO-IC-NEXT:    [[TMP12:%.*]] = icmp ne i8 [[TMP1]], 0
  ; UNROLL-NO-IC-NEXT:    [[TMP13:%.*]] = and i1 [[TMP11]], [[TMP12]]
@@ -6865,9 +6835,9 @@ define void @test_optimized_cast_induction_feeding_first_order_recurrence(i64 %n
  ; INTERLEAVE-NEXT:    [[MUL:%.*]] = call { i8, i1 } @llvm.umul.with.overflow.i8(i8 [[TMP4]], i8 [[TMP5]])
  ; INTERLEAVE-NEXT:    [[MUL_RESULT:%.*]] = extractvalue { i8, i1 } [[MUL]], 0
  ; INTERLEAVE-NEXT:    [[MUL_OVERFLOW:%.*]] = extractvalue { i8, i1 } [[MUL]], 1
-; INTERLEAVE-NEXT:    [[TMP6:%.*]] = icmp ugt i8 [[MUL_RESULT]], -128
-; INTERLEAVE-NEXT:    [[TMP7:%.*]] = icmp slt i8 [[MUL_RESULT]], 0
-; INTERLEAVE-NEXT:    [[TMP8:%.*]] = select i1 [[TMP3]], i1 [[TMP6]], i1 [[TMP7]]
+; INTERLEAVE-NEXT:    [[TMP6:%.*]] = icmp slt i8 [[MUL_RESULT]], 0
+; INTERLEAVE-NEXT:    [[TMP7:%.*]] = icmp ugt i8 [[MUL_RESULT]], -128
+; INTERLEAVE-NEXT:    [[TMP8:%.*]] = select i1 [[TMP3]], i1 [[TMP7]], i1 [[TMP6]]
  ; INTERLEAVE-NEXT:    [[TMP9:%.*]] = icmp ugt i64 [[TMP0]], 255
  ; INTERLEAVE-NEXT:    [[TMP10:%.*]] = icmp ne i8 [[TMP1]], 0
  ; INTERLEAVE-NEXT:    [[TMP11:%.*]] = and i1 [[TMP9]], [[TMP10]]
diff --git a/llvm/test/Transforms/LoopVectorize/optimal-epilog-vectorization.ll b/llvm/test/Transforms/LoopVectorize/optimal-epilog-vectorization.ll

index d191d15..2a49b56 100644 (file)
--- a/llvm/test/Transforms/LoopVectorize/optimal-epilog-vectorization.ll
+++ b/llvm/test/Transforms/LoopVectorize/optimal-epilog-vectorization.ll
@@ -171,13 +171,10 @@ define dso_local signext i32 @f2(float* noalias %A, float* noalias %B, i32 signe
  ; CHECK-NEXT:    [[MUL:%.*]] = call { i32, i1 } @llvm.umul.with.overflow.i32(i32 1, i32 [[TMP2]])
  ; CHECK-NEXT:    [[MUL_RESULT:%.*]] = extractvalue { i32, i1 } [[MUL]], 0
  ; CHECK-NEXT:    [[MUL_OVERFLOW:%.*]] = extractvalue { i32, i1 } [[MUL]], 1
-; CHECK-NEXT:    [[TMP3:%.*]] = add i32 [[TMP0]], [[MUL_RESULT]]
  ; CHECK-NEXT:    [[TMP4:%.*]] = sub i32 [[TMP0]], [[MUL_RESULT]]
  ; CHECK-NEXT:    [[TMP5:%.*]] = icmp sgt i32 [[TMP4]], [[TMP0]]
-; CHECK-NEXT:    [[TMP6:%.*]] = icmp slt i32 [[TMP3]], [[TMP0]]
-; CHECK-NEXT:    [[TMP7:%.*]] = select i1 true, i1 [[TMP5]], i1 [[TMP6]]
  ; CHECK-NEXT:    [[TMP8:%.*]] = icmp ugt i64 [[TMP1]], 4294967295
-; CHECK-NEXT:    [[TMP9:%.*]] = or i1 [[TMP7]], [[TMP8]]
+; CHECK-NEXT:    [[TMP9:%.*]] = or i1 [[TMP5]], [[TMP8]]
  ; CHECK-NEXT:    [[TMP10:%.*]] = or i1 [[TMP9]], [[MUL_OVERFLOW]]
  ; CHECK-NEXT:    br i1 [[TMP10]], label [[VEC_EPILOG_SCALAR_PH]], label [[VECTOR_MAIN_LOOP_ITER_CHECK:%.*]]
  ; CHECK:       vector.main.loop.iter.check:
diff --git a/llvm/test/Transforms/LoopVectorize/pr30654-phiscev-sext-trunc.ll b/llvm/test/Transforms/LoopVectorize/pr30654-phiscev-sext-trunc.ll

index 2e7ad45..814c825 100644 (file)
--- a/llvm/test/Transforms/LoopVectorize/pr30654-phiscev-sext-trunc.ll
+++ b/llvm/test/Transforms/LoopVectorize/pr30654-phiscev-sext-trunc.ll
@@ -53,9 +53,9 @@ define void @doit1(i32 %n, i32 %step) local_unnamed_addr {
  ; CHECK-NEXT:    [[MUL_OVERFLOW:%.*]] = extractvalue { i8, i1 } [[MUL]], 1
  ; CHECK-NEXT:    [[TMP6:%.*]] = add i8 0, [[MUL_RESULT]]
  ; CHECK-NEXT:    [[TMP7:%.*]] = sub i8 0, [[MUL_RESULT]]
-; CHECK-NEXT:    [[TMP8:%.*]] = icmp sgt i8 [[TMP7]], 0
-; CHECK-NEXT:    [[TMP9:%.*]] = icmp slt i8 [[TMP6]], 0
-; CHECK-NEXT:    [[TMP10:%.*]] = select i1 [[TMP3]], i1 [[TMP8]], i1 [[TMP9]]
+; CHECK-NEXT:    [[TMP8:%.*]] = icmp slt i8 [[TMP6]], 0
+; CHECK-NEXT:    [[TMP9:%.*]] = icmp sgt i8 [[TMP7]], 0
+; CHECK-NEXT:    [[TMP10:%.*]] = select i1 [[TMP3]], i1 [[TMP9]], i1 [[TMP8]]
  ; CHECK-NEXT:    [[TMP11:%.*]] = icmp ugt i64 [[TMP0]], 255
  ; CHECK-NEXT:    [[TMP12:%.*]] = icmp ne i8 [[TMP1]], 0
  ; CHECK-NEXT:    [[TMP13:%.*]] = and i1 [[TMP11]], [[TMP12]]
@@ -179,9 +179,9 @@ define void @doit2(i32 %n, i32 %step) local_unnamed_addr  {
  ; CHECK-NEXT:    [[MUL_OVERFLOW:%.*]] = extractvalue { i8, i1 } [[MUL]], 1
  ; CHECK-NEXT:    [[TMP6:%.*]] = add i8 0, [[MUL_RESULT]]
  ; CHECK-NEXT:    [[TMP7:%.*]] = sub i8 0, [[MUL_RESULT]]
-; CHECK-NEXT:    [[TMP8:%.*]] = icmp ugt i8 [[TMP7]], 0
-; CHECK-NEXT:    [[TMP9:%.*]] = icmp ult i8 [[TMP6]], 0
-; CHECK-NEXT:    [[TMP10:%.*]] = select i1 [[TMP3]], i1 [[TMP8]], i1 [[TMP9]]
+; CHECK-NEXT:    [[TMP8:%.*]] = icmp ult i8 [[TMP6]], 0
+; CHECK-NEXT:    [[TMP9:%.*]] = icmp ugt i8 [[TMP7]], 0
+; CHECK-NEXT:    [[TMP10:%.*]] = select i1 [[TMP3]], i1 [[TMP9]], i1 [[TMP8]]
  ; CHECK-NEXT:    [[TMP11:%.*]] = icmp ugt i64 [[TMP0]], 255
  ; CHECK-NEXT:    [[TMP12:%.*]] = icmp ne i8 [[TMP1]], 0
  ; CHECK-NEXT:    [[TMP13:%.*]] = and i1 [[TMP11]], [[TMP12]]
@@ -379,9 +379,9 @@ define void @doit4(i32 %n, i8 signext %cstep) local_unnamed_addr {
  ; CHECK-NEXT:    [[MUL_OVERFLOW:%.*]] = extractvalue { i8, i1 } [[MUL]], 1
  ; CHECK-NEXT:    [[TMP5:%.*]] = add i8 0, [[MUL_RESULT]]
  ; CHECK-NEXT:    [[TMP6:%.*]] = sub i8 0, [[MUL_RESULT]]
-; CHECK-NEXT:    [[TMP7:%.*]] = icmp sgt i8 [[TMP6]], 0
-; CHECK-NEXT:    [[TMP8:%.*]] = icmp slt i8 [[TMP5]], 0
-; CHECK-NEXT:    [[TMP9:%.*]] = select i1 [[TMP2]], i1 [[TMP7]], i1 [[TMP8]]
+; CHECK-NEXT:    [[TMP7:%.*]] = icmp slt i8 [[TMP5]], 0
+; CHECK-NEXT:    [[TMP8:%.*]] = icmp sgt i8 [[TMP6]], 0
+; CHECK-NEXT:    [[TMP9:%.*]] = select i1 [[TMP2]], i1 [[TMP8]], i1 [[TMP7]]
  ; CHECK-NEXT:    [[TMP10:%.*]] = icmp ugt i64 [[TMP0]], 255
  ; CHECK-NEXT:    [[TMP11:%.*]] = icmp ne i8 [[CSTEP]], 0
  ; CHECK-NEXT:    [[TMP12:%.*]] = and i1 [[TMP10]], [[TMP11]]
diff --git a/llvm/test/Transforms/LoopVectorize/pr45259.ll b/llvm/test/Transforms/LoopVectorize/pr45259.ll

index a5e8d3b..04cae22 100644 (file)
--- a/llvm/test/Transforms/LoopVectorize/pr45259.ll
+++ b/llvm/test/Transforms/LoopVectorize/pr45259.ll
@@ -25,12 +25,9 @@ define i8 @widget(i8* %arr, i8 %t9) {
  ; CHECK-NEXT:    [[TMP4:%.*]] = sub i64 [[TMP3]], [[ARR2]]
  ; CHECK-NEXT:    [[TMP5:%.*]] = trunc i64 [[TMP4]] to i8
  ; CHECK-NEXT:    [[TMP6:%.*]] = add i8 1, [[TMP5]]
-; CHECK-NEXT:    [[TMP7:%.*]] = sub i8 1, [[TMP5]]
-; CHECK-NEXT:    [[TMP8:%.*]] = icmp sgt i8 [[TMP7]], 1
  ; CHECK-NEXT:    [[TMP9:%.*]] = icmp slt i8 [[TMP6]], 1
-; CHECK-NEXT:    [[TMP10:%.*]] = select i1 false, i1 [[TMP8]], i1 [[TMP9]]
  ; CHECK-NEXT:    [[TMP11:%.*]] = icmp ugt i64 [[TMP4]], 255
-; CHECK-NEXT:    [[TMP12:%.*]] = or i1 [[TMP10]], [[TMP11]]
+; CHECK-NEXT:    [[TMP12:%.*]] = or i1 [[TMP9]], [[TMP11]]
  ; CHECK-NEXT:    br i1 [[TMP12]], label [[SCALAR_PH]], label [[VECTOR_PH:%.*]]
  ; CHECK:       vector.ph:
  ; CHECK-NEXT:    [[N_MOD_VF:%.*]] = urem i32 [[TMP2]], 4
diff --git a/llvm/test/Transforms/LoopVectorize/runtime-check-small-clamped-bounds.ll b/llvm/test/Transforms/LoopVectorize/runtime-check-small-clamped-bounds.ll

index b29d29b..fb341f8 100644 (file)
--- a/llvm/test/Transforms/LoopVectorize/runtime-check-small-clamped-bounds.ll
+++ b/llvm/test/Transforms/LoopVectorize/runtime-check-small-clamped-bounds.ll
@@ -21,12 +21,9 @@ define void @load_clamped_index(i32* %A, i32* %B, i32 %N) {
  ; CHECK-NEXT:    [[TMP0:%.*]] = add i32 [[N]], -1
  ; CHECK-NEXT:    [[TMP1:%.*]] = trunc i32 [[TMP0]] to i2
  ; CHECK-NEXT:    [[TMP2:%.*]] = add i2 0, [[TMP1]]
-; CHECK-NEXT:    [[TMP3:%.*]] = sub i2 0, [[TMP1]]
-; CHECK-NEXT:    [[TMP4:%.*]] = icmp ugt i2 [[TMP3]], 0
  ; CHECK-NEXT:    [[TMP5:%.*]] = icmp ult i2 [[TMP2]], 0
-; CHECK-NEXT:    [[TMP6:%.*]] = select i1 false, i1 [[TMP4]], i1 [[TMP5]]
  ; CHECK-NEXT:    [[TMP7:%.*]] = icmp ugt i32 [[TMP0]], 3
-; CHECK-NEXT:    [[TMP8:%.*]] = or i1 [[TMP6]], [[TMP7]]
+; CHECK-NEXT:    [[TMP8:%.*]] = or i1 [[TMP5]], [[TMP7]]
  ; CHECK-NEXT:    br i1 [[TMP8]], label [[SCALAR_PH]], label [[VECTOR_MEMCHECK:%.*]]
  ; CHECK:       vector.memcheck:
  ; CHECK-NEXT:    [[TMP10:%.*]] = add i32 [[N]], -1
@@ -111,12 +108,9 @@ define void @store_clamped_index(i32* %A, i32* %B, i32 %N) {
  ; CHECK-NEXT:    [[TMP0:%.*]] = add i32 [[N]], -1
  ; CHECK-NEXT:    [[TMP1:%.*]] = trunc i32 [[TMP0]] to i2
  ; CHECK-NEXT:    [[TMP2:%.*]] = add i2 0, [[TMP1]]
-; CHECK-NEXT:    [[TMP3:%.*]] = sub i2 0, [[TMP1]]
-; CHECK-NEXT:    [[TMP4:%.*]] = icmp ugt i2 [[TMP3]], 0
  ; CHECK-NEXT:    [[TMP5:%.*]] = icmp ult i2 [[TMP2]], 0
-; CHECK-NEXT:    [[TMP6:%.*]] = select i1 false, i1 [[TMP4]], i1 [[TMP5]]
  ; CHECK-NEXT:    [[TMP7:%.*]] = icmp ugt i32 [[TMP0]], 3
-; CHECK-NEXT:    [[TMP8:%.*]] = or i1 [[TMP6]], [[TMP7]]
+; CHECK-NEXT:    [[TMP8:%.*]] = or i1 [[TMP5]], [[TMP7]]
  ; CHECK-NEXT:    br i1 [[TMP8]], label [[SCALAR_PH]], label [[VECTOR_MEMCHECK:%.*]]
  ; CHECK:       vector.memcheck:
  ; CHECK-NEXT:    [[TMP10:%.*]] = add i32 [[N]], -1
@@ -280,12 +274,9 @@ define void @clamped_index_equal_dependence(i32* %A, i32* %B, i32 %N) {
  ; CHECK-NEXT:    [[TMP0:%.*]] = add i32 [[N]], -1
  ; CHECK-NEXT:    [[TMP1:%.*]] = trunc i32 [[TMP0]] to i2
  ; CHECK-NEXT:    [[TMP2:%.*]] = add i2 0, [[TMP1]]
-; CHECK-NEXT:    [[TMP3:%.*]] = sub i2 0, [[TMP1]]
-; CHECK-NEXT:    [[TMP4:%.*]] = icmp ugt i2 [[TMP3]], 0
  ; CHECK-NEXT:    [[TMP5:%.*]] = icmp ult i2 [[TMP2]], 0
-; CHECK-NEXT:    [[TMP6:%.*]] = select i1 false, i1 [[TMP4]], i1 [[TMP5]]
  ; CHECK-NEXT:    [[TMP7:%.*]] = icmp ugt i32 [[TMP0]], 3
-; CHECK-NEXT:    [[TMP8:%.*]] = or i1 [[TMP6]], [[TMP7]]
+; CHECK-NEXT:    [[TMP8:%.*]] = or i1 [[TMP5]], [[TMP7]]
  ; CHECK-NEXT:    br i1 [[TMP8]], label [[SCALAR_PH]], label [[VECTOR_PH:%.*]]
  ; CHECK:       vector.ph:
  ; CHECK-NEXT:    [[N_MOD_VF:%.*]] = urem i32 [[N]], 2
diff --git a/llvm/test/Transforms/LoopVersioning/wrapping-pointer-non-integral-addrspace.ll b/llvm/test/Transforms/LoopVersioning/wrapping-pointer-non-integral-addrspace.ll

index 61f5405..1910e62 100644 (file)
--- a/llvm/test/Transforms/LoopVersioning/wrapping-pointer-non-integral-addrspace.ll
+++ b/llvm/test/Transforms/LoopVersioning/wrapping-pointer-non-integral-addrspace.ll
@@ -18,10 +18,8 @@ define void @"japi1_align!_9477"(%jl_value_t addrspace(10)** %arg) {
  ; LV-NEXT: [[OFMulResult:%[^ ]*]] = extractvalue { i64, i1 } [[OFMul]], 0
  ; LV-NEXT: [[OFMulOverflow:%[^ ]*]] = extractvalue { i64, i1 } [[OFMul]], 1
  ; LV: [[OFNegMulResult:%[^ ]*]] = sub i64 0, [[OFMulResult]]
-; LV-NEXT: [[PosGEP:%[^ ]*]] = getelementptr i8, i8 addrspace(13)* [[Base:%[^ ]*]], i64 [[OFMulResult]]
-; LV-NEXT: [[NegGEP:%[^ ]*]] = getelementptr i8, i8 addrspace(13)* [[Base]], i64 [[OFNegMulResult]]
+; LV-NEXT: [[NegGEP:%[^ ]*]] = getelementptr i8, i8 addrspace(13)* [[Base:%[^ ]*]], i64 [[OFNegMulResult]]
  ; LV-NEXT: icmp ugt i8 addrspace(13)* [[NegGEP]], [[Base]]
-; LV-NEXT: icmp ult i8 addrspace(13)* [[PosGEP]], [[Base]]
  ; LV-NOT: inttoptr
  ; LV-NOT: ptrtoint
  top:
diff --git a/llvm/test/Transforms/LoopVersioning/wrapping-pointer-versioning.ll b/llvm/test/Transforms/LoopVersioning/wrapping-pointer-versioning.ll

index 503e7ca..5250039 100644 (file)
--- a/llvm/test/Transforms/LoopVersioning/wrapping-pointer-versioning.ll
+++ b/llvm/test/Transforms/LoopVersioning/wrapping-pointer-versioning.ll
@@ -35,23 +35,17 @@ define void @f1(i16* noalias %a,
  ; LV-NEXT:    [[MUL_RESULT:%.*]] = extractvalue { i32, i1 } [[MUL1]], 0
  ; LV-NEXT:    [[MUL_OVERFLOW:%.*]] = extractvalue { i32, i1 } [[MUL1]], 1
  ; LV-NEXT:    [[TMP2:%.*]] = add i32 0, [[MUL_RESULT]]
-; LV-NEXT:    [[TMP3:%.*]] = sub i32 0, [[MUL_RESULT]]
-; LV-NEXT:    [[TMP4:%.*]] = icmp ugt i32 [[TMP3]], 0
  ; LV-NEXT:    [[TMP5:%.*]] = icmp ult i32 [[TMP2]], 0
-; LV-NEXT:    [[TMP6:%.*]] = select i1 false, i1 [[TMP4]], i1 [[TMP5]]
  ; LV-NEXT:    [[TMP7:%.*]] = icmp ugt i64 [[TMP0]], 4294967295
-; LV-NEXT:    [[TMP8:%.*]] = or i1 [[TMP6]], [[TMP7]]
+; LV-NEXT:    [[TMP8:%.*]] = or i1 [[TMP5]], [[TMP7]]
  ; LV-NEXT:    [[TMP9:%.*]] = or i1 [[TMP8]], [[MUL_OVERFLOW]]
  ; LV-NEXT:    [[MUL2:%.*]] = call { i64, i1 } @llvm.umul.with.overflow.i64(i64 4, i64 [[TMP0]])
  ; LV-NEXT:    [[MUL_RESULT3:%.*]] = extractvalue { i64, i1 } [[MUL2]], 0
  ; LV-NEXT:    [[MUL_OVERFLOW4:%.*]] = extractvalue { i64, i1 } [[MUL2]], 1
  ; LV-NEXT:    [[TMP11:%.*]] = sub i64 0, [[MUL_RESULT3]]
  ; LV-NEXT:    [[TMP12:%.*]] = getelementptr i8, i8* [[A5]], i64 [[MUL_RESULT3]]
-; LV-NEXT:    [[TMP13:%.*]] = getelementptr i8, i8* [[A5]], i64 [[TMP11]]
-; LV-NEXT:    [[TMP14:%.*]] = icmp ugt i8* [[TMP13]], [[A5]]
  ; LV-NEXT:    [[TMP15:%.*]] = icmp ult i8* [[TMP12]], [[A5]]
-; LV-NEXT:    [[TMP16:%.*]] = select i1 false, i1 [[TMP14]], i1 [[TMP15]]
-; LV-NEXT:    [[TMP17:%.*]] = or i1 [[TMP16]], [[MUL_OVERFLOW4]]
+; LV-NEXT:    [[TMP17:%.*]] = or i1 [[TMP15]], [[MUL_OVERFLOW4]]
  ; LV-NEXT:    [[TMP18:%.*]] = or i1 [[TMP9]], [[TMP17]]
  ; LV-NEXT:    br i1 [[TMP18]], label [[FOR_BODY_PH_LVER_ORIG:%.*]], label [[FOR_BODY_PH:%.*]]
  ; LV:       for.body.ph.lver.orig:
@@ -160,13 +154,10 @@ define void @f2(i16* noalias %a,
  ; LV-NEXT:    [[MUL1:%.*]] = call { i32, i1 } @llvm.umul.with.overflow.i32(i32 2, i32 [[TMP2]])
  ; LV-NEXT:    [[MUL_RESULT:%.*]] = extractvalue { i32, i1 } [[MUL1]], 0
  ; LV-NEXT:    [[MUL_OVERFLOW:%.*]] = extractvalue { i32, i1 } [[MUL1]], 1
-; LV-NEXT:    [[TMP3:%.*]] = add i32 [[TMP1]], [[MUL_RESULT]]
  ; LV-NEXT:    [[TMP4:%.*]] = sub i32 [[TMP1]], [[MUL_RESULT]]
  ; LV-NEXT:    [[TMP5:%.*]] = icmp ugt i32 [[TMP4]], [[TMP1]]
-; LV-NEXT:    [[TMP6:%.*]] = icmp ult i32 [[TMP3]], [[TMP1]]
-; LV-NEXT:    [[TMP7:%.*]] = select i1 true, i1 [[TMP5]], i1 [[TMP6]]
  ; LV-NEXT:    [[TMP8:%.*]] = icmp ugt i64 [[TMP0]], 4294967295
-; LV-NEXT:    [[TMP9:%.*]] = or i1 [[TMP7]], [[TMP8]]
+; LV-NEXT:    [[TMP9:%.*]] = or i1 [[TMP5]], [[TMP8]]
  ; LV-NEXT:    [[TMP10:%.*]] = or i1 [[TMP9]], [[MUL_OVERFLOW]]
  ; LV-NEXT:    [[TMP12:%.*]] = trunc i64 [[N]] to i31
  ; LV-NEXT:    [[TMP13:%.*]] = zext i31 [[TMP12]] to i64
@@ -177,12 +168,9 @@ define void @f2(i16* noalias %a,
  ; LV-NEXT:    [[MUL_OVERFLOW4:%.*]] = extractvalue { i64, i1 } [[MUL2]], 1
  ; LV-NEXT:    [[SCEVGEP5:%.*]] = bitcast i16* [[SCEVGEP]] to i8*
  ; LV-NEXT:    [[TMP15:%.*]] = sub i64 0, [[MUL_RESULT3]]
-; LV-NEXT:    [[TMP16:%.*]] = getelementptr i8, i8* [[SCEVGEP5]], i64 [[MUL_RESULT3]]
  ; LV-NEXT:    [[TMP17:%.*]] = getelementptr i8, i8* [[SCEVGEP5]], i64 [[TMP15]]
  ; LV-NEXT:    [[TMP18:%.*]] = icmp ugt i8* [[TMP17]], [[SCEVGEP5]]
-; LV-NEXT:    [[TMP19:%.*]] = icmp ult i8* [[TMP16]], [[SCEVGEP5]]
-; LV-NEXT:    [[TMP20:%.*]] = select i1 true, i1 [[TMP18]], i1 [[TMP19]]
-; LV-NEXT:    [[TMP21:%.*]] = or i1 [[TMP20]], [[MUL_OVERFLOW4]]
+; LV-NEXT:    [[TMP21:%.*]] = or i1 [[TMP18]], [[MUL_OVERFLOW4]]
  ; LV-NEXT:    [[TMP22:%.*]] = or i1 [[TMP10]], [[TMP21]]
  ; LV-NEXT:    br i1 [[TMP22]], label [[FOR_BODY_PH_LVER_ORIG:%.*]], label [[FOR_BODY_PH:%.*]]
  ; LV:       for.body.ph.lver.orig:
@@ -276,23 +264,17 @@ define void @f3(i16* noalias %a,
  ; LV-NEXT:    [[MUL_RESULT:%.*]] = extractvalue { i32, i1 } [[MUL1]], 0
  ; LV-NEXT:    [[MUL_OVERFLOW:%.*]] = extractvalue { i32, i1 } [[MUL1]], 1
  ; LV-NEXT:    [[TMP2:%.*]] = add i32 0, [[MUL_RESULT]]
-; LV-NEXT:    [[TMP3:%.*]] = sub i32 0, [[MUL_RESULT]]
-; LV-NEXT:    [[TMP4:%.*]] = icmp sgt i32 [[TMP3]], 0
  ; LV-NEXT:    [[TMP5:%.*]] = icmp slt i32 [[TMP2]], 0
-; LV-NEXT:    [[TMP6:%.*]] = select i1 false, i1 [[TMP4]], i1 [[TMP5]]
  ; LV-NEXT:    [[TMP7:%.*]] = icmp ugt i64 [[TMP0]], 4294967295
-; LV-NEXT:    [[TMP8:%.*]] = or i1 [[TMP6]], [[TMP7]]
+; LV-NEXT:    [[TMP8:%.*]] = or i1 [[TMP5]], [[TMP7]]
  ; LV-NEXT:    [[TMP9:%.*]] = or i1 [[TMP8]], [[MUL_OVERFLOW]]
  ; LV-NEXT:    [[MUL2:%.*]] = call { i64, i1 } @llvm.umul.with.overflow.i64(i64 4, i64 [[TMP0]])
  ; LV-NEXT:    [[MUL_RESULT3:%.*]] = extractvalue { i64, i1 } [[MUL2]], 0
  ; LV-NEXT:    [[MUL_OVERFLOW4:%.*]] = extractvalue { i64, i1 } [[MUL2]], 1
  ; LV-NEXT:    [[TMP11:%.*]] = sub i64 0, [[MUL_RESULT3]]
  ; LV-NEXT:    [[TMP12:%.*]] = getelementptr i8, i8* [[A5]], i64 [[MUL_RESULT3]]
-; LV-NEXT:    [[TMP13:%.*]] = getelementptr i8, i8* [[A5]], i64 [[TMP11]]
-; LV-NEXT:    [[TMP14:%.*]] = icmp ugt i8* [[TMP13]], [[A5]]
  ; LV-NEXT:    [[TMP15:%.*]] = icmp ult i8* [[TMP12]], [[A5]]
-; LV-NEXT:    [[TMP16:%.*]] = select i1 false, i1 [[TMP14]], i1 [[TMP15]]
-; LV-NEXT:    [[TMP17:%.*]] = or i1 [[TMP16]], [[MUL_OVERFLOW4]]
+; LV-NEXT:    [[TMP17:%.*]] = or i1 [[TMP15]], [[MUL_OVERFLOW4]]
  ; LV-NEXT:    [[TMP18:%.*]] = or i1 [[TMP9]], [[TMP17]]
  ; LV-NEXT:    br i1 [[TMP18]], label [[FOR_BODY_PH_LVER_ORIG:%.*]], label [[FOR_BODY_PH:%.*]]
  ; LV:       for.body.ph.lver.orig:
@@ -377,13 +359,10 @@ define void @f4(i16* noalias %a,
  ; LV-NEXT:    [[MUL1:%.*]] = call { i32, i1 } @llvm.umul.with.overflow.i32(i32 2, i32 [[TMP2]])
  ; LV-NEXT:    [[MUL_RESULT:%.*]] = extractvalue { i32, i1 } [[MUL1]], 0
  ; LV-NEXT:    [[MUL_OVERFLOW:%.*]] = extractvalue { i32, i1 } [[MUL1]], 1
-; LV-NEXT:    [[TMP3:%.*]] = add i32 [[TMP1]], [[MUL_RESULT]]
  ; LV-NEXT:    [[TMP4:%.*]] = sub i32 [[TMP1]], [[MUL_RESULT]]
  ; LV-NEXT:    [[TMP5:%.*]] = icmp sgt i32 [[TMP4]], [[TMP1]]
-; LV-NEXT:    [[TMP6:%.*]] = icmp slt i32 [[TMP3]], [[TMP1]]
-; LV-NEXT:    [[TMP7:%.*]] = select i1 true, i1 [[TMP5]], i1 [[TMP6]]
  ; LV-NEXT:    [[TMP8:%.*]] = icmp ugt i64 [[TMP0]], 4294967295
-; LV-NEXT:    [[TMP9:%.*]] = or i1 [[TMP7]], [[TMP8]]
+; LV-NEXT:    [[TMP9:%.*]] = or i1 [[TMP5]], [[TMP8]]
  ; LV-NEXT:    [[TMP10:%.*]] = or i1 [[TMP9]], [[MUL_OVERFLOW]]
  ; LV-NEXT:    [[TMP12:%.*]] = sext i32 [[TMP1]] to i64
  ; LV-NEXT:    [[SCEVGEP:%.*]] = getelementptr i16, i16* [[A:%.*]], i64 [[TMP12]]
@@ -392,12 +371,9 @@ define void @f4(i16* noalias %a,
  ; LV-NEXT:    [[MUL_OVERFLOW4:%.*]] = extractvalue { i64, i1 } [[MUL2]], 1
  ; LV-NEXT:    [[SCEVGEP5:%.*]] = bitcast i16* [[SCEVGEP]] to i8*
  ; LV-NEXT:    [[TMP13:%.*]] = sub i64 0, [[MUL_RESULT3]]
-; LV-NEXT:    [[TMP14:%.*]] = getelementptr i8, i8* [[SCEVGEP5]], i64 [[MUL_RESULT3]]
  ; LV-NEXT:    [[TMP15:%.*]] = getelementptr i8, i8* [[SCEVGEP5]], i64 [[TMP13]]
  ; LV-NEXT:    [[TMP16:%.*]] = icmp ugt i8* [[TMP15]], [[SCEVGEP5]]
-; LV-NEXT:    [[TMP17:%.*]] = icmp ult i8* [[TMP14]], [[SCEVGEP5]]
-; LV-NEXT:    [[TMP18:%.*]] = select i1 true, i1 [[TMP16]], i1 [[TMP17]]
-; LV-NEXT:    [[TMP19:%.*]] = or i1 [[TMP18]], [[MUL_OVERFLOW4]]
+; LV-NEXT:    [[TMP19:%.*]] = or i1 [[TMP16]], [[MUL_OVERFLOW4]]
  ; LV-NEXT:    [[TMP20:%.*]] = or i1 [[TMP10]], [[TMP19]]
  ; LV-NEXT:    br i1 [[TMP20]], label [[FOR_BODY_PH_LVER_ORIG:%.*]], label [[FOR_BODY_PH:%.*]]
  ; LV:       for.body.ph.lver.orig:
@@ -490,13 +466,10 @@ define void @f5(i16* noalias %a,
  ; LV-NEXT:    [[MUL1:%.*]] = call { i32, i1 } @llvm.umul.with.overflow.i32(i32 2, i32 [[TMP2]])
  ; LV-NEXT:    [[MUL_RESULT:%.*]] = extractvalue { i32, i1 } [[MUL1]], 0
  ; LV-NEXT:    [[MUL_OVERFLOW:%.*]] = extractvalue { i32, i1 } [[MUL1]], 1
-; LV-NEXT:    [[TMP3:%.*]] = add i32 [[TMP1]], [[MUL_RESULT]]
  ; LV-NEXT:    [[TMP4:%.*]] = sub i32 [[TMP1]], [[MUL_RESULT]]
  ; LV-NEXT:    [[TMP5:%.*]] = icmp sgt i32 [[TMP4]], [[TMP1]]
-; LV-NEXT:    [[TMP6:%.*]] = icmp slt i32 [[TMP3]], [[TMP1]]
-; LV-NEXT:    [[TMP7:%.*]] = select i1 true, i1 [[TMP5]], i1 [[TMP6]]
  ; LV-NEXT:    [[TMP8:%.*]] = icmp ugt i64 [[TMP0]], 4294967295
-; LV-NEXT:    [[TMP9:%.*]] = or i1 [[TMP7]], [[TMP8]]
+; LV-NEXT:    [[TMP9:%.*]] = or i1 [[TMP5]], [[TMP8]]
  ; LV-NEXT:    [[TMP10:%.*]] = or i1 [[TMP9]], [[MUL_OVERFLOW]]
  ; LV-NEXT:    [[TMP12:%.*]] = sext i32 [[TMP1]] to i64
  ; LV-NEXT:    [[SCEVGEP:%.*]] = getelementptr i16, i16* [[A:%.*]], i64 [[TMP12]]
@@ -505,12 +478,9 @@ define void @f5(i16* noalias %a,
  ; LV-NEXT:    [[MUL_OVERFLOW4:%.*]] = extractvalue { i64, i1 } [[MUL2]], 1
  ; LV-NEXT:    [[SCEVGEP5:%.*]] = bitcast i16* [[SCEVGEP]] to i8*
  ; LV-NEXT:    [[TMP13:%.*]] = sub i64 0, [[MUL_RESULT3]]
-; LV-NEXT:    [[TMP14:%.*]] = getelementptr i8, i8* [[SCEVGEP5]], i64 [[MUL_RESULT3]]
  ; LV-NEXT:    [[TMP15:%.*]] = getelementptr i8, i8* [[SCEVGEP5]], i64 [[TMP13]]
  ; LV-NEXT:    [[TMP16:%.*]] = icmp ugt i8* [[TMP15]], [[SCEVGEP5]]
-; LV-NEXT:    [[TMP17:%.*]] = icmp ult i8* [[TMP14]], [[SCEVGEP5]]
-; LV-NEXT:    [[TMP18:%.*]] = select i1 true, i1 [[TMP16]], i1 [[TMP17]]
-; LV-NEXT:    [[TMP19:%.*]] = or i1 [[TMP18]], [[MUL_OVERFLOW4]]
+; LV-NEXT:    [[TMP19:%.*]] = or i1 [[TMP16]], [[MUL_OVERFLOW4]]
  ; LV-NEXT:    [[TMP20:%.*]] = or i1 [[TMP10]], [[TMP19]]
  ; LV-NEXT:    br i1 [[TMP20]], label [[FOR_BODY_PH_LVER_ORIG:%.*]], label [[FOR_BODY_PH:%.*]]
  ; LV:       for.body.ph.lver.orig:
author	Florian Hahn <flo@fhahn.com>
	Fri, 7 Jan 2022 14:44:03 +0000 (14:44 +0000)
committer	Florian Hahn <flo@fhahn.com>
	Fri, 7 Jan 2022 14:49:02 +0000 (14:49 +0000)
llvm/lib/Transforms/Utils/ScalarEvolutionExpander.cpp		patch \| blob \| history
llvm/test/Transforms/LoopDistribute/scev-inserted-runtime-check.ll		patch \| blob \| history
llvm/test/Transforms/LoopVectorize/PowerPC/optimal-epilog-vectorization.ll		patch \| blob \| history
llvm/test/Transforms/LoopVectorize/X86/illegal-parallel-loop-uniform-write.ll		patch \| blob \| history
llvm/test/Transforms/LoopVectorize/X86/pr35432.ll		patch \| blob \| history
llvm/test/Transforms/LoopVectorize/first-order-recurrence-complex.ll		patch \| blob \| history
llvm/test/Transforms/LoopVectorize/induction.ll		patch \| blob \| history
llvm/test/Transforms/LoopVectorize/optimal-epilog-vectorization.ll		patch \| blob \| history
llvm/test/Transforms/LoopVectorize/pr30654-phiscev-sext-trunc.ll		patch \| blob \| history
llvm/test/Transforms/LoopVectorize/pr45259.ll		patch \| blob \| history
llvm/test/Transforms/LoopVectorize/runtime-check-small-clamped-bounds.ll		patch \| blob \| history
llvm/test/Transforms/LoopVersioning/wrapping-pointer-non-integral-addrspace.ll		patch \| blob \| history
llvm/test/Transforms/LoopVersioning/wrapping-pointer-versioning.ll		patch \| blob \| history