BlockFrequencyInfo *BFI, ProfileSummaryInfo *PSI,
GeneratedRTChecks &Checks)
: InnerLoopAndEpilogueVectorizer(OrigLoop, PSE, LI, DT, TLI, TTI, AC, ORE,
- EPI, LVL, CM, BFI, PSI, Checks) {}
+ EPI, LVL, CM, BFI, PSI, Checks) {
+ TripCount = EPI.TripCount;
+ }
/// Implements the interface for creating a vectorized skeleton using the
/// *epilogue loop* strategy (ie the second pass of vplan execution).
std::pair<BasicBlock *, Value *>
; CHECK-NEXT: br i1 [[MIN_EPILOG_ITERS_CHECK]], label [[VEC_EPILOG_SCALAR_PH]], label [[VEC_EPILOG_PH]]
; CHECK: vec.epilog.ph:
; CHECK-NEXT: [[VEC_EPILOG_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[VEC_EPILOG_ITER_CHECK]] ], [ 0, [[VECTOR_MAIN_LOOP_ITER_CHECK]] ]
-; CHECK-NEXT: [[TMP12:%.*]] = add i32 [[BLOCKSIZE]], -1
-; CHECK-NEXT: [[TMP13:%.*]] = zext i32 [[TMP12]] to i64
-; CHECK-NEXT: [[TMP14:%.*]] = add nuw nsw i64 [[TMP13]], 1
-; CHECK-NEXT: [[N_VEC9:%.*]] = and i64 [[TMP14]], 8589934584
+; CHECK-NEXT: [[N_VEC9:%.*]] = and i64 [[TMP2]], 8589934584
; CHECK-NEXT: [[CAST_CRD:%.*]] = trunc i64 [[N_VEC9]] to i32
; CHECK-NEXT: [[IND_END:%.*]] = sub i32 [[BLOCKSIZE]], [[CAST_CRD]]
; CHECK-NEXT: [[IND_END15:%.*]] = getelementptr i8, i8* [[PSRC]], i64 [[N_VEC9]]
; CHECK-NEXT: [[TMP18:%.*]] = icmp eq i64 [[INDEX_NEXT11]], [[N_VEC9]]
; CHECK-NEXT: br i1 [[TMP18]], label [[VEC_EPILOG_MIDDLE_BLOCK:%.*]], label [[VEC_EPILOG_VECTOR_BODY]], !llvm.loop [[LOOP5:![0-9]+]]
; CHECK: vec.epilog.middle.block:
-; CHECK-NEXT: [[CMP_N20:%.*]] = icmp eq i64 [[TMP14]], [[N_VEC9]]
+; CHECK-NEXT: [[CMP_N20:%.*]] = icmp eq i64 [[TMP2]], [[N_VEC9]]
; CHECK-NEXT: br i1 [[CMP_N20]], label [[WHILE_END]], label [[VEC_EPILOG_SCALAR_PH]]
; CHECK: vec.epilog.scalar.ph:
; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i32 [ [[IND_END]], [[VEC_EPILOG_MIDDLE_BLOCK]] ], [ [[IND_END13]], [[VEC_EPILOG_ITER_CHECK]] ], [ [[BLOCKSIZE]], [[ITER_CHECK]] ]
; CHECK-NEXT: br i1 [[MIN_EPILOG_ITERS_CHECK]], label [[VEC_EPILOG_SCALAR_PH]], label [[VEC_EPILOG_PH]]
; CHECK: vec.epilog.ph:
; CHECK-NEXT: [[VEC_EPILOG_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[VEC_EPILOG_ITER_CHECK]] ], [ 0, [[VECTOR_MAIN_LOOP_ITER_CHECK]] ]
-; CHECK-NEXT: [[TMP22:%.*]] = add i32 [[BLOCKSIZE]], -1
-; CHECK-NEXT: [[TMP23:%.*]] = zext i32 [[TMP22]] to i64
-; CHECK-NEXT: [[TMP24:%.*]] = add nuw nsw i64 [[TMP23]], 1
-; CHECK-NEXT: [[N_VEC19:%.*]] = and i64 [[TMP24]], 8589934584
+; CHECK-NEXT: [[N_VEC19:%.*]] = and i64 [[TMP2]], 8589934584
; CHECK-NEXT: [[CAST_CRD:%.*]] = trunc i64 [[N_VEC19]] to i32
; CHECK-NEXT: [[IND_END:%.*]] = sub i32 [[BLOCKSIZE]], [[CAST_CRD]]
; CHECK-NEXT: [[IND_END25:%.*]] = getelementptr i16, i16* [[PSRC]], i64 [[N_VEC19]]
; CHECK-NEXT: [[TMP28:%.*]] = icmp eq i64 [[INDEX_NEXT21]], [[N_VEC19]]
; CHECK-NEXT: br i1 [[TMP28]], label [[VEC_EPILOG_MIDDLE_BLOCK:%.*]], label [[VEC_EPILOG_VECTOR_BODY]], !llvm.loop [[LOOP2:![0-9]+]]
; CHECK: vec.epilog.middle.block:
-; CHECK-NEXT: [[CMP_N30:%.*]] = icmp eq i64 [[TMP24]], [[N_VEC19]]
+; CHECK-NEXT: [[CMP_N30:%.*]] = icmp eq i64 [[TMP2]], [[N_VEC19]]
; CHECK-NEXT: br i1 [[CMP_N30]], label [[WHILE_END]], label [[VEC_EPILOG_SCALAR_PH]]
; CHECK: vec.epilog.scalar.ph:
; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i32 [ [[IND_END]], [[VEC_EPILOG_MIDDLE_BLOCK]] ], [ [[IND_END23]], [[VEC_EPILOG_ITER_CHECK]] ], [ [[BLOCKSIZE]], [[ITER_CHECK]] ]
; CHECK-NEXT: br i1 [[MIN_EPILOG_ITERS_CHECK]], label [[VEC_EPILOG_SCALAR_PH]], label [[VEC_EPILOG_PH]]
; CHECK: vec.epilog.ph:
; CHECK-NEXT: [[VEC_EPILOG_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[VEC_EPILOG_ITER_CHECK]] ], [ 0, [[VECTOR_MAIN_LOOP_ITER_CHECK]] ]
-; CHECK-NEXT: [[TMP22:%.*]] = add i32 [[BLOCKSIZE]], -1
-; CHECK-NEXT: [[TMP23:%.*]] = zext i32 [[TMP22]] to i64
-; CHECK-NEXT: [[TMP24:%.*]] = add nuw nsw i64 [[TMP23]], 1
-; CHECK-NEXT: [[N_VEC19:%.*]] = and i64 [[TMP24]], 8589934576
+; CHECK-NEXT: [[N_VEC19:%.*]] = and i64 [[TMP2]], 8589934576
; CHECK-NEXT: [[CAST_CRD:%.*]] = trunc i64 [[N_VEC19]] to i32
; CHECK-NEXT: [[IND_END:%.*]] = sub i32 [[BLOCKSIZE]], [[CAST_CRD]]
; CHECK-NEXT: [[IND_END25:%.*]] = getelementptr i8, i8* [[PSRC]], i64 [[N_VEC19]]
; CHECK-NEXT: [[TMP28:%.*]] = icmp eq i64 [[INDEX_NEXT21]], [[N_VEC19]]
; CHECK-NEXT: br i1 [[TMP28]], label [[VEC_EPILOG_MIDDLE_BLOCK:%.*]], label [[VEC_EPILOG_VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]]
; CHECK: vec.epilog.middle.block:
-; CHECK-NEXT: [[CMP_N30:%.*]] = icmp eq i64 [[TMP24]], [[N_VEC19]]
+; CHECK-NEXT: [[CMP_N30:%.*]] = icmp eq i64 [[TMP2]], [[N_VEC19]]
; CHECK-NEXT: br i1 [[CMP_N30]], label [[WHILE_END]], label [[VEC_EPILOG_SCALAR_PH]]
; CHECK: vec.epilog.scalar.ph:
; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i32 [ [[IND_END]], [[VEC_EPILOG_MIDDLE_BLOCK]] ], [ [[IND_END23]], [[VEC_EPILOG_ITER_CHECK]] ], [ [[BLOCKSIZE]], [[ITER_CHECK]] ]
; CHECK-NEXT: br i1 [[MIN_EPILOG_ITERS_CHECK_NOT_NOT]], label [[VEC_EPILOG_SCALAR_PH]], label [[VEC_EPILOG_PH]]
; CHECK: vec.epilog.ph:
; CHECK-NEXT: [[VEC_EPILOG_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[VEC_EPILOG_ITER_CHECK]] ], [ 0, [[VECTOR_MAIN_LOOP_ITER_CHECK]] ]
-; CHECK-NEXT: [[SMAX11:%.*]] = call i64 @llvm.smax.i64(i64 [[N]], i64 1)
-; CHECK-NEXT: [[N_VEC13:%.*]] = and i64 [[SMAX11]], 9223372036854775800
+; CHECK-NEXT: [[N_VEC13:%.*]] = and i64 [[SMAX6]], 9223372036854775800
; CHECK-NEXT: [[BROADCAST_SPLATINSERT17:%.*]] = insertelement <8 x i32*> poison, i32* [[A]], i64 0
; CHECK-NEXT: [[BROADCAST_SPLAT18:%.*]] = shufflevector <8 x i32*> [[BROADCAST_SPLATINSERT17]], <8 x i32*> poison, <8 x i32> zeroinitializer
; CHECK-NEXT: [[BROADCAST_SPLATINSERT19:%.*]] = insertelement <8 x i32> poison, i32 [[NTRUNC]], i64 0
; CHECK-NEXT: [[TMP8:%.*]] = icmp ne <8 x i32*> [[BROADCAST_SPLAT18]], zeroinitializer
; CHECK-NEXT: [[WIDE_MASKED_GATHER21:%.*]] = call <8 x i32> @llvm.masked.gather.v8i32.v8p0i32(<8 x i32*> [[BROADCAST_SPLAT18]], i32 4, <8 x i1> [[TMP8]], <8 x i32> undef)
; CHECK-NEXT: [[PREDPHI22:%.*]] = select <8 x i1> [[TMP8]], <8 x i32> [[WIDE_MASKED_GATHER21]], <8 x i32> <i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 1>
-; CHECK-NEXT: [[CMP_N14:%.*]] = icmp eq i64 [[SMAX11]], [[N_VEC13]]
+; CHECK-NEXT: [[CMP_N14:%.*]] = icmp eq i64 [[SMAX6]], [[N_VEC13]]
; CHECK-NEXT: [[TMP9:%.*]] = extractelement <8 x i32> [[PREDPHI22]], i64 7
; CHECK-NEXT: br i1 [[CMP_N14]], label [[FOR_END_LOOPEXIT:%.*]], label [[VEC_EPILOG_SCALAR_PH]]
; CHECK: vec.epilog.scalar.ph:
; CHECK: vec.epilog.ph:
; CHECK-NEXT: [[BC_MERGE_RDX:%.*]] = phi i32 [ 0, [[VECTOR_MAIN_LOOP_ITER_CHECK]] ], [ [[TMP13]], [[VEC_EPILOG_ITER_CHECK]] ]
; CHECK-NEXT: [[VEC_EPILOG_RESUME_VAL:%.*]] = phi i64 [ 0, [[VECTOR_MAIN_LOOP_ITER_CHECK]] ], [ [[N_VEC]], [[VEC_EPILOG_ITER_CHECK]] ]
-; CHECK-NEXT: [[SMAX16:%.*]] = call i64 @llvm.smax.i64(i64 [[N]], i64 1)
-; CHECK-NEXT: [[N_VEC18:%.*]] = and i64 [[SMAX16]], 9223372036854775800
+; CHECK-NEXT: [[N_VEC18:%.*]] = and i64 [[SMAX6]], 9223372036854775800
; CHECK-NEXT: [[TMP14:%.*]] = insertelement <8 x i32> <i32 poison, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>, i32 [[BC_MERGE_RDX]], i64 0
; CHECK-NEXT: br label [[VEC_EPILOG_VECTOR_BODY:%.*]]
; CHECK: vec.epilog.vector.body:
; CHECK-NEXT: br i1 [[TMP18]], label [[VEC_EPILOG_MIDDLE_BLOCK:%.*]], label [[VEC_EPILOG_VECTOR_BODY]], !llvm.loop [[LOOP7:![0-9]+]]
; CHECK: vec.epilog.middle.block:
; CHECK-NEXT: [[TMP19:%.*]] = call i32 @llvm.vector.reduce.add.v8i32(<8 x i32> [[TMP17]])
-; CHECK-NEXT: [[CMP_N19:%.*]] = icmp eq i64 [[SMAX16]], [[N_VEC18]]
+; CHECK-NEXT: [[CMP_N19:%.*]] = icmp eq i64 [[SMAX6]], [[N_VEC18]]
; CHECK-NEXT: br i1 [[CMP_N19]], label [[FOR_END_LOOPEXIT:%.*]], label [[VEC_EPILOG_SCALAR_PH]]
; CHECK: vec.epilog.scalar.ph:
; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC18]], [[VEC_EPILOG_MIDDLE_BLOCK]] ], [ [[N_VEC]], [[VEC_EPILOG_ITER_CHECK]] ], [ 0, [[VECTOR_MEMCHECK]] ], [ 0, [[ITER_CHECK:%.*]] ]
; CHECK-NEXT: br i1 [[MIN_EPILOG_ITERS_CHECK_NOT_NOT]], label [[VEC_EPILOG_SCALAR_PH]], label [[VEC_EPILOG_PH]]
; CHECK: vec.epilog.ph:
; CHECK-NEXT: [[VEC_EPILOG_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[VEC_EPILOG_ITER_CHECK]] ], [ 0, [[VECTOR_MAIN_LOOP_ITER_CHECK]] ]
-; CHECK-NEXT: [[SMAX12:%.*]] = call i64 @llvm.smax.i64(i64 [[N]], i64 1)
-; CHECK-NEXT: [[N_VEC14:%.*]] = and i64 [[SMAX12]], 9223372036854775800
+; CHECK-NEXT: [[N_VEC14:%.*]] = and i64 [[SMAX6]], 9223372036854775800
; CHECK-NEXT: [[BROADCAST_SPLATINSERT18:%.*]] = insertelement <8 x i32> poison, i32 [[K]], i64 0
; CHECK-NEXT: [[BROADCAST_SPLAT19:%.*]] = shufflevector <8 x i32> [[BROADCAST_SPLATINSERT18]], <8 x i32> poison, <8 x i32> zeroinitializer
; CHECK-NEXT: [[BROADCAST_SPLATINSERT20:%.*]] = insertelement <8 x i32> poison, i32 [[NTRUNC]], i64 0
; CHECK-NEXT: [[TMP9:%.*]] = icmp eq i64 [[INDEX_NEXT24]], [[N_VEC14]]
; CHECK-NEXT: br i1 [[TMP9]], label [[VEC_EPILOG_MIDDLE_BLOCK:%.*]], label [[VEC_EPILOG_VECTOR_BODY]], !llvm.loop [[LOOP16:![0-9]+]]
; CHECK: vec.epilog.middle.block:
-; CHECK-NEXT: [[CMP_N15:%.*]] = icmp eq i64 [[SMAX12]], [[N_VEC14]]
+; CHECK-NEXT: [[CMP_N15:%.*]] = icmp eq i64 [[SMAX6]], [[N_VEC14]]
; CHECK-NEXT: br i1 [[CMP_N15]], label [[FOR_END_LOOPEXIT:%.*]], label [[VEC_EPILOG_SCALAR_PH]]
; CHECK: vec.epilog.scalar.ph:
; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC14]], [[VEC_EPILOG_MIDDLE_BLOCK]] ], [ [[N_VEC]], [[VEC_EPILOG_ITER_CHECK]] ], [ 0, [[VECTOR_MEMCHECK]] ], [ 0, [[ITER_CHECK:%.*]] ]
; CHECK-NEXT: br i1 [[MIN_EPILOG_ITERS_CHECK_NOT_NOT]], label [[VEC_EPILOG_SCALAR_PH]], label [[VEC_EPILOG_PH]]
; CHECK: vec.epilog.ph:
; CHECK-NEXT: [[VEC_EPILOG_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[VEC_EPILOG_ITER_CHECK]] ], [ 0, [[VECTOR_MAIN_LOOP_ITER_CHECK]] ]
-; CHECK-NEXT: [[SMAX22:%.*]] = call i64 @llvm.smax.i64(i64 [[N]], i64 1)
-; CHECK-NEXT: [[N_VEC24:%.*]] = and i64 [[SMAX22]], 9223372036854775800
+; CHECK-NEXT: [[N_VEC24:%.*]] = and i64 [[SMAX16]], 9223372036854775800
; CHECK-NEXT: [[BROADCAST_SPLATINSERT28:%.*]] = insertelement <8 x i32> poison, i32 [[K]], i64 0
; CHECK-NEXT: [[BROADCAST_SPLAT29:%.*]] = shufflevector <8 x i32> [[BROADCAST_SPLATINSERT28]], <8 x i32> poison, <8 x i32> zeroinitializer
; CHECK-NEXT: [[BROADCAST_SPLATINSERT30:%.*]] = insertelement <8 x i32> poison, i32 [[NTRUNC]], i64 0
; CHECK-NEXT: [[TMP13:%.*]] = icmp eq i64 [[INDEX_NEXT35]], [[N_VEC24]]
; CHECK-NEXT: br i1 [[TMP13]], label [[VEC_EPILOG_MIDDLE_BLOCK:%.*]], label [[VEC_EPILOG_VECTOR_BODY]], !llvm.loop [[LOOP27:![0-9]+]]
; CHECK: vec.epilog.middle.block:
-; CHECK-NEXT: [[CMP_N25:%.*]] = icmp eq i64 [[SMAX22]], [[N_VEC24]]
+; CHECK-NEXT: [[CMP_N25:%.*]] = icmp eq i64 [[SMAX16]], [[N_VEC24]]
; CHECK-NEXT: br i1 [[CMP_N25]], label [[FOR_END_LOOPEXIT:%.*]], label [[VEC_EPILOG_SCALAR_PH]]
; CHECK: vec.epilog.scalar.ph:
; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC24]], [[VEC_EPILOG_MIDDLE_BLOCK]] ], [ [[N_VEC]], [[VEC_EPILOG_ITER_CHECK]] ], [ 0, [[VECTOR_MEMCHECK]] ], [ 0, [[ITER_CHECK:%.*]] ]