const SCEV *ScStart;
const SCEV *ScEnd;
- if (SE->isLoopInvariant(Sc, Lp))
+ if (SE->isLoopInvariant(Sc, Lp)) {
ScStart = ScEnd = Sc;
- else {
+ } else {
const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(Sc);
assert(AR && "Invalid addrec expression");
const SCEV *Ex = PSE.getBackedgeTakenCount();
ScStart = SE->getUMinExpr(ScStart, ScEnd);
ScEnd = SE->getUMaxExpr(AR->getStart(), ScEnd);
}
- // Add the size of the pointed element to ScEnd.
- auto &DL = Lp->getHeader()->getModule()->getDataLayout();
- Type *IdxTy = DL.getIndexType(Ptr->getType());
- const SCEV *EltSizeSCEV =
- SE->getStoreSizeOfExpr(IdxTy, Ptr->getType()->getPointerElementType());
- ScEnd = SE->getAddExpr(ScEnd, EltSizeSCEV);
}
+ // Add the size of the pointed element to ScEnd.
+ auto &DL = Lp->getHeader()->getModule()->getDataLayout();
+ Type *IdxTy = DL.getIndexType(Ptr->getType());
+ const SCEV *EltSizeSCEV =
+ SE->getStoreSizeOfExpr(IdxTy, Ptr->getType()->getPointerElementType());
+ ScEnd = SE->getAddExpr(ScEnd, EltSizeSCEV);
Pointers.emplace_back(Ptr, ScStart, ScEnd, WritePtr, DepSetId, ASId, Sc);
}
static PointerBounds expandBounds(const RuntimeCheckingPtrGroup *CG,
Loop *TheLoop, Instruction *Loc,
SCEVExpander &Exp) {
- ScalarEvolution *SE = Exp.getSE();
// TODO: Add helper to retrieve pointers to CG.
Value *Ptr = CG->RtCheck.Pointers[CG->Members[0]].PointerValue;
- const SCEV *Sc = SE->getSCEV(Ptr);
unsigned AS = Ptr->getType()->getPointerAddressSpace();
LLVMContext &Ctx = Loc->getContext();
// Use this type for pointer arithmetic.
Type *PtrArithTy = Type::getInt8PtrTy(Ctx, AS);
- if (SE->isLoopInvariant(Sc, TheLoop)) {
- LLVM_DEBUG(dbgs() << "LAA: Adding RT check for a loop invariant ptr:"
- << *Ptr << "\n");
- // Ptr could be in the loop body. If so, expand a new one at the correct
- // location.
- Instruction *Inst = dyn_cast<Instruction>(Ptr);
- Value *NewPtr = (Inst && TheLoop->contains(Inst))
- ? Exp.expandCodeFor(Sc, PtrArithTy, Loc)
- : Ptr;
- // We must return a half-open range, which means incrementing Sc.
- const SCEV *ScPlusOne = SE->getAddExpr(Sc, SE->getOne(PtrArithTy));
- Value *NewPtrPlusOne = Exp.expandCodeFor(ScPlusOne, PtrArithTy, Loc);
- return {NewPtr, NewPtrPlusOne};
- } else {
- Value *Start = nullptr, *End = nullptr;
- LLVM_DEBUG(dbgs() << "LAA: Adding RT check for range:\n");
- Start = Exp.expandCodeFor(CG->Low, PtrArithTy, Loc);
- End = Exp.expandCodeFor(CG->High, PtrArithTy, Loc);
- LLVM_DEBUG(dbgs() << "Start: " << *CG->Low << " End: " << *CG->High
- << "\n");
- return {Start, End};
- }
+ Value *Start = nullptr, *End = nullptr;
+ LLVM_DEBUG(dbgs() << "LAA: Adding RT check for range:\n");
+ Start = Exp.expandCodeFor(CG->Low, PtrArithTy, Loc);
+ End = Exp.expandCodeFor(CG->High, PtrArithTy, Loc);
+ LLVM_DEBUG(dbgs() << "Start: " << *CG->Low << " End: " << *CG->High << "\n");
+ return {Start, End};
}
/// Turns a collection of checks into a collection of expanded upper and
; CHECK-LABEL: @inv_load_conditional(
; CHECK-NEXT: iter.check:
; CHECK-NEXT: [[NTRUNC:%.*]] = trunc i64 [[N:%.*]] to i32
-; CHECK-NEXT: [[SMAX4:%.*]] = call i64 @llvm.smax.i64(i64 [[N]], i64 1)
-; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[SMAX4]], 8
+; CHECK-NEXT: [[SMAX6:%.*]] = call i64 @llvm.smax.i64(i64 [[N]], i64 1)
+; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[SMAX6]], 8
; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[VEC_EPILOG_SCALAR_PH:%.*]], label [[VECTOR_MEMCHECK:%.*]]
; CHECK: vector.memcheck:
-; CHECK-NEXT: [[A3:%.*]] = bitcast i32* [[A:%.*]] to i8*
-; CHECK-NEXT: [[B1:%.*]] = bitcast i32* [[B:%.*]] to i8*
; CHECK-NEXT: [[SMAX:%.*]] = call i64 @llvm.smax.i64(i64 [[N]], i64 1)
-; CHECK-NEXT: [[SCEVGEP:%.*]] = getelementptr i32, i32* [[B]], i64 [[SMAX]]
-; CHECK-NEXT: [[UGLYGEP:%.*]] = getelementptr i8, i8* [[A3]], i64 1
-; CHECK-NEXT: [[BOUND0:%.*]] = icmp ugt i8* [[UGLYGEP]], [[B1]]
+; CHECK-NEXT: [[SCEVGEP:%.*]] = getelementptr i32, i32* [[B:%.*]], i64 [[SMAX]]
+; CHECK-NEXT: [[SCEVGEP4:%.*]] = getelementptr i32, i32* [[A:%.*]], i64 1
+; CHECK-NEXT: [[BOUND0:%.*]] = icmp ugt i32* [[SCEVGEP4]], [[B]]
; CHECK-NEXT: [[BOUND1:%.*]] = icmp ugt i32* [[SCEVGEP]], [[A]]
; CHECK-NEXT: [[FOUND_CONFLICT:%.*]] = and i1 [[BOUND0]], [[BOUND1]]
; CHECK-NEXT: br i1 [[FOUND_CONFLICT]], label [[VEC_EPILOG_SCALAR_PH]], label [[VECTOR_MAIN_LOOP_ITER_CHECK:%.*]]
; CHECK: vector.main.loop.iter.check:
-; CHECK-NEXT: [[MIN_ITERS_CHECK5:%.*]] = icmp ult i64 [[SMAX4]], 16
-; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK5]], label [[VEC_EPILOG_PH:%.*]], label [[VECTOR_PH:%.*]]
+; CHECK-NEXT: [[MIN_ITERS_CHECK7:%.*]] = icmp ult i64 [[SMAX6]], 16
+; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK7]], label [[VEC_EPILOG_PH:%.*]], label [[VECTOR_PH:%.*]]
; CHECK: vector.ph:
-; CHECK-NEXT: [[N_VEC:%.*]] = and i64 [[SMAX4]], 9223372036854775792
+; CHECK-NEXT: [[N_VEC:%.*]] = and i64 [[SMAX6]], 9223372036854775792
; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <16 x i32*> poison, i32* [[A]], i32 0
; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <16 x i32*> [[BROADCAST_SPLATINSERT]], <16 x i32*> poison, <16 x i32> zeroinitializer
-; CHECK-NEXT: [[BROADCAST_SPLATINSERT6:%.*]] = insertelement <16 x i32> poison, i32 [[NTRUNC]], i32 0
-; CHECK-NEXT: [[BROADCAST_SPLAT7:%.*]] = shufflevector <16 x i32> [[BROADCAST_SPLATINSERT6]], <16 x i32> poison, <16 x i32> zeroinitializer
+; CHECK-NEXT: [[BROADCAST_SPLATINSERT8:%.*]] = insertelement <16 x i32> poison, i32 [[NTRUNC]], i32 0
+; CHECK-NEXT: [[BROADCAST_SPLAT9:%.*]] = shufflevector <16 x i32> [[BROADCAST_SPLATINSERT8]], <16 x i32> poison, <16 x i32> zeroinitializer
; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]
; CHECK: vector.body:
; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
; CHECK-NEXT: [[TMP0:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 [[INDEX]]
; CHECK-NEXT: [[TMP1:%.*]] = icmp ne <16 x i32*> [[BROADCAST_SPLAT]], zeroinitializer
; CHECK-NEXT: [[TMP2:%.*]] = bitcast i32* [[TMP0]] to <16 x i32>*
-; CHECK-NEXT: store <16 x i32> [[BROADCAST_SPLAT7]], <16 x i32>* [[TMP2]], align 4, !alias.scope !0, !noalias !3
+; CHECK-NEXT: store <16 x i32> [[BROADCAST_SPLAT9]], <16 x i32>* [[TMP2]], align 4, !alias.scope !0, !noalias !3
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 16
; CHECK-NEXT: [[TMP3:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
; CHECK-NEXT: br i1 [[TMP3]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP5:![0-9]+]]
; CHECK: middle.block:
; CHECK-NEXT: [[WIDE_MASKED_GATHER:%.*]] = call <16 x i32> @llvm.masked.gather.v16i32.v16p0i32(<16 x i32*> [[BROADCAST_SPLAT]], i32 4, <16 x i1> [[TMP1]], <16 x i32> undef), !alias.scope !3
; CHECK-NEXT: [[PREDPHI:%.*]] = select <16 x i1> [[TMP1]], <16 x i32> [[WIDE_MASKED_GATHER]], <16 x i32> <i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 1>
-; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[SMAX4]], [[N_VEC]]
+; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[SMAX6]], [[N_VEC]]
; CHECK-NEXT: [[TMP4:%.*]] = extractelement <16 x i32> [[PREDPHI]], i32 15
; CHECK-NEXT: br i1 [[CMP_N]], label [[FOR_END:%.*]], label [[VEC_EPILOG_ITER_CHECK:%.*]]
; CHECK: vec.epilog.iter.check:
-; CHECK-NEXT: [[N_VEC_REMAINING:%.*]] = and i64 [[SMAX4]], 8
+; CHECK-NEXT: [[N_VEC_REMAINING:%.*]] = and i64 [[SMAX6]], 8
; CHECK-NEXT: [[MIN_EPILOG_ITERS_CHECK_NOT_NOT:%.*]] = icmp eq i64 [[N_VEC_REMAINING]], 0
; CHECK-NEXT: br i1 [[MIN_EPILOG_ITERS_CHECK_NOT_NOT]], label [[VEC_EPILOG_SCALAR_PH]], label [[VEC_EPILOG_PH]]
; CHECK: vec.epilog.ph:
; CHECK-NEXT: [[VEC_EPILOG_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[VEC_EPILOG_ITER_CHECK]] ], [ 0, [[VECTOR_MAIN_LOOP_ITER_CHECK]] ]
-; CHECK-NEXT: [[SMAX9:%.*]] = call i64 @llvm.smax.i64(i64 [[N]], i64 1)
-; CHECK-NEXT: [[N_VEC11:%.*]] = and i64 [[SMAX9]], 9223372036854775800
-; CHECK-NEXT: [[BROADCAST_SPLATINSERT16:%.*]] = insertelement <8 x i32*> poison, i32* [[A]], i32 0
-; CHECK-NEXT: [[BROADCAST_SPLAT17:%.*]] = shufflevector <8 x i32*> [[BROADCAST_SPLATINSERT16]], <8 x i32*> poison, <8 x i32> zeroinitializer
-; CHECK-NEXT: [[BROADCAST_SPLATINSERT18:%.*]] = insertelement <8 x i32> poison, i32 [[NTRUNC]], i32 0
-; CHECK-NEXT: [[BROADCAST_SPLAT19:%.*]] = shufflevector <8 x i32> [[BROADCAST_SPLATINSERT18]], <8 x i32> poison, <8 x i32> zeroinitializer
+; CHECK-NEXT: [[SMAX11:%.*]] = call i64 @llvm.smax.i64(i64 [[N]], i64 1)
+; CHECK-NEXT: [[N_VEC13:%.*]] = and i64 [[SMAX11]], 9223372036854775800
+; CHECK-NEXT: [[BROADCAST_SPLATINSERT18:%.*]] = insertelement <8 x i32*> poison, i32* [[A]], i32 0
+; CHECK-NEXT: [[BROADCAST_SPLAT19:%.*]] = shufflevector <8 x i32*> [[BROADCAST_SPLATINSERT18]], <8 x i32*> poison, <8 x i32> zeroinitializer
+; CHECK-NEXT: [[BROADCAST_SPLATINSERT20:%.*]] = insertelement <8 x i32> poison, i32 [[NTRUNC]], i32 0
+; CHECK-NEXT: [[BROADCAST_SPLAT21:%.*]] = shufflevector <8 x i32> [[BROADCAST_SPLATINSERT20]], <8 x i32> poison, <8 x i32> zeroinitializer
; CHECK-NEXT: br label [[VEC_EPILOG_VECTOR_BODY:%.*]]
; CHECK: vec.epilog.vector.body:
-; CHECK-NEXT: [[INDEX12:%.*]] = phi i64 [ [[VEC_EPILOG_RESUME_VAL]], [[VEC_EPILOG_PH]] ], [ [[INDEX_NEXT13:%.*]], [[VEC_EPILOG_VECTOR_BODY]] ]
-; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 [[INDEX12]]
-; CHECK-NEXT: [[TMP6:%.*]] = icmp ne <8 x i32*> [[BROADCAST_SPLAT17]], zeroinitializer
+; CHECK-NEXT: [[INDEX14:%.*]] = phi i64 [ [[VEC_EPILOG_RESUME_VAL]], [[VEC_EPILOG_PH]] ], [ [[INDEX_NEXT15:%.*]], [[VEC_EPILOG_VECTOR_BODY]] ]
+; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 [[INDEX14]]
+; CHECK-NEXT: [[TMP6:%.*]] = icmp ne <8 x i32*> [[BROADCAST_SPLAT19]], zeroinitializer
; CHECK-NEXT: [[TMP7:%.*]] = bitcast i32* [[TMP5]] to <8 x i32>*
-; CHECK-NEXT: store <8 x i32> [[BROADCAST_SPLAT19]], <8 x i32>* [[TMP7]], align 4
-; CHECK-NEXT: [[INDEX_NEXT13]] = add nuw i64 [[INDEX12]], 8
-; CHECK-NEXT: [[TMP8:%.*]] = icmp eq i64 [[INDEX_NEXT13]], [[N_VEC11]]
+; CHECK-NEXT: store <8 x i32> [[BROADCAST_SPLAT21]], <8 x i32>* [[TMP7]], align 4
+; CHECK-NEXT: [[INDEX_NEXT15]] = add nuw i64 [[INDEX14]], 8
+; CHECK-NEXT: [[TMP8:%.*]] = icmp eq i64 [[INDEX_NEXT15]], [[N_VEC13]]
; CHECK-NEXT: br i1 [[TMP8]], label [[VEC_EPILOG_MIDDLE_BLOCK:%.*]], label [[VEC_EPILOG_VECTOR_BODY]], !llvm.loop [[LOOP7:![0-9]+]]
; CHECK: vec.epilog.middle.block:
-; CHECK-NEXT: [[WIDE_MASKED_GATHER20:%.*]] = call <8 x i32> @llvm.masked.gather.v8i32.v8p0i32(<8 x i32*> [[BROADCAST_SPLAT17]], i32 4, <8 x i1> [[TMP6]], <8 x i32> undef)
-; CHECK-NEXT: [[PREDPHI21:%.*]] = select <8 x i1> [[TMP6]], <8 x i32> [[WIDE_MASKED_GATHER20]], <8 x i32> <i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 1>
-; CHECK-NEXT: [[CMP_N14:%.*]] = icmp eq i64 [[SMAX9]], [[N_VEC11]]
-; CHECK-NEXT: [[TMP9:%.*]] = extractelement <8 x i32> [[PREDPHI21]], i32 7
-; CHECK-NEXT: br i1 [[CMP_N14]], label [[FOR_END_LOOPEXIT:%.*]], label [[VEC_EPILOG_SCALAR_PH]]
+; CHECK-NEXT: [[WIDE_MASKED_GATHER22:%.*]] = call <8 x i32> @llvm.masked.gather.v8i32.v8p0i32(<8 x i32*> [[BROADCAST_SPLAT19]], i32 4, <8 x i1> [[TMP6]], <8 x i32> undef)
+; CHECK-NEXT: [[PREDPHI23:%.*]] = select <8 x i1> [[TMP6]], <8 x i32> [[WIDE_MASKED_GATHER22]], <8 x i32> <i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 1>
+; CHECK-NEXT: [[CMP_N16:%.*]] = icmp eq i64 [[SMAX11]], [[N_VEC13]]
+; CHECK-NEXT: [[TMP9:%.*]] = extractelement <8 x i32> [[PREDPHI23]], i32 7
+; CHECK-NEXT: br i1 [[CMP_N16]], label [[FOR_END_LOOPEXIT:%.*]], label [[VEC_EPILOG_SCALAR_PH]]
; CHECK: vec.epilog.scalar.ph:
-; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC11]], [[VEC_EPILOG_MIDDLE_BLOCK]] ], [ [[N_VEC]], [[VEC_EPILOG_ITER_CHECK]] ], [ 0, [[VECTOR_MEMCHECK]] ], [ 0, [[ITER_CHECK:%.*]] ]
+; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC13]], [[VEC_EPILOG_MIDDLE_BLOCK]] ], [ [[N_VEC]], [[VEC_EPILOG_ITER_CHECK]] ], [ 0, [[VECTOR_MEMCHECK]] ], [ 0, [[ITER_CHECK:%.*]] ]
; CHECK-NEXT: br label [[FOR_BODY:%.*]]
; CHECK: for.body:
; CHECK-NEXT: [[I:%.*]] = phi i64 [ [[I_NEXT:%.*]], [[LATCH:%.*]] ], [ [[BC_RESUME_VAL]], [[VEC_EPILOG_SCALAR_PH]] ]
; CHECK-NEXT: [[COND:%.*]] = icmp slt i64 [[I_NEXT]], [[N]]
; CHECK-NEXT: br i1 [[COND]], label [[FOR_BODY]], label [[FOR_END_LOOPEXIT]], !llvm.loop [[LOOP9:![0-9]+]]
; CHECK: for.end.loopexit:
-; CHECK-NEXT: [[A_LCSSA_LCSSA8:%.*]] = phi i32 [ [[A_LCSSA]], [[LATCH]] ], [ [[TMP9]], [[VEC_EPILOG_MIDDLE_BLOCK]] ]
+; CHECK-NEXT: [[A_LCSSA_LCSSA10:%.*]] = phi i32 [ [[A_LCSSA]], [[LATCH]] ], [ [[TMP9]], [[VEC_EPILOG_MIDDLE_BLOCK]] ]
; CHECK-NEXT: br label [[FOR_END]]
; CHECK: for.end:
-; CHECK-NEXT: [[A_LCSSA_LCSSA:%.*]] = phi i32 [ [[TMP4]], [[MIDDLE_BLOCK]] ], [ [[A_LCSSA_LCSSA8]], [[FOR_END_LOOPEXIT]] ]
+; CHECK-NEXT: [[A_LCSSA_LCSSA:%.*]] = phi i32 [ [[TMP4]], [[MIDDLE_BLOCK]] ], [ [[A_LCSSA_LCSSA10]], [[FOR_END_LOOPEXIT]] ]
; CHECK-NEXT: ret i32 [[A_LCSSA_LCSSA]]
;
entry:
; CHECK-LABEL: @inv_val_store_to_inv_address_with_reduction(
; CHECK-NEXT: entry:
; CHECK-NEXT: [[NTRUNC:%.*]] = trunc i64 [[N:%.*]] to i32
-; CHECK-NEXT: [[SMAX4:%.*]] = call i64 @llvm.smax.i64(i64 [[N]], i64 1)
-; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[SMAX4]], 64
+; CHECK-NEXT: [[SMAX6:%.*]] = call i64 @llvm.smax.i64(i64 [[N]], i64 1)
+; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[SMAX6]], 64
; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_MEMCHECK:%.*]]
; CHECK: vector.memcheck:
-; CHECK-NEXT: [[B2:%.*]] = bitcast i32* [[B:%.*]] to i8*
-; CHECK-NEXT: [[A1:%.*]] = bitcast i32* [[A:%.*]] to i8*
-; CHECK-NEXT: [[UGLYGEP:%.*]] = getelementptr i8, i8* [[A1]], i64 1
+; CHECK-NEXT: [[SCEVGEP:%.*]] = getelementptr i32, i32* [[A:%.*]], i64 1
; CHECK-NEXT: [[SMAX:%.*]] = call i64 @llvm.smax.i64(i64 [[N]], i64 1)
-; CHECK-NEXT: [[SCEVGEP:%.*]] = getelementptr i32, i32* [[B]], i64 [[SMAX]]
-; CHECK-NEXT: [[BOUND0:%.*]] = icmp ugt i32* [[SCEVGEP]], [[A]]
-; CHECK-NEXT: [[BOUND1:%.*]] = icmp ugt i8* [[UGLYGEP]], [[B2]]
+; CHECK-NEXT: [[SCEVGEP4:%.*]] = getelementptr i32, i32* [[B:%.*]], i64 [[SMAX]]
+; CHECK-NEXT: [[BOUND0:%.*]] = icmp ugt i32* [[SCEVGEP4]], [[A]]
+; CHECK-NEXT: [[BOUND1:%.*]] = icmp ugt i32* [[SCEVGEP]], [[B]]
; CHECK-NEXT: [[FOUND_CONFLICT:%.*]] = and i1 [[BOUND0]], [[BOUND1]]
; CHECK-NEXT: br i1 [[FOUND_CONFLICT]], label [[SCALAR_PH]], label [[VECTOR_PH:%.*]]
; CHECK: vector.ph:
-; CHECK-NEXT: [[N_VEC:%.*]] = and i64 [[SMAX4]], 9223372036854775744
+; CHECK-NEXT: [[N_VEC:%.*]] = and i64 [[SMAX6]], 9223372036854775744
; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]
; CHECK: vector.body:
; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <16 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP8:%.*]], [[VECTOR_BODY]] ]
-; CHECK-NEXT: [[VEC_PHI5:%.*]] = phi <16 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP9:%.*]], [[VECTOR_BODY]] ]
-; CHECK-NEXT: [[VEC_PHI6:%.*]] = phi <16 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP10:%.*]], [[VECTOR_BODY]] ]
-; CHECK-NEXT: [[VEC_PHI7:%.*]] = phi <16 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP11:%.*]], [[VECTOR_BODY]] ]
+; CHECK-NEXT: [[VEC_PHI7:%.*]] = phi <16 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP9:%.*]], [[VECTOR_BODY]] ]
+; CHECK-NEXT: [[VEC_PHI8:%.*]] = phi <16 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP10:%.*]], [[VECTOR_BODY]] ]
+; CHECK-NEXT: [[VEC_PHI9:%.*]] = phi <16 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP11:%.*]], [[VECTOR_BODY]] ]
; CHECK-NEXT: [[TMP0:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 [[INDEX]]
; CHECK-NEXT: [[TMP1:%.*]] = bitcast i32* [[TMP0]] to <16 x i32>*
; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <16 x i32>, <16 x i32>* [[TMP1]], align 8, !alias.scope !0
; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds i32, i32* [[TMP0]], i64 16
; CHECK-NEXT: [[TMP3:%.*]] = bitcast i32* [[TMP2]] to <16 x i32>*
-; CHECK-NEXT: [[WIDE_LOAD8:%.*]] = load <16 x i32>, <16 x i32>* [[TMP3]], align 8, !alias.scope !0
+; CHECK-NEXT: [[WIDE_LOAD10:%.*]] = load <16 x i32>, <16 x i32>* [[TMP3]], align 8, !alias.scope !0
; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds i32, i32* [[TMP0]], i64 32
; CHECK-NEXT: [[TMP5:%.*]] = bitcast i32* [[TMP4]] to <16 x i32>*
-; CHECK-NEXT: [[WIDE_LOAD9:%.*]] = load <16 x i32>, <16 x i32>* [[TMP5]], align 8, !alias.scope !0
+; CHECK-NEXT: [[WIDE_LOAD11:%.*]] = load <16 x i32>, <16 x i32>* [[TMP5]], align 8, !alias.scope !0
; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds i32, i32* [[TMP0]], i64 48
; CHECK-NEXT: [[TMP7:%.*]] = bitcast i32* [[TMP6]] to <16 x i32>*
-; CHECK-NEXT: [[WIDE_LOAD10:%.*]] = load <16 x i32>, <16 x i32>* [[TMP7]], align 8, !alias.scope !0
+; CHECK-NEXT: [[WIDE_LOAD12:%.*]] = load <16 x i32>, <16 x i32>* [[TMP7]], align 8, !alias.scope !0
; CHECK-NEXT: [[TMP8]] = add <16 x i32> [[VEC_PHI]], [[WIDE_LOAD]]
-; CHECK-NEXT: [[TMP9]] = add <16 x i32> [[VEC_PHI5]], [[WIDE_LOAD8]]
-; CHECK-NEXT: [[TMP10]] = add <16 x i32> [[VEC_PHI6]], [[WIDE_LOAD9]]
-; CHECK-NEXT: [[TMP11]] = add <16 x i32> [[VEC_PHI7]], [[WIDE_LOAD10]]
+; CHECK-NEXT: [[TMP9]] = add <16 x i32> [[VEC_PHI7]], [[WIDE_LOAD10]]
+; CHECK-NEXT: [[TMP10]] = add <16 x i32> [[VEC_PHI8]], [[WIDE_LOAD11]]
+; CHECK-NEXT: [[TMP11]] = add <16 x i32> [[VEC_PHI9]], [[WIDE_LOAD12]]
; CHECK-NEXT: store i32 [[NTRUNC]], i32* [[A]], align 4, !alias.scope !3, !noalias !0
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 64
; CHECK-NEXT: [[TMP12:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
; CHECK-NEXT: br i1 [[TMP12]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP5:![0-9]+]]
; CHECK: middle.block:
; CHECK-NEXT: [[BIN_RDX:%.*]] = add <16 x i32> [[TMP9]], [[TMP8]]
-; CHECK-NEXT: [[BIN_RDX11:%.*]] = add <16 x i32> [[TMP10]], [[BIN_RDX]]
-; CHECK-NEXT: [[BIN_RDX12:%.*]] = add <16 x i32> [[TMP11]], [[BIN_RDX11]]
-; CHECK-NEXT: [[TMP13:%.*]] = call i32 @llvm.vector.reduce.add.v16i32(<16 x i32> [[BIN_RDX12]])
-; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[SMAX4]], [[N_VEC]]
+; CHECK-NEXT: [[BIN_RDX13:%.*]] = add <16 x i32> [[TMP10]], [[BIN_RDX]]
+; CHECK-NEXT: [[BIN_RDX14:%.*]] = add <16 x i32> [[TMP11]], [[BIN_RDX13]]
+; CHECK-NEXT: [[TMP13:%.*]] = call i32 @llvm.vector.reduce.add.v16i32(<16 x i32> [[BIN_RDX14]])
+; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[SMAX6]], [[N_VEC]]
; CHECK-NEXT: br i1 [[CMP_N]], label [[FOR_END:%.*]], label [[SCALAR_PH]]
; CHECK: scalar.ph:
; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ], [ 0, [[VECTOR_MEMCHECK]] ]
; CHECK-LABEL: @inv_val_store_to_inv_address_conditional(
; CHECK-NEXT: iter.check:
; CHECK-NEXT: [[NTRUNC:%.*]] = trunc i64 [[N:%.*]] to i32
-; CHECK-NEXT: [[SMAX4:%.*]] = call i64 @llvm.smax.i64(i64 [[N]], i64 1)
-; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[SMAX4]], 8
+; CHECK-NEXT: [[SMAX6:%.*]] = call i64 @llvm.smax.i64(i64 [[N]], i64 1)
+; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[SMAX6]], 8
; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[VEC_EPILOG_SCALAR_PH:%.*]], label [[VECTOR_MEMCHECK:%.*]]
; CHECK: vector.memcheck:
-; CHECK-NEXT: [[A3:%.*]] = bitcast i32* [[A:%.*]] to i8*
-; CHECK-NEXT: [[B1:%.*]] = bitcast i32* [[B:%.*]] to i8*
; CHECK-NEXT: [[SMAX:%.*]] = call i64 @llvm.smax.i64(i64 [[N]], i64 1)
-; CHECK-NEXT: [[SCEVGEP:%.*]] = getelementptr i32, i32* [[B]], i64 [[SMAX]]
-; CHECK-NEXT: [[UGLYGEP:%.*]] = getelementptr i8, i8* [[A3]], i64 1
-; CHECK-NEXT: [[BOUND0:%.*]] = icmp ugt i8* [[UGLYGEP]], [[B1]]
+; CHECK-NEXT: [[SCEVGEP:%.*]] = getelementptr i32, i32* [[B:%.*]], i64 [[SMAX]]
+; CHECK-NEXT: [[SCEVGEP4:%.*]] = getelementptr i32, i32* [[A:%.*]], i64 1
+; CHECK-NEXT: [[BOUND0:%.*]] = icmp ugt i32* [[SCEVGEP4]], [[B]]
; CHECK-NEXT: [[BOUND1:%.*]] = icmp ugt i32* [[SCEVGEP]], [[A]]
; CHECK-NEXT: [[FOUND_CONFLICT:%.*]] = and i1 [[BOUND0]], [[BOUND1]]
; CHECK-NEXT: br i1 [[FOUND_CONFLICT]], label [[VEC_EPILOG_SCALAR_PH]], label [[VECTOR_MAIN_LOOP_ITER_CHECK:%.*]]
; CHECK: vector.main.loop.iter.check:
-; CHECK-NEXT: [[MIN_ITERS_CHECK5:%.*]] = icmp ult i64 [[SMAX4]], 16
-; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK5]], label [[VEC_EPILOG_PH:%.*]], label [[VECTOR_PH:%.*]]
+; CHECK-NEXT: [[MIN_ITERS_CHECK7:%.*]] = icmp ult i64 [[SMAX6]], 16
+; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK7]], label [[VEC_EPILOG_PH:%.*]], label [[VECTOR_PH:%.*]]
; CHECK: vector.ph:
-; CHECK-NEXT: [[N_VEC:%.*]] = and i64 [[SMAX4]], 9223372036854775792
+; CHECK-NEXT: [[N_VEC:%.*]] = and i64 [[SMAX6]], 9223372036854775792
; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <16 x i32> poison, i32 [[K:%.*]], i32 0
; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <16 x i32> [[BROADCAST_SPLATINSERT]], <16 x i32> poison, <16 x i32> zeroinitializer
-; CHECK-NEXT: [[BROADCAST_SPLATINSERT6:%.*]] = insertelement <16 x i32> poison, i32 [[NTRUNC]], i32 0
-; CHECK-NEXT: [[BROADCAST_SPLAT7:%.*]] = shufflevector <16 x i32> [[BROADCAST_SPLATINSERT6]], <16 x i32> poison, <16 x i32> zeroinitializer
-; CHECK-NEXT: [[BROADCAST_SPLATINSERT8:%.*]] = insertelement <16 x i32*> poison, i32* [[A]], i32 0
-; CHECK-NEXT: [[BROADCAST_SPLAT9:%.*]] = shufflevector <16 x i32*> [[BROADCAST_SPLATINSERT8]], <16 x i32*> poison, <16 x i32> zeroinitializer
+; CHECK-NEXT: [[BROADCAST_SPLATINSERT8:%.*]] = insertelement <16 x i32> poison, i32 [[NTRUNC]], i32 0
+; CHECK-NEXT: [[BROADCAST_SPLAT9:%.*]] = shufflevector <16 x i32> [[BROADCAST_SPLATINSERT8]], <16 x i32> poison, <16 x i32> zeroinitializer
+; CHECK-NEXT: [[BROADCAST_SPLATINSERT10:%.*]] = insertelement <16 x i32*> poison, i32* [[A]], i32 0
+; CHECK-NEXT: [[BROADCAST_SPLAT11:%.*]] = shufflevector <16 x i32*> [[BROADCAST_SPLATINSERT10]], <16 x i32*> poison, <16 x i32> zeroinitializer
; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]
; CHECK: vector.body:
; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <16 x i32>, <16 x i32>* [[TMP1]], align 8, !alias.scope !8, !noalias !11
; CHECK-NEXT: [[TMP2:%.*]] = icmp eq <16 x i32> [[WIDE_LOAD]], [[BROADCAST_SPLAT]]
; CHECK-NEXT: [[TMP3:%.*]] = bitcast i32* [[TMP0]] to <16 x i32>*
-; CHECK-NEXT: store <16 x i32> [[BROADCAST_SPLAT7]], <16 x i32>* [[TMP3]], align 4, !alias.scope !8, !noalias !11
-; CHECK-NEXT: call void @llvm.masked.scatter.v16i32.v16p0i32(<16 x i32> [[BROADCAST_SPLAT7]], <16 x i32*> [[BROADCAST_SPLAT9]], i32 4, <16 x i1> [[TMP2]]), !alias.scope !11
+; CHECK-NEXT: store <16 x i32> [[BROADCAST_SPLAT9]], <16 x i32>* [[TMP3]], align 4, !alias.scope !8, !noalias !11
+; CHECK-NEXT: call void @llvm.masked.scatter.v16i32.v16p0i32(<16 x i32> [[BROADCAST_SPLAT9]], <16 x i32*> [[BROADCAST_SPLAT11]], i32 4, <16 x i1> [[TMP2]]), !alias.scope !11
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 16
; CHECK-NEXT: [[TMP4:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
; CHECK-NEXT: br i1 [[TMP4]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP13:![0-9]+]]
; CHECK: middle.block:
-; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[SMAX4]], [[N_VEC]]
+; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[SMAX6]], [[N_VEC]]
; CHECK-NEXT: br i1 [[CMP_N]], label [[FOR_END:%.*]], label [[VEC_EPILOG_ITER_CHECK:%.*]]
; CHECK: vec.epilog.iter.check:
-; CHECK-NEXT: [[N_VEC_REMAINING:%.*]] = and i64 [[SMAX4]], 8
+; CHECK-NEXT: [[N_VEC_REMAINING:%.*]] = and i64 [[SMAX6]], 8
; CHECK-NEXT: [[MIN_EPILOG_ITERS_CHECK_NOT_NOT:%.*]] = icmp eq i64 [[N_VEC_REMAINING]], 0
; CHECK-NEXT: br i1 [[MIN_EPILOG_ITERS_CHECK_NOT_NOT]], label [[VEC_EPILOG_SCALAR_PH]], label [[VEC_EPILOG_PH]]
; CHECK: vec.epilog.ph:
; CHECK-NEXT: [[VEC_EPILOG_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[VEC_EPILOG_ITER_CHECK]] ], [ 0, [[VECTOR_MAIN_LOOP_ITER_CHECK]] ]
-; CHECK-NEXT: [[SMAX10:%.*]] = call i64 @llvm.smax.i64(i64 [[N]], i64 1)
-; CHECK-NEXT: [[N_VEC12:%.*]] = and i64 [[SMAX10]], 9223372036854775800
-; CHECK-NEXT: [[BROADCAST_SPLATINSERT17:%.*]] = insertelement <8 x i32> poison, i32 [[K]], i32 0
-; CHECK-NEXT: [[BROADCAST_SPLAT18:%.*]] = shufflevector <8 x i32> [[BROADCAST_SPLATINSERT17]], <8 x i32> poison, <8 x i32> zeroinitializer
-; CHECK-NEXT: [[BROADCAST_SPLATINSERT19:%.*]] = insertelement <8 x i32> poison, i32 [[NTRUNC]], i32 0
+; CHECK-NEXT: [[SMAX12:%.*]] = call i64 @llvm.smax.i64(i64 [[N]], i64 1)
+; CHECK-NEXT: [[N_VEC14:%.*]] = and i64 [[SMAX12]], 9223372036854775800
+; CHECK-NEXT: [[BROADCAST_SPLATINSERT19:%.*]] = insertelement <8 x i32> poison, i32 [[K]], i32 0
; CHECK-NEXT: [[BROADCAST_SPLAT20:%.*]] = shufflevector <8 x i32> [[BROADCAST_SPLATINSERT19]], <8 x i32> poison, <8 x i32> zeroinitializer
-; CHECK-NEXT: [[BROADCAST_SPLATINSERT21:%.*]] = insertelement <8 x i32*> poison, i32* [[A]], i32 0
-; CHECK-NEXT: [[BROADCAST_SPLAT22:%.*]] = shufflevector <8 x i32*> [[BROADCAST_SPLATINSERT21]], <8 x i32*> poison, <8 x i32> zeroinitializer
+; CHECK-NEXT: [[BROADCAST_SPLATINSERT21:%.*]] = insertelement <8 x i32> poison, i32 [[NTRUNC]], i32 0
+; CHECK-NEXT: [[BROADCAST_SPLAT22:%.*]] = shufflevector <8 x i32> [[BROADCAST_SPLATINSERT21]], <8 x i32> poison, <8 x i32> zeroinitializer
+; CHECK-NEXT: [[BROADCAST_SPLATINSERT23:%.*]] = insertelement <8 x i32*> poison, i32* [[A]], i32 0
+; CHECK-NEXT: [[BROADCAST_SPLAT24:%.*]] = shufflevector <8 x i32*> [[BROADCAST_SPLATINSERT23]], <8 x i32*> poison, <8 x i32> zeroinitializer
; CHECK-NEXT: br label [[VEC_EPILOG_VECTOR_BODY:%.*]]
; CHECK: vec.epilog.vector.body:
-; CHECK-NEXT: [[INDEX13:%.*]] = phi i64 [ [[VEC_EPILOG_RESUME_VAL]], [[VEC_EPILOG_PH]] ], [ [[INDEX_NEXT14:%.*]], [[VEC_EPILOG_VECTOR_BODY]] ]
-; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 [[INDEX13]]
+; CHECK-NEXT: [[INDEX15:%.*]] = phi i64 [ [[VEC_EPILOG_RESUME_VAL]], [[VEC_EPILOG_PH]] ], [ [[INDEX_NEXT16:%.*]], [[VEC_EPILOG_VECTOR_BODY]] ]
+; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 [[INDEX15]]
; CHECK-NEXT: [[TMP6:%.*]] = bitcast i32* [[TMP5]] to <8 x i32>*
-; CHECK-NEXT: [[WIDE_LOAD16:%.*]] = load <8 x i32>, <8 x i32>* [[TMP6]], align 8
-; CHECK-NEXT: [[TMP7:%.*]] = icmp eq <8 x i32> [[WIDE_LOAD16]], [[BROADCAST_SPLAT18]]
+; CHECK-NEXT: [[WIDE_LOAD18:%.*]] = load <8 x i32>, <8 x i32>* [[TMP6]], align 8
+; CHECK-NEXT: [[TMP7:%.*]] = icmp eq <8 x i32> [[WIDE_LOAD18]], [[BROADCAST_SPLAT20]]
; CHECK-NEXT: [[TMP8:%.*]] = bitcast i32* [[TMP5]] to <8 x i32>*
-; CHECK-NEXT: store <8 x i32> [[BROADCAST_SPLAT20]], <8 x i32>* [[TMP8]], align 4
-; CHECK-NEXT: call void @llvm.masked.scatter.v8i32.v8p0i32(<8 x i32> [[BROADCAST_SPLAT20]], <8 x i32*> [[BROADCAST_SPLAT22]], i32 4, <8 x i1> [[TMP7]])
-; CHECK-NEXT: [[INDEX_NEXT14]] = add nuw i64 [[INDEX13]], 8
-; CHECK-NEXT: [[TMP9:%.*]] = icmp eq i64 [[INDEX_NEXT14]], [[N_VEC12]]
+; CHECK-NEXT: store <8 x i32> [[BROADCAST_SPLAT22]], <8 x i32>* [[TMP8]], align 4
+; CHECK-NEXT: call void @llvm.masked.scatter.v8i32.v8p0i32(<8 x i32> [[BROADCAST_SPLAT22]], <8 x i32*> [[BROADCAST_SPLAT24]], i32 4, <8 x i1> [[TMP7]])
+; CHECK-NEXT: [[INDEX_NEXT16]] = add nuw i64 [[INDEX15]], 8
+; CHECK-NEXT: [[TMP9:%.*]] = icmp eq i64 [[INDEX_NEXT16]], [[N_VEC14]]
; CHECK-NEXT: br i1 [[TMP9]], label [[VEC_EPILOG_MIDDLE_BLOCK:%.*]], label [[VEC_EPILOG_VECTOR_BODY]], !llvm.loop [[LOOP14:![0-9]+]]
; CHECK: vec.epilog.middle.block:
-; CHECK-NEXT: [[CMP_N15:%.*]] = icmp eq i64 [[SMAX10]], [[N_VEC12]]
-; CHECK-NEXT: br i1 [[CMP_N15]], label [[FOR_END_LOOPEXIT:%.*]], label [[VEC_EPILOG_SCALAR_PH]]
+; CHECK-NEXT: [[CMP_N17:%.*]] = icmp eq i64 [[SMAX12]], [[N_VEC14]]
+; CHECK-NEXT: br i1 [[CMP_N17]], label [[FOR_END_LOOPEXIT:%.*]], label [[VEC_EPILOG_SCALAR_PH]]
; CHECK: vec.epilog.scalar.ph:
-; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC12]], [[VEC_EPILOG_MIDDLE_BLOCK]] ], [ [[N_VEC]], [[VEC_EPILOG_ITER_CHECK]] ], [ 0, [[VECTOR_MEMCHECK]] ], [ 0, [[ITER_CHECK:%.*]] ]
+; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC14]], [[VEC_EPILOG_MIDDLE_BLOCK]] ], [ [[N_VEC]], [[VEC_EPILOG_ITER_CHECK]] ], [ 0, [[VECTOR_MEMCHECK]] ], [ 0, [[ITER_CHECK:%.*]] ]
; CHECK-NEXT: br label [[FOR_BODY:%.*]]
; CHECK: for.body:
; CHECK-NEXT: [[I:%.*]] = phi i64 [ [[I_NEXT:%.*]], [[LATCH:%.*]] ], [ [[BC_RESUME_VAL]], [[VEC_EPILOG_SCALAR_PH]] ]
; CHECK-LABEL: @variant_val_store_to_inv_address_conditional(
; CHECK-NEXT: iter.check:
; CHECK-NEXT: [[NTRUNC:%.*]] = trunc i64 [[N:%.*]] to i32
-; CHECK-NEXT: [[SMAX15:%.*]] = call i64 @llvm.smax.i64(i64 [[N]], i64 1)
-; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[SMAX15]], 8
+; CHECK-NEXT: [[SMAX16:%.*]] = call i64 @llvm.smax.i64(i64 [[N]], i64 1)
+; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[SMAX16]], 8
; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[VEC_EPILOG_SCALAR_PH:%.*]], label [[VECTOR_MEMCHECK:%.*]]
; CHECK: vector.memcheck:
-; CHECK-NEXT: [[C4:%.*]] = bitcast i32* [[C:%.*]] to i8*
-; CHECK-NEXT: [[B1:%.*]] = bitcast i32* [[B:%.*]] to i8*
-; CHECK-NEXT: [[A3:%.*]] = bitcast i32* [[A:%.*]] to i8*
; CHECK-NEXT: [[SMAX:%.*]] = call i64 @llvm.smax.i64(i64 [[N]], i64 1)
-; CHECK-NEXT: [[SCEVGEP:%.*]] = getelementptr i32, i32* [[B]], i64 [[SMAX]]
-; CHECK-NEXT: [[UGLYGEP:%.*]] = getelementptr i8, i8* [[A3]], i64 1
-; CHECK-NEXT: [[SCEVGEP5:%.*]] = getelementptr i32, i32* [[C]], i64 [[SMAX]]
-; CHECK-NEXT: [[BOUND0:%.*]] = icmp ugt i8* [[UGLYGEP]], [[B1]]
+; CHECK-NEXT: [[SCEVGEP:%.*]] = getelementptr i32, i32* [[B:%.*]], i64 [[SMAX]]
+; CHECK-NEXT: [[SCEVGEP4:%.*]] = getelementptr i32, i32* [[A:%.*]], i64 1
+; CHECK-NEXT: [[SCEVGEP7:%.*]] = getelementptr i32, i32* [[C:%.*]], i64 [[SMAX]]
+; CHECK-NEXT: [[BOUND0:%.*]] = icmp ugt i32* [[SCEVGEP4]], [[B]]
; CHECK-NEXT: [[BOUND1:%.*]] = icmp ugt i32* [[SCEVGEP]], [[A]]
; CHECK-NEXT: [[FOUND_CONFLICT:%.*]] = and i1 [[BOUND0]], [[BOUND1]]
-; CHECK-NEXT: [[BOUND07:%.*]] = icmp ugt i32* [[SCEVGEP5]], [[B]]
-; CHECK-NEXT: [[BOUND18:%.*]] = icmp ugt i32* [[SCEVGEP]], [[C]]
-; CHECK-NEXT: [[FOUND_CONFLICT9:%.*]] = and i1 [[BOUND07]], [[BOUND18]]
-; CHECK-NEXT: [[CONFLICT_RDX:%.*]] = or i1 [[FOUND_CONFLICT]], [[FOUND_CONFLICT9]]
-; CHECK-NEXT: [[BOUND011:%.*]] = icmp ugt i32* [[SCEVGEP5]], [[A]]
-; CHECK-NEXT: [[BOUND112:%.*]] = icmp ugt i8* [[UGLYGEP]], [[C4]]
-; CHECK-NEXT: [[FOUND_CONFLICT13:%.*]] = and i1 [[BOUND011]], [[BOUND112]]
-; CHECK-NEXT: [[CONFLICT_RDX14:%.*]] = or i1 [[CONFLICT_RDX]], [[FOUND_CONFLICT13]]
-; CHECK-NEXT: br i1 [[CONFLICT_RDX14]], label [[VEC_EPILOG_SCALAR_PH]], label [[VECTOR_MAIN_LOOP_ITER_CHECK:%.*]]
+; CHECK-NEXT: [[BOUND09:%.*]] = icmp ugt i32* [[SCEVGEP7]], [[B]]
+; CHECK-NEXT: [[BOUND110:%.*]] = icmp ugt i32* [[SCEVGEP]], [[C]]
+; CHECK-NEXT: [[FOUND_CONFLICT11:%.*]] = and i1 [[BOUND09]], [[BOUND110]]
+; CHECK-NEXT: [[CONFLICT_RDX:%.*]] = or i1 [[FOUND_CONFLICT]], [[FOUND_CONFLICT11]]
+; CHECK-NEXT: [[BOUND012:%.*]] = icmp ugt i32* [[SCEVGEP7]], [[A]]
+; CHECK-NEXT: [[BOUND113:%.*]] = icmp ugt i32* [[SCEVGEP4]], [[C]]
+; CHECK-NEXT: [[FOUND_CONFLICT14:%.*]] = and i1 [[BOUND012]], [[BOUND113]]
+; CHECK-NEXT: [[CONFLICT_RDX15:%.*]] = or i1 [[CONFLICT_RDX]], [[FOUND_CONFLICT14]]
+; CHECK-NEXT: br i1 [[CONFLICT_RDX15]], label [[VEC_EPILOG_SCALAR_PH]], label [[VECTOR_MAIN_LOOP_ITER_CHECK:%.*]]
; CHECK: vector.main.loop.iter.check:
-; CHECK-NEXT: [[MIN_ITERS_CHECK16:%.*]] = icmp ult i64 [[SMAX15]], 16
-; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK16]], label [[VEC_EPILOG_PH:%.*]], label [[VECTOR_PH:%.*]]
+; CHECK-NEXT: [[MIN_ITERS_CHECK17:%.*]] = icmp ult i64 [[SMAX16]], 16
+; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK17]], label [[VEC_EPILOG_PH:%.*]], label [[VECTOR_PH:%.*]]
; CHECK: vector.ph:
-; CHECK-NEXT: [[N_VEC:%.*]] = and i64 [[SMAX15]], 9223372036854775792
+; CHECK-NEXT: [[N_VEC:%.*]] = and i64 [[SMAX16]], 9223372036854775792
; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <16 x i32> poison, i32 [[K:%.*]], i32 0
; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <16 x i32> [[BROADCAST_SPLATINSERT]], <16 x i32> poison, <16 x i32> zeroinitializer
-; CHECK-NEXT: [[BROADCAST_SPLATINSERT17:%.*]] = insertelement <16 x i32> poison, i32 [[NTRUNC]], i32 0
-; CHECK-NEXT: [[BROADCAST_SPLAT18:%.*]] = shufflevector <16 x i32> [[BROADCAST_SPLATINSERT17]], <16 x i32> poison, <16 x i32> zeroinitializer
-; CHECK-NEXT: [[BROADCAST_SPLATINSERT19:%.*]] = insertelement <16 x i32*> poison, i32* [[A]], i32 0
-; CHECK-NEXT: [[BROADCAST_SPLAT20:%.*]] = shufflevector <16 x i32*> [[BROADCAST_SPLATINSERT19]], <16 x i32*> poison, <16 x i32> zeroinitializer
+; CHECK-NEXT: [[BROADCAST_SPLATINSERT18:%.*]] = insertelement <16 x i32> poison, i32 [[NTRUNC]], i32 0
+; CHECK-NEXT: [[BROADCAST_SPLAT19:%.*]] = shufflevector <16 x i32> [[BROADCAST_SPLATINSERT18]], <16 x i32> poison, <16 x i32> zeroinitializer
+; CHECK-NEXT: [[BROADCAST_SPLATINSERT20:%.*]] = insertelement <16 x i32*> poison, i32* [[A]], i32 0
+; CHECK-NEXT: [[BROADCAST_SPLAT21:%.*]] = shufflevector <16 x i32*> [[BROADCAST_SPLATINSERT20]], <16 x i32*> poison, <16 x i32> zeroinitializer
; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]
; CHECK: vector.body:
; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <16 x i32>, <16 x i32>* [[TMP1]], align 8, !alias.scope !17, !noalias !20
; CHECK-NEXT: [[TMP2:%.*]] = icmp eq <16 x i32> [[WIDE_LOAD]], [[BROADCAST_SPLAT]]
; CHECK-NEXT: [[TMP3:%.*]] = bitcast i32* [[TMP0]] to <16 x i32>*
-; CHECK-NEXT: store <16 x i32> [[BROADCAST_SPLAT18]], <16 x i32>* [[TMP3]], align 4, !alias.scope !17, !noalias !20
+; CHECK-NEXT: store <16 x i32> [[BROADCAST_SPLAT19]], <16 x i32>* [[TMP3]], align 4, !alias.scope !17, !noalias !20
; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds i32, i32* [[C]], i64 [[INDEX]]
; CHECK-NEXT: [[TMP5:%.*]] = bitcast i32* [[TMP4]] to <16 x i32>*
; CHECK-NEXT: [[WIDE_MASKED_LOAD:%.*]] = call <16 x i32> @llvm.masked.load.v16i32.p0v16i32(<16 x i32>* [[TMP5]], i32 8, <16 x i1> [[TMP2]], <16 x i32> poison), !alias.scope !23
-; CHECK-NEXT: call void @llvm.masked.scatter.v16i32.v16p0i32(<16 x i32> [[WIDE_MASKED_LOAD]], <16 x i32*> [[BROADCAST_SPLAT20]], i32 4, <16 x i1> [[TMP2]]), !alias.scope !24, !noalias !23
+; CHECK-NEXT: call void @llvm.masked.scatter.v16i32.v16p0i32(<16 x i32> [[WIDE_MASKED_LOAD]], <16 x i32*> [[BROADCAST_SPLAT21]], i32 4, <16 x i1> [[TMP2]]), !alias.scope !24, !noalias !23
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 16
; CHECK-NEXT: [[TMP6:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
; CHECK-NEXT: br i1 [[TMP6]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP25:![0-9]+]]
; CHECK: middle.block:
-; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[SMAX15]], [[N_VEC]]
+; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[SMAX16]], [[N_VEC]]
; CHECK-NEXT: br i1 [[CMP_N]], label [[FOR_END:%.*]], label [[VEC_EPILOG_ITER_CHECK:%.*]]
; CHECK: vec.epilog.iter.check:
-; CHECK-NEXT: [[N_VEC_REMAINING:%.*]] = and i64 [[SMAX15]], 8
+; CHECK-NEXT: [[N_VEC_REMAINING:%.*]] = and i64 [[SMAX16]], 8
; CHECK-NEXT: [[MIN_EPILOG_ITERS_CHECK_NOT_NOT:%.*]] = icmp eq i64 [[N_VEC_REMAINING]], 0
; CHECK-NEXT: br i1 [[MIN_EPILOG_ITERS_CHECK_NOT_NOT]], label [[VEC_EPILOG_SCALAR_PH]], label [[VEC_EPILOG_PH]]
; CHECK: vec.epilog.ph:
; CHECK-NEXT: [[VEC_EPILOG_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[VEC_EPILOG_ITER_CHECK]] ], [ 0, [[VECTOR_MAIN_LOOP_ITER_CHECK]] ]
-; CHECK-NEXT: [[SMAX21:%.*]] = call i64 @llvm.smax.i64(i64 [[N]], i64 1)
-; CHECK-NEXT: [[N_VEC23:%.*]] = and i64 [[SMAX21]], 9223372036854775800
-; CHECK-NEXT: [[BROADCAST_SPLATINSERT28:%.*]] = insertelement <8 x i32> poison, i32 [[K]], i32 0
-; CHECK-NEXT: [[BROADCAST_SPLAT29:%.*]] = shufflevector <8 x i32> [[BROADCAST_SPLATINSERT28]], <8 x i32> poison, <8 x i32> zeroinitializer
-; CHECK-NEXT: [[BROADCAST_SPLATINSERT30:%.*]] = insertelement <8 x i32> poison, i32 [[NTRUNC]], i32 0
-; CHECK-NEXT: [[BROADCAST_SPLAT31:%.*]] = shufflevector <8 x i32> [[BROADCAST_SPLATINSERT30]], <8 x i32> poison, <8 x i32> zeroinitializer
-; CHECK-NEXT: [[BROADCAST_SPLATINSERT33:%.*]] = insertelement <8 x i32*> poison, i32* [[A]], i32 0
-; CHECK-NEXT: [[BROADCAST_SPLAT34:%.*]] = shufflevector <8 x i32*> [[BROADCAST_SPLATINSERT33]], <8 x i32*> poison, <8 x i32> zeroinitializer
+; CHECK-NEXT: [[SMAX22:%.*]] = call i64 @llvm.smax.i64(i64 [[N]], i64 1)
+; CHECK-NEXT: [[N_VEC24:%.*]] = and i64 [[SMAX22]], 9223372036854775800
+; CHECK-NEXT: [[BROADCAST_SPLATINSERT29:%.*]] = insertelement <8 x i32> poison, i32 [[K]], i32 0
+; CHECK-NEXT: [[BROADCAST_SPLAT30:%.*]] = shufflevector <8 x i32> [[BROADCAST_SPLATINSERT29]], <8 x i32> poison, <8 x i32> zeroinitializer
+; CHECK-NEXT: [[BROADCAST_SPLATINSERT31:%.*]] = insertelement <8 x i32> poison, i32 [[NTRUNC]], i32 0
+; CHECK-NEXT: [[BROADCAST_SPLAT32:%.*]] = shufflevector <8 x i32> [[BROADCAST_SPLATINSERT31]], <8 x i32> poison, <8 x i32> zeroinitializer
+; CHECK-NEXT: [[BROADCAST_SPLATINSERT34:%.*]] = insertelement <8 x i32*> poison, i32* [[A]], i32 0
+; CHECK-NEXT: [[BROADCAST_SPLAT35:%.*]] = shufflevector <8 x i32*> [[BROADCAST_SPLATINSERT34]], <8 x i32*> poison, <8 x i32> zeroinitializer
; CHECK-NEXT: br label [[VEC_EPILOG_VECTOR_BODY:%.*]]
; CHECK: vec.epilog.vector.body:
-; CHECK-NEXT: [[INDEX24:%.*]] = phi i64 [ [[VEC_EPILOG_RESUME_VAL]], [[VEC_EPILOG_PH]] ], [ [[INDEX_NEXT25:%.*]], [[VEC_EPILOG_VECTOR_BODY]] ]
-; CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 [[INDEX24]]
+; CHECK-NEXT: [[INDEX25:%.*]] = phi i64 [ [[VEC_EPILOG_RESUME_VAL]], [[VEC_EPILOG_PH]] ], [ [[INDEX_NEXT26:%.*]], [[VEC_EPILOG_VECTOR_BODY]] ]
+; CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 [[INDEX25]]
; CHECK-NEXT: [[TMP8:%.*]] = bitcast i32* [[TMP7]] to <8 x i32>*
-; CHECK-NEXT: [[WIDE_LOAD27:%.*]] = load <8 x i32>, <8 x i32>* [[TMP8]], align 8
-; CHECK-NEXT: [[TMP9:%.*]] = icmp eq <8 x i32> [[WIDE_LOAD27]], [[BROADCAST_SPLAT29]]
+; CHECK-NEXT: [[WIDE_LOAD28:%.*]] = load <8 x i32>, <8 x i32>* [[TMP8]], align 8
+; CHECK-NEXT: [[TMP9:%.*]] = icmp eq <8 x i32> [[WIDE_LOAD28]], [[BROADCAST_SPLAT30]]
; CHECK-NEXT: [[TMP10:%.*]] = bitcast i32* [[TMP7]] to <8 x i32>*
-; CHECK-NEXT: store <8 x i32> [[BROADCAST_SPLAT31]], <8 x i32>* [[TMP10]], align 4
-; CHECK-NEXT: [[TMP11:%.*]] = getelementptr inbounds i32, i32* [[C]], i64 [[INDEX24]]
+; CHECK-NEXT: store <8 x i32> [[BROADCAST_SPLAT32]], <8 x i32>* [[TMP10]], align 4
+; CHECK-NEXT: [[TMP11:%.*]] = getelementptr inbounds i32, i32* [[C]], i64 [[INDEX25]]
; CHECK-NEXT: [[TMP12:%.*]] = bitcast i32* [[TMP11]] to <8 x i32>*
-; CHECK-NEXT: [[WIDE_MASKED_LOAD32:%.*]] = call <8 x i32> @llvm.masked.load.v8i32.p0v8i32(<8 x i32>* [[TMP12]], i32 8, <8 x i1> [[TMP9]], <8 x i32> poison)
-; CHECK-NEXT: call void @llvm.masked.scatter.v8i32.v8p0i32(<8 x i32> [[WIDE_MASKED_LOAD32]], <8 x i32*> [[BROADCAST_SPLAT34]], i32 4, <8 x i1> [[TMP9]])
-; CHECK-NEXT: [[INDEX_NEXT25]] = add nuw i64 [[INDEX24]], 8
-; CHECK-NEXT: [[TMP13:%.*]] = icmp eq i64 [[INDEX_NEXT25]], [[N_VEC23]]
+; CHECK-NEXT: [[WIDE_MASKED_LOAD33:%.*]] = call <8 x i32> @llvm.masked.load.v8i32.p0v8i32(<8 x i32>* [[TMP12]], i32 8, <8 x i1> [[TMP9]], <8 x i32> poison)
+; CHECK-NEXT: call void @llvm.masked.scatter.v8i32.v8p0i32(<8 x i32> [[WIDE_MASKED_LOAD33]], <8 x i32*> [[BROADCAST_SPLAT35]], i32 4, <8 x i1> [[TMP9]])
+; CHECK-NEXT: [[INDEX_NEXT26]] = add nuw i64 [[INDEX25]], 8
+; CHECK-NEXT: [[TMP13:%.*]] = icmp eq i64 [[INDEX_NEXT26]], [[N_VEC24]]
; CHECK-NEXT: br i1 [[TMP13]], label [[VEC_EPILOG_MIDDLE_BLOCK:%.*]], label [[VEC_EPILOG_VECTOR_BODY]], !llvm.loop [[LOOP26:![0-9]+]]
; CHECK: vec.epilog.middle.block:
-; CHECK-NEXT: [[CMP_N26:%.*]] = icmp eq i64 [[SMAX21]], [[N_VEC23]]
-; CHECK-NEXT: br i1 [[CMP_N26]], label [[FOR_END_LOOPEXIT:%.*]], label [[VEC_EPILOG_SCALAR_PH]]
+; CHECK-NEXT: [[CMP_N27:%.*]] = icmp eq i64 [[SMAX22]], [[N_VEC24]]
+; CHECK-NEXT: br i1 [[CMP_N27]], label [[FOR_END_LOOPEXIT:%.*]], label [[VEC_EPILOG_SCALAR_PH]]
; CHECK: vec.epilog.scalar.ph:
-; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC23]], [[VEC_EPILOG_MIDDLE_BLOCK]] ], [ [[N_VEC]], [[VEC_EPILOG_ITER_CHECK]] ], [ 0, [[VECTOR_MEMCHECK]] ], [ 0, [[ITER_CHECK:%.*]] ]
+; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC24]], [[VEC_EPILOG_MIDDLE_BLOCK]] ], [ [[N_VEC]], [[VEC_EPILOG_ITER_CHECK]] ], [ 0, [[VECTOR_MEMCHECK]] ], [ 0, [[ITER_CHECK:%.*]] ]
; CHECK-NEXT: br label [[FOR_BODY:%.*]]
; CHECK: for.body:
; CHECK-NEXT: [[I:%.*]] = phi i64 [ [[I_NEXT:%.*]], [[LATCH:%.*]] ], [ [[BC_RESUME_VAL]], [[VEC_EPILOG_SCALAR_PH]] ]
; CHECK-LABEL: @uniform_copy(
; CHECK-NEXT: entry:
; CHECK-NEXT: [[B1:%.*]] = bitcast i32* [[B:%.*]] to i8*
-; CHECK-NEXT: [[A2:%.*]] = bitcast i32* [[A:%.*]] to i8*
+; CHECK-NEXT: [[A3:%.*]] = bitcast i32* [[A:%.*]] to i8*
; CHECK-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_MEMCHECK:%.*]]
; CHECK: vector.memcheck:
-; CHECK-NEXT: [[UGLYGEP:%.*]] = getelementptr i8, i8* [[B1]], i64 1
-; CHECK-NEXT: [[UGLYGEP3:%.*]] = getelementptr i8, i8* [[A2]], i64 1
-; CHECK-NEXT: [[BC:%.*]] = bitcast i32* [[B]] to i8*
-; CHECK-NEXT: [[BC4:%.*]] = bitcast i32* [[A]] to i8*
-; CHECK-NEXT: [[BOUND0:%.*]] = icmp ult i8* [[BC]], [[UGLYGEP3]]
-; CHECK-NEXT: [[BOUND1:%.*]] = icmp ult i8* [[BC4]], [[UGLYGEP]]
+; CHECK-NEXT: [[SCEVGEP:%.*]] = getelementptr i32, i32* [[B]], i64 1
+; CHECK-NEXT: [[SCEVGEP2:%.*]] = bitcast i32* [[SCEVGEP]] to i8*
+; CHECK-NEXT: [[SCEVGEP4:%.*]] = getelementptr i32, i32* [[A]], i64 1
+; CHECK-NEXT: [[SCEVGEP45:%.*]] = bitcast i32* [[SCEVGEP4]] to i8*
+; CHECK-NEXT: [[BOUND0:%.*]] = icmp ult i8* [[B1]], [[SCEVGEP45]]
+; CHECK-NEXT: [[BOUND1:%.*]] = icmp ult i8* [[A3]], [[SCEVGEP2]]
; CHECK-NEXT: [[FOUND_CONFLICT:%.*]] = and i1 [[BOUND0]], [[BOUND1]]
; CHECK-NEXT: [[MEMCHECK_CONFLICT:%.*]] = and i1 [[FOUND_CONFLICT]], true
; CHECK-NEXT: br i1 [[MEMCHECK_CONFLICT]], label [[SCALAR_PH]], label [[VECTOR_PH:%.*]]
; CHECK-LABEL: @inv_val_store_to_inv_address_conditional_diff_values_ic(
; CHECK-NEXT: entry:
; CHECK-NEXT: [[NTRUNC:%.*]] = trunc i64 [[N:%.*]] to i32
-; CHECK-NEXT: [[SMAX4:%.*]] = call i64 @llvm.smax.i64(i64 [[N]], i64 1)
-; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[SMAX4]], 4
+; CHECK-NEXT: [[SMAX6:%.*]] = call i64 @llvm.smax.i64(i64 [[N]], i64 1)
+; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[SMAX6]], 4
; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_MEMCHECK:%.*]]
; CHECK: vector.memcheck:
-; CHECK-NEXT: [[A3:%.*]] = bitcast i32* [[A:%.*]] to i8*
-; CHECK-NEXT: [[B1:%.*]] = bitcast i32* [[B:%.*]] to i8*
; CHECK-NEXT: [[SMAX:%.*]] = call i64 @llvm.smax.i64(i64 [[N]], i64 1)
-; CHECK-NEXT: [[SCEVGEP:%.*]] = getelementptr i32, i32* [[B]], i64 [[SMAX]]
-; CHECK-NEXT: [[UGLYGEP:%.*]] = getelementptr i8, i8* [[A3]], i64 1
-; CHECK-NEXT: [[BOUND0:%.*]] = icmp ugt i8* [[UGLYGEP]], [[B1]]
+; CHECK-NEXT: [[SCEVGEP:%.*]] = getelementptr i32, i32* [[B:%.*]], i64 [[SMAX]]
+; CHECK-NEXT: [[SCEVGEP4:%.*]] = getelementptr i32, i32* [[A:%.*]], i64 1
+; CHECK-NEXT: [[BOUND0:%.*]] = icmp ugt i32* [[SCEVGEP4]], [[B]]
; CHECK-NEXT: [[BOUND1:%.*]] = icmp ugt i32* [[SCEVGEP]], [[A]]
; CHECK-NEXT: [[FOUND_CONFLICT:%.*]] = and i1 [[BOUND0]], [[BOUND1]]
; CHECK-NEXT: br i1 [[FOUND_CONFLICT]], label [[SCALAR_PH]], label [[VECTOR_PH:%.*]]
; CHECK: vector.ph:
-; CHECK-NEXT: [[N_VEC:%.*]] = and i64 [[SMAX4]], 9223372036854775804
+; CHECK-NEXT: [[N_VEC:%.*]] = and i64 [[SMAX6]], 9223372036854775804
; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i32> poison, i32 [[K:%.*]], i32 0
; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i32> [[BROADCAST_SPLATINSERT]], <4 x i32> poison, <4 x i32> zeroinitializer
-; CHECK-NEXT: [[BROADCAST_SPLATINSERT5:%.*]] = insertelement <4 x i32> poison, i32 [[NTRUNC]], i32 0
-; CHECK-NEXT: [[BROADCAST_SPLAT6:%.*]] = shufflevector <4 x i32> [[BROADCAST_SPLATINSERT5]], <4 x i32> poison, <4 x i32> zeroinitializer
+; CHECK-NEXT: [[BROADCAST_SPLATINSERT7:%.*]] = insertelement <4 x i32> poison, i32 [[NTRUNC]], i32 0
+; CHECK-NEXT: [[BROADCAST_SPLAT8:%.*]] = shufflevector <4 x i32> [[BROADCAST_SPLATINSERT7]], <4 x i32> poison, <4 x i32> zeroinitializer
; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]
; CHECK: vector.body:
; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i32>, <4 x i32>* [[TMP1]], align 8, !alias.scope !0, !noalias !3
; CHECK-NEXT: [[DOTNOT:%.*]] = icmp eq <4 x i32> [[WIDE_LOAD]], [[BROADCAST_SPLAT]]
; CHECK-NEXT: [[TMP2:%.*]] = bitcast i32* [[TMP0]] to <4 x i32>*
-; CHECK-NEXT: store <4 x i32> [[BROADCAST_SPLAT6]], <4 x i32>* [[TMP2]], align 4, !alias.scope !0, !noalias !3
-; CHECK-NEXT: [[PREDPHI:%.*]] = select <4 x i1> [[DOTNOT]], <4 x i32> [[BROADCAST_SPLAT6]], <4 x i32> [[BROADCAST_SPLAT]]
+; CHECK-NEXT: store <4 x i32> [[BROADCAST_SPLAT8]], <4 x i32>* [[TMP2]], align 4, !alias.scope !0, !noalias !3
+; CHECK-NEXT: [[PREDPHI:%.*]] = select <4 x i1> [[DOTNOT]], <4 x i32> [[BROADCAST_SPLAT8]], <4 x i32> [[BROADCAST_SPLAT]]
; CHECK-NEXT: [[TMP3:%.*]] = extractelement <4 x i32> [[PREDPHI]], i32 3
; CHECK-NEXT: store i32 [[TMP3]], i32* [[A]], align 4, !alias.scope !3
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
; CHECK-NEXT: [[TMP4:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
; CHECK-NEXT: br i1 [[TMP4]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP5:![0-9]+]]
; CHECK: middle.block:
-; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[SMAX4]], [[N_VEC]]
+; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[SMAX6]], [[N_VEC]]
; CHECK-NEXT: br i1 [[CMP_N]], label [[FOR_END:%.*]], label [[SCALAR_PH]]
; CHECK: scalar.ph:
; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ], [ 0, [[VECTOR_MEMCHECK]] ]
; CHECK-NEXT: entry:
; CHECK-NEXT: [[NTRUNC:%.*]] = trunc i64 [[N:%.*]] to i32
; CHECK-NEXT: [[CMP:%.*]] = icmp eq i32 [[NTRUNC]], [[K:%.*]]
-; CHECK-NEXT: [[SMAX4:%.*]] = call i64 @llvm.smax.i64(i64 [[N]], i64 1)
-; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[SMAX4]], 4
+; CHECK-NEXT: [[SMAX6:%.*]] = call i64 @llvm.smax.i64(i64 [[N]], i64 1)
+; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[SMAX6]], 4
; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_MEMCHECK:%.*]]
; CHECK: vector.memcheck:
-; CHECK-NEXT: [[A3:%.*]] = bitcast i32* [[A:%.*]] to i8*
-; CHECK-NEXT: [[B1:%.*]] = bitcast i32* [[B:%.*]] to i8*
; CHECK-NEXT: [[SMAX:%.*]] = call i64 @llvm.smax.i64(i64 [[N]], i64 1)
-; CHECK-NEXT: [[SCEVGEP:%.*]] = getelementptr i32, i32* [[B]], i64 [[SMAX]]
-; CHECK-NEXT: [[UGLYGEP:%.*]] = getelementptr i8, i8* [[A3]], i64 1
-; CHECK-NEXT: [[BOUND0:%.*]] = icmp ugt i8* [[UGLYGEP]], [[B1]]
+; CHECK-NEXT: [[SCEVGEP:%.*]] = getelementptr i32, i32* [[B:%.*]], i64 [[SMAX]]
+; CHECK-NEXT: [[SCEVGEP4:%.*]] = getelementptr i32, i32* [[A:%.*]], i64 1
+; CHECK-NEXT: [[BOUND0:%.*]] = icmp ugt i32* [[SCEVGEP4]], [[B]]
; CHECK-NEXT: [[BOUND1:%.*]] = icmp ugt i32* [[SCEVGEP]], [[A]]
; CHECK-NEXT: [[FOUND_CONFLICT:%.*]] = and i1 [[BOUND0]], [[BOUND1]]
; CHECK-NEXT: br i1 [[FOUND_CONFLICT]], label [[SCALAR_PH]], label [[VECTOR_PH:%.*]]
; CHECK: vector.ph:
-; CHECK-NEXT: [[N_VEC:%.*]] = and i64 [[SMAX4]], 9223372036854775804
+; CHECK-NEXT: [[N_VEC:%.*]] = and i64 [[SMAX6]], 9223372036854775804
; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i32> poison, i32 [[NTRUNC]], i32 0
; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i32> [[BROADCAST_SPLATINSERT]], <4 x i32> poison, <4 x i32> zeroinitializer
; CHECK-NEXT: [[TMP0:%.*]] = insertelement <4 x i1> undef, i1 [[CMP]], i32 3
-; CHECK-NEXT: [[BROADCAST_SPLAT8:%.*]] = insertelement <4 x i32> poison, i32 [[K]], i32 3
-; CHECK-NEXT: [[PREDPHI:%.*]] = select <4 x i1> [[TMP0]], <4 x i32> [[BROADCAST_SPLAT]], <4 x i32> [[BROADCAST_SPLAT8]]
+; CHECK-NEXT: [[BROADCAST_SPLAT10:%.*]] = insertelement <4 x i32> poison, i32 [[K]], i32 3
+; CHECK-NEXT: [[PREDPHI:%.*]] = select <4 x i1> [[TMP0]], <4 x i32> [[BROADCAST_SPLAT]], <4 x i32> [[BROADCAST_SPLAT10]]
; CHECK-NEXT: [[TMP1:%.*]] = extractelement <4 x i32> [[PREDPHI]], i32 3
; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]
; CHECK: vector.body:
; CHECK-NEXT: [[TMP4:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
; CHECK-NEXT: br i1 [[TMP4]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP13:![0-9]+]]
; CHECK: middle.block:
-; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[SMAX4]], [[N_VEC]]
+; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[SMAX6]], [[N_VEC]]
; CHECK-NEXT: br i1 [[CMP_N]], label [[FOR_END:%.*]], label [[SCALAR_PH]]
; CHECK: scalar.ph:
; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ], [ 0, [[VECTOR_MEMCHECK]] ]
define i32 @variant_val_store_to_inv_address(i32* %a, i64 %n, i32* %b, i32 %k) {
; CHECK-LABEL: @variant_val_store_to_inv_address(
; CHECK-NEXT: entry:
-; CHECK-NEXT: [[SMAX4:%.*]] = call i64 @llvm.smax.i64(i64 [[N:%.*]], i64 1)
-; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[SMAX4]], 4
+; CHECK-NEXT: [[SMAX6:%.*]] = call i64 @llvm.smax.i64(i64 [[N:%.*]], i64 1)
+; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[SMAX6]], 4
; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_MEMCHECK:%.*]]
; CHECK: vector.memcheck:
-; CHECK-NEXT: [[B2:%.*]] = bitcast i32* [[B:%.*]] to i8*
-; CHECK-NEXT: [[A1:%.*]] = bitcast i32* [[A:%.*]] to i8*
-; CHECK-NEXT: [[UGLYGEP:%.*]] = getelementptr i8, i8* [[A1]], i64 1
+; CHECK-NEXT: [[SCEVGEP:%.*]] = getelementptr i32, i32* [[A:%.*]], i64 1
; CHECK-NEXT: [[SMAX:%.*]] = call i64 @llvm.smax.i64(i64 [[N]], i64 1)
-; CHECK-NEXT: [[SCEVGEP:%.*]] = getelementptr i32, i32* [[B]], i64 [[SMAX]]
-; CHECK-NEXT: [[BOUND0:%.*]] = icmp ugt i32* [[SCEVGEP]], [[A]]
-; CHECK-NEXT: [[BOUND1:%.*]] = icmp ugt i8* [[UGLYGEP]], [[B2]]
+; CHECK-NEXT: [[SCEVGEP4:%.*]] = getelementptr i32, i32* [[B:%.*]], i64 [[SMAX]]
+; CHECK-NEXT: [[BOUND0:%.*]] = icmp ugt i32* [[SCEVGEP4]], [[A]]
+; CHECK-NEXT: [[BOUND1:%.*]] = icmp ugt i32* [[SCEVGEP]], [[B]]
; CHECK-NEXT: [[FOUND_CONFLICT:%.*]] = and i1 [[BOUND0]], [[BOUND1]]
; CHECK-NEXT: br i1 [[FOUND_CONFLICT]], label [[SCALAR_PH]], label [[VECTOR_PH:%.*]]
; CHECK: vector.ph:
-; CHECK-NEXT: [[N_VEC:%.*]] = and i64 [[SMAX4]], 9223372036854775804
+; CHECK-NEXT: [[N_VEC:%.*]] = and i64 [[SMAX6]], 9223372036854775804
; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]
; CHECK: vector.body:
; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
; CHECK: middle.block:
; CHECK-NEXT: [[DOTLCSSA:%.*]] = phi <4 x i32> [ [[TMP3]], [[VECTOR_BODY]] ]
; CHECK-NEXT: [[TMP5:%.*]] = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> [[DOTLCSSA]])
-; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[SMAX4]], [[N_VEC]]
+; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[SMAX6]], [[N_VEC]]
; CHECK-NEXT: br i1 [[CMP_N]], label [[FOR_END:%.*]], label [[SCALAR_PH]]
; CHECK: scalar.ph:
; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ], [ 0, [[VECTOR_MEMCHECK]] ]
; CHECK-NEXT: [[SCEVGEP23:%.*]] = bitcast i32* [[SCEVGEP2]] to i8*
; CHECK-NEXT: [[SCEVGEP7:%.*]] = getelementptr [[STRUCT_S]], %struct.s* [[OBJ]], i64 0, i32 1, i64 [[I]]
; CHECK-NEXT: [[SCEVGEP78:%.*]] = bitcast i32* [[SCEVGEP7]] to i8*
-; CHECK-NEXT: [[UGLYGEP:%.*]] = getelementptr i8, i8* [[SCEVGEP78]], i64 1
-; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_S]], %struct.s* [[OBJ]], i64 0, i32 1, i64 [[I]]
+; CHECK-NEXT: [[TMP1:%.*]] = add i64 [[I]], 1
+; CHECK-NEXT: [[SCEVGEP9:%.*]] = getelementptr [[STRUCT_S]], %struct.s* [[OBJ]], i64 0, i32 1, i64 [[TMP1]]
+; CHECK-NEXT: [[SCEVGEP910:%.*]] = bitcast i32* [[SCEVGEP9]] to i8*
+; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_S]], %struct.s* [[OBJ]], i64 0, i32 1, i64 [[I]]
; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[Z]], 4
; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_MEMCHECK:%.*]]
; CHECK: vector.memcheck:
; CHECK-NEXT: [[BOUND0:%.*]] = icmp ult i8* [[SCEVGEP1]], [[SCEVGEP56]]
; CHECK-NEXT: [[BOUND1:%.*]] = icmp ult i8* [[OBJ4]], [[SCEVGEP23]]
; CHECK-NEXT: [[FOUND_CONFLICT:%.*]] = and i1 [[BOUND0]], [[BOUND1]]
-; CHECK-NEXT: [[BC:%.*]] = bitcast i32* [[TMP1]] to i8*
-; CHECK-NEXT: [[BOUND09:%.*]] = icmp ult i8* [[SCEVGEP1]], [[UGLYGEP]]
-; CHECK-NEXT: [[BOUND110:%.*]] = icmp ult i8* [[BC]], [[SCEVGEP23]]
-; CHECK-NEXT: [[FOUND_CONFLICT11:%.*]] = and i1 [[BOUND09]], [[BOUND110]]
-; CHECK-NEXT: [[CONFLICT_RDX:%.*]] = or i1 [[FOUND_CONFLICT]], [[FOUND_CONFLICT11]]
+; CHECK-NEXT: [[BOUND011:%.*]] = icmp ult i8* [[SCEVGEP1]], [[SCEVGEP910]]
+; CHECK-NEXT: [[BOUND112:%.*]] = icmp ult i8* [[SCEVGEP78]], [[SCEVGEP23]]
+; CHECK-NEXT: [[FOUND_CONFLICT13:%.*]] = and i1 [[BOUND011]], [[BOUND112]]
+; CHECK-NEXT: [[CONFLICT_RDX:%.*]] = or i1 [[FOUND_CONFLICT]], [[FOUND_CONFLICT13]]
; CHECK-NEXT: [[MEMCHECK_CONFLICT:%.*]] = and i1 [[CONFLICT_RDX]], true
; CHECK-NEXT: br i1 [[MEMCHECK_CONFLICT]], label [[SCALAR_PH]], label [[VECTOR_PH:%.*]]
; CHECK: vector.ph:
; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]
; CHECK: vector.body:
; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
-; CHECK-NEXT: [[TMP2:%.*]] = add i64 [[INDEX]], 0
-; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_S]], %struct.s* [[OBJ]], i64 0, i32 0, i64 [[TMP2]]
-; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds i32, i32* [[TMP3]], i32 0
-; CHECK-NEXT: [[TMP5:%.*]] = bitcast i32* [[TMP4]] to <4 x i32>*
-; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i32>, <4 x i32>* [[TMP5]], align 4, !alias.scope !0
-; CHECK-NEXT: [[TMP6:%.*]] = load i32, i32* [[TMP1]], align 4, !alias.scope !3
-; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i32> poison, i32 [[TMP6]], i32 0
+; CHECK-NEXT: [[TMP3:%.*]] = add i64 [[INDEX]], 0
+; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_S]], %struct.s* [[OBJ]], i64 0, i32 0, i64 [[TMP3]]
+; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds i32, i32* [[TMP4]], i32 0
+; CHECK-NEXT: [[TMP6:%.*]] = bitcast i32* [[TMP5]] to <4 x i32>*
+; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i32>, <4 x i32>* [[TMP6]], align 4, !alias.scope !0
+; CHECK-NEXT: [[TMP7:%.*]] = load i32, i32* [[TMP2]], align 4, !alias.scope !3
+; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i32> poison, i32 [[TMP7]], i32 0
; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i32> [[BROADCAST_SPLATINSERT]], <4 x i32> poison, <4 x i32> zeroinitializer
-; CHECK-NEXT: [[TMP7:%.*]] = add nsw <4 x i32> [[BROADCAST_SPLAT]], [[WIDE_LOAD]]
-; CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT_S]], %struct.s* [[OBJ]], i64 0, i32 2, i64 [[I]], i64 [[TMP2]]
-; CHECK-NEXT: [[TMP9:%.*]] = getelementptr inbounds i32, i32* [[TMP8]], i32 0
-; CHECK-NEXT: [[TMP10:%.*]] = bitcast i32* [[TMP9]] to <4 x i32>*
-; CHECK-NEXT: [[WIDE_LOAD12:%.*]] = load <4 x i32>, <4 x i32>* [[TMP10]], align 4, !alias.scope !5, !noalias !7
-; CHECK-NEXT: [[TMP11:%.*]] = add nsw <4 x i32> [[TMP7]], [[WIDE_LOAD12]]
-; CHECK-NEXT: [[TMP12:%.*]] = bitcast i32* [[TMP9]] to <4 x i32>*
-; CHECK-NEXT: store <4 x i32> [[TMP11]], <4 x i32>* [[TMP12]], align 4, !alias.scope !5, !noalias !7
+; CHECK-NEXT: [[TMP8:%.*]] = add nsw <4 x i32> [[BROADCAST_SPLAT]], [[WIDE_LOAD]]
+; CHECK-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_S]], %struct.s* [[OBJ]], i64 0, i32 2, i64 [[I]], i64 [[TMP3]]
+; CHECK-NEXT: [[TMP10:%.*]] = getelementptr inbounds i32, i32* [[TMP9]], i32 0
+; CHECK-NEXT: [[TMP11:%.*]] = bitcast i32* [[TMP10]] to <4 x i32>*
+; CHECK-NEXT: [[WIDE_LOAD14:%.*]] = load <4 x i32>, <4 x i32>* [[TMP11]], align 4, !alias.scope !5, !noalias !7
+; CHECK-NEXT: [[TMP12:%.*]] = add nsw <4 x i32> [[TMP8]], [[WIDE_LOAD14]]
+; CHECK-NEXT: [[TMP13:%.*]] = bitcast i32* [[TMP10]] to <4 x i32>*
+; CHECK-NEXT: store <4 x i32> [[TMP12]], <4 x i32>* [[TMP13]], align 4, !alias.scope !5, !noalias !7
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
-; CHECK-NEXT: [[TMP13:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
-; CHECK-NEXT: br i1 [[TMP13]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]]
+; CHECK-NEXT: [[TMP14:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
+; CHECK-NEXT: br i1 [[TMP14]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]]
; CHECK: middle.block:
; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[Z]], [[N_VEC]]
; CHECK-NEXT: br i1 [[CMP_N]], label [[DOTOUTER]], label [[SCALAR_PH]]
; CHECK-NEXT: br i1 [[EXITCOND_OUTER]], label [[DOTEXIT:%.*]], label [[DOTOUTER_PREHEADER]]
; CHECK: .inner:
; CHECK-NEXT: [[J:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[J_NEXT:%.*]], [[DOTINNER]] ]
-; CHECK-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_S]], %struct.s* [[OBJ]], i64 0, i32 0, i64 [[J]]
-; CHECK-NEXT: [[TMP15:%.*]] = load i32, i32* [[TMP14]], align 4
-; CHECK-NEXT: [[TMP16:%.*]] = load i32, i32* [[TMP1]], align 4
-; CHECK-NEXT: [[TMP17:%.*]] = add nsw i32 [[TMP16]], [[TMP15]]
-; CHECK-NEXT: [[TMP18:%.*]] = getelementptr inbounds [[STRUCT_S]], %struct.s* [[OBJ]], i64 0, i32 2, i64 [[I]], i64 [[J]]
-; CHECK-NEXT: [[TMP19:%.*]] = load i32, i32* [[TMP18]], align 4
-; CHECK-NEXT: [[TMP20:%.*]] = add nsw i32 [[TMP17]], [[TMP19]]
-; CHECK-NEXT: store i32 [[TMP20]], i32* [[TMP18]], align 4
+; CHECK-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT_S]], %struct.s* [[OBJ]], i64 0, i32 0, i64 [[J]]
+; CHECK-NEXT: [[TMP16:%.*]] = load i32, i32* [[TMP15]], align 4
+; CHECK-NEXT: [[TMP17:%.*]] = load i32, i32* [[TMP2]], align 4
+; CHECK-NEXT: [[TMP18:%.*]] = add nsw i32 [[TMP17]], [[TMP16]]
+; CHECK-NEXT: [[TMP19:%.*]] = getelementptr inbounds [[STRUCT_S]], %struct.s* [[OBJ]], i64 0, i32 2, i64 [[I]], i64 [[J]]
+; CHECK-NEXT: [[TMP20:%.*]] = load i32, i32* [[TMP19]], align 4
+; CHECK-NEXT: [[TMP21:%.*]] = add nsw i32 [[TMP18]], [[TMP20]]
+; CHECK-NEXT: store i32 [[TMP21]], i32* [[TMP19]], align 4
; CHECK-NEXT: [[J_NEXT]] = add nuw nsw i64 [[J]], 1
; CHECK-NEXT: [[EXITCOND_INNER:%.*]] = icmp eq i64 [[J_NEXT]], [[Z]]
; CHECK-NEXT: br i1 [[EXITCOND_INNER]], label [[DOTOUTER]], label [[DOTINNER]], !llvm.loop [[LOOP10:![0-9]+]]
; CHECK: vector.memcheck:
; CHECK-NEXT: [[SCEVGEP:%.*]] = getelementptr i8, i8* [[TMP1]], i64 1
; CHECK-NEXT: [[BOUND0:%.*]] = icmp ult i8* bitcast (i32* @f.e to i8*), [[SCEVGEP]]
-; CHECK-NEXT: [[BOUND1:%.*]] = icmp ult i8* [[TMP0]], getelementptr (i8, i8* bitcast (i32* @f.e to i8*), i64 1)
+; CHECK-NEXT: [[BOUND1:%.*]] = icmp ult i8* [[TMP1]], bitcast (i32* getelementptr inbounds (i32, i32* @f.e, i64 1) to i8*)
; CHECK-NEXT: [[FOUND_CONFLICT:%.*]] = and i1 [[BOUND0]], [[BOUND1]]
; CHECK-NEXT: [[MEMCHECK_CONFLICT:%.*]] = and i1 [[FOUND_CONFLICT]], true
; CHECK-NEXT: br i1 [[MEMCHECK_CONFLICT]], label [[SCALAR_PH]], label [[VECTOR_PH:%.*]]
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
; RUN: opt -loop-vectorize -force-vector-width=4 -force-vector-interleave=1 -S < %s | FileCheck %s
-@k = common dso_local local_unnamed_addr global i32 0, align 4
-
define void @m(i32* nocapture %p, i32* nocapture %p2, i32 %q) {
; CHECK-LABEL: @m(
; CHECK-NEXT: entry:
; CHECK-NEXT: [[P1:%.*]] = bitcast i32* [[P:%.*]] to i8*
-; CHECK-NEXT: [[ARRAYIDX9_1:%.*]] = getelementptr inbounds i32, i32* [[P2:%.*]], i64 1
+; CHECK-NEXT: [[P23:%.*]] = bitcast i32* [[P2:%.*]] to i8*
+; CHECK-NEXT: [[ARRAYIDX9_1:%.*]] = getelementptr inbounds i32, i32* [[P2]], i64 1
; CHECK-NEXT: [[ARRAYIDX9_2:%.*]] = getelementptr inbounds i32, i32* [[P2]], i64 2
; CHECK-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_MEMCHECK:%.*]]
; CHECK: vector.memcheck:
; CHECK-NEXT: [[SCEVGEP:%.*]] = getelementptr i32, i32* [[P]], i64 63
; CHECK-NEXT: [[SCEVGEP2:%.*]] = bitcast i32* [[SCEVGEP]] to i8*
-; CHECK-NEXT: [[SCEVGEP3:%.*]] = getelementptr i32, i32* [[P2]], i64 2
-; CHECK-NEXT: [[SCEVGEP34:%.*]] = bitcast i32* [[SCEVGEP3]] to i8*
-; CHECK-NEXT: [[UGLYGEP:%.*]] = getelementptr i8, i8* [[SCEVGEP34]], i64 1
-; CHECK-NEXT: [[BC:%.*]] = bitcast i32* [[ARRAYIDX9_2]] to i8*
-; CHECK-NEXT: [[BOUND0:%.*]] = icmp ult i8* [[P1]], [[UGLYGEP]]
-; CHECK-NEXT: [[BOUND1:%.*]] = icmp ult i8* [[BC]], [[SCEVGEP2]]
+; CHECK-NEXT: [[SCEVGEP4:%.*]] = getelementptr i32, i32* [[P2]], i64 3
+; CHECK-NEXT: [[SCEVGEP45:%.*]] = bitcast i32* [[SCEVGEP4]] to i8*
+; CHECK-NEXT: [[BOUND0:%.*]] = icmp ult i8* [[P1]], [[SCEVGEP45]]
+; CHECK-NEXT: [[BOUND1:%.*]] = icmp ult i8* [[P23]], [[SCEVGEP2]]
; CHECK-NEXT: [[FOUND_CONFLICT:%.*]] = and i1 [[BOUND0]], [[BOUND1]]
; CHECK-NEXT: [[MEMCHECK_CONFLICT:%.*]] = and i1 [[FOUND_CONFLICT]], true
; CHECK-NEXT: br i1 [[MEMCHECK_CONFLICT]], label [[SCALAR_PH]], label [[VECTOR_PH:%.*]]
; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i32> [[BROADCAST_SPLATINSERT]], <4 x i32> poison, <4 x i32> zeroinitializer
; CHECK-NEXT: [[TMP2:%.*]] = sub nsw <4 x i32> zeroinitializer, [[BROADCAST_SPLAT]]
; CHECK-NEXT: [[TMP3:%.*]] = load i32, i32* [[ARRAYIDX9_1]], align 4, !alias.scope !0
-; CHECK-NEXT: [[BROADCAST_SPLATINSERT5:%.*]] = insertelement <4 x i32> poison, i32 [[TMP3]], i32 0
-; CHECK-NEXT: [[BROADCAST_SPLAT6:%.*]] = shufflevector <4 x i32> [[BROADCAST_SPLATINSERT5]], <4 x i32> poison, <4 x i32> zeroinitializer
-; CHECK-NEXT: [[TMP4:%.*]] = sub nsw <4 x i32> [[TMP2]], [[BROADCAST_SPLAT6]]
+; CHECK-NEXT: [[BROADCAST_SPLATINSERT6:%.*]] = insertelement <4 x i32> poison, i32 [[TMP3]], i32 0
+; CHECK-NEXT: [[BROADCAST_SPLAT7:%.*]] = shufflevector <4 x i32> [[BROADCAST_SPLATINSERT6]], <4 x i32> poison, <4 x i32> zeroinitializer
+; CHECK-NEXT: [[TMP4:%.*]] = sub nsw <4 x i32> [[TMP2]], [[BROADCAST_SPLAT7]]
; CHECK-NEXT: [[TMP5:%.*]] = load i32, i32* [[ARRAYIDX9_2]], align 4, !alias.scope !0
-; CHECK-NEXT: [[BROADCAST_SPLATINSERT7:%.*]] = insertelement <4 x i32> poison, i32 [[TMP5]], i32 0
-; CHECK-NEXT: [[BROADCAST_SPLAT8:%.*]] = shufflevector <4 x i32> [[BROADCAST_SPLATINSERT7]], <4 x i32> poison, <4 x i32> zeroinitializer
-; CHECK-NEXT: [[TMP6:%.*]] = sub nsw <4 x i32> [[TMP4]], [[BROADCAST_SPLAT8]]
+; CHECK-NEXT: [[BROADCAST_SPLATINSERT8:%.*]] = insertelement <4 x i32> poison, i32 [[TMP5]], i32 0
+; CHECK-NEXT: [[BROADCAST_SPLAT9:%.*]] = shufflevector <4 x i32> [[BROADCAST_SPLATINSERT8]], <4 x i32> poison, <4 x i32> zeroinitializer
+; CHECK-NEXT: [[TMP6:%.*]] = sub nsw <4 x i32> [[TMP4]], [[BROADCAST_SPLAT9]]
; CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds i32, i32* [[P]], i64 [[TMP0]]
; CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds i32, i32* [[TMP7]], i32 0
; CHECK-NEXT: [[TMP9:%.*]] = bitcast i32* [[TMP8]] to <4 x i32>*
; CHECK-NEXT: [[C_3:%.*]] = call i1 @cond()
; CHECK-NEXT: br i1 [[C_3]], label [[LOOP_3_PREHEADER:%.*]], label [[INNER_LATCH:%.*]]
; CHECK: loop.3.preheader:
-; CHECK-NEXT: [[L_1_LCSSA8:%.*]] = phi i16* [ [[L_1]], [[INNER_BB]] ]
+; CHECK-NEXT: [[L_1_LCSSA10:%.*]] = phi i16* [ [[L_1]], [[INNER_BB]] ]
; CHECK-NEXT: [[L_1_LCSSA:%.*]] = phi i16* [ [[L_1]], [[INNER_BB]] ]
; CHECK-NEXT: [[L_2_LCSSA:%.*]] = phi i16* [ [[L_2]], [[INNER_BB]] ]
; CHECK-NEXT: [[L_2_LCSSA3:%.*]] = bitcast i16* [[L_2_LCSSA]] to i8*
; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[TMP0]], 2
; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_MEMCHECK:%.*]]
; CHECK: vector.memcheck:
-; CHECK-NEXT: [[UGLYGEP:%.*]] = getelementptr i8, i8* [[L_2_LCSSA3]], i64 1
-; CHECK-NEXT: [[SCEVGEP:%.*]] = getelementptr i16, i16* [[L_1_LCSSA]], i64 1
-; CHECK-NEXT: [[SCEVGEP6:%.*]] = bitcast i16* [[SCEVGEP]] to i8*
+; CHECK-NEXT: [[SCEVGEP:%.*]] = getelementptr i16, i16* [[L_2_LCSSA]], i64 1
+; CHECK-NEXT: [[SCEVGEP4:%.*]] = bitcast i16* [[SCEVGEP]] to i8*
+; CHECK-NEXT: [[SCEVGEP5:%.*]] = getelementptr i16, i16* [[L_1_LCSSA]], i64 1
+; CHECK-NEXT: [[SCEVGEP58:%.*]] = bitcast i16* [[SCEVGEP5]] to i8*
; CHECK-NEXT: [[TMP1:%.*]] = add i64 [[N]], 2
-; CHECK-NEXT: [[SCEVGEP7:%.*]] = getelementptr i16, i16* [[L_1_LCSSA8]], i64 [[TMP1]]
-; CHECK-NEXT: [[SCEVGEP710:%.*]] = bitcast i16* [[SCEVGEP7]] to i8*
-; CHECK-NEXT: [[BOUND0:%.*]] = icmp ult i8* [[L_2_LCSSA3]], [[SCEVGEP710]]
-; CHECK-NEXT: [[BOUND1:%.*]] = icmp ult i8* [[SCEVGEP6]], [[UGLYGEP]]
+; CHECK-NEXT: [[SCEVGEP9:%.*]] = getelementptr i16, i16* [[L_1_LCSSA10]], i64 [[TMP1]]
+; CHECK-NEXT: [[SCEVGEP912:%.*]] = bitcast i16* [[SCEVGEP9]] to i8*
+; CHECK-NEXT: [[BOUND0:%.*]] = icmp ult i8* [[L_2_LCSSA3]], [[SCEVGEP912]]
+; CHECK-NEXT: [[BOUND1:%.*]] = icmp ult i8* [[SCEVGEP58]], [[SCEVGEP4]]
; CHECK-NEXT: [[FOUND_CONFLICT:%.*]] = and i1 [[BOUND0]], [[BOUND1]]
; CHECK-NEXT: [[MEMCHECK_CONFLICT:%.*]] = and i1 [[FOUND_CONFLICT]], true
; CHECK-NEXT: br i1 [[MEMCHECK_CONFLICT]], label [[SCALAR_PH]], label [[VECTOR_PH:%.*]]
; CHECK: exit.loopexit:
; CHECK-NEXT: br label [[EXIT:%.*]]
; CHECK: exit.loopexit1:
-; CHECK-NEXT: [[L_1_LCSSA4:%.*]] = phi i16* [ [[L_1]], [[INNER_LATCH]] ]
+; CHECK-NEXT: [[L_1_LCSSA6:%.*]] = phi i16* [ [[L_1]], [[INNER_LATCH]] ]
; CHECK-NEXT: br label [[EXIT]]
; CHECK: exit:
-; CHECK-NEXT: [[L_15:%.*]] = phi i16* [ [[L_1_LCSSA4]], [[EXIT_LOOPEXIT1]] ], [ [[L_1_LCSSA]], [[EXIT_LOOPEXIT]] ]
-; CHECK-NEXT: [[L_3:%.*]] = load i16, i16* [[L_15]], align 2
+; CHECK-NEXT: [[L_17:%.*]] = phi i16* [ [[L_1_LCSSA6]], [[EXIT_LOOPEXIT1]] ], [ [[L_1_LCSSA]], [[EXIT_LOOPEXIT]] ]
+; CHECK-NEXT: [[L_3:%.*]] = load i16, i16* [[L_17]], align 2
; CHECK-NEXT: ret i16 [[L_3]]
;
entry:
; CHECK: define void @f1
; CHECK: for.end.loopexit:
; CHECK-NEXT: %t2.lver.ph = phi i16 [ %t2.lver.orig, %for.body.lver.orig ]
-; CHECK: for.end.loopexit2:
-; CHECK-NEXT: %t2.lver.ph3 = phi i16 [ %t2, %for.body ]
+; CHECK: for.end.loopexit3:
+; CHECK-NEXT: %t2.lver.ph4 = phi i16 [ %t2, %for.body ]
; CHECK: for.end:
-; CHECK-NEXT: %t2.lver = phi i16 [ %t2.lver.ph, %for.end.loopexit ], [ %t2.lver.ph3, %for.end.loopexit2 ]
+; CHECK-NEXT: %t2.lver = phi i16 [ %t2.lver.ph, %for.end.loopexit ], [ %t2.lver.ph4, %for.end.loopexit3 ]
; CHECK-NEXT: %tobool = icmp eq i16 %t2.lver, 0
; CHECK: if.then:
; CHECK-NEXT: store i16 %t2.lver
; CHECK-NEXT: %add8 = add nsw i32 %[[induction]], %add
; CHECK-NEXT: %inc = add nuw i32 %j.113, 1
; CHECK-NEXT: %cmp2 = icmp ult i32 %inc, %itr
-; CHECK-NEXT: br i1 %cmp2, label %for.body3, label %for.inc11.loopexit.loopexit7, !llvm.loop !5
+; CHECK-NEXT: br i1 %cmp2, label %for.body3, label %for.inc11.loopexit.loopexit9, !llvm.loop !5
define i32 @foo(i32* nocapture %var1, i32* nocapture readnone %var2, i32* nocapture %var3, i32 %itr) #0 {
entry:
%cmp14 = icmp eq i32 %itr, 0
; CHECK: Loop: Loop at depth 2 containing: %for.body3.us<header><latch><exiting>
; CHECK-NEXT: Loop Versioning found to be beneficial
;
-; CHECK: for.cond1.for.inc17_crit_edge.us.loopexit7: ; preds = %for.body3.us
+; CHECK: for.cond1.for.inc17_crit_edge.us.loopexit9: ; preds = %for.body3.us
; CHECK-NEXT: %add14.us.lcssa = phi float [ %add14.us, %for.body3.us ]
; CHECK-NEXT: store float %add14.us.lcssa, float* %arrayidx.us, align 4, !alias.scope !0, !noalias !0
; CHECK-NEXT: br label %for.cond1.for.inc17_crit_edge.us