VPValue *Step = vputils::getOrCreateVPValueForSCEVExpr(Plan, II->getStep(),
*PSE.getSE());
assert(isa<SCEVConstant>(II->getStep()));
- return new VPWidenPointerInductionRecipe(Phi, Operands[0], Step, *II);
+ return new VPWidenPointerInductionRecipe(
+ Phi, Operands[0], Step, *II,
+ LoopVectorizationPlanner::getDecisionAndClampRange(
+ [&](ElementCount VF) {
+ return !VF.isScalable() && CM.isScalarAfterVectorization(Phi, VF);
+ },
+ Range));
}
return nullptr;
}
auto *IVR = getParent()->getPlan()->getCanonicalIV();
PHINode *CanonicalIV = cast<PHINode>(State.get(IVR, 0));
- if (onlyScalarsGenerated(State.VF)) {
+ if (onlyScalarsGenerated()) {
// This is the normalized GEP that starts counting at zero.
Value *PtrInd = State.Builder.CreateSExtOrTrunc(
CanonicalIV, IndDesc.getStep()->getType());
auto *WidenPhi = cast<VPWidenPointerInductionRecipe>(&R);
// TODO: Split off the case that all users of a pointer phi are scalar
// from the VPWidenPointerInductionRecipe.
- if (WidenPhi->onlyScalarsGenerated(State->VF))
+ if (WidenPhi->onlyScalarsGenerated())
continue;
auto *GEP = cast<GetElementPtrInst>(State->get(WidenPhi, 0));
class VPWidenPointerInductionRecipe : public VPHeaderPHIRecipe {
const InductionDescriptor &IndDesc;
+ bool IsScalarAfterVectorization;
+
public:
/// Create a new VPWidenPointerInductionRecipe for \p Phi with start value \p
/// Start.
VPWidenPointerInductionRecipe(PHINode *Phi, VPValue *Start, VPValue *Step,
- const InductionDescriptor &IndDesc)
+ const InductionDescriptor &IndDesc,
+ bool IsScalarAfterVectorization)
: VPHeaderPHIRecipe(VPVWidenPointerInductionSC, VPWidenPointerInductionSC,
Phi),
- IndDesc(IndDesc) {
+ IndDesc(IndDesc),
+ IsScalarAfterVectorization(IsScalarAfterVectorization) {
addOperand(Start);
addOperand(Step);
}
void execute(VPTransformState &State) override;
/// Returns true if only scalar values will be generated.
- bool onlyScalarsGenerated(ElementCount VF);
+ bool onlyScalarsGenerated();
#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
/// Print the recipe.
}
#endif
-bool VPWidenPointerInductionRecipe::onlyScalarsGenerated(ElementCount VF) {
- bool IsUniform = vputils::onlyFirstLaneUsed(this);
- return all_of(users(),
- [&](const VPUser *U) { return U->usesScalars(this); }) &&
- (IsUniform || !VF.isScalable());
+bool VPWidenPointerInductionRecipe::onlyScalarsGenerated() {
+ return IsScalarAfterVectorization;
}
#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
; CHECK: vector.ph:
; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]
; CHECK: vector.body:
-; CHECK-NEXT: [[POINTER_PHI:%.*]] = phi ptr [ [[PTR_START_1:%.*]], [[VECTOR_PH]] ], [ [[PTR_IND:%.*]], [[VECTOR_BODY]] ]
; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
-; CHECK-NEXT: [[TMP0:%.*]] = getelementptr i8, ptr [[POINTER_PHI]], <2 x i64> <i64 0, i64 1>
-; CHECK-NEXT: [[TMP1:%.*]] = getelementptr i8, ptr [[POINTER_PHI]], <2 x i64> <i64 2, i64 3>
-; CHECK-NEXT: [[TMP2:%.*]] = icmp ne <2 x ptr> [[TMP0]], zeroinitializer
-; CHECK-NEXT: [[TMP3:%.*]] = icmp ne <2 x ptr> [[TMP1]], zeroinitializer
-; CHECK-NEXT: [[TMP4:%.*]] = extractelement <2 x i1> [[TMP2]], i32 0
-; CHECK-NEXT: tail call void @llvm.assume(i1 [[TMP4]])
-; CHECK-NEXT: [[TMP5:%.*]] = extractelement <2 x i1> [[TMP2]], i32 1
-; CHECK-NEXT: tail call void @llvm.assume(i1 [[TMP5]])
-; CHECK-NEXT: [[TMP6:%.*]] = extractelement <2 x i1> [[TMP3]], i32 0
-; CHECK-NEXT: tail call void @llvm.assume(i1 [[TMP6]])
-; CHECK-NEXT: [[TMP7:%.*]] = extractelement <2 x i1> [[TMP3]], i32 1
-; CHECK-NEXT: tail call void @llvm.assume(i1 [[TMP7]])
-; CHECK-NEXT: [[TMP8:%.*]] = extractelement <2 x ptr> [[TMP0]], i32 0
-; CHECK-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[TMP8]], i32 0
-; CHECK-NEXT: store <2 x i8> zeroinitializer, ptr [[TMP9]], align 1
-; CHECK-NEXT: [[TMP10:%.*]] = getelementptr i8, ptr [[TMP8]], i32 2
-; CHECK-NEXT: store <2 x i8> zeroinitializer, ptr [[TMP10]], align 1
+; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 0
+; CHECK-NEXT: [[NEXT_GEP:%.*]] = getelementptr i8, ptr [[PTR_START_1:%.*]], i64 [[TMP0]]
+; CHECK-NEXT: [[TMP1:%.*]] = add i64 [[INDEX]], 1
+; CHECK-NEXT: [[NEXT_GEP1:%.*]] = getelementptr i8, ptr [[PTR_START_1]], i64 [[TMP1]]
+; CHECK-NEXT: [[TMP2:%.*]] = insertelement <2 x ptr> poison, ptr [[NEXT_GEP]], i32 0
+; CHECK-NEXT: [[TMP3:%.*]] = insertelement <2 x ptr> [[TMP2]], ptr [[NEXT_GEP1]], i32 1
+; CHECK-NEXT: [[TMP4:%.*]] = add i64 [[INDEX]], 2
+; CHECK-NEXT: [[NEXT_GEP2:%.*]] = getelementptr i8, ptr [[PTR_START_1]], i64 [[TMP4]]
+; CHECK-NEXT: [[TMP5:%.*]] = add i64 [[INDEX]], 3
+; CHECK-NEXT: [[NEXT_GEP3:%.*]] = getelementptr i8, ptr [[PTR_START_1]], i64 [[TMP5]]
+; CHECK-NEXT: [[TMP6:%.*]] = insertelement <2 x ptr> poison, ptr [[NEXT_GEP2]], i32 0
+; CHECK-NEXT: [[TMP7:%.*]] = insertelement <2 x ptr> [[TMP6]], ptr [[NEXT_GEP3]], i32 1
+; CHECK-NEXT: [[TMP8:%.*]] = icmp ne <2 x ptr> [[TMP3]], zeroinitializer
+; CHECK-NEXT: [[TMP9:%.*]] = icmp ne <2 x ptr> [[TMP7]], zeroinitializer
+; CHECK-NEXT: [[TMP10:%.*]] = extractelement <2 x i1> [[TMP8]], i32 0
+; CHECK-NEXT: tail call void @llvm.assume(i1 [[TMP10]])
+; CHECK-NEXT: [[TMP11:%.*]] = extractelement <2 x i1> [[TMP8]], i32 1
+; CHECK-NEXT: tail call void @llvm.assume(i1 [[TMP11]])
+; CHECK-NEXT: [[TMP12:%.*]] = extractelement <2 x i1> [[TMP9]], i32 0
+; CHECK-NEXT: tail call void @llvm.assume(i1 [[TMP12]])
+; CHECK-NEXT: [[TMP13:%.*]] = extractelement <2 x i1> [[TMP9]], i32 1
+; CHECK-NEXT: tail call void @llvm.assume(i1 [[TMP13]])
+; CHECK-NEXT: [[TMP14:%.*]] = getelementptr i8, ptr [[NEXT_GEP]], i32 0
+; CHECK-NEXT: store <2 x i8> zeroinitializer, ptr [[TMP14]], align 1
+; CHECK-NEXT: [[TMP15:%.*]] = getelementptr i8, ptr [[NEXT_GEP]], i32 2
+; CHECK-NEXT: store <2 x i8> zeroinitializer, ptr [[TMP15]], align 1
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
-; CHECK-NEXT: [[PTR_IND]] = getelementptr i8, ptr [[POINTER_PHI]], i64 4
-; CHECK-NEXT: [[TMP11:%.*]] = icmp eq i64 [[INDEX_NEXT]], 10000
-; CHECK-NEXT: br i1 [[TMP11]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
+; CHECK-NEXT: [[TMP16:%.*]] = icmp eq i64 [[INDEX_NEXT]], 10000
+; CHECK-NEXT: br i1 [[TMP16]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
; CHECK: middle.block:
; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 10001, 10000
; CHECK-NEXT: br i1 [[CMP_N]], label [[EXIT:%.*]], label [[VEC_EPILOG_ITER_CHECK:%.*]]
; CHECK: vec.epilog.iter.check:
-; CHECK-NEXT: [[IND_END1:%.*]] = getelementptr i8, ptr [[PTR_START_1]], i64 10000
+; CHECK-NEXT: [[IND_END4:%.*]] = getelementptr i8, ptr [[PTR_START_1]], i64 10000
; CHECK-NEXT: br i1 true, label [[VEC_EPILOG_SCALAR_PH]], label [[VEC_EPILOG_PH]]
; CHECK: vec.epilog.ph:
; CHECK-NEXT: [[VEC_EPILOG_RESUME_VAL:%.*]] = phi i64 [ 10000, [[VEC_EPILOG_ITER_CHECK]] ], [ 0, [[VECTOR_MAIN_LOOP_ITER_CHECK]] ]
; CHECK-NEXT: [[IND_END:%.*]] = getelementptr i8, ptr [[PTR_START_1]], i64 10000
; CHECK-NEXT: br label [[VEC_EPILOG_VECTOR_BODY:%.*]]
; CHECK: vec.epilog.vector.body:
-; CHECK-NEXT: [[POINTER_PHI5:%.*]] = phi ptr [ [[PTR_START_1]], [[VEC_EPILOG_PH]] ], [ [[PTR_IND6:%.*]], [[VEC_EPILOG_VECTOR_BODY]] ]
-; CHECK-NEXT: [[INDEX4:%.*]] = phi i64 [ [[VEC_EPILOG_RESUME_VAL]], [[VEC_EPILOG_PH]] ], [ [[INDEX_NEXT7:%.*]], [[VEC_EPILOG_VECTOR_BODY]] ]
-; CHECK-NEXT: [[TMP12:%.*]] = getelementptr i8, ptr [[POINTER_PHI5]], <2 x i64> <i64 0, i64 1>
-; CHECK-NEXT: [[TMP13:%.*]] = icmp ne <2 x ptr> [[TMP12]], zeroinitializer
-; CHECK-NEXT: [[TMP14:%.*]] = extractelement <2 x i1> [[TMP13]], i32 0
-; CHECK-NEXT: tail call void @llvm.assume(i1 [[TMP14]])
-; CHECK-NEXT: [[TMP15:%.*]] = extractelement <2 x i1> [[TMP13]], i32 1
-; CHECK-NEXT: tail call void @llvm.assume(i1 [[TMP15]])
-; CHECK-NEXT: [[TMP16:%.*]] = extractelement <2 x ptr> [[TMP12]], i32 0
-; CHECK-NEXT: [[TMP17:%.*]] = getelementptr i8, ptr [[TMP16]], i32 0
-; CHECK-NEXT: store <2 x i8> zeroinitializer, ptr [[TMP17]], align 1
-; CHECK-NEXT: [[INDEX_NEXT7]] = add nuw i64 [[INDEX4]], 2
-; CHECK-NEXT: [[PTR_IND6]] = getelementptr i8, ptr [[POINTER_PHI5]], i64 2
-; CHECK-NEXT: [[TMP18:%.*]] = icmp eq i64 [[INDEX_NEXT7]], 10000
-; CHECK-NEXT: br i1 [[TMP18]], label [[VEC_EPILOG_MIDDLE_BLOCK:%.*]], label [[VEC_EPILOG_VECTOR_BODY]], !llvm.loop [[LOOP2:![0-9]+]]
+; CHECK-NEXT: [[INDEX7:%.*]] = phi i64 [ [[VEC_EPILOG_RESUME_VAL]], [[VEC_EPILOG_PH]] ], [ [[INDEX_NEXT10:%.*]], [[VEC_EPILOG_VECTOR_BODY]] ]
+; CHECK-NEXT: [[TMP17:%.*]] = add i64 [[INDEX7]], 0
+; CHECK-NEXT: [[NEXT_GEP8:%.*]] = getelementptr i8, ptr [[PTR_START_1]], i64 [[TMP17]]
+; CHECK-NEXT: [[TMP18:%.*]] = add i64 [[INDEX7]], 1
+; CHECK-NEXT: [[NEXT_GEP9:%.*]] = getelementptr i8, ptr [[PTR_START_1]], i64 [[TMP18]]
+; CHECK-NEXT: [[TMP19:%.*]] = insertelement <2 x ptr> poison, ptr [[NEXT_GEP8]], i32 0
+; CHECK-NEXT: [[TMP20:%.*]] = insertelement <2 x ptr> [[TMP19]], ptr [[NEXT_GEP9]], i32 1
+; CHECK-NEXT: [[TMP21:%.*]] = icmp ne <2 x ptr> [[TMP20]], zeroinitializer
+; CHECK-NEXT: [[TMP22:%.*]] = extractelement <2 x i1> [[TMP21]], i32 0
+; CHECK-NEXT: tail call void @llvm.assume(i1 [[TMP22]])
+; CHECK-NEXT: [[TMP23:%.*]] = extractelement <2 x i1> [[TMP21]], i32 1
+; CHECK-NEXT: tail call void @llvm.assume(i1 [[TMP23]])
+; CHECK-NEXT: [[TMP24:%.*]] = getelementptr i8, ptr [[NEXT_GEP8]], i32 0
+; CHECK-NEXT: store <2 x i8> zeroinitializer, ptr [[TMP24]], align 1
+; CHECK-NEXT: [[INDEX_NEXT10]] = add nuw i64 [[INDEX7]], 2
+; CHECK-NEXT: [[TMP25:%.*]] = icmp eq i64 [[INDEX_NEXT10]], 10000
+; CHECK-NEXT: br i1 [[TMP25]], label [[VEC_EPILOG_MIDDLE_BLOCK:%.*]], label [[VEC_EPILOG_VECTOR_BODY]], !llvm.loop [[LOOP2:![0-9]+]]
; CHECK: vec.epilog.middle.block:
-; CHECK-NEXT: [[CMP_N3:%.*]] = icmp eq i64 10001, 10000
-; CHECK-NEXT: br i1 [[CMP_N3]], label [[EXIT]], label [[VEC_EPILOG_SCALAR_PH]]
+; CHECK-NEXT: [[CMP_N6:%.*]] = icmp eq i64 10001, 10000
+; CHECK-NEXT: br i1 [[CMP_N6]], label [[EXIT]], label [[VEC_EPILOG_SCALAR_PH]]
; CHECK: vec.epilog.scalar.ph:
; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 10000, [[VEC_EPILOG_MIDDLE_BLOCK]] ], [ 10000, [[VEC_EPILOG_ITER_CHECK]] ], [ 0, [[ITER_CHECK:%.*]] ]
-; CHECK-NEXT: [[BC_RESUME_VAL2:%.*]] = phi ptr [ [[IND_END]], [[VEC_EPILOG_MIDDLE_BLOCK]] ], [ [[IND_END1]], [[VEC_EPILOG_ITER_CHECK]] ], [ [[PTR_START_1]], [[ITER_CHECK]] ]
+; CHECK-NEXT: [[BC_RESUME_VAL5:%.*]] = phi ptr [ [[IND_END]], [[VEC_EPILOG_MIDDLE_BLOCK]] ], [ [[IND_END4]], [[VEC_EPILOG_ITER_CHECK]] ], [ [[PTR_START_1]], [[ITER_CHECK]] ]
; CHECK-NEXT: br label [[LOOP:%.*]]
; CHECK: loop:
; CHECK-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[VEC_EPILOG_SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[LOOP]] ]
-; CHECK-NEXT: [[PTR_IV:%.*]] = phi ptr [ [[BC_RESUME_VAL2]], [[VEC_EPILOG_SCALAR_PH]] ], [ [[PTR_IV_NEXT:%.*]], [[LOOP]] ]
+; CHECK-NEXT: [[PTR_IV:%.*]] = phi ptr [ [[BC_RESUME_VAL5]], [[VEC_EPILOG_SCALAR_PH]] ], [ [[PTR_IV_NEXT:%.*]], [[LOOP]] ]
; CHECK-NEXT: [[CMP_I_I_I_I:%.*]] = icmp ne ptr [[PTR_IV]], null
; CHECK-NEXT: tail call void @llvm.assume(i1 [[CMP_I_I_I_I]])
; CHECK-NEXT: store i8 0, ptr [[PTR_IV]], align 1
; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]
; CHECK: vector.body:
; CHECK-NEXT: [[POINTER_PHI:%.*]] = phi ptr [ [[START_1]], [[VECTOR_PH]] ], [ [[PTR_IND:%.*]], [[VECTOR_BODY]] ]
+; CHECK-NEXT: [[POINTER_PHI8:%.*]] = phi ptr [ [[START_2]], [[VECTOR_PH]] ], [ [[PTR_IND9:%.*]], [[VECTOR_BODY]] ]
; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
; CHECK-NEXT: [[TMP10:%.*]] = call i64 @llvm.vscale.i64()
; CHECK-NEXT: [[TMP11:%.*]] = mul i64 [[TMP10]], 2
; CHECK-NEXT: [[TMP20:%.*]] = add <vscale x 2 x i64> [[DOTSPLAT6]], [[TMP19]]
; CHECK-NEXT: [[VECTOR_GEP7:%.*]] = mul <vscale x 2 x i64> [[TMP20]], shufflevector (<vscale x 2 x i64> insertelement (<vscale x 2 x i64> poison, i64 8, i32 0), <vscale x 2 x i64> poison, <vscale x 2 x i32> zeroinitializer)
; CHECK-NEXT: [[TMP21:%.*]] = getelementptr i8, ptr [[POINTER_PHI]], <vscale x 2 x i64> [[VECTOR_GEP7]]
-; CHECK-NEXT: [[TMP22:%.*]] = add i64 [[INDEX]], 0
-; CHECK-NEXT: [[TMP23:%.*]] = mul i64 [[TMP22]], 8
-; CHECK-NEXT: [[NEXT_GEP:%.*]] = getelementptr i8, ptr [[START_2]], i64 [[TMP23]]
-; CHECK-NEXT: [[TMP24:%.*]] = call i64 @llvm.vscale.i64()
-; CHECK-NEXT: [[TMP25:%.*]] = mul i64 [[TMP24]], 2
-; CHECK-NEXT: [[TMP26:%.*]] = add i64 [[TMP25]], 0
-; CHECK-NEXT: [[TMP27:%.*]] = add i64 [[INDEX]], [[TMP26]]
-; CHECK-NEXT: [[TMP28:%.*]] = mul i64 [[TMP27]], 8
-; CHECK-NEXT: [[NEXT_GEP8:%.*]] = getelementptr i8, ptr [[START_2]], i64 [[TMP28]]
-; CHECK-NEXT: [[TMP29:%.*]] = getelementptr i64, ptr [[NEXT_GEP]], i32 0
-; CHECK-NEXT: store <vscale x 2 x i64> zeroinitializer, ptr [[TMP29]], align 4
-; CHECK-NEXT: [[TMP30:%.*]] = call i32 @llvm.vscale.i32()
-; CHECK-NEXT: [[TMP31:%.*]] = mul i32 [[TMP30]], 2
-; CHECK-NEXT: [[TMP32:%.*]] = getelementptr i64, ptr [[NEXT_GEP]], i32 [[TMP31]]
-; CHECK-NEXT: store <vscale x 2 x i64> zeroinitializer, ptr [[TMP32]], align 4
-; CHECK-NEXT: [[TMP33:%.*]] = call i64 @llvm.vscale.i64()
-; CHECK-NEXT: [[TMP34:%.*]] = mul i64 [[TMP33]], 4
-; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP34]]
+; CHECK-NEXT: [[TMP22:%.*]] = call i64 @llvm.vscale.i64()
+; CHECK-NEXT: [[TMP23:%.*]] = mul i64 [[TMP22]], 2
+; CHECK-NEXT: [[TMP24:%.*]] = mul i64 [[TMP23]], 2
+; CHECK-NEXT: [[TMP25:%.*]] = mul i64 8, [[TMP24]]
+; CHECK-NEXT: [[TMP26:%.*]] = mul i64 [[TMP23]], 0
+; CHECK-NEXT: [[DOTSPLATINSERT10:%.*]] = insertelement <vscale x 2 x i64> poison, i64 [[TMP26]], i32 0
+; CHECK-NEXT: [[DOTSPLAT11:%.*]] = shufflevector <vscale x 2 x i64> [[DOTSPLATINSERT10]], <vscale x 2 x i64> poison, <vscale x 2 x i32> zeroinitializer
+; CHECK-NEXT: [[TMP27:%.*]] = call <vscale x 2 x i64> @llvm.experimental.stepvector.nxv2i64()
+; CHECK-NEXT: [[TMP28:%.*]] = add <vscale x 2 x i64> [[DOTSPLAT11]], [[TMP27]]
+; CHECK-NEXT: [[VECTOR_GEP12:%.*]] = mul <vscale x 2 x i64> [[TMP28]], shufflevector (<vscale x 2 x i64> insertelement (<vscale x 2 x i64> poison, i64 8, i32 0), <vscale x 2 x i64> poison, <vscale x 2 x i32> zeroinitializer)
+; CHECK-NEXT: [[TMP29:%.*]] = getelementptr i8, ptr [[POINTER_PHI8]], <vscale x 2 x i64> [[VECTOR_GEP12]]
+; CHECK-NEXT: [[TMP30:%.*]] = mul i64 [[TMP23]], 1
+; CHECK-NEXT: [[DOTSPLATINSERT13:%.*]] = insertelement <vscale x 2 x i64> poison, i64 [[TMP30]], i32 0
+; CHECK-NEXT: [[DOTSPLAT14:%.*]] = shufflevector <vscale x 2 x i64> [[DOTSPLATINSERT13]], <vscale x 2 x i64> poison, <vscale x 2 x i32> zeroinitializer
+; CHECK-NEXT: [[TMP31:%.*]] = call <vscale x 2 x i64> @llvm.experimental.stepvector.nxv2i64()
+; CHECK-NEXT: [[TMP32:%.*]] = add <vscale x 2 x i64> [[DOTSPLAT14]], [[TMP31]]
+; CHECK-NEXT: [[VECTOR_GEP15:%.*]] = mul <vscale x 2 x i64> [[TMP32]], shufflevector (<vscale x 2 x i64> insertelement (<vscale x 2 x i64> poison, i64 8, i32 0), <vscale x 2 x i64> poison, <vscale x 2 x i32> zeroinitializer)
+; CHECK-NEXT: [[TMP33:%.*]] = getelementptr i8, ptr [[POINTER_PHI8]], <vscale x 2 x i64> [[VECTOR_GEP15]]
+; CHECK-NEXT: [[TMP34:%.*]] = extractelement <vscale x 2 x ptr> [[TMP29]], i32 0
+; CHECK-NEXT: [[TMP35:%.*]] = getelementptr i64, ptr [[TMP34]], i32 0
+; CHECK-NEXT: store <vscale x 2 x i64> zeroinitializer, ptr [[TMP35]], align 4
+; CHECK-NEXT: [[TMP36:%.*]] = call i32 @llvm.vscale.i32()
+; CHECK-NEXT: [[TMP37:%.*]] = mul i32 [[TMP36]], 2
+; CHECK-NEXT: [[TMP38:%.*]] = getelementptr i64, ptr [[TMP34]], i32 [[TMP37]]
+; CHECK-NEXT: store <vscale x 2 x i64> zeroinitializer, ptr [[TMP38]], align 4
+; CHECK-NEXT: [[TMP39:%.*]] = call i64 @llvm.vscale.i64()
+; CHECK-NEXT: [[TMP40:%.*]] = mul i64 [[TMP39]], 4
+; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP40]]
; CHECK-NEXT: [[PTR_IND]] = getelementptr i8, ptr [[POINTER_PHI]], i64 [[TMP13]]
-; CHECK-NEXT: [[TMP35:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
-; CHECK-NEXT: br i1 [[TMP35]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
+; CHECK-NEXT: [[PTR_IND9]] = getelementptr i8, ptr [[POINTER_PHI8]], i64 [[TMP25]]
+; CHECK-NEXT: [[TMP41:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
+; CHECK-NEXT: br i1 [[TMP41]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
; CHECK: middle.block:
; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[TMP3]], [[N_VEC]]
; CHECK-NEXT: [[CAST_CMO:%.*]] = sub i64 [[N_VEC]], 1
-; CHECK-NEXT: [[TMP36:%.*]] = mul i64 [[CAST_CMO]], 8
-; CHECK-NEXT: [[IND_ESCAPE:%.*]] = getelementptr i8, ptr [[START_1]], i64 [[TMP36]]
+; CHECK-NEXT: [[TMP42:%.*]] = mul i64 [[CAST_CMO]], 8
+; CHECK-NEXT: [[IND_ESCAPE:%.*]] = getelementptr i8, ptr [[START_1]], i64 [[TMP42]]
; CHECK-NEXT: br i1 [[CMP_N]], label [[EXIT:%.*]], label [[SCALAR_PH]]
; CHECK: scalar.ph:
; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi ptr [ [[IND_END]], [[MIDDLE_BLOCK]] ], [ [[START_1]], [[ENTRY:%.*]] ]
; CHECK-NEXT: [[IND_END2:%.*]] = getelementptr i8, i8* [[START_2:%.*]], i64 [[N_VEC]]
; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]
; CHECK: vector.body:
-; CHECK-NEXT: [[POINTER_PHI:%.*]] = phi i8* [ [[START_2]], [[VECTOR_PH]] ], [ [[PTR_IND:%.*]], [[VECTOR_BODY]] ]
+; CHECK-NEXT: [[POINTER_PHI:%.*]] = phi i8** [ [[START_1]], [[VECTOR_PH]] ], [ [[PTR_IND:%.*]], [[VECTOR_BODY]] ]
+; CHECK-NEXT: [[POINTER_PHI4:%.*]] = phi i8* [ [[START_2]], [[VECTOR_PH]] ], [ [[PTR_IND5:%.*]], [[VECTOR_BODY]] ]
; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
-; CHECK-NEXT: [[TMP4:%.*]] = add i64 [[INDEX]], 0
-; CHECK-NEXT: [[NEXT_GEP:%.*]] = getelementptr i8*, i8** [[START_1]], i64 [[TMP4]]
-; CHECK-NEXT: [[TMP5:%.*]] = call i64 @llvm.vscale.i64()
-; CHECK-NEXT: [[TMP6:%.*]] = mul i64 [[TMP5]], 2
-; CHECK-NEXT: [[TMP7:%.*]] = mul i64 [[TMP6]], 1
-; CHECK-NEXT: [[TMP8:%.*]] = mul i64 1, [[TMP7]]
-; CHECK-NEXT: [[TMP9:%.*]] = mul i64 [[TMP6]], 0
-; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <vscale x 2 x i64> poison, i64 [[TMP9]], i32 0
+; CHECK-NEXT: [[TMP4:%.*]] = call i64 @llvm.vscale.i64()
+; CHECK-NEXT: [[TMP5:%.*]] = mul i64 [[TMP4]], 2
+; CHECK-NEXT: [[TMP6:%.*]] = mul i64 [[TMP5]], 1
+; CHECK-NEXT: [[TMP7:%.*]] = mul i64 1, [[TMP6]]
+; CHECK-NEXT: [[TMP8:%.*]] = mul i64 [[TMP5]], 0
+; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <vscale x 2 x i64> poison, i64 [[TMP8]], i32 0
; CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <vscale x 2 x i64> [[DOTSPLATINSERT]], <vscale x 2 x i64> poison, <vscale x 2 x i32> zeroinitializer
-; CHECK-NEXT: [[TMP10:%.*]] = call <vscale x 2 x i64> @llvm.experimental.stepvector.nxv2i64()
-; CHECK-NEXT: [[TMP11:%.*]] = add <vscale x 2 x i64> [[DOTSPLAT]], [[TMP10]]
-; CHECK-NEXT: [[VECTOR_GEP:%.*]] = mul <vscale x 2 x i64> [[TMP11]], shufflevector (<vscale x 2 x i64> insertelement (<vscale x 2 x i64> poison, i64 1, i32 0), <vscale x 2 x i64> poison, <vscale x 2 x i32> zeroinitializer)
-; CHECK-NEXT: [[TMP12:%.*]] = getelementptr i8, i8* [[POINTER_PHI]], <vscale x 2 x i64> [[VECTOR_GEP]]
-; CHECK-NEXT: [[TMP13:%.*]] = getelementptr inbounds i8, <vscale x 2 x i8*> [[TMP12]], i64 1
-; CHECK-NEXT: [[TMP14:%.*]] = getelementptr i8*, i8** [[NEXT_GEP]], i32 0
-; CHECK-NEXT: [[TMP15:%.*]] = bitcast i8** [[TMP14]] to <vscale x 2 x i8*>*
-; CHECK-NEXT: store <vscale x 2 x i8*> [[TMP13]], <vscale x 2 x i8*>* [[TMP15]], align 8
-; CHECK-NEXT: [[TMP16:%.*]] = extractelement <vscale x 2 x i8*> [[TMP12]], i32 0
-; CHECK-NEXT: [[TMP17:%.*]] = getelementptr i8, i8* [[TMP16]], i32 0
-; CHECK-NEXT: [[TMP18:%.*]] = bitcast i8* [[TMP17]] to <vscale x 2 x i8>*
-; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <vscale x 2 x i8>, <vscale x 2 x i8>* [[TMP18]], align 1
-; CHECK-NEXT: [[TMP19:%.*]] = add <vscale x 2 x i8> [[WIDE_LOAD]], shufflevector (<vscale x 2 x i8> insertelement (<vscale x 2 x i8> poison, i8 1, i32 0), <vscale x 2 x i8> poison, <vscale x 2 x i32> zeroinitializer)
-; CHECK-NEXT: [[TMP20:%.*]] = bitcast i8* [[TMP17]] to <vscale x 2 x i8>*
-; CHECK-NEXT: store <vscale x 2 x i8> [[TMP19]], <vscale x 2 x i8>* [[TMP20]], align 1
-; CHECK-NEXT: [[TMP21:%.*]] = call i64 @llvm.vscale.i64()
-; CHECK-NEXT: [[TMP22:%.*]] = mul i64 [[TMP21]], 2
-; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP22]]
-; CHECK-NEXT: [[PTR_IND]] = getelementptr i8, i8* [[POINTER_PHI]], i64 [[TMP8]]
-; CHECK-NEXT: [[TMP23:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
-; CHECK-NEXT: br i1 [[TMP23]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
+; CHECK-NEXT: [[TMP9:%.*]] = call <vscale x 2 x i64> @llvm.experimental.stepvector.nxv2i64()
+; CHECK-NEXT: [[TMP10:%.*]] = add <vscale x 2 x i64> [[DOTSPLAT]], [[TMP9]]
+; CHECK-NEXT: [[VECTOR_GEP:%.*]] = mul <vscale x 2 x i64> [[TMP10]], shufflevector (<vscale x 2 x i64> insertelement (<vscale x 2 x i64> poison, i64 1, i32 0), <vscale x 2 x i64> poison, <vscale x 2 x i32> zeroinitializer)
+; CHECK-NEXT: [[TMP11:%.*]] = getelementptr i8*, i8** [[POINTER_PHI]], <vscale x 2 x i64> [[VECTOR_GEP]]
+; CHECK-NEXT: [[TMP12:%.*]] = call i64 @llvm.vscale.i64()
+; CHECK-NEXT: [[TMP13:%.*]] = mul i64 [[TMP12]], 2
+; CHECK-NEXT: [[TMP14:%.*]] = mul i64 [[TMP13]], 1
+; CHECK-NEXT: [[TMP15:%.*]] = mul i64 1, [[TMP14]]
+; CHECK-NEXT: [[TMP16:%.*]] = mul i64 [[TMP13]], 0
+; CHECK-NEXT: [[DOTSPLATINSERT6:%.*]] = insertelement <vscale x 2 x i64> poison, i64 [[TMP16]], i32 0
+; CHECK-NEXT: [[DOTSPLAT7:%.*]] = shufflevector <vscale x 2 x i64> [[DOTSPLATINSERT6]], <vscale x 2 x i64> poison, <vscale x 2 x i32> zeroinitializer
+; CHECK-NEXT: [[TMP17:%.*]] = call <vscale x 2 x i64> @llvm.experimental.stepvector.nxv2i64()
+; CHECK-NEXT: [[TMP18:%.*]] = add <vscale x 2 x i64> [[DOTSPLAT7]], [[TMP17]]
+; CHECK-NEXT: [[VECTOR_GEP8:%.*]] = mul <vscale x 2 x i64> [[TMP18]], shufflevector (<vscale x 2 x i64> insertelement (<vscale x 2 x i64> poison, i64 1, i32 0), <vscale x 2 x i64> poison, <vscale x 2 x i32> zeroinitializer)
+; CHECK-NEXT: [[TMP19:%.*]] = getelementptr i8, i8* [[POINTER_PHI4]], <vscale x 2 x i64> [[VECTOR_GEP8]]
+; CHECK-NEXT: [[TMP20:%.*]] = getelementptr inbounds i8, <vscale x 2 x i8*> [[TMP19]], i64 1
+; CHECK-NEXT: [[TMP21:%.*]] = extractelement <vscale x 2 x i8**> [[TMP11]], i32 0
+; CHECK-NEXT: [[TMP22:%.*]] = getelementptr i8*, i8** [[TMP21]], i32 0
+; CHECK-NEXT: [[TMP23:%.*]] = bitcast i8** [[TMP22]] to <vscale x 2 x i8*>*
+; CHECK-NEXT: store <vscale x 2 x i8*> [[TMP20]], <vscale x 2 x i8*>* [[TMP23]], align 8
+; CHECK-NEXT: [[TMP24:%.*]] = extractelement <vscale x 2 x i8*> [[TMP19]], i32 0
+; CHECK-NEXT: [[TMP25:%.*]] = getelementptr i8, i8* [[TMP24]], i32 0
+; CHECK-NEXT: [[TMP26:%.*]] = bitcast i8* [[TMP25]] to <vscale x 2 x i8>*
+; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <vscale x 2 x i8>, <vscale x 2 x i8>* [[TMP26]], align 1
+; CHECK-NEXT: [[TMP27:%.*]] = add <vscale x 2 x i8> [[WIDE_LOAD]], shufflevector (<vscale x 2 x i8> insertelement (<vscale x 2 x i8> poison, i8 1, i32 0), <vscale x 2 x i8> poison, <vscale x 2 x i32> zeroinitializer)
+; CHECK-NEXT: [[TMP28:%.*]] = bitcast i8* [[TMP25]] to <vscale x 2 x i8>*
+; CHECK-NEXT: store <vscale x 2 x i8> [[TMP27]], <vscale x 2 x i8>* [[TMP28]], align 1
+; CHECK-NEXT: [[TMP29:%.*]] = call i64 @llvm.vscale.i64()
+; CHECK-NEXT: [[TMP30:%.*]] = mul i64 [[TMP29]], 2
+; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP30]]
+; CHECK-NEXT: [[PTR_IND]] = getelementptr i8*, i8** [[POINTER_PHI]], i64 [[TMP7]]
+; CHECK-NEXT: [[PTR_IND5]] = getelementptr i8, i8* [[POINTER_PHI4]], i64 [[TMP15]]
+; CHECK-NEXT: [[TMP31:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
+; CHECK-NEXT: br i1 [[TMP31]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
; CHECK: middle.block:
; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N]], [[N_VEC]]
; CHECK-NEXT: br i1 [[CMP_N]], label [[EXIT:%.*]], label [[SCALAR_PH]]
; CHECK-NEXT: [[IND_END:%.*]] = getelementptr i8, i8* [[START:%.*]], i64 [[N_VEC]]
; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]
; CHECK: vector.body:
+; CHECK-NEXT: [[POINTER_PHI:%.*]] = phi i8* [ [[START]], [[VECTOR_PH]] ], [ [[PTR_IND:%.*]], [[VECTOR_BODY]] ]
; CHECK-NEXT: [[INDEX2:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
-; CHECK-NEXT: [[TMP5:%.*]] = add i64 [[INDEX2]], 0
-; CHECK-NEXT: [[NEXT_GEP:%.*]] = getelementptr i8, i8* [[START]], i64 [[TMP5]]
-; CHECK-NEXT: [[TMP6:%.*]] = getelementptr i8, i8* [[NEXT_GEP]], i32 0
-; CHECK-NEXT: [[TMP7:%.*]] = bitcast i8* [[TMP6]] to <vscale x 2 x i8>*
-; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <vscale x 2 x i8>, <vscale x 2 x i8>* [[TMP7]], align 1
-; CHECK-NEXT: [[TMP8:%.*]] = add <vscale x 2 x i8> [[WIDE_LOAD]], shufflevector (<vscale x 2 x i8> insertelement (<vscale x 2 x i8> poison, i8 1, i32 0), <vscale x 2 x i8> poison, <vscale x 2 x i32> zeroinitializer)
-; CHECK-NEXT: [[TMP9:%.*]] = bitcast i8* [[TMP6]] to <vscale x 2 x i8>*
-; CHECK-NEXT: store <vscale x 2 x i8> [[TMP8]], <vscale x 2 x i8>* [[TMP9]], align 1
-; CHECK-NEXT: [[TMP10:%.*]] = call i64 @llvm.vscale.i64()
-; CHECK-NEXT: [[TMP11:%.*]] = mul i64 [[TMP10]], 2
-; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX2]], [[TMP11]]
-; CHECK-NEXT: [[TMP12:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
-; CHECK-NEXT: br i1 [[TMP12]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]]
+; CHECK-NEXT: [[TMP5:%.*]] = call i64 @llvm.vscale.i64()
+; CHECK-NEXT: [[TMP6:%.*]] = mul i64 [[TMP5]], 2
+; CHECK-NEXT: [[TMP7:%.*]] = mul i64 [[TMP6]], 1
+; CHECK-NEXT: [[TMP8:%.*]] = mul i64 1, [[TMP7]]
+; CHECK-NEXT: [[TMP9:%.*]] = mul i64 [[TMP6]], 0
+; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <vscale x 2 x i64> poison, i64 [[TMP9]], i32 0
+; CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <vscale x 2 x i64> [[DOTSPLATINSERT]], <vscale x 2 x i64> poison, <vscale x 2 x i32> zeroinitializer
+; CHECK-NEXT: [[TMP10:%.*]] = call <vscale x 2 x i64> @llvm.experimental.stepvector.nxv2i64()
+; CHECK-NEXT: [[TMP11:%.*]] = add <vscale x 2 x i64> [[DOTSPLAT]], [[TMP10]]
+; CHECK-NEXT: [[VECTOR_GEP:%.*]] = mul <vscale x 2 x i64> [[TMP11]], shufflevector (<vscale x 2 x i64> insertelement (<vscale x 2 x i64> poison, i64 1, i32 0), <vscale x 2 x i64> poison, <vscale x 2 x i32> zeroinitializer)
+; CHECK-NEXT: [[TMP12:%.*]] = getelementptr i8, i8* [[POINTER_PHI]], <vscale x 2 x i64> [[VECTOR_GEP]]
+; CHECK-NEXT: [[TMP13:%.*]] = extractelement <vscale x 2 x i8*> [[TMP12]], i32 0
+; CHECK-NEXT: [[TMP14:%.*]] = getelementptr i8, i8* [[TMP13]], i32 0
+; CHECK-NEXT: [[TMP15:%.*]] = bitcast i8* [[TMP14]] to <vscale x 2 x i8>*
+; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <vscale x 2 x i8>, <vscale x 2 x i8>* [[TMP15]], align 1
+; CHECK-NEXT: [[TMP16:%.*]] = add <vscale x 2 x i8> [[WIDE_LOAD]], shufflevector (<vscale x 2 x i8> insertelement (<vscale x 2 x i8> poison, i8 1, i32 0), <vscale x 2 x i8> poison, <vscale x 2 x i32> zeroinitializer)
+; CHECK-NEXT: [[TMP17:%.*]] = bitcast i8* [[TMP14]] to <vscale x 2 x i8>*
+; CHECK-NEXT: store <vscale x 2 x i8> [[TMP16]], <vscale x 2 x i8>* [[TMP17]], align 1
+; CHECK-NEXT: [[TMP18:%.*]] = call i64 @llvm.vscale.i64()
+; CHECK-NEXT: [[TMP19:%.*]] = mul i64 [[TMP18]], 2
+; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX2]], [[TMP19]]
+; CHECK-NEXT: [[PTR_IND]] = getelementptr i8, i8* [[POINTER_PHI]], i64 [[TMP8]]
+; CHECK-NEXT: [[TMP20:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
+; CHECK-NEXT: br i1 [[TMP20]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]]
; CHECK: middle.block:
; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[TMP0]], [[N_VEC]]
; CHECK-NEXT: br i1 [[CMP_N]], label [[END:%.*]], label [[SCALAR_PH]]
; CHECK-NEXT: [[PTR_PHI:%.*]] = phi i8* [ [[PTR_PHI_NEXT:%.*]], [[FOR_BODY]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ]
; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ [[INDEX_NXT:%.*]], [[FOR_BODY]] ], [ [[BC_RESUME_VAL1]], [[SCALAR_PH]] ]
; CHECK-NEXT: [[INDEX_NXT]] = add i64 [[INDEX]], 1
-; CHECK-NEXT: [[TMP13:%.*]] = load i8, i8* [[PTR_PHI]], align 1
-; CHECK-NEXT: [[ADD:%.*]] = add i8 [[TMP13]], 1
+; CHECK-NEXT: [[TMP21:%.*]] = load i8, i8* [[PTR_PHI]], align 1
+; CHECK-NEXT: [[ADD:%.*]] = add i8 [[TMP21]], 1
; CHECK-NEXT: store i8 [[ADD]], i8* [[PTR_PHI]], align 1
; CHECK-NEXT: [[PTR_PHI_NEXT]] = getelementptr inbounds i8, i8* [[PTR_PHI]], i64 1
; CHECK-NEXT: [[CMP_I_NOT:%.*]] = icmp eq i8* [[PTR_PHI_NEXT]], [[START]]
; CHECK-NEXT: [[IND_END2:%.*]] = getelementptr i32, i32* [[DST:%.*]], i64 [[N_VEC]]
; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]
; CHECK: vector.body:
+; CHECK-NEXT: [[POINTER_PHI:%.*]] = phi i32* [ [[SRC]], [[VECTOR_PH]] ], [ [[PTR_IND:%.*]], [[VECTOR_BODY]] ]
+; CHECK-NEXT: [[POINTER_PHI7:%.*]] = phi i32* [ [[DST]], [[VECTOR_PH]] ], [ [[PTR_IND8:%.*]], [[VECTOR_BODY]] ]
; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
-; CHECK-NEXT: [[NEXT_GEP:%.*]] = getelementptr i32, i32* [[SRC]], i64 [[INDEX]]
-; CHECK-NEXT: [[NEXT_GEP5:%.*]] = getelementptr i32, i32* [[DST]], i64 [[INDEX]]
-; CHECK-NEXT: [[TMP4:%.*]] = bitcast i32* [[NEXT_GEP]] to <vscale x 4 x i32>*
-; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <vscale x 4 x i32>, <vscale x 4 x i32>* [[TMP4]], align 4
-; CHECK-NEXT: [[TMP5:%.*]] = call i32 @llvm.vscale.i32()
-; CHECK-NEXT: [[TMP6:%.*]] = shl nuw nsw i32 [[TMP5]], 2
-; CHECK-NEXT: [[TMP7:%.*]] = zext i32 [[TMP6]] to i64
-; CHECK-NEXT: [[TMP8:%.*]] = getelementptr i32, i32* [[NEXT_GEP]], i64 [[TMP7]]
-; CHECK-NEXT: [[TMP9:%.*]] = bitcast i32* [[TMP8]] to <vscale x 4 x i32>*
-; CHECK-NEXT: [[WIDE_LOAD7:%.*]] = load <vscale x 4 x i32>, <vscale x 4 x i32>* [[TMP9]], align 4
-; CHECK-NEXT: [[TMP10:%.*]] = shl nsw <vscale x 4 x i32> [[WIDE_LOAD]], shufflevector (<vscale x 4 x i32> insertelement (<vscale x 4 x i32> poison, i32 1, i32 0), <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer)
-; CHECK-NEXT: [[TMP11:%.*]] = shl nsw <vscale x 4 x i32> [[WIDE_LOAD7]], shufflevector (<vscale x 4 x i32> insertelement (<vscale x 4 x i32> poison, i32 1, i32 0), <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer)
-; CHECK-NEXT: [[TMP12:%.*]] = bitcast i32* [[NEXT_GEP5]] to <vscale x 4 x i32>*
-; CHECK-NEXT: store <vscale x 4 x i32> [[TMP10]], <vscale x 4 x i32>* [[TMP12]], align 4
-; CHECK-NEXT: [[TMP13:%.*]] = call i32 @llvm.vscale.i32()
-; CHECK-NEXT: [[TMP14:%.*]] = shl nuw nsw i32 [[TMP13]], 2
-; CHECK-NEXT: [[TMP15:%.*]] = zext i32 [[TMP14]] to i64
-; CHECK-NEXT: [[TMP16:%.*]] = getelementptr i32, i32* [[NEXT_GEP5]], i64 [[TMP15]]
-; CHECK-NEXT: [[TMP17:%.*]] = bitcast i32* [[TMP16]] to <vscale x 4 x i32>*
-; CHECK-NEXT: store <vscale x 4 x i32> [[TMP11]], <vscale x 4 x i32>* [[TMP17]], align 4
-; CHECK-NEXT: [[TMP18:%.*]] = call i64 @llvm.vscale.i64()
-; CHECK-NEXT: [[TMP19:%.*]] = shl nuw nsw i64 [[TMP18]], 3
-; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP19]]
-; CHECK-NEXT: [[TMP20:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
-; CHECK-NEXT: br i1 [[TMP20]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP5:![0-9]+]]
+; CHECK-NEXT: [[TMP4:%.*]] = call i64 @llvm.vscale.i64()
+; CHECK-NEXT: [[TMP5:%.*]] = shl nuw nsw i64 [[TMP4]], 3
+; CHECK-NEXT: [[TMP6:%.*]] = call i64 @llvm.vscale.i64()
+; CHECK-NEXT: [[TMP7:%.*]] = shl nuw nsw i64 [[TMP6]], 3
+; CHECK-NEXT: [[TMP8:%.*]] = bitcast i32* [[POINTER_PHI]] to <vscale x 4 x i32>*
+; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <vscale x 4 x i32>, <vscale x 4 x i32>* [[TMP8]], align 4
+; CHECK-NEXT: [[TMP9:%.*]] = call i32 @llvm.vscale.i32()
+; CHECK-NEXT: [[TMP10:%.*]] = shl nuw nsw i32 [[TMP9]], 2
+; CHECK-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64
+; CHECK-NEXT: [[TMP12:%.*]] = getelementptr i32, i32* [[POINTER_PHI]], i64 [[TMP11]]
+; CHECK-NEXT: [[TMP13:%.*]] = bitcast i32* [[TMP12]] to <vscale x 4 x i32>*
+; CHECK-NEXT: [[WIDE_LOAD15:%.*]] = load <vscale x 4 x i32>, <vscale x 4 x i32>* [[TMP13]], align 4
+; CHECK-NEXT: [[TMP14:%.*]] = shl nsw <vscale x 4 x i32> [[WIDE_LOAD]], shufflevector (<vscale x 4 x i32> insertelement (<vscale x 4 x i32> poison, i32 1, i32 0), <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer)
+; CHECK-NEXT: [[TMP15:%.*]] = shl nsw <vscale x 4 x i32> [[WIDE_LOAD15]], shufflevector (<vscale x 4 x i32> insertelement (<vscale x 4 x i32> poison, i32 1, i32 0), <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer)
+; CHECK-NEXT: [[TMP16:%.*]] = bitcast i32* [[POINTER_PHI7]] to <vscale x 4 x i32>*
+; CHECK-NEXT: store <vscale x 4 x i32> [[TMP14]], <vscale x 4 x i32>* [[TMP16]], align 4
+; CHECK-NEXT: [[TMP17:%.*]] = call i32 @llvm.vscale.i32()
+; CHECK-NEXT: [[TMP18:%.*]] = shl nuw nsw i32 [[TMP17]], 2
+; CHECK-NEXT: [[TMP19:%.*]] = zext i32 [[TMP18]] to i64
+; CHECK-NEXT: [[TMP20:%.*]] = getelementptr i32, i32* [[POINTER_PHI7]], i64 [[TMP19]]
+; CHECK-NEXT: [[TMP21:%.*]] = bitcast i32* [[TMP20]] to <vscale x 4 x i32>*
+; CHECK-NEXT: store <vscale x 4 x i32> [[TMP15]], <vscale x 4 x i32>* [[TMP21]], align 4
+; CHECK-NEXT: [[TMP22:%.*]] = call i64 @llvm.vscale.i64()
+; CHECK-NEXT: [[TMP23:%.*]] = shl nuw nsw i64 [[TMP22]], 3
+; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP23]]
+; CHECK-NEXT: [[PTR_IND]] = getelementptr i32, i32* [[POINTER_PHI]], i64 [[TMP5]]
+; CHECK-NEXT: [[PTR_IND8]] = getelementptr i32, i32* [[POINTER_PHI7]], i64 [[TMP7]]
+; CHECK-NEXT: [[TMP24:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
+; CHECK-NEXT: br i1 [[TMP24]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP5:![0-9]+]]
; CHECK: middle.block:
; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N_MOD_VF]], 0
; CHECK-NEXT: br i1 [[CMP_N]], label [[FOR_COND_CLEANUP:%.*]], label [[SCALAR_PH]]
; CHECK-NEXT: [[I_011:%.*]] = phi i64 [ [[INC:%.*]], [[FOR_BODY]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ]
; CHECK-NEXT: [[S_010:%.*]] = phi i32* [ [[INCDEC_PTR1:%.*]], [[FOR_BODY]] ], [ [[BC_RESUME_VAL1]], [[SCALAR_PH]] ]
; CHECK-NEXT: [[D_09:%.*]] = phi i32* [ [[INCDEC_PTR:%.*]], [[FOR_BODY]] ], [ [[BC_RESUME_VAL3]], [[SCALAR_PH]] ]
-; CHECK-NEXT: [[TMP21:%.*]] = load i32, i32* [[S_010]], align 4
-; CHECK-NEXT: [[MUL:%.*]] = shl nsw i32 [[TMP21]], 1
+; CHECK-NEXT: [[TMP25:%.*]] = load i32, i32* [[S_010]], align 4
+; CHECK-NEXT: [[MUL:%.*]] = shl nsw i32 [[TMP25]], 1
; CHECK-NEXT: store i32 [[MUL]], i32* [[D_09]], align 4
; CHECK-NEXT: [[INCDEC_PTR]] = getelementptr inbounds i32, i32* [[D_09]], i64 1
; CHECK-NEXT: [[INCDEC_PTR1]] = getelementptr inbounds i32, i32* [[S_010]], i64 1
; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]
; CHECK: vector.body:
; CHECK-NEXT: [[POINTER_PHI:%.*]] = phi i32* [ [[A]], [[VECTOR_PH]] ], [ [[PTR_IND:%.*]], [[VECTOR_BODY]] ]
+; CHECK-NEXT: [[POINTER_PHI4:%.*]] = phi i32** [ [[B]], [[VECTOR_PH]] ], [ [[PTR_IND5:%.*]], [[VECTOR_BODY]] ]
; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
-; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <vscale x 2 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP9:%.*]], [[VECTOR_BODY]] ]
+; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <vscale x 2 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP11:%.*]], [[VECTOR_BODY]] ]
; CHECK-NEXT: [[TMP4:%.*]] = call i64 @llvm.vscale.i64()
; CHECK-NEXT: [[TMP5:%.*]] = shl nuw nsw i64 [[TMP4]], 1
; CHECK-NEXT: [[TMP6:%.*]] = call <vscale x 2 x i64> @llvm.experimental.stepvector.nxv2i64()
; CHECK-NEXT: [[TMP7:%.*]] = getelementptr i32, i32* [[POINTER_PHI]], <vscale x 2 x i64> [[TMP6]]
-; CHECK-NEXT: [[NEXT_GEP:%.*]] = getelementptr i32*, i32** [[B]], i64 [[INDEX]]
+; CHECK-NEXT: [[TMP8:%.*]] = call i64 @llvm.vscale.i64()
+; CHECK-NEXT: [[TMP9:%.*]] = shl nuw nsw i64 [[TMP8]], 1
; CHECK-NEXT: [[BC:%.*]] = bitcast <vscale x 2 x i32*> [[TMP7]] to <vscale x 2 x <vscale x 2 x i32>*>
-; CHECK-NEXT: [[TMP8:%.*]] = extractelement <vscale x 2 x <vscale x 2 x i32>*> [[BC]], i64 0
-; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <vscale x 2 x i32>, <vscale x 2 x i32>* [[TMP8]], align 8
-; CHECK-NEXT: [[TMP9]] = add <vscale x 2 x i32> [[WIDE_LOAD]], [[VEC_PHI]]
-; CHECK-NEXT: [[TMP10:%.*]] = bitcast i32** [[NEXT_GEP]] to <vscale x 2 x i32*>*
-; CHECK-NEXT: store <vscale x 2 x i32*> [[TMP7]], <vscale x 2 x i32*>* [[TMP10]], align 8
-; CHECK-NEXT: [[TMP11:%.*]] = call i64 @llvm.vscale.i64()
-; CHECK-NEXT: [[TMP12:%.*]] = shl nuw nsw i64 [[TMP11]], 1
-; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP12]]
+; CHECK-NEXT: [[TMP10:%.*]] = extractelement <vscale x 2 x <vscale x 2 x i32>*> [[BC]], i64 0
+; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <vscale x 2 x i32>, <vscale x 2 x i32>* [[TMP10]], align 8
+; CHECK-NEXT: [[TMP11]] = add <vscale x 2 x i32> [[WIDE_LOAD]], [[VEC_PHI]]
+; CHECK-NEXT: [[TMP12:%.*]] = bitcast i32** [[POINTER_PHI4]] to <vscale x 2 x i32*>*
+; CHECK-NEXT: store <vscale x 2 x i32*> [[TMP7]], <vscale x 2 x i32*>* [[TMP12]], align 8
+; CHECK-NEXT: [[TMP13:%.*]] = call i64 @llvm.vscale.i64()
+; CHECK-NEXT: [[TMP14:%.*]] = shl nuw nsw i64 [[TMP13]], 1
+; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP14]]
; CHECK-NEXT: [[PTR_IND]] = getelementptr i32, i32* [[POINTER_PHI]], i64 [[TMP5]]
-; CHECK-NEXT: [[TMP13:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
-; CHECK-NEXT: br i1 [[TMP13]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP7:![0-9]+]]
+; CHECK-NEXT: [[PTR_IND5]] = getelementptr i32*, i32** [[POINTER_PHI4]], i64 [[TMP9]]
+; CHECK-NEXT: [[TMP15:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
+; CHECK-NEXT: br i1 [[TMP15]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP7:![0-9]+]]
; CHECK: middle.block:
-; CHECK-NEXT: [[TMP14:%.*]] = call i32 @llvm.vector.reduce.add.nxv2i32(<vscale x 2 x i32> [[TMP9]])
+; CHECK-NEXT: [[TMP16:%.*]] = call i32 @llvm.vector.reduce.add.nxv2i32(<vscale x 2 x i32> [[TMP11]])
; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N_MOD_VF]], 0
; CHECK-NEXT: br i1 [[CMP_N]], label [[FOR_END:%.*]], label [[SCALAR_PH]]
; CHECK: scalar.ph:
; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ]
; CHECK-NEXT: [[BC_RESUME_VAL1:%.*]] = phi i32* [ [[IND_END]], [[MIDDLE_BLOCK]] ], [ [[A]], [[ENTRY]] ]
; CHECK-NEXT: [[BC_RESUME_VAL3:%.*]] = phi i32** [ [[IND_END2]], [[MIDDLE_BLOCK]] ], [ [[B]], [[ENTRY]] ]
-; CHECK-NEXT: [[BC_MERGE_RDX:%.*]] = phi i32 [ [[TMP14]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY]] ]
+; CHECK-NEXT: [[BC_MERGE_RDX:%.*]] = phi i32 [ [[TMP16]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY]] ]
; CHECK-NEXT: br label [[FOR_BODY:%.*]]
; CHECK: for.body:
; CHECK-NEXT: [[I:%.*]] = phi i64 [ [[I_NEXT:%.*]], [[FOR_BODY]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ]
; CHECK-NEXT: [[COND:%.*]] = icmp slt i64 [[I_NEXT]], [[N]]
; CHECK-NEXT: br i1 [[COND]], label [[FOR_BODY]], label [[FOR_END]], !llvm.loop [[LOOP8:![0-9]+]]
; CHECK: for.end:
-; CHECK-NEXT: [[VAR5:%.*]] = phi i32 [ [[VAR2]], [[FOR_BODY]] ], [ [[TMP14]], [[MIDDLE_BLOCK]] ]
+; CHECK-NEXT: [[VAR5:%.*]] = phi i32 [ [[VAR2]], [[FOR_BODY]] ], [ [[TMP16]], [[MIDDLE_BLOCK]] ]
; CHECK-NEXT: ret i32 [[VAR5]]
;
entry:
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
; RUN: opt -passes=loop-vectorize -force-vector-width=2 -force-vector-interleave=1 -S %s | FileCheck %s
target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128"
define void @test_pr55375_interleave_opaque_ptr(ptr %start, ptr %end) {
; CHECK-LABEL: @test_pr55375_interleave_opaque_ptr(
+; CHECK-NEXT: entry:
+; CHECK-NEXT: [[START2:%.*]] = ptrtoint ptr [[START:%.*]] to i64
+; CHECK-NEXT: [[END1:%.*]] = ptrtoint ptr [[END:%.*]] to i64
+; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[END1]], -16
+; CHECK-NEXT: [[TMP1:%.*]] = sub i64 [[TMP0]], [[START2]]
+; CHECK-NEXT: [[TMP2:%.*]] = lshr i64 [[TMP1]], 4
+; CHECK-NEXT: [[TMP3:%.*]] = add nuw nsw i64 [[TMP2]], 1
+; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[TMP3]], 2
+; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
+; CHECK: vector.ph:
+; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[TMP3]], 2
+; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[TMP3]], [[N_MOD_VF]]
+; CHECK-NEXT: [[TMP4:%.*]] = mul i64 [[N_VEC]], 16
+; CHECK-NEXT: [[IND_END:%.*]] = getelementptr i8, ptr [[START]], i64 [[TMP4]]
+; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]
; CHECK: vector.body:
-; CHECK-NEXT: [[POINTER_PHI:%.*]] = phi ptr [ %start, %vector.ph ], [ [[PTR_IND:%.*]], %vector.body ]
-; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %vector.ph ], [ [[INDEX_NEXT:%.*]], %vector.body ]
-; CHECK-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[POINTER_PHI]], <2 x i64> <i64 0, i64 16>
-; CHECK-NEXT: [[TMP7:%.*]] = extractelement <2 x ptr> [[TMP5]], i32 0
-; CHECK-NEXT: [[TMP8:%.*]] = getelementptr ptr, ptr [[TMP7]], i32 0
-; CHECK-NEXT: [[TMP11:%.*]] = shufflevector <2 x ptr> zeroinitializer, <2 x ptr> [[TMP5]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
-; CHECK-NEXT: [[INTERLEAVED_VEC:%.*]] = shufflevector <4 x ptr> [[TMP11]], <4 x ptr> poison, <4 x i32> <i32 0, i32 2, i32 1, i32 3>
-; CHECK-NEXT: store <4 x ptr> [[INTERLEAVED_VEC]], ptr [[TMP8]], align 8
+; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
+; CHECK-NEXT: [[TMP5:%.*]] = add i64 [[INDEX]], 0
+; CHECK-NEXT: [[TMP6:%.*]] = mul i64 [[TMP5]], 16
+; CHECK-NEXT: [[NEXT_GEP:%.*]] = getelementptr i8, ptr [[START]], i64 [[TMP6]]
+; CHECK-NEXT: [[TMP7:%.*]] = add i64 [[INDEX]], 1
+; CHECK-NEXT: [[TMP8:%.*]] = mul i64 [[TMP7]], 16
+; CHECK-NEXT: [[NEXT_GEP3:%.*]] = getelementptr i8, ptr [[START]], i64 [[TMP8]]
+; CHECK-NEXT: [[TMP9:%.*]] = insertelement <2 x ptr> poison, ptr [[NEXT_GEP]], i32 0
+; CHECK-NEXT: [[TMP10:%.*]] = insertelement <2 x ptr> [[TMP9]], ptr [[NEXT_GEP3]], i32 1
+; CHECK-NEXT: [[TMP11:%.*]] = getelementptr ptr, ptr [[NEXT_GEP]], i32 0
+; CHECK-NEXT: [[TMP12:%.*]] = shufflevector <2 x ptr> zeroinitializer, <2 x ptr> [[TMP10]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+; CHECK-NEXT: [[INTERLEAVED_VEC:%.*]] = shufflevector <4 x ptr> [[TMP12]], <4 x ptr> poison, <4 x i32> <i32 0, i32 2, i32 1, i32 3>
+; CHECK-NEXT: store <4 x ptr> [[INTERLEAVED_VEC]], ptr [[TMP11]], align 8
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2
-; CHECK-NEXT: [[PTR_IND]] = getelementptr i8, ptr [[POINTER_PHI]], i64 32
-; CHECK-NEXT: [[TMP13:%.*]] = icmp eq i64 [[INDEX_NEXT]], %n.vec
-; CHECK-NEXT: br i1 [[TMP13]], label %middle.block, label %vector.body
+; CHECK-NEXT: [[TMP13:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
+; CHECK-NEXT: br i1 [[TMP13]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
+; CHECK: middle.block:
+; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[TMP3]], [[N_VEC]]
+; CHECK-NEXT: br i1 [[CMP_N]], label [[EXIT:%.*]], label [[SCALAR_PH]]
+; CHECK: scalar.ph:
+; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi ptr [ [[IND_END]], [[MIDDLE_BLOCK]] ], [ [[START]], [[ENTRY:%.*]] ]
+; CHECK-NEXT: br label [[LOOP:%.*]]
+; CHECK: loop:
+; CHECK-NEXT: [[IV:%.*]] = phi ptr [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[LOOP]] ]
+; CHECK-NEXT: [[IV_1:%.*]] = getelementptr inbounds [[PAIR:%.*]], ptr [[IV]], i64 0, i32 1
+; CHECK-NEXT: store ptr [[IV]], ptr [[IV_1]], align 8
+; CHECK-NEXT: store ptr null, ptr [[IV]], align 8
+; CHECK-NEXT: [[IV_NEXT]] = getelementptr inbounds [[PAIR]], ptr [[IV]], i64 1
+; CHECK-NEXT: [[EC:%.*]] = icmp eq ptr [[IV_NEXT]], [[END]]
+; CHECK-NEXT: br i1 [[EC]], label [[EXIT]], label [[LOOP]], !llvm.loop [[LOOP2:![0-9]+]]
+; CHECK: exit:
+; CHECK-NEXT: ret void
;
entry:
br label %loop
; CHECK-NEXT: [[IND_END:%.*]] = getelementptr i8, ptr [[START]], i64 [[TMP4]]
; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]
; CHECK: vector.body:
-; CHECK-NEXT: [[POINTER_PHI:%.*]] = phi ptr [ [[START]], [[VECTOR_PH]] ], [ [[PTR_IND:%.*]], [[VECTOR_BODY]] ]
; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
-; CHECK-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[POINTER_PHI]], <2 x i64> <i64 0, i64 8>
-; CHECK-NEXT: [[TMP6:%.*]] = extractelement <2 x ptr> [[TMP5]], i32 0
-; CHECK-NEXT: [[TMP7:%.*]] = getelementptr ptr, ptr [[TMP6]], i32 0
-; CHECK-NEXT: store <2 x ptr> [[TMP5]], ptr [[TMP7]], align 4
+; CHECK-NEXT: [[TMP5:%.*]] = add i64 [[INDEX]], 0
+; CHECK-NEXT: [[TMP6:%.*]] = mul i64 [[TMP5]], 8
+; CHECK-NEXT: [[NEXT_GEP:%.*]] = getelementptr i8, ptr [[START]], i64 [[TMP6]]
+; CHECK-NEXT: [[TMP7:%.*]] = add i64 [[INDEX]], 1
+; CHECK-NEXT: [[TMP8:%.*]] = mul i64 [[TMP7]], 8
+; CHECK-NEXT: [[NEXT_GEP3:%.*]] = getelementptr i8, ptr [[START]], i64 [[TMP8]]
+; CHECK-NEXT: [[TMP9:%.*]] = insertelement <2 x ptr> poison, ptr [[NEXT_GEP]], i32 0
+; CHECK-NEXT: [[TMP10:%.*]] = insertelement <2 x ptr> [[TMP9]], ptr [[NEXT_GEP3]], i32 1
+; CHECK-NEXT: [[TMP11:%.*]] = getelementptr ptr, ptr [[NEXT_GEP]], i32 0
+; CHECK-NEXT: store <2 x ptr> [[TMP10]], ptr [[TMP11]], align 4
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2
-; CHECK-NEXT: [[PTR_IND]] = getelementptr i8, ptr [[POINTER_PHI]], i64 16
-; CHECK-NEXT: [[TMP8:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
-; CHECK-NEXT: br i1 [[TMP8]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
+; CHECK-NEXT: [[TMP12:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
+; CHECK-NEXT: br i1 [[TMP12]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
; CHECK: middle.block:
; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[TMP3]], [[N_VEC]]
; CHECK-NEXT: br i1 [[CMP_N]], label [[EXIT:%.*]], label [[SCALAR_PH]]