From: Nikita Popov Date: Thu, 6 Apr 2023 07:30:42 +0000 (+0200) Subject: [LoopVectorize] Convert some tests to opaque pointers (NFC) X-Git-Tag: upstream/17.0.6~12471 X-Git-Url: http://review.tizen.org/git/?a=commitdiff_plain;h=53f7f857036b818786ca5c3cc86a4155b8a29617;p=platform%2Fupstream%2Fllvm.git [LoopVectorize] Convert some tests to opaque pointers (NFC) --- diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/sve-tail-folding-optsize.ll b/llvm/test/Transforms/LoopVectorize/AArch64/sve-tail-folding-optsize.ll index c609380..275a26b 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/sve-tail-folding-optsize.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/sve-tail-folding-optsize.ll @@ -1,9 +1,9 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py -; RUN: opt -opaque-pointers=0 -passes=loop-vectorize -S < %s | FileCheck %s +; RUN: opt -passes=loop-vectorize -S < %s | FileCheck %s target triple = "aarch64-unknown-linux-gnu" -define void @trip1025_i64(i64* noalias nocapture noundef %dst, i64* noalias nocapture noundef readonly %src) #0 { +define void @trip1025_i64(ptr noalias nocapture noundef %dst, ptr noalias nocapture noundef readonly %src) #0 { ; CHECK-LABEL: @trip1025_i64( ; CHECK-NEXT: entry: ; CHECK-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] @@ -22,25 +22,22 @@ define void @trip1025_i64(i64* noalias nocapture noundef %dst, i64* noalias noca ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[ACTIVE_LANE_MASK:%.*]] = phi [ [[ACTIVE_LANE_MASK_ENTRY]], [[VECTOR_PH]] ], [ [[ACTIVE_LANE_MASK_NEXT:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[TMP5:%.*]] = add i64 [[INDEX]], 0 -; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds i64, i64* [[SRC:%.*]], i64 [[TMP5]] -; CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds i64, i64* [[TMP6]], i32 0 -; CHECK-NEXT: [[TMP8:%.*]] = bitcast i64* [[TMP7]] to * -; CHECK-NEXT: [[WIDE_MASKED_LOAD:%.*]] = call @llvm.masked.load.nxv2i64.p0nxv2i64(* [[TMP8]], i32 8, [[ACTIVE_LANE_MASK]], poison) -; CHECK-NEXT: [[TMP9:%.*]] = shl nsw [[WIDE_MASKED_LOAD]], shufflevector ( insertelement ( poison, i64 1, i64 0), poison, zeroinitializer) -; CHECK-NEXT: [[TMP10:%.*]] = getelementptr inbounds i64, i64* [[DST:%.*]], i64 [[TMP5]] -; CHECK-NEXT: [[TMP11:%.*]] = getelementptr inbounds i64, i64* [[TMP10]], i32 0 -; CHECK-NEXT: [[TMP12:%.*]] = bitcast i64* [[TMP11]] to * -; CHECK-NEXT: [[WIDE_MASKED_LOAD1:%.*]] = call @llvm.masked.load.nxv2i64.p0nxv2i64(* [[TMP12]], i32 8, [[ACTIVE_LANE_MASK]], poison) -; CHECK-NEXT: [[TMP13:%.*]] = add nsw [[WIDE_MASKED_LOAD1]], [[TMP9]] -; CHECK-NEXT: [[TMP14:%.*]] = bitcast i64* [[TMP11]] to * -; CHECK-NEXT: call void @llvm.masked.store.nxv2i64.p0nxv2i64( [[TMP13]], * [[TMP14]], i32 8, [[ACTIVE_LANE_MASK]]) -; CHECK-NEXT: [[TMP15:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[TMP16:%.*]] = mul i64 [[TMP15]], 2 -; CHECK-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], [[TMP16]] +; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds i64, ptr [[SRC:%.*]], i64 [[TMP5]] +; CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds i64, ptr [[TMP6]], i32 0 +; CHECK-NEXT: [[WIDE_MASKED_LOAD:%.*]] = call @llvm.masked.load.nxv2i64.p0(ptr [[TMP7]], i32 8, [[ACTIVE_LANE_MASK]], poison) +; CHECK-NEXT: [[TMP8:%.*]] = shl nsw [[WIDE_MASKED_LOAD]], shufflevector ( insertelement ( poison, i64 1, i64 0), poison, zeroinitializer) +; CHECK-NEXT: [[TMP9:%.*]] = getelementptr inbounds i64, ptr [[DST:%.*]], i64 [[TMP5]] +; CHECK-NEXT: [[TMP10:%.*]] = getelementptr inbounds i64, ptr [[TMP9]], i32 0 +; CHECK-NEXT: [[WIDE_MASKED_LOAD1:%.*]] = call @llvm.masked.load.nxv2i64.p0(ptr [[TMP10]], i32 8, [[ACTIVE_LANE_MASK]], poison) +; CHECK-NEXT: [[TMP11:%.*]] = add nsw [[WIDE_MASKED_LOAD1]], [[TMP8]] +; CHECK-NEXT: call void @llvm.masked.store.nxv2i64.p0( [[TMP11]], ptr [[TMP10]], i32 8, [[ACTIVE_LANE_MASK]]) +; CHECK-NEXT: [[TMP12:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-NEXT: [[TMP13:%.*]] = mul i64 [[TMP12]], 2 +; CHECK-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], [[TMP13]] ; CHECK-NEXT: [[ACTIVE_LANE_MASK_NEXT]] = call @llvm.get.active.lane.mask.nxv2i1.i64(i64 [[INDEX_NEXT]], i64 1025) -; CHECK-NEXT: [[TMP17:%.*]] = xor [[ACTIVE_LANE_MASK_NEXT]], shufflevector ( insertelement ( poison, i1 true, i64 0), poison, zeroinitializer) -; CHECK-NEXT: [[TMP18:%.*]] = extractelement [[TMP17]], i32 0 -; CHECK-NEXT: br i1 [[TMP18]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] +; CHECK-NEXT: [[TMP14:%.*]] = xor [[ACTIVE_LANE_MASK_NEXT]], shufflevector ( insertelement ( poison, i1 true, i64 0), poison, zeroinitializer) +; CHECK-NEXT: [[TMP15:%.*]] = extractelement [[TMP14]], i32 0 +; CHECK-NEXT: br i1 [[TMP15]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] ; CHECK: middle.block: ; CHECK-NEXT: br i1 true, label [[FOR_END:%.*]], label [[SCALAR_PH]] ; CHECK: scalar.ph: @@ -48,13 +45,13 @@ define void @trip1025_i64(i64* noalias nocapture noundef %dst, i64* noalias noca ; CHECK-NEXT: br label [[FOR_BODY:%.*]] ; CHECK: for.body: ; CHECK-NEXT: [[I_06:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[INC:%.*]], [[FOR_BODY]] ] -; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i64, i64* [[SRC]], i64 [[I_06]] -; CHECK-NEXT: [[TMP19:%.*]] = load i64, i64* [[ARRAYIDX]], align 8 -; CHECK-NEXT: [[MUL:%.*]] = shl nsw i64 [[TMP19]], 1 -; CHECK-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds i64, i64* [[DST]], i64 [[I_06]] -; CHECK-NEXT: [[TMP20:%.*]] = load i64, i64* [[ARRAYIDX1]], align 8 -; CHECK-NEXT: [[ADD:%.*]] = add nsw i64 [[TMP20]], [[MUL]] -; CHECK-NEXT: store i64 [[ADD]], i64* [[ARRAYIDX1]], align 8 +; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i64, ptr [[SRC]], i64 [[I_06]] +; CHECK-NEXT: [[TMP16:%.*]] = load i64, ptr [[ARRAYIDX]], align 8 +; CHECK-NEXT: [[MUL:%.*]] = shl nsw i64 [[TMP16]], 1 +; CHECK-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds i64, ptr [[DST]], i64 [[I_06]] +; CHECK-NEXT: [[TMP17:%.*]] = load i64, ptr [[ARRAYIDX1]], align 8 +; CHECK-NEXT: [[ADD:%.*]] = add nsw i64 [[TMP17]], [[MUL]] +; CHECK-NEXT: store i64 [[ADD]], ptr [[ARRAYIDX1]], align 8 ; CHECK-NEXT: [[INC]] = add nuw nsw i64 [[I_06]], 1 ; CHECK-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[INC]], 1025 ; CHECK-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP3:![0-9]+]] @@ -66,13 +63,13 @@ entry: for.body: ; preds = %entry, %for.body %i.06 = phi i64 [ 0, %entry ], [ %inc, %for.body ] - %arrayidx = getelementptr inbounds i64, i64* %src, i64 %i.06 - %0 = load i64, i64* %arrayidx, align 8 + %arrayidx = getelementptr inbounds i64, ptr %src, i64 %i.06 + %0 = load i64, ptr %arrayidx, align 8 %mul = shl nsw i64 %0, 1 - %arrayidx1 = getelementptr inbounds i64, i64* %dst, i64 %i.06 - %1 = load i64, i64* %arrayidx1, align 8 + %arrayidx1 = getelementptr inbounds i64, ptr %dst, i64 %i.06 + %1 = load i64, ptr %arrayidx1, align 8 %add = add nsw i64 %1, %mul - store i64 %add, i64* %arrayidx1, align 8 + store i64 %add, ptr %arrayidx1, align 8 %inc = add nuw nsw i64 %i.06, 1 %exitcond.not = icmp eq i64 %inc, 1025 br i1 %exitcond.not, label %for.end, label %for.body diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/sve-tail-folding-reductions.ll b/llvm/test/Transforms/LoopVectorize/AArch64/sve-tail-folding-reductions.ll index 5f4c755..bd8dbbb 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/sve-tail-folding-reductions.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/sve-tail-folding-reductions.ll @@ -1,12 +1,12 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py -; RUN: opt -opaque-pointers=0 -S -hints-allow-reordering=false -passes=loop-vectorize -prefer-predicate-over-epilogue=predicate-else-scalar-epilogue \ +; RUN: opt -S -hints-allow-reordering=false -passes=loop-vectorize -prefer-predicate-over-epilogue=predicate-else-scalar-epilogue \ ; RUN: < %s | FileCheck %s --check-prefix=CHECK -; RUN: opt -opaque-pointers=0 -S -hints-allow-reordering=false -passes=loop-vectorize -prefer-predicate-over-epilogue=predicate-else-scalar-epilogue \ +; RUN: opt -S -hints-allow-reordering=false -passes=loop-vectorize -prefer-predicate-over-epilogue=predicate-else-scalar-epilogue \ ; RUN: -prefer-inloop-reductions < %s | FileCheck %s --check-prefix=CHECK-IN-LOOP target triple = "aarch64-unknown-linux-gnu" -define i32 @add_reduction_i32(i32* %ptr, i64 %n) #0 { +define i32 @add_reduction_i32(ptr %ptr, i64 %n) #0 { ; CHECK-LABEL: @add_reduction_i32( ; CHECK-NEXT: entry: ; CHECK-NEXT: [[UMAX:%.*]] = call i64 @llvm.umax.i64(i64 [[N:%.*]], i64 1) @@ -30,39 +30,38 @@ define i32 @add_reduction_i32(i32* %ptr, i64 %n) #0 { ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX1:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT2:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[ACTIVE_LANE_MASK:%.*]] = phi [ [[ACTIVE_LANE_MASK_ENTRY]], [[VECTOR_PH]] ], [ [[ACTIVE_LANE_MASK_NEXT:%.*]], [[VECTOR_BODY]] ] -; CHECK-NEXT: [[VEC_PHI:%.*]] = phi [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP15:%.*]], [[VECTOR_BODY]] ] +; CHECK-NEXT: [[VEC_PHI:%.*]] = phi [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP14:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[TMP10:%.*]] = add i64 [[INDEX1]], 0 -; CHECK-NEXT: [[TMP11:%.*]] = getelementptr i32, i32* [[PTR:%.*]], i64 [[TMP10]] -; CHECK-NEXT: [[TMP12:%.*]] = getelementptr i32, i32* [[TMP11]], i32 0 -; CHECK-NEXT: [[TMP13:%.*]] = bitcast i32* [[TMP12]] to * -; CHECK-NEXT: [[WIDE_MASKED_LOAD:%.*]] = call @llvm.masked.load.nxv4i32.p0nxv4i32(* [[TMP13]], i32 4, [[ACTIVE_LANE_MASK]], poison) -; CHECK-NEXT: [[TMP14:%.*]] = add [[VEC_PHI]], [[WIDE_MASKED_LOAD]] -; CHECK-NEXT: [[TMP15]] = select [[ACTIVE_LANE_MASK]], [[TMP14]], [[VEC_PHI]] +; CHECK-NEXT: [[TMP11:%.*]] = getelementptr i32, ptr [[PTR:%.*]], i64 [[TMP10]] +; CHECK-NEXT: [[TMP12:%.*]] = getelementptr i32, ptr [[TMP11]], i32 0 +; CHECK-NEXT: [[WIDE_MASKED_LOAD:%.*]] = call @llvm.masked.load.nxv4i32.p0(ptr [[TMP12]], i32 4, [[ACTIVE_LANE_MASK]], poison) +; CHECK-NEXT: [[TMP13:%.*]] = add [[VEC_PHI]], [[WIDE_MASKED_LOAD]] +; CHECK-NEXT: [[TMP14]] = select [[ACTIVE_LANE_MASK]], [[TMP13]], [[VEC_PHI]] ; CHECK-NEXT: [[ACTIVE_LANE_MASK_NEXT]] = call @llvm.get.active.lane.mask.nxv4i1.i64(i64 [[INDEX1]], i64 [[TMP9]]) -; CHECK-NEXT: [[TMP16:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[TMP17:%.*]] = mul i64 [[TMP16]], 4 -; CHECK-NEXT: [[INDEX_NEXT2]] = add i64 [[INDEX1]], [[TMP17]] -; CHECK-NEXT: [[TMP18:%.*]] = xor [[ACTIVE_LANE_MASK_NEXT]], shufflevector ( insertelement ( poison, i1 true, i64 0), poison, zeroinitializer) -; CHECK-NEXT: [[TMP19:%.*]] = extractelement [[TMP18]], i32 0 -; CHECK-NEXT: br i1 [[TMP19]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] +; CHECK-NEXT: [[TMP15:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-NEXT: [[TMP16:%.*]] = mul i64 [[TMP15]], 4 +; CHECK-NEXT: [[INDEX_NEXT2]] = add i64 [[INDEX1]], [[TMP16]] +; CHECK-NEXT: [[TMP17:%.*]] = xor [[ACTIVE_LANE_MASK_NEXT]], shufflevector ( insertelement ( poison, i1 true, i64 0), poison, zeroinitializer) +; CHECK-NEXT: [[TMP18:%.*]] = extractelement [[TMP17]], i32 0 +; CHECK-NEXT: br i1 [[TMP18]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] ; CHECK: middle.block: -; CHECK-NEXT: [[TMP20:%.*]] = call i32 @llvm.vector.reduce.add.nxv4i32( [[TMP15]]) +; CHECK-NEXT: [[TMP19:%.*]] = call i32 @llvm.vector.reduce.add.nxv4i32( [[TMP14]]) ; CHECK-NEXT: br i1 true, label [[WHILE_END_LOOPEXIT:%.*]], label [[SCALAR_PH]] ; CHECK: scalar.ph: ; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] -; CHECK-NEXT: [[BC_MERGE_RDX:%.*]] = phi i32 [ 0, [[ENTRY]] ], [ [[TMP20]], [[MIDDLE_BLOCK]] ] +; CHECK-NEXT: [[BC_MERGE_RDX:%.*]] = phi i32 [ 0, [[ENTRY]] ], [ [[TMP19]], [[MIDDLE_BLOCK]] ] ; CHECK-NEXT: br label [[WHILE_BODY:%.*]] ; CHECK: while.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ [[INDEX_NEXT:%.*]], [[WHILE_BODY]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ] ; CHECK-NEXT: [[RED:%.*]] = phi i32 [ [[RED_NEXT:%.*]], [[WHILE_BODY]] ], [ [[BC_MERGE_RDX]], [[SCALAR_PH]] ] -; CHECK-NEXT: [[GEP:%.*]] = getelementptr i32, i32* [[PTR]], i64 [[INDEX]] -; CHECK-NEXT: [[VAL:%.*]] = load i32, i32* [[GEP]], align 4 +; CHECK-NEXT: [[GEP:%.*]] = getelementptr i32, ptr [[PTR]], i64 [[INDEX]] +; CHECK-NEXT: [[VAL:%.*]] = load i32, ptr [[GEP]], align 4 ; CHECK-NEXT: [[RED_NEXT]] = add i32 [[RED]], [[VAL]] ; CHECK-NEXT: [[INDEX_NEXT]] = add nsw i64 [[INDEX]], 1 ; CHECK-NEXT: [[CMP10:%.*]] = icmp ult i64 [[INDEX_NEXT]], [[N]] ; CHECK-NEXT: br i1 [[CMP10]], label [[WHILE_BODY]], label [[WHILE_END_LOOPEXIT]], !llvm.loop [[LOOP3:![0-9]+]] ; CHECK: while.end.loopexit: -; CHECK-NEXT: [[RED_NEXT_LCSSA:%.*]] = phi i32 [ [[RED_NEXT]], [[WHILE_BODY]] ], [ [[TMP20]], [[MIDDLE_BLOCK]] ] +; CHECK-NEXT: [[RED_NEXT_LCSSA:%.*]] = phi i32 [ [[RED_NEXT]], [[WHILE_BODY]] ], [ [[TMP19]], [[MIDDLE_BLOCK]] ] ; CHECK-NEXT: ret i32 [[RED_NEXT_LCSSA]] ; ; CHECK-IN-LOOP-LABEL: @add_reduction_i32( @@ -88,39 +87,38 @@ define i32 @add_reduction_i32(i32* %ptr, i64 %n) #0 { ; CHECK-IN-LOOP: vector.body: ; CHECK-IN-LOOP-NEXT: [[INDEX1:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT2:%.*]], [[VECTOR_BODY]] ] ; CHECK-IN-LOOP-NEXT: [[ACTIVE_LANE_MASK:%.*]] = phi [ [[ACTIVE_LANE_MASK_ENTRY]], [[VECTOR_PH]] ], [ [[ACTIVE_LANE_MASK_NEXT:%.*]], [[VECTOR_BODY]] ] -; CHECK-IN-LOOP-NEXT: [[VEC_PHI:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[TMP16:%.*]], [[VECTOR_BODY]] ] +; CHECK-IN-LOOP-NEXT: [[VEC_PHI:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[TMP15:%.*]], [[VECTOR_BODY]] ] ; CHECK-IN-LOOP-NEXT: [[TMP10:%.*]] = add i64 [[INDEX1]], 0 -; CHECK-IN-LOOP-NEXT: [[TMP11:%.*]] = getelementptr i32, i32* [[PTR:%.*]], i64 [[TMP10]] -; CHECK-IN-LOOP-NEXT: [[TMP12:%.*]] = getelementptr i32, i32* [[TMP11]], i32 0 -; CHECK-IN-LOOP-NEXT: [[TMP13:%.*]] = bitcast i32* [[TMP12]] to * -; CHECK-IN-LOOP-NEXT: [[WIDE_MASKED_LOAD:%.*]] = call @llvm.masked.load.nxv4i32.p0nxv4i32(* [[TMP13]], i32 4, [[ACTIVE_LANE_MASK]], poison) -; CHECK-IN-LOOP-NEXT: [[TMP14:%.*]] = select [[ACTIVE_LANE_MASK]], [[WIDE_MASKED_LOAD]], zeroinitializer -; CHECK-IN-LOOP-NEXT: [[TMP15:%.*]] = call i32 @llvm.vector.reduce.add.nxv4i32( [[TMP14]]) -; CHECK-IN-LOOP-NEXT: [[TMP16]] = add i32 [[TMP15]], [[VEC_PHI]] +; CHECK-IN-LOOP-NEXT: [[TMP11:%.*]] = getelementptr i32, ptr [[PTR:%.*]], i64 [[TMP10]] +; CHECK-IN-LOOP-NEXT: [[TMP12:%.*]] = getelementptr i32, ptr [[TMP11]], i32 0 +; CHECK-IN-LOOP-NEXT: [[WIDE_MASKED_LOAD:%.*]] = call @llvm.masked.load.nxv4i32.p0(ptr [[TMP12]], i32 4, [[ACTIVE_LANE_MASK]], poison) +; CHECK-IN-LOOP-NEXT: [[TMP13:%.*]] = select [[ACTIVE_LANE_MASK]], [[WIDE_MASKED_LOAD]], zeroinitializer +; CHECK-IN-LOOP-NEXT: [[TMP14:%.*]] = call i32 @llvm.vector.reduce.add.nxv4i32( [[TMP13]]) +; CHECK-IN-LOOP-NEXT: [[TMP15]] = add i32 [[TMP14]], [[VEC_PHI]] ; CHECK-IN-LOOP-NEXT: [[ACTIVE_LANE_MASK_NEXT]] = call @llvm.get.active.lane.mask.nxv4i1.i64(i64 [[INDEX1]], i64 [[TMP9]]) -; CHECK-IN-LOOP-NEXT: [[TMP17:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-IN-LOOP-NEXT: [[TMP18:%.*]] = mul i64 [[TMP17]], 4 -; CHECK-IN-LOOP-NEXT: [[INDEX_NEXT2]] = add i64 [[INDEX1]], [[TMP18]] -; CHECK-IN-LOOP-NEXT: [[TMP19:%.*]] = xor [[ACTIVE_LANE_MASK_NEXT]], shufflevector ( insertelement ( poison, i1 true, i64 0), poison, zeroinitializer) -; CHECK-IN-LOOP-NEXT: [[TMP20:%.*]] = extractelement [[TMP19]], i32 0 -; CHECK-IN-LOOP-NEXT: br i1 [[TMP20]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] +; CHECK-IN-LOOP-NEXT: [[TMP16:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-IN-LOOP-NEXT: [[TMP17:%.*]] = mul i64 [[TMP16]], 4 +; CHECK-IN-LOOP-NEXT: [[INDEX_NEXT2]] = add i64 [[INDEX1]], [[TMP17]] +; CHECK-IN-LOOP-NEXT: [[TMP18:%.*]] = xor [[ACTIVE_LANE_MASK_NEXT]], shufflevector ( insertelement ( poison, i1 true, i64 0), poison, zeroinitializer) +; CHECK-IN-LOOP-NEXT: [[TMP19:%.*]] = extractelement [[TMP18]], i32 0 +; CHECK-IN-LOOP-NEXT: br i1 [[TMP19]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] ; CHECK-IN-LOOP: middle.block: ; CHECK-IN-LOOP-NEXT: br i1 true, label [[WHILE_END_LOOPEXIT:%.*]], label [[SCALAR_PH]] ; CHECK-IN-LOOP: scalar.ph: ; CHECK-IN-LOOP-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] -; CHECK-IN-LOOP-NEXT: [[BC_MERGE_RDX:%.*]] = phi i32 [ 0, [[ENTRY]] ], [ [[TMP16]], [[MIDDLE_BLOCK]] ] +; CHECK-IN-LOOP-NEXT: [[BC_MERGE_RDX:%.*]] = phi i32 [ 0, [[ENTRY]] ], [ [[TMP15]], [[MIDDLE_BLOCK]] ] ; CHECK-IN-LOOP-NEXT: br label [[WHILE_BODY:%.*]] ; CHECK-IN-LOOP: while.body: ; CHECK-IN-LOOP-NEXT: [[INDEX:%.*]] = phi i64 [ [[INDEX_NEXT:%.*]], [[WHILE_BODY]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ] ; CHECK-IN-LOOP-NEXT: [[RED:%.*]] = phi i32 [ [[RED_NEXT:%.*]], [[WHILE_BODY]] ], [ [[BC_MERGE_RDX]], [[SCALAR_PH]] ] -; CHECK-IN-LOOP-NEXT: [[GEP:%.*]] = getelementptr i32, i32* [[PTR]], i64 [[INDEX]] -; CHECK-IN-LOOP-NEXT: [[VAL:%.*]] = load i32, i32* [[GEP]], align 4 +; CHECK-IN-LOOP-NEXT: [[GEP:%.*]] = getelementptr i32, ptr [[PTR]], i64 [[INDEX]] +; CHECK-IN-LOOP-NEXT: [[VAL:%.*]] = load i32, ptr [[GEP]], align 4 ; CHECK-IN-LOOP-NEXT: [[RED_NEXT]] = add i32 [[RED]], [[VAL]] ; CHECK-IN-LOOP-NEXT: [[INDEX_NEXT]] = add nsw i64 [[INDEX]], 1 ; CHECK-IN-LOOP-NEXT: [[CMP10:%.*]] = icmp ult i64 [[INDEX_NEXT]], [[N]] ; CHECK-IN-LOOP-NEXT: br i1 [[CMP10]], label [[WHILE_BODY]], label [[WHILE_END_LOOPEXIT]], !llvm.loop [[LOOP3:![0-9]+]] ; CHECK-IN-LOOP: while.end.loopexit: -; CHECK-IN-LOOP-NEXT: [[RED_NEXT_LCSSA:%.*]] = phi i32 [ [[RED_NEXT]], [[WHILE_BODY]] ], [ [[TMP16]], [[MIDDLE_BLOCK]] ] +; CHECK-IN-LOOP-NEXT: [[RED_NEXT_LCSSA:%.*]] = phi i32 [ [[RED_NEXT]], [[WHILE_BODY]] ], [ [[TMP15]], [[MIDDLE_BLOCK]] ] ; CHECK-IN-LOOP-NEXT: ret i32 [[RED_NEXT_LCSSA]] ; entry: @@ -129,8 +127,8 @@ entry: while.body: ; preds = %while.body, %entry %index = phi i64 [ %index.next, %while.body ], [ 0, %entry ] %red = phi i32 [ %red.next, %while.body ], [ 0, %entry ] - %gep = getelementptr i32, i32* %ptr, i64 %index - %val = load i32, i32* %gep + %gep = getelementptr i32, ptr %ptr, i64 %index + %val = load i32, ptr %gep %red.next = add i32 %red, %val %index.next = add nsw i64 %index, 1 %cmp10 = icmp ult i64 %index.next, %n @@ -140,7 +138,7 @@ while.end.loopexit: ; preds = %while.body ret i32 %red.next } -define float @add_reduction_f32(float* %ptr, i64 %n) #0 { +define float @add_reduction_f32(ptr %ptr, i64 %n) #0 { ; CHECK-LABEL: @add_reduction_f32( ; CHECK-NEXT: entry: ; CHECK-NEXT: [[UMAX:%.*]] = call i64 @llvm.umax.i64(i64 [[N:%.*]], i64 1) @@ -164,38 +162,37 @@ define float @add_reduction_f32(float* %ptr, i64 %n) #0 { ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX1:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT2:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[ACTIVE_LANE_MASK:%.*]] = phi [ [[ACTIVE_LANE_MASK_ENTRY]], [[VECTOR_PH]] ], [ [[ACTIVE_LANE_MASK_NEXT:%.*]], [[VECTOR_BODY]] ] -; CHECK-NEXT: [[VEC_PHI:%.*]] = phi float [ 0.000000e+00, [[VECTOR_PH]] ], [ [[TMP15:%.*]], [[VECTOR_BODY]] ] +; CHECK-NEXT: [[VEC_PHI:%.*]] = phi float [ 0.000000e+00, [[VECTOR_PH]] ], [ [[TMP14:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[TMP10:%.*]] = add i64 [[INDEX1]], 0 -; CHECK-NEXT: [[TMP11:%.*]] = getelementptr float, float* [[PTR:%.*]], i64 [[TMP10]] -; CHECK-NEXT: [[TMP12:%.*]] = getelementptr float, float* [[TMP11]], i32 0 -; CHECK-NEXT: [[TMP13:%.*]] = bitcast float* [[TMP12]] to * -; CHECK-NEXT: [[WIDE_MASKED_LOAD:%.*]] = call @llvm.masked.load.nxv4f32.p0nxv4f32(* [[TMP13]], i32 4, [[ACTIVE_LANE_MASK]], poison) -; CHECK-NEXT: [[TMP14:%.*]] = select [[ACTIVE_LANE_MASK]], [[WIDE_MASKED_LOAD]], shufflevector ( insertelement ( poison, float -0.000000e+00, i64 0), poison, zeroinitializer) -; CHECK-NEXT: [[TMP15]] = call float @llvm.vector.reduce.fadd.nxv4f32(float [[VEC_PHI]], [[TMP14]]) +; CHECK-NEXT: [[TMP11:%.*]] = getelementptr float, ptr [[PTR:%.*]], i64 [[TMP10]] +; CHECK-NEXT: [[TMP12:%.*]] = getelementptr float, ptr [[TMP11]], i32 0 +; CHECK-NEXT: [[WIDE_MASKED_LOAD:%.*]] = call @llvm.masked.load.nxv4f32.p0(ptr [[TMP12]], i32 4, [[ACTIVE_LANE_MASK]], poison) +; CHECK-NEXT: [[TMP13:%.*]] = select [[ACTIVE_LANE_MASK]], [[WIDE_MASKED_LOAD]], shufflevector ( insertelement ( poison, float -0.000000e+00, i64 0), poison, zeroinitializer) +; CHECK-NEXT: [[TMP14]] = call float @llvm.vector.reduce.fadd.nxv4f32(float [[VEC_PHI]], [[TMP13]]) ; CHECK-NEXT: [[ACTIVE_LANE_MASK_NEXT]] = call @llvm.get.active.lane.mask.nxv4i1.i64(i64 [[INDEX1]], i64 [[TMP9]]) -; CHECK-NEXT: [[TMP16:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[TMP17:%.*]] = mul i64 [[TMP16]], 4 -; CHECK-NEXT: [[INDEX_NEXT2]] = add i64 [[INDEX1]], [[TMP17]] -; CHECK-NEXT: [[TMP18:%.*]] = xor [[ACTIVE_LANE_MASK_NEXT]], shufflevector ( insertelement ( poison, i1 true, i64 0), poison, zeroinitializer) -; CHECK-NEXT: [[TMP19:%.*]] = extractelement [[TMP18]], i32 0 -; CHECK-NEXT: br i1 [[TMP19]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] +; CHECK-NEXT: [[TMP15:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-NEXT: [[TMP16:%.*]] = mul i64 [[TMP15]], 4 +; CHECK-NEXT: [[INDEX_NEXT2]] = add i64 [[INDEX1]], [[TMP16]] +; CHECK-NEXT: [[TMP17:%.*]] = xor [[ACTIVE_LANE_MASK_NEXT]], shufflevector ( insertelement ( poison, i1 true, i64 0), poison, zeroinitializer) +; CHECK-NEXT: [[TMP18:%.*]] = extractelement [[TMP17]], i32 0 +; CHECK-NEXT: br i1 [[TMP18]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] ; CHECK: middle.block: ; CHECK-NEXT: br i1 true, label [[WHILE_END_LOOPEXIT:%.*]], label [[SCALAR_PH]] ; CHECK: scalar.ph: ; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] -; CHECK-NEXT: [[BC_MERGE_RDX:%.*]] = phi float [ 0.000000e+00, [[ENTRY]] ], [ [[TMP15]], [[MIDDLE_BLOCK]] ] +; CHECK-NEXT: [[BC_MERGE_RDX:%.*]] = phi float [ 0.000000e+00, [[ENTRY]] ], [ [[TMP14]], [[MIDDLE_BLOCK]] ] ; CHECK-NEXT: br label [[WHILE_BODY:%.*]] ; CHECK: while.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ [[INDEX_NEXT:%.*]], [[WHILE_BODY]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ] ; CHECK-NEXT: [[RED:%.*]] = phi float [ [[RED_NEXT:%.*]], [[WHILE_BODY]] ], [ [[BC_MERGE_RDX]], [[SCALAR_PH]] ] -; CHECK-NEXT: [[GEP:%.*]] = getelementptr float, float* [[PTR]], i64 [[INDEX]] -; CHECK-NEXT: [[VAL:%.*]] = load float, float* [[GEP]], align 4 +; CHECK-NEXT: [[GEP:%.*]] = getelementptr float, ptr [[PTR]], i64 [[INDEX]] +; CHECK-NEXT: [[VAL:%.*]] = load float, ptr [[GEP]], align 4 ; CHECK-NEXT: [[RED_NEXT]] = fadd float [[RED]], [[VAL]] ; CHECK-NEXT: [[INDEX_NEXT]] = add nsw i64 [[INDEX]], 1 ; CHECK-NEXT: [[CMP10:%.*]] = icmp ult i64 [[INDEX_NEXT]], [[N]] ; CHECK-NEXT: br i1 [[CMP10]], label [[WHILE_BODY]], label [[WHILE_END_LOOPEXIT]], !llvm.loop [[LOOP5:![0-9]+]] ; CHECK: while.end.loopexit: -; CHECK-NEXT: [[RED_NEXT_LCSSA:%.*]] = phi float [ [[RED_NEXT]], [[WHILE_BODY]] ], [ [[TMP15]], [[MIDDLE_BLOCK]] ] +; CHECK-NEXT: [[RED_NEXT_LCSSA:%.*]] = phi float [ [[RED_NEXT]], [[WHILE_BODY]] ], [ [[TMP14]], [[MIDDLE_BLOCK]] ] ; CHECK-NEXT: ret float [[RED_NEXT_LCSSA]] ; ; CHECK-IN-LOOP-LABEL: @add_reduction_f32( @@ -221,38 +218,37 @@ define float @add_reduction_f32(float* %ptr, i64 %n) #0 { ; CHECK-IN-LOOP: vector.body: ; CHECK-IN-LOOP-NEXT: [[INDEX1:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT2:%.*]], [[VECTOR_BODY]] ] ; CHECK-IN-LOOP-NEXT: [[ACTIVE_LANE_MASK:%.*]] = phi [ [[ACTIVE_LANE_MASK_ENTRY]], [[VECTOR_PH]] ], [ [[ACTIVE_LANE_MASK_NEXT:%.*]], [[VECTOR_BODY]] ] -; CHECK-IN-LOOP-NEXT: [[VEC_PHI:%.*]] = phi float [ 0.000000e+00, [[VECTOR_PH]] ], [ [[TMP15:%.*]], [[VECTOR_BODY]] ] +; CHECK-IN-LOOP-NEXT: [[VEC_PHI:%.*]] = phi float [ 0.000000e+00, [[VECTOR_PH]] ], [ [[TMP14:%.*]], [[VECTOR_BODY]] ] ; CHECK-IN-LOOP-NEXT: [[TMP10:%.*]] = add i64 [[INDEX1]], 0 -; CHECK-IN-LOOP-NEXT: [[TMP11:%.*]] = getelementptr float, float* [[PTR:%.*]], i64 [[TMP10]] -; CHECK-IN-LOOP-NEXT: [[TMP12:%.*]] = getelementptr float, float* [[TMP11]], i32 0 -; CHECK-IN-LOOP-NEXT: [[TMP13:%.*]] = bitcast float* [[TMP12]] to * -; CHECK-IN-LOOP-NEXT: [[WIDE_MASKED_LOAD:%.*]] = call @llvm.masked.load.nxv4f32.p0nxv4f32(* [[TMP13]], i32 4, [[ACTIVE_LANE_MASK]], poison) -; CHECK-IN-LOOP-NEXT: [[TMP14:%.*]] = select [[ACTIVE_LANE_MASK]], [[WIDE_MASKED_LOAD]], shufflevector ( insertelement ( poison, float -0.000000e+00, i64 0), poison, zeroinitializer) -; CHECK-IN-LOOP-NEXT: [[TMP15]] = call float @llvm.vector.reduce.fadd.nxv4f32(float [[VEC_PHI]], [[TMP14]]) +; CHECK-IN-LOOP-NEXT: [[TMP11:%.*]] = getelementptr float, ptr [[PTR:%.*]], i64 [[TMP10]] +; CHECK-IN-LOOP-NEXT: [[TMP12:%.*]] = getelementptr float, ptr [[TMP11]], i32 0 +; CHECK-IN-LOOP-NEXT: [[WIDE_MASKED_LOAD:%.*]] = call @llvm.masked.load.nxv4f32.p0(ptr [[TMP12]], i32 4, [[ACTIVE_LANE_MASK]], poison) +; CHECK-IN-LOOP-NEXT: [[TMP13:%.*]] = select [[ACTIVE_LANE_MASK]], [[WIDE_MASKED_LOAD]], shufflevector ( insertelement ( poison, float -0.000000e+00, i64 0), poison, zeroinitializer) +; CHECK-IN-LOOP-NEXT: [[TMP14]] = call float @llvm.vector.reduce.fadd.nxv4f32(float [[VEC_PHI]], [[TMP13]]) ; CHECK-IN-LOOP-NEXT: [[ACTIVE_LANE_MASK_NEXT]] = call @llvm.get.active.lane.mask.nxv4i1.i64(i64 [[INDEX1]], i64 [[TMP9]]) -; CHECK-IN-LOOP-NEXT: [[TMP16:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-IN-LOOP-NEXT: [[TMP17:%.*]] = mul i64 [[TMP16]], 4 -; CHECK-IN-LOOP-NEXT: [[INDEX_NEXT2]] = add i64 [[INDEX1]], [[TMP17]] -; CHECK-IN-LOOP-NEXT: [[TMP18:%.*]] = xor [[ACTIVE_LANE_MASK_NEXT]], shufflevector ( insertelement ( poison, i1 true, i64 0), poison, zeroinitializer) -; CHECK-IN-LOOP-NEXT: [[TMP19:%.*]] = extractelement [[TMP18]], i32 0 -; CHECK-IN-LOOP-NEXT: br i1 [[TMP19]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] +; CHECK-IN-LOOP-NEXT: [[TMP15:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-IN-LOOP-NEXT: [[TMP16:%.*]] = mul i64 [[TMP15]], 4 +; CHECK-IN-LOOP-NEXT: [[INDEX_NEXT2]] = add i64 [[INDEX1]], [[TMP16]] +; CHECK-IN-LOOP-NEXT: [[TMP17:%.*]] = xor [[ACTIVE_LANE_MASK_NEXT]], shufflevector ( insertelement ( poison, i1 true, i64 0), poison, zeroinitializer) +; CHECK-IN-LOOP-NEXT: [[TMP18:%.*]] = extractelement [[TMP17]], i32 0 +; CHECK-IN-LOOP-NEXT: br i1 [[TMP18]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] ; CHECK-IN-LOOP: middle.block: ; CHECK-IN-LOOP-NEXT: br i1 true, label [[WHILE_END_LOOPEXIT:%.*]], label [[SCALAR_PH]] ; CHECK-IN-LOOP: scalar.ph: ; CHECK-IN-LOOP-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] -; CHECK-IN-LOOP-NEXT: [[BC_MERGE_RDX:%.*]] = phi float [ 0.000000e+00, [[ENTRY]] ], [ [[TMP15]], [[MIDDLE_BLOCK]] ] +; CHECK-IN-LOOP-NEXT: [[BC_MERGE_RDX:%.*]] = phi float [ 0.000000e+00, [[ENTRY]] ], [ [[TMP14]], [[MIDDLE_BLOCK]] ] ; CHECK-IN-LOOP-NEXT: br label [[WHILE_BODY:%.*]] ; CHECK-IN-LOOP: while.body: ; CHECK-IN-LOOP-NEXT: [[INDEX:%.*]] = phi i64 [ [[INDEX_NEXT:%.*]], [[WHILE_BODY]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ] ; CHECK-IN-LOOP-NEXT: [[RED:%.*]] = phi float [ [[RED_NEXT:%.*]], [[WHILE_BODY]] ], [ [[BC_MERGE_RDX]], [[SCALAR_PH]] ] -; CHECK-IN-LOOP-NEXT: [[GEP:%.*]] = getelementptr float, float* [[PTR]], i64 [[INDEX]] -; CHECK-IN-LOOP-NEXT: [[VAL:%.*]] = load float, float* [[GEP]], align 4 +; CHECK-IN-LOOP-NEXT: [[GEP:%.*]] = getelementptr float, ptr [[PTR]], i64 [[INDEX]] +; CHECK-IN-LOOP-NEXT: [[VAL:%.*]] = load float, ptr [[GEP]], align 4 ; CHECK-IN-LOOP-NEXT: [[RED_NEXT]] = fadd float [[RED]], [[VAL]] ; CHECK-IN-LOOP-NEXT: [[INDEX_NEXT]] = add nsw i64 [[INDEX]], 1 ; CHECK-IN-LOOP-NEXT: [[CMP10:%.*]] = icmp ult i64 [[INDEX_NEXT]], [[N]] ; CHECK-IN-LOOP-NEXT: br i1 [[CMP10]], label [[WHILE_BODY]], label [[WHILE_END_LOOPEXIT]], !llvm.loop [[LOOP5:![0-9]+]] ; CHECK-IN-LOOP: while.end.loopexit: -; CHECK-IN-LOOP-NEXT: [[RED_NEXT_LCSSA:%.*]] = phi float [ [[RED_NEXT]], [[WHILE_BODY]] ], [ [[TMP15]], [[MIDDLE_BLOCK]] ] +; CHECK-IN-LOOP-NEXT: [[RED_NEXT_LCSSA:%.*]] = phi float [ [[RED_NEXT]], [[WHILE_BODY]] ], [ [[TMP14]], [[MIDDLE_BLOCK]] ] ; CHECK-IN-LOOP-NEXT: ret float [[RED_NEXT_LCSSA]] ; entry: @@ -261,8 +257,8 @@ entry: while.body: ; preds = %while.body, %entry %index = phi i64 [ %index.next, %while.body ], [ 0, %entry ] %red = phi float [ %red.next, %while.body ], [ 0.000000, %entry ] - %gep = getelementptr float, float* %ptr, i64 %index - %val = load float, float* %gep + %gep = getelementptr float, ptr %ptr, i64 %index + %val = load float, ptr %gep %red.next = fadd float %red, %val %index.next = add nsw i64 %index, 1 %cmp10 = icmp ult i64 %index.next, %n @@ -272,7 +268,7 @@ while.end.loopexit: ; preds = %while.body ret float %red.next } -define i32 @cond_xor_reduction(i32* noalias %a, i32* noalias %cond, i64 %N) #0 { +define i32 @cond_xor_reduction(ptr noalias %a, ptr noalias %cond, i64 %N) #0 { ; CHECK-LABEL: @cond_xor_reduction( ; CHECK-NEXT: entry: ; CHECK-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] @@ -295,48 +291,46 @@ define i32 @cond_xor_reduction(i32* noalias %a, i32* noalias %cond, i64 %N) #0 { ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[ACTIVE_LANE_MASK:%.*]] = phi [ [[ACTIVE_LANE_MASK_ENTRY]], [[VECTOR_PH]] ], [ [[ACTIVE_LANE_MASK_NEXT:%.*]], [[VECTOR_BODY]] ] -; CHECK-NEXT: [[VEC_PHI:%.*]] = phi [ insertelement ( zeroinitializer, i32 7, i32 0), [[VECTOR_PH]] ], [ [[TMP22:%.*]], [[VECTOR_BODY]] ] +; CHECK-NEXT: [[VEC_PHI:%.*]] = phi [ insertelement ( zeroinitializer, i32 7, i32 0), [[VECTOR_PH]] ], [ [[TMP20:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[TMP10:%.*]] = add i64 [[INDEX]], 0 -; CHECK-NEXT: [[TMP11:%.*]] = getelementptr inbounds i32, i32* [[COND:%.*]], i64 [[TMP10]] -; CHECK-NEXT: [[TMP12:%.*]] = getelementptr inbounds i32, i32* [[TMP11]], i32 0 -; CHECK-NEXT: [[TMP13:%.*]] = bitcast i32* [[TMP12]] to * -; CHECK-NEXT: [[WIDE_MASKED_LOAD:%.*]] = call @llvm.masked.load.nxv4i32.p0nxv4i32(* [[TMP13]], i32 4, [[ACTIVE_LANE_MASK]], poison) -; CHECK-NEXT: [[TMP14:%.*]] = icmp eq [[WIDE_MASKED_LOAD]], shufflevector ( insertelement ( poison, i32 5, i64 0), poison, zeroinitializer) -; CHECK-NEXT: [[TMP15:%.*]] = getelementptr i32, i32* [[A:%.*]], i64 [[TMP10]] -; CHECK-NEXT: [[TMP16:%.*]] = select [[ACTIVE_LANE_MASK]], [[TMP14]], zeroinitializer -; CHECK-NEXT: [[TMP17:%.*]] = getelementptr i32, i32* [[TMP15]], i32 0 -; CHECK-NEXT: [[TMP18:%.*]] = bitcast i32* [[TMP17]] to * -; CHECK-NEXT: [[WIDE_MASKED_LOAD1:%.*]] = call @llvm.masked.load.nxv4i32.p0nxv4i32(* [[TMP18]], i32 4, [[TMP16]], poison) -; CHECK-NEXT: [[TMP19:%.*]] = xor [[VEC_PHI]], [[WIDE_MASKED_LOAD1]] -; CHECK-NEXT: [[TMP20:%.*]] = xor [[TMP14]], shufflevector ( insertelement ( poison, i1 true, i64 0), poison, zeroinitializer) -; CHECK-NEXT: [[TMP21:%.*]] = select [[ACTIVE_LANE_MASK]], [[TMP20]], zeroinitializer -; CHECK-NEXT: [[PREDPHI:%.*]] = select [[TMP16]], [[TMP19]], [[VEC_PHI]] -; CHECK-NEXT: [[TMP22]] = select [[ACTIVE_LANE_MASK]], [[PREDPHI]], [[VEC_PHI]] +; CHECK-NEXT: [[TMP11:%.*]] = getelementptr inbounds i32, ptr [[COND:%.*]], i64 [[TMP10]] +; CHECK-NEXT: [[TMP12:%.*]] = getelementptr inbounds i32, ptr [[TMP11]], i32 0 +; CHECK-NEXT: [[WIDE_MASKED_LOAD:%.*]] = call @llvm.masked.load.nxv4i32.p0(ptr [[TMP12]], i32 4, [[ACTIVE_LANE_MASK]], poison) +; CHECK-NEXT: [[TMP13:%.*]] = icmp eq [[WIDE_MASKED_LOAD]], shufflevector ( insertelement ( poison, i32 5, i64 0), poison, zeroinitializer) +; CHECK-NEXT: [[TMP14:%.*]] = getelementptr i32, ptr [[A:%.*]], i64 [[TMP10]] +; CHECK-NEXT: [[TMP15:%.*]] = select [[ACTIVE_LANE_MASK]], [[TMP13]], zeroinitializer +; CHECK-NEXT: [[TMP16:%.*]] = getelementptr i32, ptr [[TMP14]], i32 0 +; CHECK-NEXT: [[WIDE_MASKED_LOAD1:%.*]] = call @llvm.masked.load.nxv4i32.p0(ptr [[TMP16]], i32 4, [[TMP15]], poison) +; CHECK-NEXT: [[TMP17:%.*]] = xor [[VEC_PHI]], [[WIDE_MASKED_LOAD1]] +; CHECK-NEXT: [[TMP18:%.*]] = xor [[TMP13]], shufflevector ( insertelement ( poison, i1 true, i64 0), poison, zeroinitializer) +; CHECK-NEXT: [[TMP19:%.*]] = select [[ACTIVE_LANE_MASK]], [[TMP18]], zeroinitializer +; CHECK-NEXT: [[PREDPHI:%.*]] = select [[TMP15]], [[TMP17]], [[VEC_PHI]] +; CHECK-NEXT: [[TMP20]] = select [[ACTIVE_LANE_MASK]], [[PREDPHI]], [[VEC_PHI]] ; CHECK-NEXT: [[ACTIVE_LANE_MASK_NEXT]] = call @llvm.get.active.lane.mask.nxv4i1.i64(i64 [[INDEX]], i64 [[TMP9]]) -; CHECK-NEXT: [[TMP23:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[TMP24:%.*]] = mul i64 [[TMP23]], 4 -; CHECK-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], [[TMP24]] -; CHECK-NEXT: [[TMP25:%.*]] = xor [[ACTIVE_LANE_MASK_NEXT]], shufflevector ( insertelement ( poison, i1 true, i64 0), poison, zeroinitializer) -; CHECK-NEXT: [[TMP26:%.*]] = extractelement [[TMP25]], i32 0 -; CHECK-NEXT: br i1 [[TMP26]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]] +; CHECK-NEXT: [[TMP21:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-NEXT: [[TMP22:%.*]] = mul i64 [[TMP21]], 4 +; CHECK-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], [[TMP22]] +; CHECK-NEXT: [[TMP23:%.*]] = xor [[ACTIVE_LANE_MASK_NEXT]], shufflevector ( insertelement ( poison, i1 true, i64 0), poison, zeroinitializer) +; CHECK-NEXT: [[TMP24:%.*]] = extractelement [[TMP23]], i32 0 +; CHECK-NEXT: br i1 [[TMP24]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]] ; CHECK: middle.block: -; CHECK-NEXT: [[TMP27:%.*]] = call i32 @llvm.vector.reduce.xor.nxv4i32( [[TMP22]]) +; CHECK-NEXT: [[TMP25:%.*]] = call i32 @llvm.vector.reduce.xor.nxv4i32( [[TMP20]]) ; CHECK-NEXT: br i1 true, label [[FOR_END:%.*]], label [[SCALAR_PH]] ; CHECK: scalar.ph: ; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] -; CHECK-NEXT: [[BC_MERGE_RDX:%.*]] = phi i32 [ 7, [[ENTRY]] ], [ [[TMP27]], [[MIDDLE_BLOCK]] ] +; CHECK-NEXT: [[BC_MERGE_RDX:%.*]] = phi i32 [ 7, [[ENTRY]] ], [ [[TMP25]], [[MIDDLE_BLOCK]] ] ; CHECK-NEXT: br label [[FOR_BODY:%.*]] ; CHECK: for.body: ; CHECK-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[FOR_INC:%.*]] ] ; CHECK-NEXT: [[RDX:%.*]] = phi i32 [ [[BC_MERGE_RDX]], [[SCALAR_PH]] ], [ [[RES:%.*]], [[FOR_INC]] ] -; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[COND]], i64 [[IV]] -; CHECK-NEXT: [[TMP28:%.*]] = load i32, i32* [[ARRAYIDX]], align 4 -; CHECK-NEXT: [[TOBOOL:%.*]] = icmp eq i32 [[TMP28]], 5 +; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[COND]], i64 [[IV]] +; CHECK-NEXT: [[TMP26:%.*]] = load i32, ptr [[ARRAYIDX]], align 4 +; CHECK-NEXT: [[TOBOOL:%.*]] = icmp eq i32 [[TMP26]], 5 ; CHECK-NEXT: br i1 [[TOBOOL]], label [[IF_THEN:%.*]], label [[FOR_INC]] ; CHECK: if.then: -; CHECK-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 [[IV]] -; CHECK-NEXT: [[TMP29:%.*]] = load i32, i32* [[ARRAYIDX2]], align 4 -; CHECK-NEXT: [[XOR:%.*]] = xor i32 [[RDX]], [[TMP29]] +; CHECK-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[IV]] +; CHECK-NEXT: [[TMP27:%.*]] = load i32, ptr [[ARRAYIDX2]], align 4 +; CHECK-NEXT: [[XOR:%.*]] = xor i32 [[RDX]], [[TMP27]] ; CHECK-NEXT: br label [[FOR_INC]] ; CHECK: for.inc: ; CHECK-NEXT: [[RES]] = phi i32 [ [[RDX]], [[FOR_BODY]] ], [ [[XOR]], [[IF_THEN]] ] @@ -344,7 +338,7 @@ define i32 @cond_xor_reduction(i32* noalias %a, i32* noalias %cond, i64 %N) #0 { ; CHECK-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT]], [[N]] ; CHECK-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP7:![0-9]+]] ; CHECK: for.end: -; CHECK-NEXT: [[RES_LCSSA:%.*]] = phi i32 [ [[RES]], [[FOR_INC]] ], [ [[TMP27]], [[MIDDLE_BLOCK]] ] +; CHECK-NEXT: [[RES_LCSSA:%.*]] = phi i32 [ [[RES]], [[FOR_INC]] ], [ [[TMP25]], [[MIDDLE_BLOCK]] ] ; CHECK-NEXT: ret i32 [[RES_LCSSA]] ; ; CHECK-IN-LOOP-LABEL: @cond_xor_reduction( @@ -369,45 +363,43 @@ define i32 @cond_xor_reduction(i32* noalias %a, i32* noalias %cond, i64 %N) #0 { ; CHECK-IN-LOOP: vector.body: ; CHECK-IN-LOOP-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] ; CHECK-IN-LOOP-NEXT: [[ACTIVE_LANE_MASK:%.*]] = phi [ [[ACTIVE_LANE_MASK_ENTRY]], [[VECTOR_PH]] ], [ [[ACTIVE_LANE_MASK_NEXT:%.*]], [[VECTOR_BODY]] ] -; CHECK-IN-LOOP-NEXT: [[VEC_PHI:%.*]] = phi i32 [ 7, [[VECTOR_PH]] ], [ [[TMP21:%.*]], [[VECTOR_BODY]] ] +; CHECK-IN-LOOP-NEXT: [[VEC_PHI:%.*]] = phi i32 [ 7, [[VECTOR_PH]] ], [ [[TMP19:%.*]], [[VECTOR_BODY]] ] ; CHECK-IN-LOOP-NEXT: [[TMP10:%.*]] = add i64 [[INDEX]], 0 -; CHECK-IN-LOOP-NEXT: [[TMP11:%.*]] = getelementptr inbounds i32, i32* [[COND:%.*]], i64 [[TMP10]] -; CHECK-IN-LOOP-NEXT: [[TMP12:%.*]] = getelementptr inbounds i32, i32* [[TMP11]], i32 0 -; CHECK-IN-LOOP-NEXT: [[TMP13:%.*]] = bitcast i32* [[TMP12]] to * -; CHECK-IN-LOOP-NEXT: [[WIDE_MASKED_LOAD:%.*]] = call @llvm.masked.load.nxv4i32.p0nxv4i32(* [[TMP13]], i32 4, [[ACTIVE_LANE_MASK]], poison) -; CHECK-IN-LOOP-NEXT: [[TMP14:%.*]] = icmp eq [[WIDE_MASKED_LOAD]], shufflevector ( insertelement ( poison, i32 5, i64 0), poison, zeroinitializer) -; CHECK-IN-LOOP-NEXT: [[TMP15:%.*]] = getelementptr i32, i32* [[A:%.*]], i64 [[TMP10]] -; CHECK-IN-LOOP-NEXT: [[TMP16:%.*]] = select [[ACTIVE_LANE_MASK]], [[TMP14]], zeroinitializer -; CHECK-IN-LOOP-NEXT: [[TMP17:%.*]] = getelementptr i32, i32* [[TMP15]], i32 0 -; CHECK-IN-LOOP-NEXT: [[TMP18:%.*]] = bitcast i32* [[TMP17]] to * -; CHECK-IN-LOOP-NEXT: [[WIDE_MASKED_LOAD1:%.*]] = call @llvm.masked.load.nxv4i32.p0nxv4i32(* [[TMP18]], i32 4, [[TMP16]], poison) -; CHECK-IN-LOOP-NEXT: [[TMP19:%.*]] = select [[TMP16]], [[WIDE_MASKED_LOAD1]], zeroinitializer -; CHECK-IN-LOOP-NEXT: [[TMP20:%.*]] = call i32 @llvm.vector.reduce.xor.nxv4i32( [[TMP19]]) -; CHECK-IN-LOOP-NEXT: [[TMP21]] = xor i32 [[TMP20]], [[VEC_PHI]] +; CHECK-IN-LOOP-NEXT: [[TMP11:%.*]] = getelementptr inbounds i32, ptr [[COND:%.*]], i64 [[TMP10]] +; CHECK-IN-LOOP-NEXT: [[TMP12:%.*]] = getelementptr inbounds i32, ptr [[TMP11]], i32 0 +; CHECK-IN-LOOP-NEXT: [[WIDE_MASKED_LOAD:%.*]] = call @llvm.masked.load.nxv4i32.p0(ptr [[TMP12]], i32 4, [[ACTIVE_LANE_MASK]], poison) +; CHECK-IN-LOOP-NEXT: [[TMP13:%.*]] = icmp eq [[WIDE_MASKED_LOAD]], shufflevector ( insertelement ( poison, i32 5, i64 0), poison, zeroinitializer) +; CHECK-IN-LOOP-NEXT: [[TMP14:%.*]] = getelementptr i32, ptr [[A:%.*]], i64 [[TMP10]] +; CHECK-IN-LOOP-NEXT: [[TMP15:%.*]] = select [[ACTIVE_LANE_MASK]], [[TMP13]], zeroinitializer +; CHECK-IN-LOOP-NEXT: [[TMP16:%.*]] = getelementptr i32, ptr [[TMP14]], i32 0 +; CHECK-IN-LOOP-NEXT: [[WIDE_MASKED_LOAD1:%.*]] = call @llvm.masked.load.nxv4i32.p0(ptr [[TMP16]], i32 4, [[TMP15]], poison) +; CHECK-IN-LOOP-NEXT: [[TMP17:%.*]] = select [[TMP15]], [[WIDE_MASKED_LOAD1]], zeroinitializer +; CHECK-IN-LOOP-NEXT: [[TMP18:%.*]] = call i32 @llvm.vector.reduce.xor.nxv4i32( [[TMP17]]) +; CHECK-IN-LOOP-NEXT: [[TMP19]] = xor i32 [[TMP18]], [[VEC_PHI]] ; CHECK-IN-LOOP-NEXT: [[ACTIVE_LANE_MASK_NEXT]] = call @llvm.get.active.lane.mask.nxv4i1.i64(i64 [[INDEX]], i64 [[TMP9]]) -; CHECK-IN-LOOP-NEXT: [[TMP22:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-IN-LOOP-NEXT: [[TMP23:%.*]] = mul i64 [[TMP22]], 4 -; CHECK-IN-LOOP-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], [[TMP23]] -; CHECK-IN-LOOP-NEXT: [[TMP24:%.*]] = xor [[ACTIVE_LANE_MASK_NEXT]], shufflevector ( insertelement ( poison, i1 true, i64 0), poison, zeroinitializer) -; CHECK-IN-LOOP-NEXT: [[TMP25:%.*]] = extractelement [[TMP24]], i32 0 -; CHECK-IN-LOOP-NEXT: br i1 [[TMP25]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]] +; CHECK-IN-LOOP-NEXT: [[TMP20:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-IN-LOOP-NEXT: [[TMP21:%.*]] = mul i64 [[TMP20]], 4 +; CHECK-IN-LOOP-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], [[TMP21]] +; CHECK-IN-LOOP-NEXT: [[TMP22:%.*]] = xor [[ACTIVE_LANE_MASK_NEXT]], shufflevector ( insertelement ( poison, i1 true, i64 0), poison, zeroinitializer) +; CHECK-IN-LOOP-NEXT: [[TMP23:%.*]] = extractelement [[TMP22]], i32 0 +; CHECK-IN-LOOP-NEXT: br i1 [[TMP23]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]] ; CHECK-IN-LOOP: middle.block: ; CHECK-IN-LOOP-NEXT: br i1 true, label [[FOR_END:%.*]], label [[SCALAR_PH]] ; CHECK-IN-LOOP: scalar.ph: ; CHECK-IN-LOOP-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] -; CHECK-IN-LOOP-NEXT: [[BC_MERGE_RDX:%.*]] = phi i32 [ 7, [[ENTRY]] ], [ [[TMP21]], [[MIDDLE_BLOCK]] ] +; CHECK-IN-LOOP-NEXT: [[BC_MERGE_RDX:%.*]] = phi i32 [ 7, [[ENTRY]] ], [ [[TMP19]], [[MIDDLE_BLOCK]] ] ; CHECK-IN-LOOP-NEXT: br label [[FOR_BODY:%.*]] ; CHECK-IN-LOOP: for.body: ; CHECK-IN-LOOP-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[FOR_INC:%.*]] ] ; CHECK-IN-LOOP-NEXT: [[RDX:%.*]] = phi i32 [ [[BC_MERGE_RDX]], [[SCALAR_PH]] ], [ [[RES:%.*]], [[FOR_INC]] ] -; CHECK-IN-LOOP-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[COND]], i64 [[IV]] -; CHECK-IN-LOOP-NEXT: [[TMP26:%.*]] = load i32, i32* [[ARRAYIDX]], align 4 -; CHECK-IN-LOOP-NEXT: [[TOBOOL:%.*]] = icmp eq i32 [[TMP26]], 5 +; CHECK-IN-LOOP-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[COND]], i64 [[IV]] +; CHECK-IN-LOOP-NEXT: [[TMP24:%.*]] = load i32, ptr [[ARRAYIDX]], align 4 +; CHECK-IN-LOOP-NEXT: [[TOBOOL:%.*]] = icmp eq i32 [[TMP24]], 5 ; CHECK-IN-LOOP-NEXT: br i1 [[TOBOOL]], label [[IF_THEN:%.*]], label [[FOR_INC]] ; CHECK-IN-LOOP: if.then: -; CHECK-IN-LOOP-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 [[IV]] -; CHECK-IN-LOOP-NEXT: [[TMP27:%.*]] = load i32, i32* [[ARRAYIDX2]], align 4 -; CHECK-IN-LOOP-NEXT: [[XOR:%.*]] = xor i32 [[RDX]], [[TMP27]] +; CHECK-IN-LOOP-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[IV]] +; CHECK-IN-LOOP-NEXT: [[TMP25:%.*]] = load i32, ptr [[ARRAYIDX2]], align 4 +; CHECK-IN-LOOP-NEXT: [[XOR:%.*]] = xor i32 [[RDX]], [[TMP25]] ; CHECK-IN-LOOP-NEXT: br label [[FOR_INC]] ; CHECK-IN-LOOP: for.inc: ; CHECK-IN-LOOP-NEXT: [[RES]] = phi i32 [ [[RDX]], [[FOR_BODY]] ], [ [[XOR]], [[IF_THEN]] ] @@ -415,7 +407,7 @@ define i32 @cond_xor_reduction(i32* noalias %a, i32* noalias %cond, i64 %N) #0 { ; CHECK-IN-LOOP-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT]], [[N]] ; CHECK-IN-LOOP-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP7:![0-9]+]] ; CHECK-IN-LOOP: for.end: -; CHECK-IN-LOOP-NEXT: [[RES_LCSSA:%.*]] = phi i32 [ [[RES]], [[FOR_INC]] ], [ [[TMP21]], [[MIDDLE_BLOCK]] ] +; CHECK-IN-LOOP-NEXT: [[RES_LCSSA:%.*]] = phi i32 [ [[RES]], [[FOR_INC]] ], [ [[TMP19]], [[MIDDLE_BLOCK]] ] ; CHECK-IN-LOOP-NEXT: ret i32 [[RES_LCSSA]] ; entry: @@ -424,14 +416,14 @@ entry: for.body: %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.inc ] %rdx = phi i32 [ 7, %entry ], [ %res, %for.inc ] - %arrayidx = getelementptr inbounds i32, i32* %cond, i64 %iv - %0 = load i32, i32* %arrayidx + %arrayidx = getelementptr inbounds i32, ptr %cond, i64 %iv + %0 = load i32, ptr %arrayidx %tobool = icmp eq i32 %0, 5 br i1 %tobool, label %if.then, label %for.inc if.then: - %arrayidx2 = getelementptr inbounds i32, i32* %a, i64 %iv - %1 = load i32, i32* %arrayidx2 + %arrayidx2 = getelementptr inbounds i32, ptr %a, i64 %iv + %1 = load i32, ptr %arrayidx2 %xor = xor i32 %rdx, %1 br label %for.inc diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/sve-tail-folding-unroll.ll b/llvm/test/Transforms/LoopVectorize/AArch64/sve-tail-folding-unroll.ll index 8cf181c..134e40b 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/sve-tail-folding-unroll.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/sve-tail-folding-unroll.ll @@ -1,10 +1,10 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py -; RUN: opt -opaque-pointers=0 -S -passes=loop-vectorize -prefer-predicate-over-epilogue=predicate-else-scalar-epilogue -force-vector-interleave=4 -force-vector-width=4 < %s | FileCheck %s +; RUN: opt -S -passes=loop-vectorize -prefer-predicate-over-epilogue=predicate-else-scalar-epilogue -force-vector-interleave=4 -force-vector-width=4 < %s | FileCheck %s target triple = "aarch64-unknown-linux-gnu" -define void @simple_memset(i32 %val, i32* %ptr, i64 %n) #0 { +define void @simple_memset(i32 %val, ptr %ptr, i64 %n) #0 { ; CHECK-LABEL: @simple_memset( ; CHECK-NEXT: entry: ; CHECK-NEXT: [[UMAX:%.*]] = call i64 @llvm.umax.i64(i64 [[N:%.*]], i64 1) @@ -82,50 +82,46 @@ define void @simple_memset(i32 %val, i32* %ptr, i64 %n) #0 { ; CHECK-NEXT: [[TMP44:%.*]] = add i64 [[TMP43]], 0 ; CHECK-NEXT: [[TMP45:%.*]] = mul i64 [[TMP44]], 1 ; CHECK-NEXT: [[TMP46:%.*]] = add i64 [[INDEX6]], [[TMP45]] -; CHECK-NEXT: [[TMP47:%.*]] = getelementptr i32, i32* [[PTR:%.*]], i64 [[TMP31]] -; CHECK-NEXT: [[TMP48:%.*]] = getelementptr i32, i32* [[PTR]], i64 [[TMP36]] -; CHECK-NEXT: [[TMP49:%.*]] = getelementptr i32, i32* [[PTR]], i64 [[TMP41]] -; CHECK-NEXT: [[TMP50:%.*]] = getelementptr i32, i32* [[PTR]], i64 [[TMP46]] -; CHECK-NEXT: [[TMP51:%.*]] = getelementptr i32, i32* [[TMP47]], i32 0 -; CHECK-NEXT: [[TMP52:%.*]] = bitcast i32* [[TMP51]] to * -; CHECK-NEXT: call void @llvm.masked.store.nxv4i32.p0nxv4i32( [[BROADCAST_SPLAT]], * [[TMP52]], i32 4, [[ACTIVE_LANE_MASK]]) -; CHECK-NEXT: [[TMP53:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[TMP54:%.*]] = mul i64 [[TMP53]], 4 -; CHECK-NEXT: [[TMP55:%.*]] = getelementptr i32, i32* [[TMP47]], i64 [[TMP54]] -; CHECK-NEXT: [[TMP56:%.*]] = bitcast i32* [[TMP55]] to * -; CHECK-NEXT: call void @llvm.masked.store.nxv4i32.p0nxv4i32( [[BROADCAST_SPLAT11]], * [[TMP56]], i32 4, [[ACTIVE_LANE_MASK7]]) -; CHECK-NEXT: [[TMP57:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[TMP58:%.*]] = mul i64 [[TMP57]], 8 -; CHECK-NEXT: [[TMP59:%.*]] = getelementptr i32, i32* [[TMP47]], i64 [[TMP58]] -; CHECK-NEXT: [[TMP60:%.*]] = bitcast i32* [[TMP59]] to * -; CHECK-NEXT: call void @llvm.masked.store.nxv4i32.p0nxv4i32( [[BROADCAST_SPLAT13]], * [[TMP60]], i32 4, [[ACTIVE_LANE_MASK8]]) +; CHECK-NEXT: [[TMP47:%.*]] = getelementptr i32, ptr [[PTR:%.*]], i64 [[TMP31]] +; CHECK-NEXT: [[TMP48:%.*]] = getelementptr i32, ptr [[PTR]], i64 [[TMP36]] +; CHECK-NEXT: [[TMP49:%.*]] = getelementptr i32, ptr [[PTR]], i64 [[TMP41]] +; CHECK-NEXT: [[TMP50:%.*]] = getelementptr i32, ptr [[PTR]], i64 [[TMP46]] +; CHECK-NEXT: [[TMP51:%.*]] = getelementptr i32, ptr [[TMP47]], i32 0 +; CHECK-NEXT: call void @llvm.masked.store.nxv4i32.p0( [[BROADCAST_SPLAT]], ptr [[TMP51]], i32 4, [[ACTIVE_LANE_MASK]]) +; CHECK-NEXT: [[TMP52:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-NEXT: [[TMP53:%.*]] = mul i64 [[TMP52]], 4 +; CHECK-NEXT: [[TMP54:%.*]] = getelementptr i32, ptr [[TMP47]], i64 [[TMP53]] +; CHECK-NEXT: call void @llvm.masked.store.nxv4i32.p0( [[BROADCAST_SPLAT11]], ptr [[TMP54]], i32 4, [[ACTIVE_LANE_MASK7]]) +; CHECK-NEXT: [[TMP55:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-NEXT: [[TMP56:%.*]] = mul i64 [[TMP55]], 8 +; CHECK-NEXT: [[TMP57:%.*]] = getelementptr i32, ptr [[TMP47]], i64 [[TMP56]] +; CHECK-NEXT: call void @llvm.masked.store.nxv4i32.p0( [[BROADCAST_SPLAT13]], ptr [[TMP57]], i32 4, [[ACTIVE_LANE_MASK8]]) +; CHECK-NEXT: [[TMP58:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-NEXT: [[TMP59:%.*]] = mul i64 [[TMP58]], 12 +; CHECK-NEXT: [[TMP60:%.*]] = getelementptr i32, ptr [[TMP47]], i64 [[TMP59]] +; CHECK-NEXT: call void @llvm.masked.store.nxv4i32.p0( [[BROADCAST_SPLAT15]], ptr [[TMP60]], i32 4, [[ACTIVE_LANE_MASK9]]) ; CHECK-NEXT: [[TMP61:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[TMP62:%.*]] = mul i64 [[TMP61]], 12 -; CHECK-NEXT: [[TMP63:%.*]] = getelementptr i32, i32* [[TMP47]], i64 [[TMP62]] -; CHECK-NEXT: [[TMP64:%.*]] = bitcast i32* [[TMP63]] to * -; CHECK-NEXT: call void @llvm.masked.store.nxv4i32.p0nxv4i32( [[BROADCAST_SPLAT15]], * [[TMP64]], i32 4, [[ACTIVE_LANE_MASK9]]) -; CHECK-NEXT: [[TMP65:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[TMP66:%.*]] = mul i64 [[TMP65]], 4 -; CHECK-NEXT: [[TMP67:%.*]] = add i64 [[INDEX6]], [[TMP66]] -; CHECK-NEXT: [[TMP68:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[TMP69:%.*]] = mul i64 [[TMP68]], 8 -; CHECK-NEXT: [[TMP70:%.*]] = add i64 [[INDEX6]], [[TMP69]] -; CHECK-NEXT: [[TMP71:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[TMP72:%.*]] = mul i64 [[TMP71]], 12 -; CHECK-NEXT: [[TMP73:%.*]] = add i64 [[INDEX6]], [[TMP72]] +; CHECK-NEXT: [[TMP62:%.*]] = mul i64 [[TMP61]], 4 +; CHECK-NEXT: [[TMP63:%.*]] = add i64 [[INDEX6]], [[TMP62]] +; CHECK-NEXT: [[TMP64:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-NEXT: [[TMP65:%.*]] = mul i64 [[TMP64]], 8 +; CHECK-NEXT: [[TMP66:%.*]] = add i64 [[INDEX6]], [[TMP65]] +; CHECK-NEXT: [[TMP67:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-NEXT: [[TMP68:%.*]] = mul i64 [[TMP67]], 12 +; CHECK-NEXT: [[TMP69:%.*]] = add i64 [[INDEX6]], [[TMP68]] ; CHECK-NEXT: [[ACTIVE_LANE_MASK_NEXT]] = call @llvm.get.active.lane.mask.nxv4i1.i64(i64 [[INDEX6]], i64 [[TMP15]]) -; CHECK-NEXT: [[ACTIVE_LANE_MASK_NEXT16]] = call @llvm.get.active.lane.mask.nxv4i1.i64(i64 [[TMP67]], i64 [[TMP20]]) -; CHECK-NEXT: [[ACTIVE_LANE_MASK_NEXT17]] = call @llvm.get.active.lane.mask.nxv4i1.i64(i64 [[TMP70]], i64 [[TMP25]]) -; CHECK-NEXT: [[ACTIVE_LANE_MASK_NEXT18]] = call @llvm.get.active.lane.mask.nxv4i1.i64(i64 [[TMP73]], i64 [[TMP30]]) -; CHECK-NEXT: [[TMP74:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[TMP75:%.*]] = mul i64 [[TMP74]], 16 -; CHECK-NEXT: [[INDEX_NEXT19]] = add i64 [[INDEX6]], [[TMP75]] -; CHECK-NEXT: [[TMP76:%.*]] = xor [[ACTIVE_LANE_MASK_NEXT]], shufflevector ( insertelement ( poison, i1 true, i64 0), poison, zeroinitializer) -; CHECK-NEXT: [[TMP77:%.*]] = xor [[ACTIVE_LANE_MASK_NEXT16]], shufflevector ( insertelement ( poison, i1 true, i64 0), poison, zeroinitializer) -; CHECK-NEXT: [[TMP78:%.*]] = xor [[ACTIVE_LANE_MASK_NEXT17]], shufflevector ( insertelement ( poison, i1 true, i64 0), poison, zeroinitializer) -; CHECK-NEXT: [[TMP79:%.*]] = xor [[ACTIVE_LANE_MASK_NEXT18]], shufflevector ( insertelement ( poison, i1 true, i64 0), poison, zeroinitializer) -; CHECK-NEXT: [[TMP80:%.*]] = extractelement [[TMP76]], i32 0 -; CHECK-NEXT: br i1 [[TMP80]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] +; CHECK-NEXT: [[ACTIVE_LANE_MASK_NEXT16]] = call @llvm.get.active.lane.mask.nxv4i1.i64(i64 [[TMP63]], i64 [[TMP20]]) +; CHECK-NEXT: [[ACTIVE_LANE_MASK_NEXT17]] = call @llvm.get.active.lane.mask.nxv4i1.i64(i64 [[TMP66]], i64 [[TMP25]]) +; CHECK-NEXT: [[ACTIVE_LANE_MASK_NEXT18]] = call @llvm.get.active.lane.mask.nxv4i1.i64(i64 [[TMP69]], i64 [[TMP30]]) +; CHECK-NEXT: [[TMP70:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-NEXT: [[TMP71:%.*]] = mul i64 [[TMP70]], 16 +; CHECK-NEXT: [[INDEX_NEXT19]] = add i64 [[INDEX6]], [[TMP71]] +; CHECK-NEXT: [[TMP72:%.*]] = xor [[ACTIVE_LANE_MASK_NEXT]], shufflevector ( insertelement ( poison, i1 true, i64 0), poison, zeroinitializer) +; CHECK-NEXT: [[TMP73:%.*]] = xor [[ACTIVE_LANE_MASK_NEXT16]], shufflevector ( insertelement ( poison, i1 true, i64 0), poison, zeroinitializer) +; CHECK-NEXT: [[TMP74:%.*]] = xor [[ACTIVE_LANE_MASK_NEXT17]], shufflevector ( insertelement ( poison, i1 true, i64 0), poison, zeroinitializer) +; CHECK-NEXT: [[TMP75:%.*]] = xor [[ACTIVE_LANE_MASK_NEXT18]], shufflevector ( insertelement ( poison, i1 true, i64 0), poison, zeroinitializer) +; CHECK-NEXT: [[TMP76:%.*]] = extractelement [[TMP72]], i32 0 +; CHECK-NEXT: br i1 [[TMP76]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] ; CHECK: middle.block: ; CHECK-NEXT: br i1 true, label [[WHILE_END_LOOPEXIT:%.*]], label [[SCALAR_PH]] ; CHECK: scalar.ph: @@ -133,8 +129,8 @@ define void @simple_memset(i32 %val, i32* %ptr, i64 %n) #0 { ; CHECK-NEXT: br label [[WHILE_BODY:%.*]] ; CHECK: while.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ [[INDEX_NEXT:%.*]], [[WHILE_BODY]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ] -; CHECK-NEXT: [[GEP:%.*]] = getelementptr i32, i32* [[PTR]], i64 [[INDEX]] -; CHECK-NEXT: store i32 [[VAL]], i32* [[GEP]], align 4 +; CHECK-NEXT: [[GEP:%.*]] = getelementptr i32, ptr [[PTR]], i64 [[INDEX]] +; CHECK-NEXT: store i32 [[VAL]], ptr [[GEP]], align 4 ; CHECK-NEXT: [[INDEX_NEXT]] = add nsw i64 [[INDEX]], 1 ; CHECK-NEXT: [[CMP10:%.*]] = icmp ult i64 [[INDEX_NEXT]], [[N]] ; CHECK-NEXT: br i1 [[CMP10]], label [[WHILE_BODY]], label [[WHILE_END_LOOPEXIT]], !llvm.loop [[LOOP3:![0-9]+]] @@ -146,8 +142,8 @@ entry: while.body: ; preds = %while.body, %entry %index = phi i64 [ %index.next, %while.body ], [ 0, %entry ] - %gep = getelementptr i32, i32* %ptr, i64 %index - store i32 %val, i32* %gep + %gep = getelementptr i32, ptr %ptr, i64 %index + store i32 %val, ptr %gep %index.next = add nsw i64 %index, 1 %cmp10 = icmp ult i64 %index.next, %n br i1 %cmp10, label %while.body, label %while.end.loopexit, !llvm.loop !0 @@ -156,7 +152,7 @@ while.end.loopexit: ; preds = %while.body ret void } -define void @cond_memset(i32 %val, i32* noalias readonly %cond_ptr, i32* noalias %ptr, i64 %n) #0 { +define void @cond_memset(i32 %val, ptr noalias readonly %cond_ptr, ptr noalias %ptr, i64 %n) #0 { ; CHECK-LABEL: @cond_memset( ; CHECK-NEXT: entry: ; CHECK-NEXT: [[UMAX:%.*]] = call i64 @llvm.umax.i64(i64 [[N:%.*]], i64 1) @@ -234,80 +230,72 @@ define void @cond_memset(i32 %val, i32* noalias readonly %cond_ptr, i32* noalias ; CHECK-NEXT: [[TMP44:%.*]] = add i64 [[TMP43]], 0 ; CHECK-NEXT: [[TMP45:%.*]] = mul i64 [[TMP44]], 1 ; CHECK-NEXT: [[TMP46:%.*]] = add i64 [[INDEX6]], [[TMP45]] -; CHECK-NEXT: [[TMP47:%.*]] = getelementptr i32, i32* [[COND_PTR:%.*]], i64 [[TMP31]] -; CHECK-NEXT: [[TMP48:%.*]] = getelementptr i32, i32* [[COND_PTR]], i64 [[TMP36]] -; CHECK-NEXT: [[TMP49:%.*]] = getelementptr i32, i32* [[COND_PTR]], i64 [[TMP41]] -; CHECK-NEXT: [[TMP50:%.*]] = getelementptr i32, i32* [[COND_PTR]], i64 [[TMP46]] -; CHECK-NEXT: [[TMP51:%.*]] = getelementptr i32, i32* [[TMP47]], i32 0 -; CHECK-NEXT: [[TMP52:%.*]] = bitcast i32* [[TMP51]] to * -; CHECK-NEXT: [[WIDE_MASKED_LOAD:%.*]] = call @llvm.masked.load.nxv4i32.p0nxv4i32(* [[TMP52]], i32 4, [[ACTIVE_LANE_MASK]], poison) -; CHECK-NEXT: [[TMP53:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[TMP54:%.*]] = mul i64 [[TMP53]], 4 -; CHECK-NEXT: [[TMP55:%.*]] = getelementptr i32, i32* [[TMP47]], i64 [[TMP54]] -; CHECK-NEXT: [[TMP56:%.*]] = bitcast i32* [[TMP55]] to * -; CHECK-NEXT: [[WIDE_MASKED_LOAD10:%.*]] = call @llvm.masked.load.nxv4i32.p0nxv4i32(* [[TMP56]], i32 4, [[ACTIVE_LANE_MASK7]], poison) -; CHECK-NEXT: [[TMP57:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[TMP58:%.*]] = mul i64 [[TMP57]], 8 -; CHECK-NEXT: [[TMP59:%.*]] = getelementptr i32, i32* [[TMP47]], i64 [[TMP58]] -; CHECK-NEXT: [[TMP60:%.*]] = bitcast i32* [[TMP59]] to * -; CHECK-NEXT: [[WIDE_MASKED_LOAD11:%.*]] = call @llvm.masked.load.nxv4i32.p0nxv4i32(* [[TMP60]], i32 4, [[ACTIVE_LANE_MASK8]], poison) -; CHECK-NEXT: [[TMP61:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[TMP62:%.*]] = mul i64 [[TMP61]], 12 -; CHECK-NEXT: [[TMP63:%.*]] = getelementptr i32, i32* [[TMP47]], i64 [[TMP62]] -; CHECK-NEXT: [[TMP64:%.*]] = bitcast i32* [[TMP63]] to * -; CHECK-NEXT: [[WIDE_MASKED_LOAD12:%.*]] = call @llvm.masked.load.nxv4i32.p0nxv4i32(* [[TMP64]], i32 4, [[ACTIVE_LANE_MASK9]], poison) -; CHECK-NEXT: [[TMP65:%.*]] = icmp ne [[WIDE_MASKED_LOAD]], zeroinitializer -; CHECK-NEXT: [[TMP66:%.*]] = icmp ne [[WIDE_MASKED_LOAD10]], zeroinitializer -; CHECK-NEXT: [[TMP67:%.*]] = icmp ne [[WIDE_MASKED_LOAD11]], zeroinitializer -; CHECK-NEXT: [[TMP68:%.*]] = icmp ne [[WIDE_MASKED_LOAD12]], zeroinitializer -; CHECK-NEXT: [[TMP69:%.*]] = getelementptr i32, i32* [[PTR:%.*]], i64 [[TMP31]] -; CHECK-NEXT: [[TMP70:%.*]] = getelementptr i32, i32* [[PTR]], i64 [[TMP36]] -; CHECK-NEXT: [[TMP71:%.*]] = getelementptr i32, i32* [[PTR]], i64 [[TMP41]] -; CHECK-NEXT: [[TMP72:%.*]] = getelementptr i32, i32* [[PTR]], i64 [[TMP46]] -; CHECK-NEXT: [[TMP73:%.*]] = select [[ACTIVE_LANE_MASK]], [[TMP65]], zeroinitializer -; CHECK-NEXT: [[TMP74:%.*]] = select [[ACTIVE_LANE_MASK7]], [[TMP66]], zeroinitializer -; CHECK-NEXT: [[TMP75:%.*]] = select [[ACTIVE_LANE_MASK8]], [[TMP67]], zeroinitializer -; CHECK-NEXT: [[TMP76:%.*]] = select [[ACTIVE_LANE_MASK9]], [[TMP68]], zeroinitializer -; CHECK-NEXT: [[TMP77:%.*]] = getelementptr i32, i32* [[TMP69]], i32 0 -; CHECK-NEXT: [[TMP78:%.*]] = bitcast i32* [[TMP77]] to * -; CHECK-NEXT: call void @llvm.masked.store.nxv4i32.p0nxv4i32( [[BROADCAST_SPLAT]], * [[TMP78]], i32 4, [[TMP73]]) -; CHECK-NEXT: [[TMP79:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[TMP80:%.*]] = mul i64 [[TMP79]], 4 -; CHECK-NEXT: [[TMP81:%.*]] = getelementptr i32, i32* [[TMP69]], i64 [[TMP80]] -; CHECK-NEXT: [[TMP82:%.*]] = bitcast i32* [[TMP81]] to * -; CHECK-NEXT: call void @llvm.masked.store.nxv4i32.p0nxv4i32( [[BROADCAST_SPLAT14]], * [[TMP82]], i32 4, [[TMP74]]) +; CHECK-NEXT: [[TMP47:%.*]] = getelementptr i32, ptr [[COND_PTR:%.*]], i64 [[TMP31]] +; CHECK-NEXT: [[TMP48:%.*]] = getelementptr i32, ptr [[COND_PTR]], i64 [[TMP36]] +; CHECK-NEXT: [[TMP49:%.*]] = getelementptr i32, ptr [[COND_PTR]], i64 [[TMP41]] +; CHECK-NEXT: [[TMP50:%.*]] = getelementptr i32, ptr [[COND_PTR]], i64 [[TMP46]] +; CHECK-NEXT: [[TMP51:%.*]] = getelementptr i32, ptr [[TMP47]], i32 0 +; CHECK-NEXT: [[WIDE_MASKED_LOAD:%.*]] = call @llvm.masked.load.nxv4i32.p0(ptr [[TMP51]], i32 4, [[ACTIVE_LANE_MASK]], poison) +; CHECK-NEXT: [[TMP52:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-NEXT: [[TMP53:%.*]] = mul i64 [[TMP52]], 4 +; CHECK-NEXT: [[TMP54:%.*]] = getelementptr i32, ptr [[TMP47]], i64 [[TMP53]] +; CHECK-NEXT: [[WIDE_MASKED_LOAD10:%.*]] = call @llvm.masked.load.nxv4i32.p0(ptr [[TMP54]], i32 4, [[ACTIVE_LANE_MASK7]], poison) +; CHECK-NEXT: [[TMP55:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-NEXT: [[TMP56:%.*]] = mul i64 [[TMP55]], 8 +; CHECK-NEXT: [[TMP57:%.*]] = getelementptr i32, ptr [[TMP47]], i64 [[TMP56]] +; CHECK-NEXT: [[WIDE_MASKED_LOAD11:%.*]] = call @llvm.masked.load.nxv4i32.p0(ptr [[TMP57]], i32 4, [[ACTIVE_LANE_MASK8]], poison) +; CHECK-NEXT: [[TMP58:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-NEXT: [[TMP59:%.*]] = mul i64 [[TMP58]], 12 +; CHECK-NEXT: [[TMP60:%.*]] = getelementptr i32, ptr [[TMP47]], i64 [[TMP59]] +; CHECK-NEXT: [[WIDE_MASKED_LOAD12:%.*]] = call @llvm.masked.load.nxv4i32.p0(ptr [[TMP60]], i32 4, [[ACTIVE_LANE_MASK9]], poison) +; CHECK-NEXT: [[TMP61:%.*]] = icmp ne [[WIDE_MASKED_LOAD]], zeroinitializer +; CHECK-NEXT: [[TMP62:%.*]] = icmp ne [[WIDE_MASKED_LOAD10]], zeroinitializer +; CHECK-NEXT: [[TMP63:%.*]] = icmp ne [[WIDE_MASKED_LOAD11]], zeroinitializer +; CHECK-NEXT: [[TMP64:%.*]] = icmp ne [[WIDE_MASKED_LOAD12]], zeroinitializer +; CHECK-NEXT: [[TMP65:%.*]] = getelementptr i32, ptr [[PTR:%.*]], i64 [[TMP31]] +; CHECK-NEXT: [[TMP66:%.*]] = getelementptr i32, ptr [[PTR]], i64 [[TMP36]] +; CHECK-NEXT: [[TMP67:%.*]] = getelementptr i32, ptr [[PTR]], i64 [[TMP41]] +; CHECK-NEXT: [[TMP68:%.*]] = getelementptr i32, ptr [[PTR]], i64 [[TMP46]] +; CHECK-NEXT: [[TMP69:%.*]] = select [[ACTIVE_LANE_MASK]], [[TMP61]], zeroinitializer +; CHECK-NEXT: [[TMP70:%.*]] = select [[ACTIVE_LANE_MASK7]], [[TMP62]], zeroinitializer +; CHECK-NEXT: [[TMP71:%.*]] = select [[ACTIVE_LANE_MASK8]], [[TMP63]], zeroinitializer +; CHECK-NEXT: [[TMP72:%.*]] = select [[ACTIVE_LANE_MASK9]], [[TMP64]], zeroinitializer +; CHECK-NEXT: [[TMP73:%.*]] = getelementptr i32, ptr [[TMP65]], i32 0 +; CHECK-NEXT: call void @llvm.masked.store.nxv4i32.p0( [[BROADCAST_SPLAT]], ptr [[TMP73]], i32 4, [[TMP69]]) +; CHECK-NEXT: [[TMP74:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-NEXT: [[TMP75:%.*]] = mul i64 [[TMP74]], 4 +; CHECK-NEXT: [[TMP76:%.*]] = getelementptr i32, ptr [[TMP65]], i64 [[TMP75]] +; CHECK-NEXT: call void @llvm.masked.store.nxv4i32.p0( [[BROADCAST_SPLAT14]], ptr [[TMP76]], i32 4, [[TMP70]]) +; CHECK-NEXT: [[TMP77:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-NEXT: [[TMP78:%.*]] = mul i64 [[TMP77]], 8 +; CHECK-NEXT: [[TMP79:%.*]] = getelementptr i32, ptr [[TMP65]], i64 [[TMP78]] +; CHECK-NEXT: call void @llvm.masked.store.nxv4i32.p0( [[BROADCAST_SPLAT16]], ptr [[TMP79]], i32 4, [[TMP71]]) +; CHECK-NEXT: [[TMP80:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-NEXT: [[TMP81:%.*]] = mul i64 [[TMP80]], 12 +; CHECK-NEXT: [[TMP82:%.*]] = getelementptr i32, ptr [[TMP65]], i64 [[TMP81]] +; CHECK-NEXT: call void @llvm.masked.store.nxv4i32.p0( [[BROADCAST_SPLAT18]], ptr [[TMP82]], i32 4, [[TMP72]]) ; CHECK-NEXT: [[TMP83:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[TMP84:%.*]] = mul i64 [[TMP83]], 8 -; CHECK-NEXT: [[TMP85:%.*]] = getelementptr i32, i32* [[TMP69]], i64 [[TMP84]] -; CHECK-NEXT: [[TMP86:%.*]] = bitcast i32* [[TMP85]] to * -; CHECK-NEXT: call void @llvm.masked.store.nxv4i32.p0nxv4i32( [[BROADCAST_SPLAT16]], * [[TMP86]], i32 4, [[TMP75]]) -; CHECK-NEXT: [[TMP87:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[TMP88:%.*]] = mul i64 [[TMP87]], 12 -; CHECK-NEXT: [[TMP89:%.*]] = getelementptr i32, i32* [[TMP69]], i64 [[TMP88]] -; CHECK-NEXT: [[TMP90:%.*]] = bitcast i32* [[TMP89]] to * -; CHECK-NEXT: call void @llvm.masked.store.nxv4i32.p0nxv4i32( [[BROADCAST_SPLAT18]], * [[TMP90]], i32 4, [[TMP76]]) -; CHECK-NEXT: [[TMP91:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[TMP92:%.*]] = mul i64 [[TMP91]], 4 -; CHECK-NEXT: [[TMP93:%.*]] = add i64 [[INDEX6]], [[TMP92]] -; CHECK-NEXT: [[TMP94:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[TMP95:%.*]] = mul i64 [[TMP94]], 8 -; CHECK-NEXT: [[TMP96:%.*]] = add i64 [[INDEX6]], [[TMP95]] -; CHECK-NEXT: [[TMP97:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[TMP98:%.*]] = mul i64 [[TMP97]], 12 -; CHECK-NEXT: [[TMP99:%.*]] = add i64 [[INDEX6]], [[TMP98]] +; CHECK-NEXT: [[TMP84:%.*]] = mul i64 [[TMP83]], 4 +; CHECK-NEXT: [[TMP85:%.*]] = add i64 [[INDEX6]], [[TMP84]] +; CHECK-NEXT: [[TMP86:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-NEXT: [[TMP87:%.*]] = mul i64 [[TMP86]], 8 +; CHECK-NEXT: [[TMP88:%.*]] = add i64 [[INDEX6]], [[TMP87]] +; CHECK-NEXT: [[TMP89:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-NEXT: [[TMP90:%.*]] = mul i64 [[TMP89]], 12 +; CHECK-NEXT: [[TMP91:%.*]] = add i64 [[INDEX6]], [[TMP90]] ; CHECK-NEXT: [[ACTIVE_LANE_MASK_NEXT]] = call @llvm.get.active.lane.mask.nxv4i1.i64(i64 [[INDEX6]], i64 [[TMP15]]) -; CHECK-NEXT: [[ACTIVE_LANE_MASK_NEXT19]] = call @llvm.get.active.lane.mask.nxv4i1.i64(i64 [[TMP93]], i64 [[TMP20]]) -; CHECK-NEXT: [[ACTIVE_LANE_MASK_NEXT20]] = call @llvm.get.active.lane.mask.nxv4i1.i64(i64 [[TMP96]], i64 [[TMP25]]) -; CHECK-NEXT: [[ACTIVE_LANE_MASK_NEXT21]] = call @llvm.get.active.lane.mask.nxv4i1.i64(i64 [[TMP99]], i64 [[TMP30]]) -; CHECK-NEXT: [[TMP100:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[TMP101:%.*]] = mul i64 [[TMP100]], 16 -; CHECK-NEXT: [[INDEX_NEXT22]] = add i64 [[INDEX6]], [[TMP101]] -; CHECK-NEXT: [[TMP102:%.*]] = xor [[ACTIVE_LANE_MASK_NEXT]], shufflevector ( insertelement ( poison, i1 true, i64 0), poison, zeroinitializer) -; CHECK-NEXT: [[TMP103:%.*]] = xor [[ACTIVE_LANE_MASK_NEXT19]], shufflevector ( insertelement ( poison, i1 true, i64 0), poison, zeroinitializer) -; CHECK-NEXT: [[TMP104:%.*]] = xor [[ACTIVE_LANE_MASK_NEXT20]], shufflevector ( insertelement ( poison, i1 true, i64 0), poison, zeroinitializer) -; CHECK-NEXT: [[TMP105:%.*]] = xor [[ACTIVE_LANE_MASK_NEXT21]], shufflevector ( insertelement ( poison, i1 true, i64 0), poison, zeroinitializer) -; CHECK-NEXT: [[TMP106:%.*]] = extractelement [[TMP102]], i32 0 -; CHECK-NEXT: br i1 [[TMP106]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] +; CHECK-NEXT: [[ACTIVE_LANE_MASK_NEXT19]] = call @llvm.get.active.lane.mask.nxv4i1.i64(i64 [[TMP85]], i64 [[TMP20]]) +; CHECK-NEXT: [[ACTIVE_LANE_MASK_NEXT20]] = call @llvm.get.active.lane.mask.nxv4i1.i64(i64 [[TMP88]], i64 [[TMP25]]) +; CHECK-NEXT: [[ACTIVE_LANE_MASK_NEXT21]] = call @llvm.get.active.lane.mask.nxv4i1.i64(i64 [[TMP91]], i64 [[TMP30]]) +; CHECK-NEXT: [[TMP92:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-NEXT: [[TMP93:%.*]] = mul i64 [[TMP92]], 16 +; CHECK-NEXT: [[INDEX_NEXT22]] = add i64 [[INDEX6]], [[TMP93]] +; CHECK-NEXT: [[TMP94:%.*]] = xor [[ACTIVE_LANE_MASK_NEXT]], shufflevector ( insertelement ( poison, i1 true, i64 0), poison, zeroinitializer) +; CHECK-NEXT: [[TMP95:%.*]] = xor [[ACTIVE_LANE_MASK_NEXT19]], shufflevector ( insertelement ( poison, i1 true, i64 0), poison, zeroinitializer) +; CHECK-NEXT: [[TMP96:%.*]] = xor [[ACTIVE_LANE_MASK_NEXT20]], shufflevector ( insertelement ( poison, i1 true, i64 0), poison, zeroinitializer) +; CHECK-NEXT: [[TMP97:%.*]] = xor [[ACTIVE_LANE_MASK_NEXT21]], shufflevector ( insertelement ( poison, i1 true, i64 0), poison, zeroinitializer) +; CHECK-NEXT: [[TMP98:%.*]] = extractelement [[TMP94]], i32 0 +; CHECK-NEXT: br i1 [[TMP98]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] ; CHECK: middle.block: ; CHECK-NEXT: br i1 true, label [[WHILE_END_LOOPEXIT:%.*]], label [[SCALAR_PH]] ; CHECK: scalar.ph: @@ -315,13 +303,13 @@ define void @cond_memset(i32 %val, i32* noalias readonly %cond_ptr, i32* noalias ; CHECK-NEXT: br label [[WHILE_BODY:%.*]] ; CHECK: while.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ [[INDEX_NEXT:%.*]], [[WHILE_END:%.*]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ] -; CHECK-NEXT: [[COND_GEP:%.*]] = getelementptr i32, i32* [[COND_PTR]], i64 [[INDEX]] -; CHECK-NEXT: [[COND_I32:%.*]] = load i32, i32* [[COND_GEP]], align 4 +; CHECK-NEXT: [[COND_GEP:%.*]] = getelementptr i32, ptr [[COND_PTR]], i64 [[INDEX]] +; CHECK-NEXT: [[COND_I32:%.*]] = load i32, ptr [[COND_GEP]], align 4 ; CHECK-NEXT: [[COND_I1:%.*]] = icmp ne i32 [[COND_I32]], 0 ; CHECK-NEXT: br i1 [[COND_I1]], label [[DO_STORE:%.*]], label [[WHILE_END]] ; CHECK: do.store: -; CHECK-NEXT: [[GEP:%.*]] = getelementptr i32, i32* [[PTR]], i64 [[INDEX]] -; CHECK-NEXT: store i32 [[VAL]], i32* [[GEP]], align 4 +; CHECK-NEXT: [[GEP:%.*]] = getelementptr i32, ptr [[PTR]], i64 [[INDEX]] +; CHECK-NEXT: store i32 [[VAL]], ptr [[GEP]], align 4 ; CHECK-NEXT: br label [[WHILE_END]] ; CHECK: while.end: ; CHECK-NEXT: [[INDEX_NEXT]] = add nsw i64 [[INDEX]], 1 @@ -335,14 +323,14 @@ entry: while.body: ; preds = %while.body, %entry %index = phi i64 [ %index.next, %while.end ], [ 0, %entry ] - %cond_gep = getelementptr i32, i32* %cond_ptr, i64 %index - %cond_i32 = load i32, i32* %cond_gep + %cond_gep = getelementptr i32, ptr %cond_ptr, i64 %index + %cond_i32 = load i32, ptr %cond_gep %cond_i1 = icmp ne i32 %cond_i32, 0 br i1 %cond_i1, label %do.store, label %while.end do.store: - %gep = getelementptr i32, i32* %ptr, i64 %index - store i32 %val, i32* %gep + %gep = getelementptr i32, ptr %ptr, i64 %index + store i32 %val, ptr %gep br label %while.end while.end: diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/sve-widen-gep.ll b/llvm/test/Transforms/LoopVectorize/AArch64/sve-widen-gep.ll index cfa85d6..fd0bf10 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/sve-widen-gep.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/sve-widen-gep.ll @@ -1,12 +1,12 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py ; REQUIRES: asserts -; RUN: opt -opaque-pointers=0 -passes=loop-vectorize -S -mtriple=aarch64 -mattr=+sve -debug-only=loop-vectorize \ +; RUN: opt -passes=loop-vectorize -S -mtriple=aarch64 -mattr=+sve -debug-only=loop-vectorize \ ; RUN: -prefer-predicate-over-epilogue=scalar-epilogue < %s 2>&1 | FileCheck %s target triple = "aarch64-unknown-linux-gnu" ; CHECK-LABEL: LV: Checking a loop in 'pointer_induction_used_as_vector' -; CHECK-NOT: LV: Found {{.*}} scalar instruction: %ptr.iv.2.next = getelementptr inbounds i8, i8* %ptr.iv.2, i64 1 +; CHECK-NOT: LV: Found {{.*}} scalar instruction: %ptr.iv.2.next = getelementptr inbounds i8, ptr %ptr.iv.2, i64 1 ; ; CHECK: VPlan 'Initial VPlan for VF={vscale x 2},UF>=1' { ; CHECK-NEXT: Live-in vp<[[VEC_TC:%.+]]> = vector-trip-count @@ -17,7 +17,7 @@ target triple = "aarch64-unknown-linux-gnu" ; CHECK-NEXT: vector loop: { ; CHECK-NEXT: vector.body: ; CHECK-NEXT: EMIT vp<[[CAN_IV:%.+]]> = CANONICAL-INDUCTION -; CHECK-NEXT: EMIT ir<%ptr.iv.1> = WIDEN-POINTER-INDUCTION ir<%start.1>, 1 +; CHECK-NEXT: EMIT ir<%ptr.iv.1> = WIDEN-POINTER-INDUCTION ir<%start.1>, 8 ; CHECK-NEXT: EMIT ir<%ptr.iv.2> = WIDEN-POINTER-INDUCTION ir<%start.2>, 1 ; CHECK-NEXT: WIDEN-GEP Var[Inv] ir<%ptr.iv.2.next> = getelementptr ir<%ptr.iv.2>, ir<1> ; CHECK-NEXT: WIDEN store ir<%ptr.iv.1>, ir<%ptr.iv.2.next> @@ -33,7 +33,7 @@ target triple = "aarch64-unknown-linux-gnu" ; 1. As a uniform address for the load, and ; 2. Non-uniform use by the getelementptr which is stored. This requires the ; vector value. -define void @pointer_induction_used_as_vector(i8** noalias %start.1, i8* noalias %start.2, i64 %N) { +define void @pointer_induction_used_as_vector(ptr noalias %start.1, ptr noalias %start.2, i64 %N) { ; CHECK-LABEL: @pointer_induction_used_as_vector( ; CHECK-NEXT: entry: ; CHECK-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64() @@ -45,63 +45,62 @@ define void @pointer_induction_used_as_vector(i8** noalias %start.1, i8* noalias ; CHECK-NEXT: [[TMP3:%.*]] = mul i64 [[TMP2]], 2 ; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N]], [[TMP3]] ; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[N]], [[N_MOD_VF]] -; CHECK-NEXT: [[IND_END:%.*]] = getelementptr i8*, i8** [[START_1:%.*]], i64 [[N_VEC]] -; CHECK-NEXT: [[IND_END2:%.*]] = getelementptr i8, i8* [[START_2:%.*]], i64 [[N_VEC]] +; CHECK-NEXT: [[TMP4:%.*]] = mul i64 [[N_VEC]], 8 +; CHECK-NEXT: [[IND_END:%.*]] = getelementptr i8, ptr [[START_1:%.*]], i64 [[TMP4]] +; CHECK-NEXT: [[IND_END2:%.*]] = getelementptr i8, ptr [[START_2:%.*]], i64 [[N_VEC]] ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: -; CHECK-NEXT: [[POINTER_PHI:%.*]] = phi i8* [ [[START_2]], [[VECTOR_PH]] ], [ [[PTR_IND:%.*]], [[VECTOR_BODY]] ] +; CHECK-NEXT: [[POINTER_PHI:%.*]] = phi ptr [ [[START_2]], [[VECTOR_PH]] ], [ [[PTR_IND:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] -; CHECK-NEXT: [[TMP4:%.*]] = add i64 [[INDEX]], 0 -; CHECK-NEXT: [[NEXT_GEP:%.*]] = getelementptr i8*, i8** [[START_1]], i64 [[TMP4]] -; CHECK-NEXT: [[TMP5:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[TMP6:%.*]] = mul i64 [[TMP5]], 2 -; CHECK-NEXT: [[TMP7:%.*]] = mul i64 [[TMP6]], 1 -; CHECK-NEXT: [[TMP8:%.*]] = mul i64 1, [[TMP7]] -; CHECK-NEXT: [[TMP9:%.*]] = mul i64 [[TMP6]], 0 -; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement poison, i64 [[TMP9]], i64 0 +; CHECK-NEXT: [[TMP5:%.*]] = add i64 [[INDEX]], 0 +; CHECK-NEXT: [[TMP6:%.*]] = mul i64 [[TMP5]], 8 +; CHECK-NEXT: [[NEXT_GEP:%.*]] = getelementptr i8, ptr [[START_1]], i64 [[TMP6]] +; CHECK-NEXT: [[TMP7:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-NEXT: [[TMP8:%.*]] = mul i64 [[TMP7]], 2 +; CHECK-NEXT: [[TMP9:%.*]] = mul i64 [[TMP8]], 1 +; CHECK-NEXT: [[TMP10:%.*]] = mul i64 1, [[TMP9]] +; CHECK-NEXT: [[TMP11:%.*]] = mul i64 [[TMP8]], 0 +; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement poison, i64 [[TMP11]], i64 0 ; CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector [[DOTSPLATINSERT]], poison, zeroinitializer -; CHECK-NEXT: [[TMP10:%.*]] = call @llvm.experimental.stepvector.nxv2i64() -; CHECK-NEXT: [[TMP11:%.*]] = add [[DOTSPLAT]], [[TMP10]] -; CHECK-NEXT: [[VECTOR_GEP:%.*]] = mul [[TMP11]], shufflevector ( insertelement ( poison, i64 1, i64 0), poison, zeroinitializer) -; CHECK-NEXT: [[TMP12:%.*]] = getelementptr i8, i8* [[POINTER_PHI]], [[VECTOR_GEP]] -; CHECK-NEXT: [[TMP13:%.*]] = getelementptr inbounds i8, [[TMP12]], i64 1 -; CHECK-NEXT: [[TMP14:%.*]] = getelementptr i8*, i8** [[NEXT_GEP]], i32 0 -; CHECK-NEXT: [[TMP15:%.*]] = bitcast i8** [[TMP14]] to * -; CHECK-NEXT: store [[TMP13]], * [[TMP15]], align 8 -; CHECK-NEXT: [[TMP16:%.*]] = extractelement [[TMP12]], i32 0 -; CHECK-NEXT: [[TMP17:%.*]] = getelementptr i8, i8* [[TMP16]], i32 0 -; CHECK-NEXT: [[TMP18:%.*]] = bitcast i8* [[TMP17]] to * -; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load , * [[TMP18]], align 1 +; CHECK-NEXT: [[TMP12:%.*]] = call @llvm.experimental.stepvector.nxv2i64() +; CHECK-NEXT: [[TMP13:%.*]] = add [[DOTSPLAT]], [[TMP12]] +; CHECK-NEXT: [[VECTOR_GEP:%.*]] = mul [[TMP13]], shufflevector ( insertelement ( poison, i64 1, i64 0), poison, zeroinitializer) +; CHECK-NEXT: [[TMP14:%.*]] = getelementptr i8, ptr [[POINTER_PHI]], [[VECTOR_GEP]] +; CHECK-NEXT: [[TMP15:%.*]] = getelementptr inbounds i8, [[TMP14]], i64 1 +; CHECK-NEXT: [[TMP16:%.*]] = getelementptr ptr, ptr [[NEXT_GEP]], i32 0 +; CHECK-NEXT: store [[TMP15]], ptr [[TMP16]], align 8 +; CHECK-NEXT: [[TMP17:%.*]] = extractelement [[TMP14]], i32 0 +; CHECK-NEXT: [[TMP18:%.*]] = getelementptr i8, ptr [[TMP17]], i32 0 +; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load , ptr [[TMP18]], align 1 ; CHECK-NEXT: [[TMP19:%.*]] = add [[WIDE_LOAD]], shufflevector ( insertelement ( poison, i8 1, i64 0), poison, zeroinitializer) -; CHECK-NEXT: [[TMP20:%.*]] = bitcast i8* [[TMP17]] to * -; CHECK-NEXT: store [[TMP19]], * [[TMP20]], align 1 -; CHECK-NEXT: [[TMP21:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[TMP22:%.*]] = mul i64 [[TMP21]], 2 -; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP22]] -; CHECK-NEXT: [[PTR_IND]] = getelementptr i8, i8* [[POINTER_PHI]], i64 [[TMP8]] -; CHECK-NEXT: [[TMP23:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] -; CHECK-NEXT: br i1 [[TMP23]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] +; CHECK-NEXT: store [[TMP19]], ptr [[TMP18]], align 1 +; CHECK-NEXT: [[TMP20:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-NEXT: [[TMP21:%.*]] = mul i64 [[TMP20]], 2 +; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP21]] +; CHECK-NEXT: [[PTR_IND]] = getelementptr i8, ptr [[POINTER_PHI]], i64 [[TMP10]] +; CHECK-NEXT: [[TMP22:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] +; CHECK-NEXT: br i1 [[TMP22]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] ; CHECK: middle.block: ; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N]], [[N_VEC]] ; CHECK-NEXT: br i1 [[CMP_N]], label [[EXIT:%.*]], label [[SCALAR_PH]] ; CHECK: scalar.ph: ; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] -; CHECK-NEXT: [[BC_RESUME_VAL1:%.*]] = phi i8** [ [[IND_END]], [[MIDDLE_BLOCK]] ], [ [[START_1]], [[ENTRY]] ] -; CHECK-NEXT: [[BC_RESUME_VAL3:%.*]] = phi i8* [ [[IND_END2]], [[MIDDLE_BLOCK]] ], [ [[START_2]], [[ENTRY]] ] +; CHECK-NEXT: [[BC_RESUME_VAL1:%.*]] = phi ptr [ [[IND_END]], [[MIDDLE_BLOCK]] ], [ [[START_1]], [[ENTRY]] ] +; CHECK-NEXT: [[BC_RESUME_VAL3:%.*]] = phi ptr [ [[IND_END2]], [[MIDDLE_BLOCK]] ], [ [[START_2]], [[ENTRY]] ] ; CHECK-NEXT: br label [[LOOP_BODY:%.*]] ; CHECK: loop.body: ; CHECK-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[LOOP_BODY]] ] -; CHECK-NEXT: [[PTR_IV_1:%.*]] = phi i8** [ [[BC_RESUME_VAL1]], [[SCALAR_PH]] ], [ [[PTR_IV_1_NEXT:%.*]], [[LOOP_BODY]] ] -; CHECK-NEXT: [[PTR_IV_2:%.*]] = phi i8* [ [[BC_RESUME_VAL3]], [[SCALAR_PH]] ], [ [[PTR_IV_2_NEXT:%.*]], [[LOOP_BODY]] ] -; CHECK-NEXT: [[PTR_IV_1_NEXT]] = getelementptr inbounds i8*, i8** [[PTR_IV_1]], i64 1 -; CHECK-NEXT: [[PTR_IV_2_NEXT]] = getelementptr inbounds i8, i8* [[PTR_IV_2]], i64 1 -; CHECK-NEXT: store i8* [[PTR_IV_2_NEXT]], i8** [[PTR_IV_1]], align 8 -; CHECK-NEXT: [[LV:%.*]] = load i8, i8* [[PTR_IV_2]], align 1 +; CHECK-NEXT: [[PTR_IV_1:%.*]] = phi ptr [ [[BC_RESUME_VAL1]], [[SCALAR_PH]] ], [ [[PTR_IV_1_NEXT:%.*]], [[LOOP_BODY]] ] +; CHECK-NEXT: [[PTR_IV_2:%.*]] = phi ptr [ [[BC_RESUME_VAL3]], [[SCALAR_PH]] ], [ [[PTR_IV_2_NEXT:%.*]], [[LOOP_BODY]] ] +; CHECK-NEXT: [[PTR_IV_1_NEXT]] = getelementptr inbounds ptr, ptr [[PTR_IV_1]], i64 1 +; CHECK-NEXT: [[PTR_IV_2_NEXT]] = getelementptr inbounds i8, ptr [[PTR_IV_2]], i64 1 +; CHECK-NEXT: store ptr [[PTR_IV_2_NEXT]], ptr [[PTR_IV_1]], align 8 +; CHECK-NEXT: [[LV:%.*]] = load i8, ptr [[PTR_IV_2]], align 1 ; CHECK-NEXT: [[ADD:%.*]] = add i8 [[LV]], 1 -; CHECK-NEXT: store i8 [[ADD]], i8* [[PTR_IV_2]], align 1 +; CHECK-NEXT: store i8 [[ADD]], ptr [[PTR_IV_2]], align 1 ; CHECK-NEXT: [[IV_NEXT]] = add nuw i64 [[IV]], 1 ; CHECK-NEXT: [[C:%.*]] = icmp ne i64 [[IV_NEXT]], [[N]] -; CHECK-NEXT: br i1 [[C]], label [[LOOP_BODY]], label [[EXIT]], !llvm.loop [[LOOP2:![0-9]+]] +; CHECK-NEXT: br i1 [[C]], label [[LOOP_BODY]], label [[EXIT]], !llvm.loop [[LOOP3:![0-9]+]] ; CHECK: exit: ; CHECK-NEXT: ret void ; @@ -112,14 +111,14 @@ entry: loop.body: ; preds = %loop.body, %entry %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop.body ] - %ptr.iv.1 = phi i8** [ %start.1, %entry ], [ %ptr.iv.1.next, %loop.body ] - %ptr.iv.2 = phi i8* [ %start.2, %entry ], [ %ptr.iv.2.next, %loop.body ] - %ptr.iv.1.next = getelementptr inbounds i8*, i8** %ptr.iv.1, i64 1 - %ptr.iv.2.next = getelementptr inbounds i8, i8* %ptr.iv.2, i64 1 - store i8* %ptr.iv.2.next, i8** %ptr.iv.1, align 8 - %lv = load i8, i8* %ptr.iv.2, align 1 + %ptr.iv.1 = phi ptr [ %start.1, %entry ], [ %ptr.iv.1.next, %loop.body ] + %ptr.iv.2 = phi ptr [ %start.2, %entry ], [ %ptr.iv.2.next, %loop.body ] + %ptr.iv.1.next = getelementptr inbounds ptr, ptr %ptr.iv.1, i64 1 + %ptr.iv.2.next = getelementptr inbounds i8, ptr %ptr.iv.2, i64 1 + store ptr %ptr.iv.2.next, ptr %ptr.iv.1, align 8 + %lv = load i8, ptr %ptr.iv.2, align 1 %add = add i8 %lv, 1 - store i8 %add, i8* %ptr.iv.2, align 1 + store i8 %add, ptr %ptr.iv.2, align 1 %iv.next = add nuw i64 %iv, 1 %c = icmp ne i64 %iv.next, %N br i1 %c, label %loop.body, label %exit, !llvm.loop !0 @@ -128,7 +127,7 @@ exit: ; preds = %loop.body ret void } -define void @pointer_induction(i8* noalias %start, i64 %N) { +define void @pointer_induction(ptr noalias %start, i64 %N) { ; CHECK-LABEL: @pointer_induction( ; CHECK-NEXT: entry: ; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[N:%.*]], 1 @@ -141,10 +140,10 @@ define void @pointer_induction(i8* noalias %start, i64 %N) { ; CHECK-NEXT: [[TMP4:%.*]] = mul i64 [[TMP3]], 2 ; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[TMP0]], [[TMP4]] ; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[TMP0]], [[N_MOD_VF]] -; CHECK-NEXT: [[IND_END:%.*]] = getelementptr i8, i8* [[START:%.*]], i64 [[N_VEC]] +; CHECK-NEXT: [[IND_END:%.*]] = getelementptr i8, ptr [[START:%.*]], i64 [[N_VEC]] ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: -; CHECK-NEXT: [[POINTER_PHI:%.*]] = phi i8* [ [[START]], [[VECTOR_PH]] ], [ [[PTR_IND:%.*]], [[VECTOR_BODY]] ] +; CHECK-NEXT: [[POINTER_PHI:%.*]] = phi ptr [ [[START]], [[VECTOR_PH]] ], [ [[PTR_IND:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[INDEX2:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[TMP5:%.*]] = call i64 @llvm.vscale.i64() ; CHECK-NEXT: [[TMP6:%.*]] = mul i64 [[TMP5]], 2 @@ -156,36 +155,34 @@ define void @pointer_induction(i8* noalias %start, i64 %N) { ; CHECK-NEXT: [[TMP10:%.*]] = call @llvm.experimental.stepvector.nxv2i64() ; CHECK-NEXT: [[TMP11:%.*]] = add [[DOTSPLAT]], [[TMP10]] ; CHECK-NEXT: [[VECTOR_GEP:%.*]] = mul [[TMP11]], shufflevector ( insertelement ( poison, i64 1, i64 0), poison, zeroinitializer) -; CHECK-NEXT: [[TMP12:%.*]] = getelementptr i8, i8* [[POINTER_PHI]], [[VECTOR_GEP]] -; CHECK-NEXT: [[TMP13:%.*]] = extractelement [[TMP12]], i32 0 -; CHECK-NEXT: [[TMP14:%.*]] = getelementptr i8, i8* [[TMP13]], i32 0 -; CHECK-NEXT: [[TMP15:%.*]] = bitcast i8* [[TMP14]] to * -; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load , * [[TMP15]], align 1 -; CHECK-NEXT: [[TMP16:%.*]] = add [[WIDE_LOAD]], shufflevector ( insertelement ( poison, i8 1, i64 0), poison, zeroinitializer) -; CHECK-NEXT: [[TMP17:%.*]] = bitcast i8* [[TMP14]] to * -; CHECK-NEXT: store [[TMP16]], * [[TMP17]], align 1 -; CHECK-NEXT: [[TMP18:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[TMP19:%.*]] = mul i64 [[TMP18]], 2 -; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX2]], [[TMP19]] -; CHECK-NEXT: [[PTR_IND]] = getelementptr i8, i8* [[POINTER_PHI]], i64 [[TMP8]] -; CHECK-NEXT: [[TMP20:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] -; CHECK-NEXT: br i1 [[TMP20]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] +; CHECK-NEXT: [[TMP12:%.*]] = getelementptr i8, ptr [[POINTER_PHI]], [[VECTOR_GEP]] +; CHECK-NEXT: [[TMP13:%.*]] = extractelement [[TMP12]], i32 0 +; CHECK-NEXT: [[TMP14:%.*]] = getelementptr i8, ptr [[TMP13]], i32 0 +; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load , ptr [[TMP14]], align 1 +; CHECK-NEXT: [[TMP15:%.*]] = add [[WIDE_LOAD]], shufflevector ( insertelement ( poison, i8 1, i64 0), poison, zeroinitializer) +; CHECK-NEXT: store [[TMP15]], ptr [[TMP14]], align 1 +; CHECK-NEXT: [[TMP16:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-NEXT: [[TMP17:%.*]] = mul i64 [[TMP16]], 2 +; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX2]], [[TMP17]] +; CHECK-NEXT: [[PTR_IND]] = getelementptr i8, ptr [[POINTER_PHI]], i64 [[TMP8]] +; CHECK-NEXT: [[TMP18:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] +; CHECK-NEXT: br i1 [[TMP18]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] ; CHECK: middle.block: ; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[TMP0]], [[N_VEC]] ; CHECK-NEXT: br i1 [[CMP_N]], label [[END:%.*]], label [[SCALAR_PH]] ; CHECK: scalar.ph: -; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i8* [ [[IND_END]], [[MIDDLE_BLOCK]] ], [ [[START]], [[ENTRY:%.*]] ] +; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi ptr [ [[IND_END]], [[MIDDLE_BLOCK]] ], [ [[START]], [[ENTRY:%.*]] ] ; CHECK-NEXT: [[BC_RESUME_VAL1:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY]] ] ; CHECK-NEXT: br label [[FOR_BODY:%.*]] ; CHECK: for.body: -; CHECK-NEXT: [[PTR_PHI:%.*]] = phi i8* [ [[PTR_PHI_NEXT:%.*]], [[FOR_BODY]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ] +; CHECK-NEXT: [[PTR_PHI:%.*]] = phi ptr [ [[PTR_PHI_NEXT:%.*]], [[FOR_BODY]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ] ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ [[INDEX_NXT:%.*]], [[FOR_BODY]] ], [ [[BC_RESUME_VAL1]], [[SCALAR_PH]] ] ; CHECK-NEXT: [[INDEX_NXT]] = add i64 [[INDEX]], 1 -; CHECK-NEXT: [[TMP21:%.*]] = load i8, i8* [[PTR_PHI]], align 1 -; CHECK-NEXT: [[ADD:%.*]] = add i8 [[TMP21]], 1 -; CHECK-NEXT: store i8 [[ADD]], i8* [[PTR_PHI]], align 1 -; CHECK-NEXT: [[PTR_PHI_NEXT]] = getelementptr inbounds i8, i8* [[PTR_PHI]], i64 1 -; CHECK-NEXT: [[CMP_I_NOT:%.*]] = icmp eq i8* [[PTR_PHI_NEXT]], [[START]] +; CHECK-NEXT: [[TMP19:%.*]] = load i8, ptr [[PTR_PHI]], align 1 +; CHECK-NEXT: [[ADD:%.*]] = add i8 [[TMP19]], 1 +; CHECK-NEXT: store i8 [[ADD]], ptr [[PTR_PHI]], align 1 +; CHECK-NEXT: [[PTR_PHI_NEXT]] = getelementptr inbounds i8, ptr [[PTR_PHI]], i64 1 +; CHECK-NEXT: [[CMP_I_NOT:%.*]] = icmp eq ptr [[PTR_PHI_NEXT]], [[START]] ; CHECK-NEXT: [[CMP:%.*]] = icmp ult i64 [[INDEX]], [[N]] ; CHECK-NEXT: br i1 [[CMP]], label [[FOR_BODY]], label [[END]], !llvm.loop [[LOOP5:![0-9]+]] ; CHECK: end: @@ -195,14 +192,14 @@ entry: br label %for.body for.body: - %ptr.phi = phi i8* [ %ptr.phi.next, %for.body ], [ %start, %entry ] + %ptr.phi = phi ptr [ %ptr.phi.next, %for.body ], [ %start, %entry ] %index = phi i64 [ %index_nxt, %for.body ], [ 0, %entry ] %index_nxt = add i64 %index, 1 - %0 = load i8, i8* %ptr.phi, align 1 + %0 = load i8, ptr %ptr.phi, align 1 %add = add i8 %0, 1 - store i8 %add, i8* %ptr.phi - %ptr.phi.next = getelementptr inbounds i8, i8* %ptr.phi, i64 1 - %cmp.i.not = icmp eq i8* %ptr.phi.next, %start + store i8 %add, ptr %ptr.phi + %ptr.phi.next = getelementptr inbounds i8, ptr %ptr.phi, i64 1 + %cmp.i.not = icmp eq ptr %ptr.phi.next, %start %cmp = icmp ult i64 %index, %N br i1 %cmp, label %for.body, label %end, !llvm.loop !0 diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/sve-widen-phi.ll b/llvm/test/Transforms/LoopVectorize/AArch64/sve-widen-phi.ll index cb96838..394d183 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/sve-widen-phi.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/sve-widen-phi.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py -; RUN: opt -opaque-pointers=0 -mtriple aarch64-linux-gnu -mattr=+sve -passes=loop-vectorize,dce,instcombine -S \ +; RUN: opt -mtriple aarch64-linux-gnu -mattr=+sve -passes=loop-vectorize,dce,instcombine -S \ ; RUN: -prefer-predicate-over-epilogue=scalar-epilogue < %s | FileCheck %s ; Ensure that we can vectorize loops such as: @@ -14,7 +14,7 @@ ; that we can use gather instructions with the correct offsets, taking ; vscale into account. -define void @widen_ptr_phi_unrolled(i32* noalias nocapture %a, i32* noalias nocapture %b, i32* nocapture readonly %c, i64 %n) #0 { +define void @widen_ptr_phi_unrolled(ptr noalias nocapture %a, ptr noalias nocapture %b, ptr nocapture readonly %c, i64 %n) #0 { ; CHECK-LABEL: @widen_ptr_phi_unrolled( ; CHECK-NEXT: entry: ; CHECK-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64() @@ -26,76 +26,72 @@ define void @widen_ptr_phi_unrolled(i32* noalias nocapture %a, i32* noalias noca ; CHECK-NEXT: [[TMP3:%.*]] = shl nuw nsw i64 [[TMP2]], 3 ; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N]], [[TMP3]] ; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[N]], [[N_MOD_VF]] -; CHECK-NEXT: [[TMP4:%.*]] = shl i64 [[N_VEC]], 1 -; CHECK-NEXT: [[IND_END:%.*]] = getelementptr i32, i32* [[C:%.*]], i64 [[TMP4]] +; CHECK-NEXT: [[TMP4:%.*]] = shl i64 [[N_VEC]], 3 +; CHECK-NEXT: [[IND_END:%.*]] = getelementptr i8, ptr [[C:%.*]], i64 [[TMP4]] ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: -; CHECK-NEXT: [[POINTER_PHI:%.*]] = phi i32* [ [[C]], [[VECTOR_PH]] ], [ [[PTR_IND:%.*]], [[VECTOR_BODY]] ] +; CHECK-NEXT: [[POINTER_PHI:%.*]] = phi ptr [ [[C]], [[VECTOR_PH]] ], [ [[PTR_IND:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[TMP5:%.*]] = call i64 @llvm.vscale.i64() ; CHECK-NEXT: [[TMP6:%.*]] = shl nuw nsw i64 [[TMP5]], 2 -; CHECK-NEXT: [[TMP7:%.*]] = shl nuw nsw i64 [[TMP5]], 4 +; CHECK-NEXT: [[TMP7:%.*]] = shl nuw nsw i64 [[TMP5]], 6 ; CHECK-NEXT: [[TMP8:%.*]] = call @llvm.experimental.stepvector.nxv4i64() -; CHECK-NEXT: [[VECTOR_GEP:%.*]] = shl [[TMP8]], shufflevector ( insertelement ( poison, i64 1, i64 0), poison, zeroinitializer) -; CHECK-NEXT: [[TMP9:%.*]] = getelementptr i32, i32* [[POINTER_PHI]], [[VECTOR_GEP]] +; CHECK-NEXT: [[VECTOR_GEP:%.*]] = shl [[TMP8]], shufflevector ( insertelement ( poison, i64 3, i64 0), poison, zeroinitializer) +; CHECK-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[POINTER_PHI]], [[VECTOR_GEP]] ; CHECK-NEXT: [[DOTSPLATINSERT2:%.*]] = insertelement poison, i64 [[TMP6]], i64 0 ; CHECK-NEXT: [[DOTSPLAT3:%.*]] = shufflevector [[DOTSPLATINSERT2]], poison, zeroinitializer ; CHECK-NEXT: [[TMP10:%.*]] = call @llvm.experimental.stepvector.nxv4i64() ; CHECK-NEXT: [[TMP11:%.*]] = add [[DOTSPLAT3]], [[TMP10]] -; CHECK-NEXT: [[VECTOR_GEP4:%.*]] = shl [[TMP11]], shufflevector ( insertelement ( poison, i64 1, i64 0), poison, zeroinitializer) -; CHECK-NEXT: [[TMP12:%.*]] = getelementptr i32, i32* [[POINTER_PHI]], [[VECTOR_GEP4]] -; CHECK-NEXT: [[TMP13:%.*]] = getelementptr inbounds i32, [[TMP9]], i64 1 -; CHECK-NEXT: [[TMP14:%.*]] = getelementptr inbounds i32, [[TMP12]], i64 1 -; CHECK-NEXT: [[WIDE_MASKED_GATHER:%.*]] = call @llvm.masked.gather.nxv4i32.nxv4p0i32( [[TMP9]], i32 4, shufflevector ( insertelement ( poison, i1 true, i64 0), poison, zeroinitializer), poison) -; CHECK-NEXT: [[WIDE_MASKED_GATHER5:%.*]] = call @llvm.masked.gather.nxv4i32.nxv4p0i32( [[TMP12]], i32 4, shufflevector ( insertelement ( poison, i1 true, i64 0), poison, zeroinitializer), poison) -; CHECK-NEXT: [[WIDE_MASKED_GATHER6:%.*]] = call @llvm.masked.gather.nxv4i32.nxv4p0i32( [[TMP13]], i32 4, shufflevector ( insertelement ( poison, i1 true, i64 0), poison, zeroinitializer), poison) -; CHECK-NEXT: [[WIDE_MASKED_GATHER7:%.*]] = call @llvm.masked.gather.nxv4i32.nxv4p0i32( [[TMP14]], i32 4, shufflevector ( insertelement ( poison, i1 true, i64 0), poison, zeroinitializer), poison) +; CHECK-NEXT: [[VECTOR_GEP4:%.*]] = shl [[TMP11]], shufflevector ( insertelement ( poison, i64 3, i64 0), poison, zeroinitializer) +; CHECK-NEXT: [[TMP12:%.*]] = getelementptr i8, ptr [[POINTER_PHI]], [[VECTOR_GEP4]] +; CHECK-NEXT: [[TMP13:%.*]] = getelementptr inbounds i32, [[TMP9]], i64 1 +; CHECK-NEXT: [[TMP14:%.*]] = getelementptr inbounds i32, [[TMP12]], i64 1 +; CHECK-NEXT: [[WIDE_MASKED_GATHER:%.*]] = call @llvm.masked.gather.nxv4i32.nxv4p0( [[TMP9]], i32 4, shufflevector ( insertelement ( poison, i1 true, i64 0), poison, zeroinitializer), poison) +; CHECK-NEXT: [[WIDE_MASKED_GATHER5:%.*]] = call @llvm.masked.gather.nxv4i32.nxv4p0( [[TMP12]], i32 4, shufflevector ( insertelement ( poison, i1 true, i64 0), poison, zeroinitializer), poison) +; CHECK-NEXT: [[WIDE_MASKED_GATHER6:%.*]] = call @llvm.masked.gather.nxv4i32.nxv4p0( [[TMP13]], i32 4, shufflevector ( insertelement ( poison, i1 true, i64 0), poison, zeroinitializer), poison) +; CHECK-NEXT: [[WIDE_MASKED_GATHER7:%.*]] = call @llvm.masked.gather.nxv4i32.nxv4p0( [[TMP14]], i32 4, shufflevector ( insertelement ( poison, i1 true, i64 0), poison, zeroinitializer), poison) ; CHECK-NEXT: [[TMP15:%.*]] = add nsw [[WIDE_MASKED_GATHER]], shufflevector ( insertelement ( poison, i32 1, i64 0), poison, zeroinitializer) ; CHECK-NEXT: [[TMP16:%.*]] = add nsw [[WIDE_MASKED_GATHER5]], shufflevector ( insertelement ( poison, i32 1, i64 0), poison, zeroinitializer) -; CHECK-NEXT: [[TMP17:%.*]] = getelementptr inbounds i32, i32* [[A:%.*]], i64 [[INDEX]] -; CHECK-NEXT: [[TMP18:%.*]] = bitcast i32* [[TMP17]] to * -; CHECK-NEXT: store [[TMP15]], * [[TMP18]], align 4 -; CHECK-NEXT: [[TMP19:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[TMP20:%.*]] = shl nuw nsw i64 [[TMP19]], 2 -; CHECK-NEXT: [[TMP21:%.*]] = getelementptr inbounds i32, i32* [[TMP17]], i64 [[TMP20]] -; CHECK-NEXT: [[TMP22:%.*]] = bitcast i32* [[TMP21]] to * -; CHECK-NEXT: store [[TMP16]], * [[TMP22]], align 4 -; CHECK-NEXT: [[TMP23:%.*]] = add nsw [[WIDE_MASKED_GATHER6]], shufflevector ( insertelement ( poison, i32 1, i64 0), poison, zeroinitializer) -; CHECK-NEXT: [[TMP24:%.*]] = add nsw [[WIDE_MASKED_GATHER7]], shufflevector ( insertelement ( poison, i32 1, i64 0), poison, zeroinitializer) -; CHECK-NEXT: [[TMP25:%.*]] = getelementptr inbounds i32, i32* [[B:%.*]], i64 [[INDEX]] -; CHECK-NEXT: [[TMP26:%.*]] = bitcast i32* [[TMP25]] to * -; CHECK-NEXT: store [[TMP23]], * [[TMP26]], align 4 +; CHECK-NEXT: [[TMP17:%.*]] = getelementptr inbounds i32, ptr [[A:%.*]], i64 [[INDEX]] +; CHECK-NEXT: store [[TMP15]], ptr [[TMP17]], align 4 +; CHECK-NEXT: [[TMP18:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-NEXT: [[TMP19:%.*]] = shl nuw nsw i64 [[TMP18]], 2 +; CHECK-NEXT: [[TMP20:%.*]] = getelementptr inbounds i32, ptr [[TMP17]], i64 [[TMP19]] +; CHECK-NEXT: store [[TMP16]], ptr [[TMP20]], align 4 +; CHECK-NEXT: [[TMP21:%.*]] = add nsw [[WIDE_MASKED_GATHER6]], shufflevector ( insertelement ( poison, i32 1, i64 0), poison, zeroinitializer) +; CHECK-NEXT: [[TMP22:%.*]] = add nsw [[WIDE_MASKED_GATHER7]], shufflevector ( insertelement ( poison, i32 1, i64 0), poison, zeroinitializer) +; CHECK-NEXT: [[TMP23:%.*]] = getelementptr inbounds i32, ptr [[B:%.*]], i64 [[INDEX]] +; CHECK-NEXT: store [[TMP21]], ptr [[TMP23]], align 4 +; CHECK-NEXT: [[TMP24:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-NEXT: [[TMP25:%.*]] = shl nuw nsw i64 [[TMP24]], 2 +; CHECK-NEXT: [[TMP26:%.*]] = getelementptr inbounds i32, ptr [[TMP23]], i64 [[TMP25]] +; CHECK-NEXT: store [[TMP22]], ptr [[TMP26]], align 4 ; CHECK-NEXT: [[TMP27:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[TMP28:%.*]] = shl nuw nsw i64 [[TMP27]], 2 -; CHECK-NEXT: [[TMP29:%.*]] = getelementptr inbounds i32, i32* [[TMP25]], i64 [[TMP28]] -; CHECK-NEXT: [[TMP30:%.*]] = bitcast i32* [[TMP29]] to * -; CHECK-NEXT: store [[TMP24]], * [[TMP30]], align 4 -; CHECK-NEXT: [[TMP31:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[TMP32:%.*]] = shl nuw nsw i64 [[TMP31]], 3 -; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP32]] -; CHECK-NEXT: [[PTR_IND]] = getelementptr i32, i32* [[POINTER_PHI]], i64 [[TMP7]] -; CHECK-NEXT: [[TMP33:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] -; CHECK-NEXT: br i1 [[TMP33]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] +; CHECK-NEXT: [[TMP28:%.*]] = shl nuw nsw i64 [[TMP27]], 3 +; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP28]] +; CHECK-NEXT: [[PTR_IND]] = getelementptr i8, ptr [[POINTER_PHI]], i64 [[TMP7]] +; CHECK-NEXT: [[TMP29:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] +; CHECK-NEXT: br i1 [[TMP29]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] ; CHECK: middle.block: ; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N_MOD_VF]], 0 ; CHECK-NEXT: br i1 [[CMP_N]], label [[FOR_EXIT:%.*]], label [[SCALAR_PH]] ; CHECK: scalar.ph: -; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i32* [ [[IND_END]], [[MIDDLE_BLOCK]] ], [ [[C]], [[ENTRY:%.*]] ] +; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi ptr [ [[IND_END]], [[MIDDLE_BLOCK]] ], [ [[C]], [[ENTRY:%.*]] ] ; CHECK-NEXT: [[BC_RESUME_VAL1:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY]] ] ; CHECK-NEXT: br label [[FOR_BODY:%.*]] ; CHECK: for.body: -; CHECK-NEXT: [[PTR_014:%.*]] = phi i32* [ [[INCDEC_PTR1:%.*]], [[FOR_BODY]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ] +; CHECK-NEXT: [[PTR_014:%.*]] = phi ptr [ [[INCDEC_PTR1:%.*]], [[FOR_BODY]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ] ; CHECK-NEXT: [[I_013:%.*]] = phi i64 [ [[INC:%.*]], [[FOR_BODY]] ], [ [[BC_RESUME_VAL1]], [[SCALAR_PH]] ] -; CHECK-NEXT: [[INCDEC_PTR:%.*]] = getelementptr inbounds i32, i32* [[PTR_014]], i64 1 -; CHECK-NEXT: [[TMP34:%.*]] = load i32, i32* [[PTR_014]], align 4 -; CHECK-NEXT: [[INCDEC_PTR1]] = getelementptr inbounds i32, i32* [[PTR_014]], i64 2 -; CHECK-NEXT: [[TMP35:%.*]] = load i32, i32* [[INCDEC_PTR]], align 4 -; CHECK-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP34]], 1 -; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 [[I_013]] -; CHECK-NEXT: store i32 [[ADD]], i32* [[ARRAYIDX]], align 4 -; CHECK-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP35]], 1 -; CHECK-NEXT: [[ARRAYIDX3:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 [[I_013]] -; CHECK-NEXT: store i32 [[ADD2]], i32* [[ARRAYIDX3]], align 4 +; CHECK-NEXT: [[INCDEC_PTR:%.*]] = getelementptr inbounds i32, ptr [[PTR_014]], i64 1 +; CHECK-NEXT: [[TMP30:%.*]] = load i32, ptr [[PTR_014]], align 4 +; CHECK-NEXT: [[INCDEC_PTR1]] = getelementptr inbounds i32, ptr [[PTR_014]], i64 2 +; CHECK-NEXT: [[TMP31:%.*]] = load i32, ptr [[INCDEC_PTR]], align 4 +; CHECK-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP30]], 1 +; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[I_013]] +; CHECK-NEXT: store i32 [[ADD]], ptr [[ARRAYIDX]], align 4 +; CHECK-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP31]], 1 +; CHECK-NEXT: [[ARRAYIDX3:%.*]] = getelementptr inbounds i32, ptr [[B]], i64 [[I_013]] +; CHECK-NEXT: store i32 [[ADD2]], ptr [[ARRAYIDX3]], align 4 ; CHECK-NEXT: [[INC]] = add nuw nsw i64 [[I_013]], 1 ; CHECK-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[INC]], [[N]] ; CHECK-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_EXIT]], label [[FOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] @@ -106,18 +102,18 @@ entry: br label %for.body for.body: ; preds = %entry, %for.body - %ptr.014 = phi i32* [ %incdec.ptr1, %for.body ], [ %c, %entry ] + %ptr.014 = phi ptr [ %incdec.ptr1, %for.body ], [ %c, %entry ] %i.013 = phi i64 [ %inc, %for.body ], [ 0, %entry ] - %incdec.ptr = getelementptr inbounds i32, i32* %ptr.014, i64 1 - %0 = load i32, i32* %ptr.014, align 4 - %incdec.ptr1 = getelementptr inbounds i32, i32* %ptr.014, i64 2 - %1 = load i32, i32* %incdec.ptr, align 4 + %incdec.ptr = getelementptr inbounds i32, ptr %ptr.014, i64 1 + %0 = load i32, ptr %ptr.014, align 4 + %incdec.ptr1 = getelementptr inbounds i32, ptr %ptr.014, i64 2 + %1 = load i32, ptr %incdec.ptr, align 4 %add = add nsw i32 %0, 1 - %arrayidx = getelementptr inbounds i32, i32* %a, i64 %i.013 - store i32 %add, i32* %arrayidx, align 4 + %arrayidx = getelementptr inbounds i32, ptr %a, i64 %i.013 + store i32 %add, ptr %arrayidx, align 4 %add2 = add nsw i32 %1, 1 - %arrayidx3 = getelementptr inbounds i32, i32* %b, i64 %i.013 - store i32 %add2, i32* %arrayidx3, align 4 + %arrayidx3 = getelementptr inbounds i32, ptr %b, i64 %i.013 + store i32 %add2, ptr %arrayidx3, align 4 %inc = add nuw nsw i64 %i.013, 1 %exitcond.not = icmp eq i64 %inc, %n br i1 %exitcond.not, label %for.exit, label %for.body, !llvm.loop !0 @@ -138,7 +134,7 @@ for.exit: ; preds = %for.body ; This takes us down a different codepath to the test above, where ; here we treat the PHIs as being uniform. -define void @widen_2ptrs_phi_unrolled(i32* noalias nocapture %dst, i32* noalias nocapture readonly %src, i64 %n) #0 { +define void @widen_2ptrs_phi_unrolled(ptr noalias nocapture %dst, ptr noalias nocapture readonly %src, i64 %n) #0 { ; CHECK-LABEL: @widen_2ptrs_phi_unrolled( ; CHECK-NEXT: entry: ; CHECK-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64() @@ -150,29 +146,29 @@ define void @widen_2ptrs_phi_unrolled(i32* noalias nocapture %dst, i32* noalias ; CHECK-NEXT: [[TMP3:%.*]] = shl nuw nsw i64 [[TMP2]], 3 ; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N]], [[TMP3]] ; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[N]], [[N_MOD_VF]] -; CHECK-NEXT: [[IND_END:%.*]] = getelementptr i32, i32* [[SRC:%.*]], i64 [[N_VEC]] -; CHECK-NEXT: [[IND_END2:%.*]] = getelementptr i32, i32* [[DST:%.*]], i64 [[N_VEC]] +; CHECK-NEXT: [[TMP4:%.*]] = shl i64 [[N_VEC]], 2 +; CHECK-NEXT: [[IND_END:%.*]] = getelementptr i8, ptr [[SRC:%.*]], i64 [[TMP4]] +; CHECK-NEXT: [[TMP5:%.*]] = shl i64 [[N_VEC]], 2 +; CHECK-NEXT: [[IND_END2:%.*]] = getelementptr i8, ptr [[DST:%.*]], i64 [[TMP5]] ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] -; CHECK-NEXT: [[NEXT_GEP:%.*]] = getelementptr i32, i32* [[SRC]], i64 [[INDEX]] -; CHECK-NEXT: [[NEXT_GEP5:%.*]] = getelementptr i32, i32* [[DST]], i64 [[INDEX]] -; CHECK-NEXT: [[TMP4:%.*]] = bitcast i32* [[NEXT_GEP]] to * -; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load , * [[TMP4]], align 4 -; CHECK-NEXT: [[TMP5:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[TMP6:%.*]] = shl nuw nsw i64 [[TMP5]], 2 -; CHECK-NEXT: [[TMP7:%.*]] = getelementptr i32, i32* [[NEXT_GEP]], i64 [[TMP6]] -; CHECK-NEXT: [[TMP8:%.*]] = bitcast i32* [[TMP7]] to * -; CHECK-NEXT: [[WIDE_LOAD7:%.*]] = load , * [[TMP8]], align 4 -; CHECK-NEXT: [[TMP9:%.*]] = shl nsw [[WIDE_LOAD]], shufflevector ( insertelement ( poison, i32 1, i64 0), poison, zeroinitializer) -; CHECK-NEXT: [[TMP10:%.*]] = shl nsw [[WIDE_LOAD7]], shufflevector ( insertelement ( poison, i32 1, i64 0), poison, zeroinitializer) -; CHECK-NEXT: [[TMP11:%.*]] = bitcast i32* [[NEXT_GEP5]] to * -; CHECK-NEXT: store [[TMP9]], * [[TMP11]], align 4 -; CHECK-NEXT: [[TMP12:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[TMP13:%.*]] = shl nuw nsw i64 [[TMP12]], 2 -; CHECK-NEXT: [[TMP14:%.*]] = getelementptr i32, i32* [[NEXT_GEP5]], i64 [[TMP13]] -; CHECK-NEXT: [[TMP15:%.*]] = bitcast i32* [[TMP14]] to * -; CHECK-NEXT: store [[TMP10]], * [[TMP15]], align 4 +; CHECK-NEXT: [[TMP6:%.*]] = shl i64 [[INDEX]], 2 +; CHECK-NEXT: [[NEXT_GEP:%.*]] = getelementptr i8, ptr [[SRC]], i64 [[TMP6]] +; CHECK-NEXT: [[TMP7:%.*]] = shl i64 [[INDEX]], 2 +; CHECK-NEXT: [[NEXT_GEP5:%.*]] = getelementptr i8, ptr [[DST]], i64 [[TMP7]] +; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load , ptr [[NEXT_GEP]], align 4 +; CHECK-NEXT: [[TMP8:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-NEXT: [[TMP9:%.*]] = shl nuw nsw i64 [[TMP8]], 2 +; CHECK-NEXT: [[TMP10:%.*]] = getelementptr i32, ptr [[NEXT_GEP]], i64 [[TMP9]] +; CHECK-NEXT: [[WIDE_LOAD7:%.*]] = load , ptr [[TMP10]], align 4 +; CHECK-NEXT: [[TMP11:%.*]] = shl nsw [[WIDE_LOAD]], shufflevector ( insertelement ( poison, i32 1, i64 0), poison, zeroinitializer) +; CHECK-NEXT: [[TMP12:%.*]] = shl nsw [[WIDE_LOAD7]], shufflevector ( insertelement ( poison, i32 1, i64 0), poison, zeroinitializer) +; CHECK-NEXT: store [[TMP11]], ptr [[NEXT_GEP5]], align 4 +; CHECK-NEXT: [[TMP13:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-NEXT: [[TMP14:%.*]] = shl nuw nsw i64 [[TMP13]], 2 +; CHECK-NEXT: [[TMP15:%.*]] = getelementptr i32, ptr [[NEXT_GEP5]], i64 [[TMP14]] +; CHECK-NEXT: store [[TMP12]], ptr [[TMP15]], align 4 ; CHECK-NEXT: [[TMP16:%.*]] = call i64 @llvm.vscale.i64() ; CHECK-NEXT: [[TMP17:%.*]] = shl nuw nsw i64 [[TMP16]], 3 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP17]] @@ -183,18 +179,18 @@ define void @widen_2ptrs_phi_unrolled(i32* noalias nocapture %dst, i32* noalias ; CHECK-NEXT: br i1 [[CMP_N]], label [[FOR_COND_CLEANUP:%.*]], label [[SCALAR_PH]] ; CHECK: scalar.ph: ; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] -; CHECK-NEXT: [[BC_RESUME_VAL1:%.*]] = phi i32* [ [[IND_END]], [[MIDDLE_BLOCK]] ], [ [[SRC]], [[ENTRY]] ] -; CHECK-NEXT: [[BC_RESUME_VAL3:%.*]] = phi i32* [ [[IND_END2]], [[MIDDLE_BLOCK]] ], [ [[DST]], [[ENTRY]] ] +; CHECK-NEXT: [[BC_RESUME_VAL1:%.*]] = phi ptr [ [[IND_END]], [[MIDDLE_BLOCK]] ], [ [[SRC]], [[ENTRY]] ] +; CHECK-NEXT: [[BC_RESUME_VAL3:%.*]] = phi ptr [ [[IND_END2]], [[MIDDLE_BLOCK]] ], [ [[DST]], [[ENTRY]] ] ; CHECK-NEXT: br label [[FOR_BODY:%.*]] ; CHECK: for.body: ; CHECK-NEXT: [[I_011:%.*]] = phi i64 [ [[INC:%.*]], [[FOR_BODY]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ] -; CHECK-NEXT: [[S_010:%.*]] = phi i32* [ [[INCDEC_PTR1:%.*]], [[FOR_BODY]] ], [ [[BC_RESUME_VAL1]], [[SCALAR_PH]] ] -; CHECK-NEXT: [[D_09:%.*]] = phi i32* [ [[INCDEC_PTR:%.*]], [[FOR_BODY]] ], [ [[BC_RESUME_VAL3]], [[SCALAR_PH]] ] -; CHECK-NEXT: [[TMP19:%.*]] = load i32, i32* [[S_010]], align 4 +; CHECK-NEXT: [[S_010:%.*]] = phi ptr [ [[INCDEC_PTR1:%.*]], [[FOR_BODY]] ], [ [[BC_RESUME_VAL1]], [[SCALAR_PH]] ] +; CHECK-NEXT: [[D_09:%.*]] = phi ptr [ [[INCDEC_PTR:%.*]], [[FOR_BODY]] ], [ [[BC_RESUME_VAL3]], [[SCALAR_PH]] ] +; CHECK-NEXT: [[TMP19:%.*]] = load i32, ptr [[S_010]], align 4 ; CHECK-NEXT: [[MUL:%.*]] = shl nsw i32 [[TMP19]], 1 -; CHECK-NEXT: store i32 [[MUL]], i32* [[D_09]], align 4 -; CHECK-NEXT: [[INCDEC_PTR]] = getelementptr inbounds i32, i32* [[D_09]], i64 1 -; CHECK-NEXT: [[INCDEC_PTR1]] = getelementptr inbounds i32, i32* [[S_010]], i64 1 +; CHECK-NEXT: store i32 [[MUL]], ptr [[D_09]], align 4 +; CHECK-NEXT: [[INCDEC_PTR]] = getelementptr inbounds i32, ptr [[D_09]], i64 1 +; CHECK-NEXT: [[INCDEC_PTR1]] = getelementptr inbounds i32, ptr [[S_010]], i64 1 ; CHECK-NEXT: [[INC]] = add nuw nsw i64 [[I_011]], 1 ; CHECK-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[INC]], [[N]] ; CHECK-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_COND_CLEANUP]], label [[FOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]] @@ -206,13 +202,13 @@ entry: for.body: ; preds = %entry, %for.body %i.011 = phi i64 [ %inc, %for.body ], [ 0, %entry ] - %S.010 = phi i32* [ %incdec.ptr1, %for.body ], [ %src, %entry ] - %D.09 = phi i32* [ %incdec.ptr, %for.body ], [ %dst, %entry ] - %0 = load i32, i32* %S.010, align 4 + %S.010 = phi ptr [ %incdec.ptr1, %for.body ], [ %src, %entry ] + %D.09 = phi ptr [ %incdec.ptr, %for.body ], [ %dst, %entry ] + %0 = load i32, ptr %S.010, align 4 %mul = shl nsw i32 %0, 1 - store i32 %mul, i32* %D.09, align 4 - %incdec.ptr = getelementptr inbounds i32, i32* %D.09, i64 1 - %incdec.ptr1 = getelementptr inbounds i32, i32* %S.010, i64 1 + store i32 %mul, ptr %D.09, align 4 + %incdec.ptr = getelementptr inbounds i32, ptr %D.09, i64 1 + %incdec.ptr1 = getelementptr inbounds i32, ptr %S.010, i64 1 %inc = add nuw nsw i64 %i.011, 1 %exitcond.not = icmp eq i64 %inc, %n br i1 %exitcond.not, label %for.cond.cleanup, label %for.body, !llvm.loop !0 @@ -229,7 +225,7 @@ for.cond.cleanup: ; preds = %for.body ; because it is stored to memory. ; -define i32 @pointer_iv_mixed(i32* noalias %a, i32** noalias %b, i64 %n) #0 { +define i32 @pointer_iv_mixed(ptr noalias %a, ptr noalias %b, i64 %n) #0 { ; CHECK-LABEL: @pointer_iv_mixed( ; CHECK-NEXT: entry: ; CHECK-NEXT: [[SMAX:%.*]] = call i64 @llvm.smax.i64(i64 [[N:%.*]], i64 1) @@ -242,55 +238,57 @@ define i32 @pointer_iv_mixed(i32* noalias %a, i32** noalias %b, i64 %n) #0 { ; CHECK-NEXT: [[TMP3:%.*]] = shl nuw nsw i64 [[TMP2]], 1 ; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[SMAX]], [[TMP3]] ; CHECK-NEXT: [[N_VEC:%.*]] = sub nsw i64 [[SMAX]], [[N_MOD_VF]] -; CHECK-NEXT: [[IND_END:%.*]] = getelementptr i32, i32* [[A:%.*]], i64 [[N_VEC]] -; CHECK-NEXT: [[IND_END2:%.*]] = getelementptr i32*, i32** [[B:%.*]], i64 [[N_VEC]] +; CHECK-NEXT: [[TMP4:%.*]] = shl i64 [[N_VEC]], 2 +; CHECK-NEXT: [[IND_END:%.*]] = getelementptr i8, ptr [[A:%.*]], i64 [[TMP4]] +; CHECK-NEXT: [[TMP5:%.*]] = shl i64 [[N_VEC]], 3 +; CHECK-NEXT: [[IND_END2:%.*]] = getelementptr i8, ptr [[B:%.*]], i64 [[TMP5]] ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: -; CHECK-NEXT: [[POINTER_PHI:%.*]] = phi i32* [ [[A]], [[VECTOR_PH]] ], [ [[PTR_IND:%.*]], [[VECTOR_BODY]] ] +; CHECK-NEXT: [[POINTER_PHI:%.*]] = phi ptr [ [[A]], [[VECTOR_PH]] ], [ [[PTR_IND:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] -; CHECK-NEXT: [[VEC_PHI:%.*]] = phi [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP9:%.*]], [[VECTOR_BODY]] ] -; CHECK-NEXT: [[TMP4:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[TMP5:%.*]] = shl nuw nsw i64 [[TMP4]], 1 -; CHECK-NEXT: [[TMP6:%.*]] = call @llvm.experimental.stepvector.nxv2i64() -; CHECK-NEXT: [[TMP7:%.*]] = getelementptr i32, i32* [[POINTER_PHI]], [[TMP6]] -; CHECK-NEXT: [[NEXT_GEP:%.*]] = getelementptr i32*, i32** [[B]], i64 [[INDEX]] -; CHECK-NEXT: [[BC:%.*]] = bitcast [[TMP7]] to *> -; CHECK-NEXT: [[TMP8:%.*]] = extractelement *> [[BC]], i64 0 -; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load , * [[TMP8]], align 8 -; CHECK-NEXT: [[TMP9]] = add [[WIDE_LOAD]], [[VEC_PHI]] -; CHECK-NEXT: [[TMP10:%.*]] = bitcast i32** [[NEXT_GEP]] to * -; CHECK-NEXT: store [[TMP7]], * [[TMP10]], align 8 -; CHECK-NEXT: [[TMP11:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[TMP12:%.*]] = shl nuw nsw i64 [[TMP11]], 1 -; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP12]] -; CHECK-NEXT: [[PTR_IND]] = getelementptr i32, i32* [[POINTER_PHI]], i64 [[TMP5]] -; CHECK-NEXT: [[TMP13:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] -; CHECK-NEXT: br i1 [[TMP13]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP7:![0-9]+]] +; CHECK-NEXT: [[VEC_PHI:%.*]] = phi [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP12:%.*]], [[VECTOR_BODY]] ] +; CHECK-NEXT: [[TMP6:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-NEXT: [[TMP7:%.*]] = shl nuw nsw i64 [[TMP6]], 3 +; CHECK-NEXT: [[TMP8:%.*]] = call @llvm.experimental.stepvector.nxv2i64() +; CHECK-NEXT: [[VECTOR_GEP:%.*]] = shl [[TMP8]], shufflevector ( insertelement ( poison, i64 2, i64 0), poison, zeroinitializer) +; CHECK-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[POINTER_PHI]], [[VECTOR_GEP]] +; CHECK-NEXT: [[TMP10:%.*]] = shl i64 [[INDEX]], 3 +; CHECK-NEXT: [[NEXT_GEP:%.*]] = getelementptr i8, ptr [[B]], i64 [[TMP10]] +; CHECK-NEXT: [[TMP11:%.*]] = extractelement [[TMP9]], i64 0 +; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load , ptr [[TMP11]], align 8 +; CHECK-NEXT: [[TMP12]] = add [[WIDE_LOAD]], [[VEC_PHI]] +; CHECK-NEXT: store [[TMP9]], ptr [[NEXT_GEP]], align 8 +; CHECK-NEXT: [[TMP13:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-NEXT: [[TMP14:%.*]] = shl nuw nsw i64 [[TMP13]], 1 +; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP14]] +; CHECK-NEXT: [[PTR_IND]] = getelementptr i8, ptr [[POINTER_PHI]], i64 [[TMP7]] +; CHECK-NEXT: [[TMP15:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] +; CHECK-NEXT: br i1 [[TMP15]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP7:![0-9]+]] ; CHECK: middle.block: -; CHECK-NEXT: [[TMP14:%.*]] = call i32 @llvm.vector.reduce.add.nxv2i32( [[TMP9]]) +; CHECK-NEXT: [[TMP16:%.*]] = call i32 @llvm.vector.reduce.add.nxv2i32( [[TMP12]]) ; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N_MOD_VF]], 0 ; CHECK-NEXT: br i1 [[CMP_N]], label [[FOR_END:%.*]], label [[SCALAR_PH]] ; CHECK: scalar.ph: ; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] -; CHECK-NEXT: [[BC_RESUME_VAL1:%.*]] = phi i32* [ [[IND_END]], [[MIDDLE_BLOCK]] ], [ [[A]], [[ENTRY]] ] -; CHECK-NEXT: [[BC_RESUME_VAL3:%.*]] = phi i32** [ [[IND_END2]], [[MIDDLE_BLOCK]] ], [ [[B]], [[ENTRY]] ] -; CHECK-NEXT: [[BC_MERGE_RDX:%.*]] = phi i32 [ [[TMP14]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY]] ] +; CHECK-NEXT: [[BC_RESUME_VAL1:%.*]] = phi ptr [ [[IND_END]], [[MIDDLE_BLOCK]] ], [ [[A]], [[ENTRY]] ] +; CHECK-NEXT: [[BC_RESUME_VAL3:%.*]] = phi ptr [ [[IND_END2]], [[MIDDLE_BLOCK]] ], [ [[B]], [[ENTRY]] ] +; CHECK-NEXT: [[BC_MERGE_RDX:%.*]] = phi i32 [ [[TMP16]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY]] ] ; CHECK-NEXT: br label [[FOR_BODY:%.*]] ; CHECK: for.body: ; CHECK-NEXT: [[I:%.*]] = phi i64 [ [[I_NEXT:%.*]], [[FOR_BODY]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ] -; CHECK-NEXT: [[P:%.*]] = phi i32* [ [[VAR3:%.*]], [[FOR_BODY]] ], [ [[BC_RESUME_VAL1]], [[SCALAR_PH]] ] -; CHECK-NEXT: [[Q:%.*]] = phi i32** [ [[VAR4:%.*]], [[FOR_BODY]] ], [ [[BC_RESUME_VAL3]], [[SCALAR_PH]] ] +; CHECK-NEXT: [[P:%.*]] = phi ptr [ [[VAR3:%.*]], [[FOR_BODY]] ], [ [[BC_RESUME_VAL1]], [[SCALAR_PH]] ] +; CHECK-NEXT: [[Q:%.*]] = phi ptr [ [[VAR4:%.*]], [[FOR_BODY]] ], [ [[BC_RESUME_VAL3]], [[SCALAR_PH]] ] ; CHECK-NEXT: [[VAR0:%.*]] = phi i32 [ [[VAR2:%.*]], [[FOR_BODY]] ], [ [[BC_MERGE_RDX]], [[SCALAR_PH]] ] -; CHECK-NEXT: [[VAR1:%.*]] = load i32, i32* [[P]], align 8 +; CHECK-NEXT: [[VAR1:%.*]] = load i32, ptr [[P]], align 8 ; CHECK-NEXT: [[VAR2]] = add i32 [[VAR1]], [[VAR0]] -; CHECK-NEXT: store i32* [[P]], i32** [[Q]], align 8 -; CHECK-NEXT: [[VAR3]] = getelementptr inbounds i32, i32* [[P]], i64 1 -; CHECK-NEXT: [[VAR4]] = getelementptr inbounds i32*, i32** [[Q]], i64 1 +; CHECK-NEXT: store ptr [[P]], ptr [[Q]], align 8 +; CHECK-NEXT: [[VAR3]] = getelementptr inbounds i32, ptr [[P]], i64 1 +; CHECK-NEXT: [[VAR4]] = getelementptr inbounds ptr, ptr [[Q]], i64 1 ; CHECK-NEXT: [[I_NEXT]] = add nuw nsw i64 [[I]], 1 ; CHECK-NEXT: [[COND:%.*]] = icmp slt i64 [[I_NEXT]], [[N]] ; CHECK-NEXT: br i1 [[COND]], label [[FOR_BODY]], label [[FOR_END]], !llvm.loop [[LOOP8:![0-9]+]] ; CHECK: for.end: -; CHECK-NEXT: [[VAR5:%.*]] = phi i32 [ [[VAR2]], [[FOR_BODY]] ], [ [[TMP14]], [[MIDDLE_BLOCK]] ] +; CHECK-NEXT: [[VAR5:%.*]] = phi i32 [ [[VAR2]], [[FOR_BODY]] ], [ [[TMP16]], [[MIDDLE_BLOCK]] ] ; CHECK-NEXT: ret i32 [[VAR5]] ; entry: @@ -298,14 +296,14 @@ entry: for.body: %i = phi i64 [ %i.next, %for.body ], [ 0, %entry ] - %p = phi i32* [ %var3, %for.body ], [ %a, %entry ] - %q = phi i32** [ %var4, %for.body ], [ %b, %entry ] + %p = phi ptr [ %var3, %for.body ], [ %a, %entry ] + %q = phi ptr [ %var4, %for.body ], [ %b, %entry ] %var0 = phi i32 [ %var2, %for.body ], [ 0, %entry ] - %var1 = load i32, i32* %p, align 8 + %var1 = load i32, ptr %p, align 8 %var2 = add i32 %var1, %var0 - store i32* %p, i32** %q, align 8 - %var3 = getelementptr inbounds i32, i32* %p, i32 1 - %var4 = getelementptr inbounds i32*, i32** %q, i32 1 + store ptr %p, ptr %q, align 8 + %var3 = getelementptr inbounds i32, ptr %p, i32 1 + %var4 = getelementptr inbounds ptr, ptr %q, i32 1 %i.next = add nuw nsw i64 %i, 1 %cond = icmp slt i64 %i.next, %n br i1 %cond, label %for.body, label %for.end, !llvm.loop !6 @@ -315,7 +313,7 @@ for.end: ret i32 %var5 } -define void @phi_used_in_vector_compare_and_scalar_indvar_update_and_store(i16* %ptr) #0 { +define void @phi_used_in_vector_compare_and_scalar_indvar_update_and_store(ptr %ptr) #0 { ; CHECK-LABEL: @phi_used_in_vector_compare_and_scalar_indvar_update_and_store( ; CHECK-NEXT: entry: ; CHECK-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] @@ -324,42 +322,43 @@ define void @phi_used_in_vector_compare_and_scalar_indvar_update_and_store(i16* ; CHECK-NEXT: [[TMP1:%.*]] = shl nuw nsw i64 [[TMP0]], 1 ; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 1024, [[TMP1]] ; CHECK-NEXT: [[N_VEC:%.*]] = sub nuw nsw i64 1024, [[N_MOD_VF]] -; CHECK-NEXT: [[IND_END:%.*]] = getelementptr i16, i16* [[PTR:%.*]], i64 [[N_VEC]] +; CHECK-NEXT: [[TMP2:%.*]] = shl nuw nsw i64 [[N_VEC]], 1 +; CHECK-NEXT: [[IND_END:%.*]] = getelementptr i8, ptr [[PTR:%.*]], i64 [[TMP2]] ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: -; CHECK-NEXT: [[POINTER_PHI:%.*]] = phi i16* [ [[PTR]], [[VECTOR_PH]] ], [ [[PTR_IND:%.*]], [[VECTOR_BODY]] ] +; CHECK-NEXT: [[POINTER_PHI:%.*]] = phi ptr [ [[PTR]], [[VECTOR_PH]] ], [ [[PTR_IND:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] -; CHECK-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[TMP3:%.*]] = shl nuw nsw i64 [[TMP2]], 1 -; CHECK-NEXT: [[TMP4:%.*]] = call @llvm.experimental.stepvector.nxv2i64() -; CHECK-NEXT: [[TMP5:%.*]] = getelementptr i16, i16* [[POINTER_PHI]], [[TMP4]] -; CHECK-NEXT: [[TMP6:%.*]] = icmp ne [[TMP5]], zeroinitializer -; CHECK-NEXT: [[BC:%.*]] = bitcast [[TMP5]] to *> -; CHECK-NEXT: [[TMP7:%.*]] = extractelement *> [[BC]], i64 0 -; CHECK-NEXT: call void @llvm.masked.store.nxv2i16.p0nxv2i16( zeroinitializer, * [[TMP7]], i32 2, [[TMP6]]) -; CHECK-NEXT: [[TMP8:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[TMP9:%.*]] = shl nuw nsw i64 [[TMP8]], 1 -; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP9]] -; CHECK-NEXT: [[PTR_IND]] = getelementptr i16, i16* [[POINTER_PHI]], i64 [[TMP3]] -; CHECK-NEXT: [[TMP10:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] -; CHECK-NEXT: br i1 [[TMP10]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP9:![0-9]+]] +; CHECK-NEXT: [[TMP3:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-NEXT: [[TMP4:%.*]] = shl nuw nsw i64 [[TMP3]], 2 +; CHECK-NEXT: [[TMP5:%.*]] = call @llvm.experimental.stepvector.nxv2i64() +; CHECK-NEXT: [[VECTOR_GEP:%.*]] = shl [[TMP5]], shufflevector ( insertelement ( poison, i64 1, i64 0), poison, zeroinitializer) +; CHECK-NEXT: [[TMP6:%.*]] = getelementptr i8, ptr [[POINTER_PHI]], [[VECTOR_GEP]] +; CHECK-NEXT: [[TMP7:%.*]] = icmp ne [[TMP6]], zeroinitializer +; CHECK-NEXT: [[TMP8:%.*]] = extractelement [[TMP6]], i64 0 +; CHECK-NEXT: call void @llvm.masked.store.nxv2i16.p0( zeroinitializer, ptr [[TMP8]], i32 2, [[TMP7]]) +; CHECK-NEXT: [[TMP9:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-NEXT: [[TMP10:%.*]] = shl nuw nsw i64 [[TMP9]], 1 +; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP10]] +; CHECK-NEXT: [[PTR_IND]] = getelementptr i8, ptr [[POINTER_PHI]], i64 [[TMP4]] +; CHECK-NEXT: [[TMP11:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] +; CHECK-NEXT: br i1 [[TMP11]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP9:![0-9]+]] ; CHECK: middle.block: ; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N_MOD_VF]], 0 ; CHECK-NEXT: br i1 [[CMP_N]], label [[FOR_END:%.*]], label [[SCALAR_PH]] ; CHECK: scalar.ph: ; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] -; CHECK-NEXT: [[BC_RESUME_VAL1:%.*]] = phi i16* [ [[IND_END]], [[MIDDLE_BLOCK]] ], [ [[PTR]], [[ENTRY]] ] +; CHECK-NEXT: [[BC_RESUME_VAL1:%.*]] = phi ptr [ [[IND_END]], [[MIDDLE_BLOCK]] ], [ [[PTR]], [[ENTRY]] ] ; CHECK-NEXT: br label [[FOR_BODY:%.*]] ; CHECK: for.body: ; CHECK-NEXT: [[IV:%.*]] = phi i64 [ [[INC:%.*]], [[IF_END:%.*]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ] -; CHECK-NEXT: [[IV_PTR:%.*]] = phi i16* [ [[INCDEC_IV_PTR:%.*]], [[IF_END]] ], [ [[BC_RESUME_VAL1]], [[SCALAR_PH]] ] -; CHECK-NEXT: [[CMP_I_NOT:%.*]] = icmp eq i16* [[IV_PTR]], null +; CHECK-NEXT: [[IV_PTR:%.*]] = phi ptr [ [[INCDEC_IV_PTR:%.*]], [[IF_END]] ], [ [[BC_RESUME_VAL1]], [[SCALAR_PH]] ] +; CHECK-NEXT: [[CMP_I_NOT:%.*]] = icmp eq ptr [[IV_PTR]], null ; CHECK-NEXT: br i1 [[CMP_I_NOT]], label [[IF_END]], label [[IF_END_SINK_SPLIT:%.*]] ; CHECK: if.end.sink.split: -; CHECK-NEXT: store i16 0, i16* [[IV_PTR]], align 2 +; CHECK-NEXT: store i16 0, ptr [[IV_PTR]], align 2 ; CHECK-NEXT: br label [[IF_END]] ; CHECK: if.end: -; CHECK-NEXT: [[INCDEC_IV_PTR]] = getelementptr inbounds i16, i16* [[IV_PTR]], i64 1 +; CHECK-NEXT: [[INCDEC_IV_PTR]] = getelementptr inbounds i16, ptr [[IV_PTR]], i64 1 ; CHECK-NEXT: [[INC]] = add nuw nsw i64 [[IV]], 1 ; CHECK-NEXT: [[EXITCOND_NOT:%.*]] = icmp ult i64 [[IV]], 1023 ; CHECK-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_BODY]], label [[FOR_END]], !llvm.loop [[LOOP10:![0-9]+]] @@ -371,22 +370,22 @@ entry: for.body: ; preds = %if.end, %entry %iv = phi i64 [ %inc, %if.end ], [ 0, %entry ] - %iv.ptr = phi i16* [ %incdec.iv.ptr, %if.end ], [ %ptr, %entry ] - %cmp.i = icmp ne i16* %iv.ptr, null + %iv.ptr = phi ptr [ %incdec.iv.ptr, %if.end ], [ %ptr, %entry ] + %cmp.i = icmp ne ptr %iv.ptr, null br i1 %cmp.i, label %if.end.sink.split, label %if.end if.end.sink.split: ; preds = %for.body - store i16 0, i16* %iv.ptr, align 2 + store i16 0, ptr %iv.ptr, align 2 br label %if.end if.end: ; preds = %if.end.sink.split, %for.body - %incdec.iv.ptr = getelementptr inbounds i16, i16* %iv.ptr, i64 1 + %incdec.iv.ptr = getelementptr inbounds i16, ptr %iv.ptr, i64 1 %inc = add nuw nsw i64 %iv, 1 %exitcond.not = icmp ult i64 %inc, 1024 br i1 %exitcond.not, label %for.body, label %for.end, !llvm.loop !6 for.end: ; preds = %if.end, %for.end - %iv.ptr.1.lcssa = phi i16* [ %incdec.iv.ptr, %if.end ] + %iv.ptr.1.lcssa = phi ptr [ %incdec.iv.ptr, %if.end ] ret void } diff --git a/llvm/test/Transforms/LoopVectorize/X86/small-size.ll b/llvm/test/Transforms/LoopVectorize/X86/small-size.ll index f51d474..fd99e3f 100644 --- a/llvm/test/Transforms/LoopVectorize/X86/small-size.ll +++ b/llvm/test/Transforms/LoopVectorize/X86/small-size.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py -; RUN: opt -opaque-pointers=0 < %s -passes=loop-vectorize,dce,instcombine -force-vector-interleave=1 -force-vector-width=4 -loop-vectorize-with-block-frequency -S | FileCheck %s +; RUN: opt < %s -passes=loop-vectorize,dce,instcombine -force-vector-interleave=1 -force-vector-width=4 -loop-vectorize-with-block-frequency -S | FileCheck %s target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128" target triple = "x86_64-apple-macosx10.8.0" @@ -28,39 +28,36 @@ define void @example1() optsize { ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] -; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds [2048 x i32], [2048 x i32]* @b, i64 0, i64 [[INDEX]] -; CHECK-NEXT: [[TMP2:%.*]] = bitcast i32* [[TMP1]] to <4 x i32>* -; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i32>, <4 x i32>* [[TMP2]], align 16 -; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds [2048 x i32], [2048 x i32]* @c, i64 0, i64 [[INDEX]] -; CHECK-NEXT: [[TMP4:%.*]] = bitcast i32* [[TMP3]] to <4 x i32>* -; CHECK-NEXT: [[WIDE_LOAD1:%.*]] = load <4 x i32>, <4 x i32>* [[TMP4]], align 16 -; CHECK-NEXT: [[TMP5:%.*]] = add nsw <4 x i32> [[WIDE_LOAD1]], [[WIDE_LOAD]] -; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds [2048 x i32], [2048 x i32]* @a, i64 0, i64 [[INDEX]] -; CHECK-NEXT: [[TMP7:%.*]] = bitcast i32* [[TMP6]] to <4 x i32>* -; CHECK-NEXT: store <4 x i32> [[TMP5]], <4 x i32>* [[TMP7]], align 16 +; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds [2048 x i32], ptr @b, i64 0, i64 [[INDEX]] +; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i32>, ptr [[TMP1]], align 16 +; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds [2048 x i32], ptr @c, i64 0, i64 [[INDEX]] +; CHECK-NEXT: [[WIDE_LOAD1:%.*]] = load <4 x i32>, ptr [[TMP2]], align 16 +; CHECK-NEXT: [[TMP3:%.*]] = add nsw <4 x i32> [[WIDE_LOAD1]], [[WIDE_LOAD]] +; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds [2048 x i32], ptr @a, i64 0, i64 [[INDEX]] +; CHECK-NEXT: store <4 x i32> [[TMP3]], ptr [[TMP4]], align 16 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 -; CHECK-NEXT: [[TMP8:%.*]] = icmp eq i64 [[INDEX_NEXT]], 256 -; CHECK-NEXT: br i1 [[TMP8]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] +; CHECK-NEXT: [[TMP5:%.*]] = icmp eq i64 [[INDEX_NEXT]], 256 +; CHECK-NEXT: br i1 [[TMP5]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] ; CHECK: middle.block: -; CHECK-NEXT: br i1 true, label [[TMP10:%.*]], label [[SCALAR_PH]] +; CHECK-NEXT: br i1 true, label [[TMP7:%.*]], label [[SCALAR_PH]] ; CHECK: scalar.ph: -; CHECK-NEXT: br label [[TMP9:%.*]] -; CHECK: 9: -; CHECK-NEXT: br i1 poison, label [[TMP10]], label [[TMP9]], !llvm.loop [[LOOP3:![0-9]+]] -; CHECK: 10: +; CHECK-NEXT: br label [[TMP6:%.*]] +; CHECK: 6: +; CHECK-NEXT: br i1 poison, label [[TMP7]], label [[TMP6]], !llvm.loop [[LOOP3:![0-9]+]] +; CHECK: 7: ; CHECK-NEXT: ret void ; br label %1 ;