From: Nikita Popov Date: Wed, 4 Jan 2023 16:18:17 +0000 (+0100) Subject: [LoopVectorize] Convert some tests to opaque pointers (NFC) X-Git-Tag: upstream/17.0.6~22156 X-Git-Url: http://review.tizen.org/git/?a=commitdiff_plain;h=2fab927546b34f5af7770541a9bbb974d9818c5c;p=platform%2Fupstream%2Fllvm.git [LoopVectorize] Convert some tests to opaque pointers (NFC) Check lines for some of these tests were regenerated. The difference is that with opaque pointers SCEVExpander always emits i8 GEPs, making the address calculation explicit. This is a known problem that will be solved long term by making all address calculations explicit. --- diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/arbitrary-induction-step.ll b/llvm/test/Transforms/LoopVectorize/AArch64/arbitrary-induction-step.ll index 7b50148..cb9ba1b 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/arbitrary-induction-step.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/arbitrary-induction-step.ll @@ -11,8 +11,8 @@ target triple = "aarch64--linux-gnueabi" ; } ; CHECK-LABEL: @ind_plus2( -; CHECK: load <4 x i32>, <4 x i32>* -; CHECK: load <4 x i32>, <4 x i32>* +; CHECK: load <4 x i32>, ptr +; CHECK: load <4 x i32>, ptr ; CHECK: mul nsw <4 x i32> ; CHECK: mul nsw <4 x i32> ; CHECK: add <4 x i32> @@ -21,21 +21,21 @@ target triple = "aarch64--linux-gnueabi" ; CHECK: icmp eq i64 %index.next, 512 ; FORCE-VEC-LABEL: @ind_plus2( -; FORCE-VEC: %wide.load = load <2 x i32>, <2 x i32>* +; FORCE-VEC: %wide.load = load <2 x i32>, ptr ; FORCE-VEC: mul nsw <2 x i32> ; FORCE-VEC: add <2 x i32> ; FORCE-VEC: %index.next = add nuw i64 %index, 2 ; FORCE-VEC: icmp eq i64 %index.next, 512 -define i32 @ind_plus2(i32* %A) { +define i32 @ind_plus2(ptr %A) { entry: br label %for.body for.body: ; preds = %entry, %for.body - %A.addr = phi i32* [ %A, %entry ], [ %inc.ptr, %for.body ] + %A.addr = phi ptr [ %A, %entry ], [ %inc.ptr, %for.body ] %i = phi i32 [ 0, %entry ], [ %add1, %for.body ] %sum = phi i32 [ 0, %entry ], [ %add, %for.body ] - %inc.ptr = getelementptr inbounds i32, i32* %A.addr, i64 1 - %0 = load i32, i32* %A.addr, align 4 + %inc.ptr = getelementptr inbounds i32, ptr %A.addr, i64 1 + %0 = load i32, ptr %A.addr, align 4 %mul = mul nsw i32 %0, %i %add = add nsw i32 %mul, %sum %add1 = add nsw i32 %i, 2 @@ -55,8 +55,8 @@ for.end: ; preds = %for.body ; } ; CHECK-LABEL: @ind_minus2( -; CHECK: load <4 x i32>, <4 x i32>* -; CHECK: load <4 x i32>, <4 x i32>* +; CHECK: load <4 x i32>, ptr +; CHECK: load <4 x i32>, ptr ; CHECK: mul nsw <4 x i32> ; CHECK: mul nsw <4 x i32> ; CHECK: add <4 x i32> @@ -65,21 +65,21 @@ for.end: ; preds = %for.body ; CHECK: icmp eq i64 %index.next, 512 ; FORCE-VEC-LABEL: @ind_minus2( -; FORCE-VEC: %wide.load = load <2 x i32>, <2 x i32>* +; FORCE-VEC: %wide.load = load <2 x i32>, ptr ; FORCE-VEC: mul nsw <2 x i32> ; FORCE-VEC: add <2 x i32> ; FORCE-VEC: %index.next = add nuw i64 %index, 2 ; FORCE-VEC: icmp eq i64 %index.next, 512 -define i32 @ind_minus2(i32* %A) { +define i32 @ind_minus2(ptr %A) { entry: br label %for.body for.body: ; preds = %entry, %for.body - %A.addr = phi i32* [ %A, %entry ], [ %inc.ptr, %for.body ] + %A.addr = phi ptr [ %A, %entry ], [ %inc.ptr, %for.body ] %i = phi i32 [ 1024, %entry ], [ %sub, %for.body ] %sum = phi i32 [ 0, %entry ], [ %add, %for.body ] - %inc.ptr = getelementptr inbounds i32, i32* %A.addr, i64 1 - %0 = load i32, i32* %A.addr, align 4 + %inc.ptr = getelementptr inbounds i32, ptr %A.addr, i64 1 + %0 = load i32, ptr %A.addr, align 4 %mul = mul nsw i32 %0, %i %add = add nsw i32 %mul, %sum %sub = add nsw i32 %i, -2 @@ -123,18 +123,18 @@ for.end: ; preds = %for.body ; FORCE-VEC: add <2 x i32> ; FORCE-VEC: %index.next = add nuw i64 %index, 2 ; FORCE-VEC: icmp eq i64 %index.next, 1024 -define i32 @ptr_ind_plus2(i32* %A) { +define i32 @ptr_ind_plus2(ptr %A) { entry: br label %for.body for.body: ; preds = %for.body, %entry - %A.addr = phi i32* [ %A, %entry ], [ %inc.ptr1, %for.body ] + %A.addr = phi ptr [ %A, %entry ], [ %inc.ptr1, %for.body ] %sum = phi i32 [ 0, %entry ], [ %add, %for.body ] %i = phi i32 [ 0, %entry ], [ %inc, %for.body ] - %inc.ptr = getelementptr inbounds i32, i32* %A.addr, i64 1 - %0 = load i32, i32* %A.addr, align 4 - %inc.ptr1 = getelementptr inbounds i32, i32* %A.addr, i64 2 - %1 = load i32, i32* %inc.ptr, align 4 + %inc.ptr = getelementptr inbounds i32, ptr %A.addr, i64 1 + %0 = load i32, ptr %A.addr, align 4 + %inc.ptr1 = getelementptr inbounds i32, ptr %A.addr, i64 2 + %1 = load i32, ptr %inc.ptr, align 4 %mul = mul nsw i32 %1, %0 %add = add nsw i32 %mul, %sum %inc = add nsw i32 %i, 1 diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/first-order-recurrence.ll b/llvm/test/Transforms/LoopVectorize/AArch64/first-order-recurrence.ll index 99486a1..38cab8a 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/first-order-recurrence.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/first-order-recurrence.ll @@ -18,7 +18,7 @@ ; return a; ; } ; -define i32 @PR33613(double* %b, double %j, i32 %d) #0 { +define i32 @PR33613(ptr %b, double %j, i32 %d) #0 { ; CHECK-VF4UF2-LABEL: @PR33613 ; CHECK-VF4UF2: vector.body ; CHECK-VF4UF2: %[[VEC_RECUR:.*]] = phi [ {{.*}}, %vector.ph ], [ {{.*}}, %vector.body ] @@ -35,18 +35,18 @@ for.cond.cleanup: ret i32 %a.1.lcssa for.body: - %b.addr.012 = phi double* [ %b, %entry ], [ %add.ptr, %for.body ] + %b.addr.012 = phi ptr [ %b, %entry ], [ %add.ptr, %for.body ] %i.011 = phi i32 [ 0, %entry ], [ %inc1, %for.body ] %a.010 = phi i32 [ 0, %entry ], [ %a.1, %for.body ] %j.addr.09 = phi double [ %j, %entry ], [ %0, %for.body ] - %arrayidx = getelementptr inbounds double, double* %b.addr.012, i64 %idxprom - %0 = load double, double* %arrayidx, align 8 + %arrayidx = getelementptr inbounds double, ptr %b.addr.012, i64 %idxprom + %0 = load double, ptr %arrayidx, align 8 %mul = fmul double %j.addr.09, %0 %tobool = fcmp une double %mul, 0.000000e+00 %inc = zext i1 %tobool to i32 %a.1 = add nsw i32 %a.010, %inc %inc1 = add nuw nsw i32 %i.011, 1 - %add.ptr = getelementptr inbounds double, double* %b.addr.012, i64 25 + %add.ptr = getelementptr inbounds double, ptr %b.addr.012, i64 25 %exitcond = icmp eq i32 %inc1, 10240 br i1 %exitcond, label %for.cond.cleanup, label %for.body, !llvm.loop !0 } @@ -66,32 +66,31 @@ for.body: ; } ; ; Check that the sext sank after the load in the vector loop. -define void @PR34711([2 x i16]* %a, i32* %b, i32* %c, i64 %n) #0 { +define void @PR34711(ptr %a, ptr %b, ptr %c, i64 %n) #0 { ; CHECK-VF4UF1-LABEL: @PR34711 ; CHECK-VF4UF1: vector.body ; CHECK-VF4UF1: %[[VEC_RECUR:.*]] = phi [ %vector.recur.init, %vector.ph ], [ %[[MGATHER:.*]], %vector.body ] -; CHECK-VF4UF1: %[[MGATHER]] = call @llvm.masked.gather.nxv4i16.nxv4p0i16( {{.*}}, i32 2, shufflevector ( insertelement ( poison, i1 true, i32 0), poison, zeroinitializer), poison) +; CHECK-VF4UF1: %[[MGATHER]] = call @llvm.masked.gather.nxv4i16.nxv4p0( {{.*}}, i32 2, shufflevector ( insertelement ( poison, i1 true, i32 0), poison, zeroinitializer), poison) ; CHECK-VF4UF1-NEXT: %[[SPLICE:.*]] = call @llvm.experimental.vector.splice.nxv4i16( %[[VEC_RECUR]], %[[MGATHER]], i32 -1) ; CHECK-VF4UF1-NEXT: %[[SXT1:.*]] = sext %[[SPLICE]] to ; CHECK-VF4UF1-NEXT: %[[SXT2:.*]] = sext %[[MGATHER]] to ; CHECK-VF4UF1-NEXT: mul nsw %[[SXT2]], %[[SXT1]] entry: - %pre.index = getelementptr inbounds [2 x i16], [2 x i16]* %a, i64 0, i64 0 - %.pre = load i16, i16* %pre.index + %.pre = load i16, ptr %a br label %for.body for.body: %0 = phi i16 [ %.pre, %entry ], [ %1, %for.body ] %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ] - %arraycidx = getelementptr inbounds i32, i32* %c, i64 %indvars.iv - %cur.index = getelementptr inbounds [2 x i16], [2 x i16]* %a, i64 %indvars.iv, i64 1 - store i32 7, i32* %arraycidx ; 1st instruction, to be widened. + %arraycidx = getelementptr inbounds i32, ptr %c, i64 %indvars.iv + %cur.index = getelementptr inbounds [2 x i16], ptr %a, i64 %indvars.iv, i64 1 + store i32 7, ptr %arraycidx ; 1st instruction, to be widened. %conv = sext i16 %0 to i32 ; 2nd, cast to sink after third. - %1 = load i16, i16* %cur.index ; 3rd, first-order-recurring load not widened. + %1 = load i16, ptr %cur.index ; 3rd, first-order-recurring load not widened. %conv3 = sext i16 %1 to i32 %mul = mul nsw i32 %conv3, %conv - %arrayidx5 = getelementptr inbounds i32, i32* %b, i64 %indvars.iv - store i32 %mul, i32* %arrayidx5 + %arrayidx5 = getelementptr inbounds i32, ptr %b, i64 %indvars.iv + store i32 %mul, ptr %arrayidx5 %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 %exitcond = icmp eq i64 %indvars.iv.next, %n br i1 %exitcond, label %for.end, label %for.body, !llvm.loop !0 diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/intrinsiccost.ll b/llvm/test/Transforms/LoopVectorize/AArch64/intrinsiccost.ll index e2c696f..1c58fb4 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/intrinsiccost.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/intrinsiccost.ll @@ -12,7 +12,7 @@ target triple = "aarch64--linux-gnu" ; CHECK-COST: Found an estimated cost of 1 for VF 4 For instruction: %1 = tail call i16 @llvm.sadd.sat.i16(i16 %0, i16 %offset) ; CHECK-COST: Found an estimated cost of 1 for VF 8 For instruction: %1 = tail call i16 @llvm.sadd.sat.i16(i16 %0, i16 %offset) -define void @saddsat(i16* nocapture readonly %pSrc, i16 signext %offset, i16* nocapture noalias %pDst, i32 %blockSize) #0 { +define void @saddsat(ptr nocapture readonly %pSrc, i16 signext %offset, ptr nocapture noalias %pDst, i32 %blockSize) #0 { ; CHECK-LABEL: @saddsat( ; CHECK-NEXT: entry: ; CHECK-NEXT: [[CMP_NOT6:%.*]] = icmp eq i32 [[BLOCKSIZE:%.*]], 0 @@ -25,10 +25,12 @@ define void @saddsat(i16* nocapture readonly %pSrc, i16 signext %offset, i16* no ; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] ; CHECK: vector.ph: ; CHECK-NEXT: [[N_VEC:%.*]] = and i64 [[TMP2]], -16 -; CHECK-NEXT: [[CAST_VTC:%.*]] = trunc i64 [[N_VEC]] to i32 -; CHECK-NEXT: [[IND_END:%.*]] = sub i32 [[BLOCKSIZE]], [[CAST_VTC]] -; CHECK-NEXT: [[IND_END1:%.*]] = getelementptr i16, i16* [[PSRC:%.*]], i64 [[N_VEC]] -; CHECK-NEXT: [[IND_END3:%.*]] = getelementptr i16, i16* [[PDST:%.*]], i64 [[N_VEC]] +; CHECK-NEXT: [[DOTCAST:%.*]] = trunc i64 [[N_VEC]] to i32 +; CHECK-NEXT: [[IND_END:%.*]] = sub i32 [[BLOCKSIZE]], [[DOTCAST]] +; CHECK-NEXT: [[TMP3:%.*]] = shl nuw nsw i64 [[N_VEC]], 1 +; CHECK-NEXT: [[IND_END1:%.*]] = getelementptr i8, ptr [[PSRC:%.*]], i64 [[TMP3]] +; CHECK-NEXT: [[TMP4:%.*]] = shl nuw nsw i64 [[N_VEC]], 1 +; CHECK-NEXT: [[IND_END3:%.*]] = getelementptr i8, ptr [[PDST:%.*]], i64 [[TMP4]] ; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <8 x i16> poison, i16 [[OFFSET:%.*]], i64 0 ; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <8 x i16> [[BROADCAST_SPLATINSERT]], <8 x i16> poison, <8 x i32> zeroinitializer ; CHECK-NEXT: [[BROADCAST_SPLATINSERT9:%.*]] = insertelement <8 x i16> poison, i16 [[OFFSET]], i64 0 @@ -36,20 +38,18 @@ define void @saddsat(i16* nocapture readonly %pSrc, i16 signext %offset, i16* no ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] -; CHECK-NEXT: [[NEXT_GEP:%.*]] = getelementptr i16, i16* [[PSRC]], i64 [[INDEX]] -; CHECK-NEXT: [[NEXT_GEP6:%.*]] = getelementptr i16, i16* [[PDST]], i64 [[INDEX]] -; CHECK-NEXT: [[TMP3:%.*]] = bitcast i16* [[NEXT_GEP]] to <8 x i16>* -; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <8 x i16>, <8 x i16>* [[TMP3]], align 2 -; CHECK-NEXT: [[TMP4:%.*]] = getelementptr i16, i16* [[NEXT_GEP]], i64 8 -; CHECK-NEXT: [[TMP5:%.*]] = bitcast i16* [[TMP4]] to <8 x i16>* -; CHECK-NEXT: [[WIDE_LOAD8:%.*]] = load <8 x i16>, <8 x i16>* [[TMP5]], align 2 -; CHECK-NEXT: [[TMP6:%.*]] = call <8 x i16> @llvm.sadd.sat.v8i16(<8 x i16> [[WIDE_LOAD]], <8 x i16> [[BROADCAST_SPLAT]]) -; CHECK-NEXT: [[TMP7:%.*]] = call <8 x i16> @llvm.sadd.sat.v8i16(<8 x i16> [[WIDE_LOAD8]], <8 x i16> [[BROADCAST_SPLAT10]]) -; CHECK-NEXT: [[TMP8:%.*]] = bitcast i16* [[NEXT_GEP6]] to <8 x i16>* -; CHECK-NEXT: store <8 x i16> [[TMP6]], <8 x i16>* [[TMP8]], align 2 -; CHECK-NEXT: [[TMP9:%.*]] = getelementptr i16, i16* [[NEXT_GEP6]], i64 8 -; CHECK-NEXT: [[TMP10:%.*]] = bitcast i16* [[TMP9]] to <8 x i16>* -; CHECK-NEXT: store <8 x i16> [[TMP7]], <8 x i16>* [[TMP10]], align 2 +; CHECK-NEXT: [[TMP5:%.*]] = shl i64 [[INDEX]], 1 +; CHECK-NEXT: [[NEXT_GEP:%.*]] = getelementptr i8, ptr [[PSRC]], i64 [[TMP5]] +; CHECK-NEXT: [[TMP6:%.*]] = shl i64 [[INDEX]], 1 +; CHECK-NEXT: [[NEXT_GEP6:%.*]] = getelementptr i8, ptr [[PDST]], i64 [[TMP6]] +; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <8 x i16>, ptr [[NEXT_GEP]], align 2 +; CHECK-NEXT: [[TMP7:%.*]] = getelementptr i16, ptr [[NEXT_GEP]], i64 8 +; CHECK-NEXT: [[WIDE_LOAD8:%.*]] = load <8 x i16>, ptr [[TMP7]], align 2 +; CHECK-NEXT: [[TMP8:%.*]] = call <8 x i16> @llvm.sadd.sat.v8i16(<8 x i16> [[WIDE_LOAD]], <8 x i16> [[BROADCAST_SPLAT]]) +; CHECK-NEXT: [[TMP9:%.*]] = call <8 x i16> @llvm.sadd.sat.v8i16(<8 x i16> [[WIDE_LOAD8]], <8 x i16> [[BROADCAST_SPLAT10]]) +; CHECK-NEXT: store <8 x i16> [[TMP8]], ptr [[NEXT_GEP6]], align 2 +; CHECK-NEXT: [[TMP10:%.*]] = getelementptr i16, ptr [[NEXT_GEP6]], i64 8 +; CHECK-NEXT: store <8 x i16> [[TMP9]], ptr [[TMP10]], align 2 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 16 ; CHECK-NEXT: [[TMP11:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-NEXT: br i1 [[TMP11]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] @@ -58,18 +58,18 @@ define void @saddsat(i16* nocapture readonly %pSrc, i16 signext %offset, i16* no ; CHECK-NEXT: br i1 [[CMP_N]], label [[WHILE_END]], label [[SCALAR_PH]] ; CHECK: scalar.ph: ; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i32 [ [[IND_END]], [[MIDDLE_BLOCK]] ], [ [[BLOCKSIZE]], [[WHILE_BODY_PREHEADER]] ] -; CHECK-NEXT: [[BC_RESUME_VAL2:%.*]] = phi i16* [ [[IND_END1]], [[MIDDLE_BLOCK]] ], [ [[PSRC]], [[WHILE_BODY_PREHEADER]] ] -; CHECK-NEXT: [[BC_RESUME_VAL4:%.*]] = phi i16* [ [[IND_END3]], [[MIDDLE_BLOCK]] ], [ [[PDST]], [[WHILE_BODY_PREHEADER]] ] +; CHECK-NEXT: [[BC_RESUME_VAL2:%.*]] = phi ptr [ [[IND_END1]], [[MIDDLE_BLOCK]] ], [ [[PSRC]], [[WHILE_BODY_PREHEADER]] ] +; CHECK-NEXT: [[BC_RESUME_VAL4:%.*]] = phi ptr [ [[IND_END3]], [[MIDDLE_BLOCK]] ], [ [[PDST]], [[WHILE_BODY_PREHEADER]] ] ; CHECK-NEXT: br label [[WHILE_BODY:%.*]] ; CHECK: while.body: ; CHECK-NEXT: [[BLKCNT_09:%.*]] = phi i32 [ [[DEC:%.*]], [[WHILE_BODY]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ] -; CHECK-NEXT: [[PSRC_ADDR_08:%.*]] = phi i16* [ [[INCDEC_PTR:%.*]], [[WHILE_BODY]] ], [ [[BC_RESUME_VAL2]], [[SCALAR_PH]] ] -; CHECK-NEXT: [[PDST_ADDR_07:%.*]] = phi i16* [ [[INCDEC_PTR3:%.*]], [[WHILE_BODY]] ], [ [[BC_RESUME_VAL4]], [[SCALAR_PH]] ] -; CHECK-NEXT: [[INCDEC_PTR]] = getelementptr inbounds i16, i16* [[PSRC_ADDR_08]], i64 1 -; CHECK-NEXT: [[TMP12:%.*]] = load i16, i16* [[PSRC_ADDR_08]], align 2 +; CHECK-NEXT: [[PSRC_ADDR_08:%.*]] = phi ptr [ [[INCDEC_PTR:%.*]], [[WHILE_BODY]] ], [ [[BC_RESUME_VAL2]], [[SCALAR_PH]] ] +; CHECK-NEXT: [[PDST_ADDR_07:%.*]] = phi ptr [ [[INCDEC_PTR3:%.*]], [[WHILE_BODY]] ], [ [[BC_RESUME_VAL4]], [[SCALAR_PH]] ] +; CHECK-NEXT: [[INCDEC_PTR]] = getelementptr inbounds i16, ptr [[PSRC_ADDR_08]], i64 1 +; CHECK-NEXT: [[TMP12:%.*]] = load i16, ptr [[PSRC_ADDR_08]], align 2 ; CHECK-NEXT: [[TMP13:%.*]] = tail call i16 @llvm.sadd.sat.i16(i16 [[TMP12]], i16 [[OFFSET]]) -; CHECK-NEXT: [[INCDEC_PTR3]] = getelementptr inbounds i16, i16* [[PDST_ADDR_07]], i64 1 -; CHECK-NEXT: store i16 [[TMP13]], i16* [[PDST_ADDR_07]], align 2 +; CHECK-NEXT: [[INCDEC_PTR3]] = getelementptr inbounds i16, ptr [[PDST_ADDR_07]], i64 1 +; CHECK-NEXT: store i16 [[TMP13]], ptr [[PDST_ADDR_07]], align 2 ; CHECK-NEXT: [[DEC]] = add i32 [[BLKCNT_09]], -1 ; CHECK-NEXT: [[CMP_NOT:%.*]] = icmp eq i32 [[DEC]], 0 ; CHECK-NEXT: br i1 [[CMP_NOT]], label [[WHILE_END]], label [[WHILE_BODY]], !llvm.loop [[LOOP2:![0-9]+]] @@ -82,13 +82,13 @@ entry: while.body: ; preds = %entry, %while.body %blkCnt.09 = phi i32 [ %dec, %while.body ], [ %blockSize, %entry ] - %pSrc.addr.08 = phi i16* [ %incdec.ptr, %while.body ], [ %pSrc, %entry ] - %pDst.addr.07 = phi i16* [ %incdec.ptr3, %while.body ], [ %pDst, %entry ] - %incdec.ptr = getelementptr inbounds i16, i16* %pSrc.addr.08, i32 1 - %0 = load i16, i16* %pSrc.addr.08, align 2 + %pSrc.addr.08 = phi ptr [ %incdec.ptr, %while.body ], [ %pSrc, %entry ] + %pDst.addr.07 = phi ptr [ %incdec.ptr3, %while.body ], [ %pDst, %entry ] + %incdec.ptr = getelementptr inbounds i16, ptr %pSrc.addr.08, i32 1 + %0 = load i16, ptr %pSrc.addr.08, align 2 %1 = tail call i16 @llvm.sadd.sat.i16(i16 %0, i16 %offset) - %incdec.ptr3 = getelementptr inbounds i16, i16* %pDst.addr.07, i32 1 - store i16 %1, i16* %pDst.addr.07, align 2 + %incdec.ptr3 = getelementptr inbounds i16, ptr %pDst.addr.07, i32 1 + store i16 %1, ptr %pDst.addr.07, align 2 %dec = add i32 %blkCnt.09, -1 %cmp.not = icmp eq i32 %dec, 0 br i1 %cmp.not, label %while.end, label %while.body @@ -104,7 +104,7 @@ while.end: ; preds = %while.body, %entry ; CHECK-COST: Found an estimated cost of 1 for VF 8 For instruction: %1 = tail call i8 @llvm.umin.i8(i8 %0, i8 %offset) ; CHECK-COST: Found an estimated cost of 1 for VF 16 For instruction: %1 = tail call i8 @llvm.umin.i8(i8 %0, i8 %offset) -define void @umin(i8* nocapture readonly %pSrc, i8 signext %offset, i8* nocapture noalias %pDst, i32 %blockSize) #0 { +define void @umin(ptr nocapture readonly %pSrc, i8 signext %offset, ptr nocapture noalias %pDst, i32 %blockSize) #0 { ; CHECK-LABEL: @umin( ; CHECK-NEXT: entry: ; CHECK-NEXT: [[CMP_NOT6:%.*]] = icmp eq i32 [[BLOCKSIZE:%.*]], 0 @@ -127,73 +127,67 @@ define void @umin(i8* nocapture readonly %pSrc, i8 signext %offset, i8* nocaptur ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] -; CHECK-NEXT: [[NEXT_GEP:%.*]] = getelementptr i8, i8* [[PSRC:%.*]], i64 [[INDEX]] -; CHECK-NEXT: [[NEXT_GEP3:%.*]] = getelementptr i8, i8* [[PDST:%.*]], i64 [[INDEX]] -; CHECK-NEXT: [[TMP3:%.*]] = bitcast i8* [[NEXT_GEP]] to <16 x i8>* -; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <16 x i8>, <16 x i8>* [[TMP3]], align 2 -; CHECK-NEXT: [[TMP4:%.*]] = getelementptr i8, i8* [[NEXT_GEP]], i64 16 -; CHECK-NEXT: [[TMP5:%.*]] = bitcast i8* [[TMP4]] to <16 x i8>* -; CHECK-NEXT: [[WIDE_LOAD5:%.*]] = load <16 x i8>, <16 x i8>* [[TMP5]], align 2 -; CHECK-NEXT: [[TMP6:%.*]] = call <16 x i8> @llvm.umin.v16i8(<16 x i8> [[WIDE_LOAD]], <16 x i8> [[BROADCAST_SPLAT]]) -; CHECK-NEXT: [[TMP7:%.*]] = call <16 x i8> @llvm.umin.v16i8(<16 x i8> [[WIDE_LOAD5]], <16 x i8> [[BROADCAST_SPLAT7]]) -; CHECK-NEXT: [[TMP8:%.*]] = bitcast i8* [[NEXT_GEP3]] to <16 x i8>* -; CHECK-NEXT: store <16 x i8> [[TMP6]], <16 x i8>* [[TMP8]], align 2 -; CHECK-NEXT: [[TMP9:%.*]] = getelementptr i8, i8* [[NEXT_GEP3]], i64 16 -; CHECK-NEXT: [[TMP10:%.*]] = bitcast i8* [[TMP9]] to <16 x i8>* -; CHECK-NEXT: store <16 x i8> [[TMP7]], <16 x i8>* [[TMP10]], align 2 +; CHECK-NEXT: [[NEXT_GEP:%.*]] = getelementptr i8, ptr [[PSRC:%.*]], i64 [[INDEX]] +; CHECK-NEXT: [[NEXT_GEP3:%.*]] = getelementptr i8, ptr [[PDST:%.*]], i64 [[INDEX]] +; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <16 x i8>, ptr [[NEXT_GEP]], align 2 +; CHECK-NEXT: [[TMP3:%.*]] = getelementptr i8, ptr [[NEXT_GEP]], i64 16 +; CHECK-NEXT: [[WIDE_LOAD5:%.*]] = load <16 x i8>, ptr [[TMP3]], align 2 +; CHECK-NEXT: [[TMP4:%.*]] = call <16 x i8> @llvm.umin.v16i8(<16 x i8> [[WIDE_LOAD]], <16 x i8> [[BROADCAST_SPLAT]]) +; CHECK-NEXT: [[TMP5:%.*]] = call <16 x i8> @llvm.umin.v16i8(<16 x i8> [[WIDE_LOAD5]], <16 x i8> [[BROADCAST_SPLAT7]]) +; CHECK-NEXT: store <16 x i8> [[TMP4]], ptr [[NEXT_GEP3]], align 2 +; CHECK-NEXT: [[TMP6:%.*]] = getelementptr i8, ptr [[NEXT_GEP3]], i64 16 +; CHECK-NEXT: store <16 x i8> [[TMP5]], ptr [[TMP6]], align 2 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 32 -; CHECK-NEXT: [[TMP11:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] -; CHECK-NEXT: br i1 [[TMP11]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] +; CHECK-NEXT: [[TMP7:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] +; CHECK-NEXT: br i1 [[TMP7]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] ; CHECK: middle.block: ; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[TMP2]], [[N_VEC]] ; CHECK-NEXT: br i1 [[CMP_N]], label [[WHILE_END]], label [[VEC_EPILOG_ITER_CHECK:%.*]] ; CHECK: vec.epilog.iter.check: -; CHECK-NEXT: [[IND_END16:%.*]] = getelementptr i8, i8* [[PDST]], i64 [[N_VEC]] -; CHECK-NEXT: [[IND_END13:%.*]] = getelementptr i8, i8* [[PSRC]], i64 [[N_VEC]] -; CHECK-NEXT: [[CAST_VTC10:%.*]] = trunc i64 [[N_VEC]] to i32 -; CHECK-NEXT: [[IND_END11:%.*]] = sub i32 [[BLOCKSIZE]], [[CAST_VTC10]] +; CHECK-NEXT: [[IND_END20:%.*]] = getelementptr i8, ptr [[PDST]], i64 [[N_VEC]] +; CHECK-NEXT: [[IND_END17:%.*]] = getelementptr i8, ptr [[PSRC]], i64 [[N_VEC]] +; CHECK-NEXT: [[DOTCAST13:%.*]] = trunc i64 [[N_VEC]] to i32 +; CHECK-NEXT: [[IND_END14:%.*]] = sub i32 [[BLOCKSIZE]], [[DOTCAST13]] ; CHECK-NEXT: [[N_VEC_REMAINING:%.*]] = and i64 [[TMP2]], 24 ; CHECK-NEXT: [[MIN_EPILOG_ITERS_CHECK:%.*]] = icmp eq i64 [[N_VEC_REMAINING]], 0 ; CHECK-NEXT: br i1 [[MIN_EPILOG_ITERS_CHECK]], label [[VEC_EPILOG_SCALAR_PH]], label [[VEC_EPILOG_PH]] ; CHECK: vec.epilog.ph: ; CHECK-NEXT: [[VEC_EPILOG_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[VEC_EPILOG_ITER_CHECK]] ], [ 0, [[VECTOR_MAIN_LOOP_ITER_CHECK]] ] -; CHECK-NEXT: [[N_VEC9:%.*]] = and i64 [[TMP2]], -8 -; CHECK-NEXT: [[CAST_VTC:%.*]] = trunc i64 [[N_VEC9]] to i32 -; CHECK-NEXT: [[IND_END:%.*]] = sub i32 [[BLOCKSIZE]], [[CAST_VTC]] -; CHECK-NEXT: [[IND_END12:%.*]] = getelementptr i8, i8* [[PSRC]], i64 [[N_VEC9]] -; CHECK-NEXT: [[IND_END15:%.*]] = getelementptr i8, i8* [[PDST]], i64 [[N_VEC9]] -; CHECK-NEXT: [[BROADCAST_SPLATINSERT23:%.*]] = insertelement <8 x i8> poison, i8 [[OFFSET]], i64 0 -; CHECK-NEXT: [[BROADCAST_SPLAT24:%.*]] = shufflevector <8 x i8> [[BROADCAST_SPLATINSERT23]], <8 x i8> poison, <8 x i32> zeroinitializer +; CHECK-NEXT: [[N_VEC11:%.*]] = and i64 [[TMP2]], -8 +; CHECK-NEXT: [[DOTCAST:%.*]] = trunc i64 [[N_VEC11]] to i32 +; CHECK-NEXT: [[IND_END12:%.*]] = sub i32 [[BLOCKSIZE]], [[DOTCAST]] +; CHECK-NEXT: [[IND_END16:%.*]] = getelementptr i8, ptr [[PSRC]], i64 [[N_VEC11]] +; CHECK-NEXT: [[IND_END19:%.*]] = getelementptr i8, ptr [[PDST]], i64 [[N_VEC11]] +; CHECK-NEXT: [[BROADCAST_SPLATINSERT27:%.*]] = insertelement <8 x i8> poison, i8 [[OFFSET]], i64 0 +; CHECK-NEXT: [[BROADCAST_SPLAT28:%.*]] = shufflevector <8 x i8> [[BROADCAST_SPLATINSERT27]], <8 x i8> poison, <8 x i32> zeroinitializer ; CHECK-NEXT: br label [[VEC_EPILOG_VECTOR_BODY:%.*]] ; CHECK: vec.epilog.vector.body: -; CHECK-NEXT: [[INDEX19:%.*]] = phi i64 [ [[VEC_EPILOG_RESUME_VAL]], [[VEC_EPILOG_PH]] ], [ [[INDEX_NEXT25:%.*]], [[VEC_EPILOG_VECTOR_BODY]] ] -; CHECK-NEXT: [[NEXT_GEP20:%.*]] = getelementptr i8, i8* [[PSRC]], i64 [[INDEX19]] -; CHECK-NEXT: [[NEXT_GEP21:%.*]] = getelementptr i8, i8* [[PDST]], i64 [[INDEX19]] -; CHECK-NEXT: [[TMP12:%.*]] = bitcast i8* [[NEXT_GEP20]] to <8 x i8>* -; CHECK-NEXT: [[WIDE_LOAD22:%.*]] = load <8 x i8>, <8 x i8>* [[TMP12]], align 2 -; CHECK-NEXT: [[TMP13:%.*]] = call <8 x i8> @llvm.umin.v8i8(<8 x i8> [[WIDE_LOAD22]], <8 x i8> [[BROADCAST_SPLAT24]]) -; CHECK-NEXT: [[TMP14:%.*]] = bitcast i8* [[NEXT_GEP21]] to <8 x i8>* -; CHECK-NEXT: store <8 x i8> [[TMP13]], <8 x i8>* [[TMP14]], align 2 -; CHECK-NEXT: [[INDEX_NEXT25]] = add nuw i64 [[INDEX19]], 8 -; CHECK-NEXT: [[TMP15:%.*]] = icmp eq i64 [[INDEX_NEXT25]], [[N_VEC9]] -; CHECK-NEXT: br i1 [[TMP15]], label [[VEC_EPILOG_MIDDLE_BLOCK:%.*]], label [[VEC_EPILOG_VECTOR_BODY]], !llvm.loop [[LOOP5:![0-9]+]] +; CHECK-NEXT: [[INDEX23:%.*]] = phi i64 [ [[VEC_EPILOG_RESUME_VAL]], [[VEC_EPILOG_PH]] ], [ [[INDEX_NEXT29:%.*]], [[VEC_EPILOG_VECTOR_BODY]] ] +; CHECK-NEXT: [[NEXT_GEP24:%.*]] = getelementptr i8, ptr [[PSRC]], i64 [[INDEX23]] +; CHECK-NEXT: [[NEXT_GEP25:%.*]] = getelementptr i8, ptr [[PDST]], i64 [[INDEX23]] +; CHECK-NEXT: [[WIDE_LOAD26:%.*]] = load <8 x i8>, ptr [[NEXT_GEP24]], align 2 +; CHECK-NEXT: [[TMP8:%.*]] = call <8 x i8> @llvm.umin.v8i8(<8 x i8> [[WIDE_LOAD26]], <8 x i8> [[BROADCAST_SPLAT28]]) +; CHECK-NEXT: store <8 x i8> [[TMP8]], ptr [[NEXT_GEP25]], align 2 +; CHECK-NEXT: [[INDEX_NEXT29]] = add nuw i64 [[INDEX23]], 8 +; CHECK-NEXT: [[TMP9:%.*]] = icmp eq i64 [[INDEX_NEXT29]], [[N_VEC11]] +; CHECK-NEXT: br i1 [[TMP9]], label [[VEC_EPILOG_MIDDLE_BLOCK:%.*]], label [[VEC_EPILOG_VECTOR_BODY]], !llvm.loop [[LOOP5:![0-9]+]] ; CHECK: vec.epilog.middle.block: -; CHECK-NEXT: [[CMP_N18:%.*]] = icmp eq i64 [[TMP2]], [[N_VEC9]] -; CHECK-NEXT: br i1 [[CMP_N18]], label [[WHILE_END]], label [[VEC_EPILOG_SCALAR_PH]] +; CHECK-NEXT: [[CMP_N22:%.*]] = icmp eq i64 [[TMP2]], [[N_VEC11]] +; CHECK-NEXT: br i1 [[CMP_N22]], label [[WHILE_END]], label [[VEC_EPILOG_SCALAR_PH]] ; CHECK: vec.epilog.scalar.ph: -; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i32 [ [[IND_END]], [[VEC_EPILOG_MIDDLE_BLOCK]] ], [ [[IND_END11]], [[VEC_EPILOG_ITER_CHECK]] ], [ [[BLOCKSIZE]], [[ITER_CHECK]] ] -; CHECK-NEXT: [[BC_RESUME_VAL14:%.*]] = phi i8* [ [[IND_END12]], [[VEC_EPILOG_MIDDLE_BLOCK]] ], [ [[IND_END13]], [[VEC_EPILOG_ITER_CHECK]] ], [ [[PSRC]], [[ITER_CHECK]] ] -; CHECK-NEXT: [[BC_RESUME_VAL17:%.*]] = phi i8* [ [[IND_END15]], [[VEC_EPILOG_MIDDLE_BLOCK]] ], [ [[IND_END16]], [[VEC_EPILOG_ITER_CHECK]] ], [ [[PDST]], [[ITER_CHECK]] ] +; CHECK-NEXT: [[BC_RESUME_VAL15:%.*]] = phi i32 [ [[IND_END12]], [[VEC_EPILOG_MIDDLE_BLOCK]] ], [ [[IND_END14]], [[VEC_EPILOG_ITER_CHECK]] ], [ [[BLOCKSIZE]], [[ITER_CHECK]] ] +; CHECK-NEXT: [[BC_RESUME_VAL18:%.*]] = phi ptr [ [[IND_END16]], [[VEC_EPILOG_MIDDLE_BLOCK]] ], [ [[IND_END17]], [[VEC_EPILOG_ITER_CHECK]] ], [ [[PSRC]], [[ITER_CHECK]] ] +; CHECK-NEXT: [[BC_RESUME_VAL21:%.*]] = phi ptr [ [[IND_END19]], [[VEC_EPILOG_MIDDLE_BLOCK]] ], [ [[IND_END20]], [[VEC_EPILOG_ITER_CHECK]] ], [ [[PDST]], [[ITER_CHECK]] ] ; CHECK-NEXT: br label [[WHILE_BODY:%.*]] ; CHECK: while.body: -; CHECK-NEXT: [[BLKCNT_09:%.*]] = phi i32 [ [[DEC:%.*]], [[WHILE_BODY]] ], [ [[BC_RESUME_VAL]], [[VEC_EPILOG_SCALAR_PH]] ] -; CHECK-NEXT: [[PSRC_ADDR_08:%.*]] = phi i8* [ [[INCDEC_PTR:%.*]], [[WHILE_BODY]] ], [ [[BC_RESUME_VAL14]], [[VEC_EPILOG_SCALAR_PH]] ] -; CHECK-NEXT: [[PDST_ADDR_07:%.*]] = phi i8* [ [[INCDEC_PTR3:%.*]], [[WHILE_BODY]] ], [ [[BC_RESUME_VAL17]], [[VEC_EPILOG_SCALAR_PH]] ] -; CHECK-NEXT: [[INCDEC_PTR]] = getelementptr inbounds i8, i8* [[PSRC_ADDR_08]], i64 1 -; CHECK-NEXT: [[TMP16:%.*]] = load i8, i8* [[PSRC_ADDR_08]], align 2 -; CHECK-NEXT: [[TMP17:%.*]] = tail call i8 @llvm.umin.i8(i8 [[TMP16]], i8 [[OFFSET]]) -; CHECK-NEXT: [[INCDEC_PTR3]] = getelementptr inbounds i8, i8* [[PDST_ADDR_07]], i64 1 -; CHECK-NEXT: store i8 [[TMP17]], i8* [[PDST_ADDR_07]], align 2 +; CHECK-NEXT: [[BLKCNT_09:%.*]] = phi i32 [ [[DEC:%.*]], [[WHILE_BODY]] ], [ [[BC_RESUME_VAL15]], [[VEC_EPILOG_SCALAR_PH]] ] +; CHECK-NEXT: [[PSRC_ADDR_08:%.*]] = phi ptr [ [[INCDEC_PTR:%.*]], [[WHILE_BODY]] ], [ [[BC_RESUME_VAL18]], [[VEC_EPILOG_SCALAR_PH]] ] +; CHECK-NEXT: [[PDST_ADDR_07:%.*]] = phi ptr [ [[INCDEC_PTR3:%.*]], [[WHILE_BODY]] ], [ [[BC_RESUME_VAL21]], [[VEC_EPILOG_SCALAR_PH]] ] +; CHECK-NEXT: [[INCDEC_PTR]] = getelementptr inbounds i8, ptr [[PSRC_ADDR_08]], i64 1 +; CHECK-NEXT: [[TMP10:%.*]] = load i8, ptr [[PSRC_ADDR_08]], align 2 +; CHECK-NEXT: [[TMP11:%.*]] = tail call i8 @llvm.umin.i8(i8 [[TMP10]], i8 [[OFFSET]]) +; CHECK-NEXT: [[INCDEC_PTR3]] = getelementptr inbounds i8, ptr [[PDST_ADDR_07]], i64 1 +; CHECK-NEXT: store i8 [[TMP11]], ptr [[PDST_ADDR_07]], align 2 ; CHECK-NEXT: [[DEC]] = add i32 [[BLKCNT_09]], -1 ; CHECK-NEXT: [[CMP_NOT:%.*]] = icmp eq i32 [[DEC]], 0 ; CHECK-NEXT: br i1 [[CMP_NOT]], label [[WHILE_END]], label [[WHILE_BODY]], !llvm.loop [[LOOP6:![0-9]+]] @@ -206,13 +200,13 @@ entry: while.body: ; preds = %entry, %while.body %blkCnt.09 = phi i32 [ %dec, %while.body ], [ %blockSize, %entry ] - %pSrc.addr.08 = phi i8* [ %incdec.ptr, %while.body ], [ %pSrc, %entry ] - %pDst.addr.07 = phi i8* [ %incdec.ptr3, %while.body ], [ %pDst, %entry ] - %incdec.ptr = getelementptr inbounds i8, i8* %pSrc.addr.08, i32 1 - %0 = load i8, i8* %pSrc.addr.08, align 2 + %pSrc.addr.08 = phi ptr [ %incdec.ptr, %while.body ], [ %pSrc, %entry ] + %pDst.addr.07 = phi ptr [ %incdec.ptr3, %while.body ], [ %pDst, %entry ] + %incdec.ptr = getelementptr inbounds i8, ptr %pSrc.addr.08, i32 1 + %0 = load i8, ptr %pSrc.addr.08, align 2 %1 = tail call i8 @llvm.umin.i8(i8 %0, i8 %offset) - %incdec.ptr3 = getelementptr inbounds i8, i8* %pDst.addr.07, i32 1 - store i8 %1, i8* %pDst.addr.07, align 2 + %incdec.ptr3 = getelementptr inbounds i8, ptr %pDst.addr.07, i32 1 + store i8 %1, ptr %pDst.addr.07, align 2 %dec = add i32 %blkCnt.09, -1 %cmp.not = icmp eq i32 %dec, 0 br i1 %cmp.not, label %while.end, label %while.body @@ -224,3 +218,5 @@ while.end: ; preds = %while.body, %entry declare i16 @llvm.sadd.sat.i16(i16, i16) declare i8 @llvm.umin.i8(i8, i8) +;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line: +; CHECK-COST: {{.*}} diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/maximize-bandwidth-invalidate.ll b/llvm/test/Transforms/LoopVectorize/AArch64/maximize-bandwidth-invalidate.ll index a3d0530..d40115b 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/maximize-bandwidth-invalidate.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/maximize-bandwidth-invalidate.ll @@ -16,7 +16,7 @@ target triple = "aarch64-none-unknown-eabi" ; COST: LV: Found an estimated cost of 3000000 for VF 16 For instruction: %0 = load ; COST: LV: Selecting VF: 1. -define i32 @test(i8* nocapture noundef readonly %pInVec, i8* nocapture noundef readonly %pInA1, i8* nocapture noundef readonly %pInA2, i8* nocapture noundef readonly %pInA3, i8* nocapture noundef readonly %pInA4, i32 noundef %numCols) { +define i32 @test(ptr nocapture noundef readonly %pInVec, ptr nocapture noundef readonly %pInA1, ptr nocapture noundef readonly %pInA2, ptr nocapture noundef readonly %pInA3, ptr nocapture noundef readonly %pInA4, i32 noundef %numCols) { ; CHECK-LABEL: @test( ; CHECK-NEXT: entry: ; CHECK-NEXT: [[AND:%.*]] = and i32 [[NUMCOLS:%.*]], 3 @@ -25,36 +25,36 @@ define i32 @test(i8* nocapture noundef readonly %pInVec, i8* nocapture noundef r ; CHECK: while.body.preheader: ; CHECK-NEXT: br label [[WHILE_BODY:%.*]] ; CHECK: while.body: -; CHECK-NEXT: [[PINVEC_ADDR_042:%.*]] = phi i8* [ [[INCDEC_PTR:%.*]], [[WHILE_BODY]] ], [ [[PINVEC:%.*]], [[WHILE_BODY_PREHEADER]] ] +; CHECK-NEXT: [[PINVEC_ADDR_042:%.*]] = phi ptr [ [[INCDEC_PTR:%.*]], [[WHILE_BODY]] ], [ [[PINVEC:%.*]], [[WHILE_BODY_PREHEADER]] ] ; CHECK-NEXT: [[SUM4_041:%.*]] = phi i32 [ [[ADD14:%.*]], [[WHILE_BODY]] ], [ 0, [[WHILE_BODY_PREHEADER]] ] ; CHECK-NEXT: [[SUM3_040:%.*]] = phi i32 [ [[ADD10:%.*]], [[WHILE_BODY]] ], [ 0, [[WHILE_BODY_PREHEADER]] ] ; CHECK-NEXT: [[SUM2_039:%.*]] = phi i32 [ [[ADD6:%.*]], [[WHILE_BODY]] ], [ 0, [[WHILE_BODY_PREHEADER]] ] ; CHECK-NEXT: [[SUM1_038:%.*]] = phi i32 [ [[ADD:%.*]], [[WHILE_BODY]] ], [ 0, [[WHILE_BODY_PREHEADER]] ] ; CHECK-NEXT: [[COLCNT_037:%.*]] = phi i32 [ [[DEC:%.*]], [[WHILE_BODY]] ], [ [[AND]], [[WHILE_BODY_PREHEADER]] ] -; CHECK-NEXT: [[PINA1_ADDR_036:%.*]] = phi i8* [ [[INCDEC_PTR1:%.*]], [[WHILE_BODY]] ], [ [[PINA1:%.*]], [[WHILE_BODY_PREHEADER]] ] -; CHECK-NEXT: [[PINA4_ADDR_035:%.*]] = phi i8* [ [[INCDEC_PTR11:%.*]], [[WHILE_BODY]] ], [ [[PINA4:%.*]], [[WHILE_BODY_PREHEADER]] ] -; CHECK-NEXT: [[PINA3_ADDR_034:%.*]] = phi i8* [ [[INCDEC_PTR7:%.*]], [[WHILE_BODY]] ], [ [[PINA3:%.*]], [[WHILE_BODY_PREHEADER]] ] -; CHECK-NEXT: [[PINA2_ADDR_033:%.*]] = phi i8* [ [[INCDEC_PTR3:%.*]], [[WHILE_BODY]] ], [ [[PINA2:%.*]], [[WHILE_BODY_PREHEADER]] ] -; CHECK-NEXT: [[INCDEC_PTR]] = getelementptr inbounds i8, i8* [[PINVEC_ADDR_042]], i64 1 -; CHECK-NEXT: [[TMP0:%.*]] = load i8, i8* [[PINVEC_ADDR_042]], align 1 +; CHECK-NEXT: [[PINA1_ADDR_036:%.*]] = phi ptr [ [[INCDEC_PTR1:%.*]], [[WHILE_BODY]] ], [ [[PINA1:%.*]], [[WHILE_BODY_PREHEADER]] ] +; CHECK-NEXT: [[PINA4_ADDR_035:%.*]] = phi ptr [ [[INCDEC_PTR11:%.*]], [[WHILE_BODY]] ], [ [[PINA4:%.*]], [[WHILE_BODY_PREHEADER]] ] +; CHECK-NEXT: [[PINA3_ADDR_034:%.*]] = phi ptr [ [[INCDEC_PTR7:%.*]], [[WHILE_BODY]] ], [ [[PINA3:%.*]], [[WHILE_BODY_PREHEADER]] ] +; CHECK-NEXT: [[PINA2_ADDR_033:%.*]] = phi ptr [ [[INCDEC_PTR3:%.*]], [[WHILE_BODY]] ], [ [[PINA2:%.*]], [[WHILE_BODY_PREHEADER]] ] +; CHECK-NEXT: [[INCDEC_PTR]] = getelementptr inbounds i8, ptr [[PINVEC_ADDR_042]], i64 1 +; CHECK-NEXT: [[TMP0:%.*]] = load i8, ptr [[PINVEC_ADDR_042]], align 1 ; CHECK-NEXT: [[CONV:%.*]] = sext i8 [[TMP0]] to i32 -; CHECK-NEXT: [[INCDEC_PTR1]] = getelementptr inbounds i8, i8* [[PINA1_ADDR_036]], i64 1 -; CHECK-NEXT: [[TMP1:%.*]] = load i8, i8* [[PINA1_ADDR_036]], align 1 +; CHECK-NEXT: [[INCDEC_PTR1]] = getelementptr inbounds i8, ptr [[PINA1_ADDR_036]], i64 1 +; CHECK-NEXT: [[TMP1:%.*]] = load i8, ptr [[PINA1_ADDR_036]], align 1 ; CHECK-NEXT: [[CONV2:%.*]] = sext i8 [[TMP1]] to i32 ; CHECK-NEXT: [[MUL:%.*]] = mul nsw i32 [[CONV2]], [[CONV]] ; CHECK-NEXT: [[ADD]] = add nsw i32 [[MUL]], [[SUM1_038]] -; CHECK-NEXT: [[INCDEC_PTR3]] = getelementptr inbounds i8, i8* [[PINA2_ADDR_033]], i64 1 -; CHECK-NEXT: [[TMP2:%.*]] = load i8, i8* [[PINA2_ADDR_033]], align 1 +; CHECK-NEXT: [[INCDEC_PTR3]] = getelementptr inbounds i8, ptr [[PINA2_ADDR_033]], i64 1 +; CHECK-NEXT: [[TMP2:%.*]] = load i8, ptr [[PINA2_ADDR_033]], align 1 ; CHECK-NEXT: [[CONV4:%.*]] = sext i8 [[TMP2]] to i32 ; CHECK-NEXT: [[MUL5:%.*]] = mul nsw i32 [[CONV4]], [[CONV]] ; CHECK-NEXT: [[ADD6]] = add nsw i32 [[MUL5]], [[SUM2_039]] -; CHECK-NEXT: [[INCDEC_PTR7]] = getelementptr inbounds i8, i8* [[PINA3_ADDR_034]], i64 1 -; CHECK-NEXT: [[TMP3:%.*]] = load i8, i8* [[PINA3_ADDR_034]], align 1 +; CHECK-NEXT: [[INCDEC_PTR7]] = getelementptr inbounds i8, ptr [[PINA3_ADDR_034]], i64 1 +; CHECK-NEXT: [[TMP3:%.*]] = load i8, ptr [[PINA3_ADDR_034]], align 1 ; CHECK-NEXT: [[CONV8:%.*]] = sext i8 [[TMP3]] to i32 ; CHECK-NEXT: [[MUL9:%.*]] = mul nsw i32 [[CONV8]], [[CONV]] ; CHECK-NEXT: [[ADD10]] = add nsw i32 [[MUL9]], [[SUM3_040]] -; CHECK-NEXT: [[INCDEC_PTR11]] = getelementptr inbounds i8, i8* [[PINA4_ADDR_035]], i64 1 -; CHECK-NEXT: [[TMP4:%.*]] = load i8, i8* [[PINA4_ADDR_035]], align 1 +; CHECK-NEXT: [[INCDEC_PTR11]] = getelementptr inbounds i8, ptr [[PINA4_ADDR_035]], i64 1 +; CHECK-NEXT: [[TMP4:%.*]] = load i8, ptr [[PINA4_ADDR_035]], align 1 ; CHECK-NEXT: [[CONV12:%.*]] = sext i8 [[TMP4]] to i32 ; CHECK-NEXT: [[MUL13:%.*]] = mul nsw i32 [[CONV12]], [[CONV]] ; CHECK-NEXT: [[ADD14]] = add nsw i32 [[MUL13]], [[SUM4_041]] @@ -80,36 +80,36 @@ entry: br i1 %cmp.not32, label %while.end, label %while.body while.body: ; preds = %entry, %while.body - %pInVec.addr.042 = phi i8* [ %incdec.ptr, %while.body ], [ %pInVec, %entry ] + %pInVec.addr.042 = phi ptr [ %incdec.ptr, %while.body ], [ %pInVec, %entry ] %sum4.041 = phi i32 [ %add14, %while.body ], [ 0, %entry ] %sum3.040 = phi i32 [ %add10, %while.body ], [ 0, %entry ] %sum2.039 = phi i32 [ %add6, %while.body ], [ 0, %entry ] %sum1.038 = phi i32 [ %add, %while.body ], [ 0, %entry ] %colCnt.037 = phi i32 [ %dec, %while.body ], [ %and, %entry ] - %pInA1.addr.036 = phi i8* [ %incdec.ptr1, %while.body ], [ %pInA1, %entry ] - %pInA4.addr.035 = phi i8* [ %incdec.ptr11, %while.body ], [ %pInA4, %entry ] - %pInA3.addr.034 = phi i8* [ %incdec.ptr7, %while.body ], [ %pInA3, %entry ] - %pInA2.addr.033 = phi i8* [ %incdec.ptr3, %while.body ], [ %pInA2, %entry ] - %incdec.ptr = getelementptr inbounds i8, i8* %pInVec.addr.042, i64 1 - %0 = load i8, i8* %pInVec.addr.042, align 1 + %pInA1.addr.036 = phi ptr [ %incdec.ptr1, %while.body ], [ %pInA1, %entry ] + %pInA4.addr.035 = phi ptr [ %incdec.ptr11, %while.body ], [ %pInA4, %entry ] + %pInA3.addr.034 = phi ptr [ %incdec.ptr7, %while.body ], [ %pInA3, %entry ] + %pInA2.addr.033 = phi ptr [ %incdec.ptr3, %while.body ], [ %pInA2, %entry ] + %incdec.ptr = getelementptr inbounds i8, ptr %pInVec.addr.042, i64 1 + %0 = load i8, ptr %pInVec.addr.042, align 1 %conv = sext i8 %0 to i32 - %incdec.ptr1 = getelementptr inbounds i8, i8* %pInA1.addr.036, i64 1 - %1 = load i8, i8* %pInA1.addr.036, align 1 + %incdec.ptr1 = getelementptr inbounds i8, ptr %pInA1.addr.036, i64 1 + %1 = load i8, ptr %pInA1.addr.036, align 1 %conv2 = sext i8 %1 to i32 %mul = mul nsw i32 %conv2, %conv %add = add nsw i32 %mul, %sum1.038 - %incdec.ptr3 = getelementptr inbounds i8, i8* %pInA2.addr.033, i64 1 - %2 = load i8, i8* %pInA2.addr.033, align 1 + %incdec.ptr3 = getelementptr inbounds i8, ptr %pInA2.addr.033, i64 1 + %2 = load i8, ptr %pInA2.addr.033, align 1 %conv4 = sext i8 %2 to i32 %mul5 = mul nsw i32 %conv4, %conv %add6 = add nsw i32 %mul5, %sum2.039 - %incdec.ptr7 = getelementptr inbounds i8, i8* %pInA3.addr.034, i64 1 - %3 = load i8, i8* %pInA3.addr.034, align 1 + %incdec.ptr7 = getelementptr inbounds i8, ptr %pInA3.addr.034, i64 1 + %3 = load i8, ptr %pInA3.addr.034, align 1 %conv8 = sext i8 %3 to i32 %mul9 = mul nsw i32 %conv8, %conv %add10 = add nsw i32 %mul9, %sum3.040 - %incdec.ptr11 = getelementptr inbounds i8, i8* %pInA4.addr.035, i64 1 - %4 = load i8, i8* %pInA4.addr.035, align 1 + %incdec.ptr11 = getelementptr inbounds i8, ptr %pInA4.addr.035, i64 1 + %4 = load i8, ptr %pInA4.addr.035, align 1 %conv12 = sext i8 %4 to i32 %mul13 = mul nsw i32 %conv12, %conv %add14 = add nsw i32 %mul13, %sum4.041 diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/nontemporal-load-store.ll b/llvm/test/Transforms/LoopVectorize/AArch64/nontemporal-load-store.ll index 8f5d796..75f03c7 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/nontemporal-load-store.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/nontemporal-load-store.ll @@ -1,7 +1,7 @@ ; RUN: opt -passes=loop-vectorize -mtriple=arm64-apple-iphones -force-vector-width=4 -force-vector-interleave=1 %s -S | FileCheck %s ; Vectors with i4 elements may not legal with nontemporal stores. -define void @test_i4_store(i4* %ddst) { +define void @test_i4_store(ptr %ddst) { ; CHECK-LABEL: define void @test_i4_store( ; CHECK-NOT: vector.body: ; CHECK: ret void @@ -11,9 +11,9 @@ entry: for.body: ; preds = %entry, %for.body %i = phi i32 [ 0, %entry ], [ %add, %for.body ] - %ddst.addr = phi i4* [ %ddst, %entry ], [ %incdec.ptr, %for.body ] - %incdec.ptr = getelementptr inbounds i4, i4* %ddst.addr, i64 1 - store i4 10, i4* %ddst.addr, align 4, !nontemporal !8 + %ddst.addr = phi ptr [ %ddst, %entry ], [ %incdec.ptr, %for.body ] + %incdec.ptr = getelementptr inbounds i4, ptr %ddst.addr, i64 1 + store i4 10, ptr %ddst.addr, align 4, !nontemporal !8 %add = add nuw nsw i32 %i, 4 %cmp = icmp ult i32 %i, 4092 br i1 %cmp, label %for.body, label %for.cond.cleanup @@ -22,7 +22,7 @@ for.cond.cleanup: ; preds = %for.body ret void } -define void @test_i8_store(i8* %ddst) { +define void @test_i8_store(ptr %ddst) { ; CHECK-LABEL: define void @test_i8_store( ; CHECK-LABEL: vector.body: ; CHECK: store <4 x i8> {{.*}} !nontemporal !0 @@ -33,9 +33,9 @@ entry: for.body: ; preds = %entry, %for.body %i = phi i32 [ 0, %entry ], [ %add, %for.body ] - %ddst.addr = phi i8* [ %ddst, %entry ], [ %incdec.ptr, %for.body ] - %incdec.ptr = getelementptr inbounds i8, i8* %ddst.addr, i64 1 - store i8 10, i8* %ddst.addr, align 4, !nontemporal !8 + %ddst.addr = phi ptr [ %ddst, %entry ], [ %incdec.ptr, %for.body ] + %incdec.ptr = getelementptr inbounds i8, ptr %ddst.addr, i64 1 + store i8 10, ptr %ddst.addr, align 4, !nontemporal !8 %add = add nuw nsw i32 %i, 4 %cmp = icmp ult i32 %i, 4092 br i1 %cmp, label %for.body, label %for.cond.cleanup @@ -44,7 +44,7 @@ for.cond.cleanup: ; preds = %for.body ret void } -define void @test_half_store(half* %ddst) { +define void @test_half_store(ptr %ddst) { ; CHECK-LABEL: define void @test_half_store( ; CHECK-LABEL: vector.body: ; CHECK: store <4 x half> {{.*}} !nontemporal !0 @@ -55,9 +55,9 @@ entry: for.body: ; preds = %entry, %for.body %i = phi i32 [ 0, %entry ], [ %add, %for.body ] - %ddst.addr = phi half* [ %ddst, %entry ], [ %incdec.ptr, %for.body ] - %incdec.ptr = getelementptr inbounds half, half* %ddst.addr, i64 1 - store half 10.0, half* %ddst.addr, align 4, !nontemporal !8 + %ddst.addr = phi ptr [ %ddst, %entry ], [ %incdec.ptr, %for.body ] + %incdec.ptr = getelementptr inbounds half, ptr %ddst.addr, i64 1 + store half 10.0, ptr %ddst.addr, align 4, !nontemporal !8 %add = add nuw nsw i32 %i, 4 %cmp = icmp ult i32 %i, 4092 br i1 %cmp, label %for.body, label %for.cond.cleanup @@ -66,7 +66,7 @@ for.cond.cleanup: ; preds = %for.body ret void } -define void @test_i16_store(i16* %ddst) { +define void @test_i16_store(ptr %ddst) { ; CHECK-LABEL: define void @test_i16_store( ; CHECK-LABEL: vector.body: ; CHECK: store <4 x i16> {{.*}} !nontemporal !0 @@ -77,9 +77,9 @@ entry: for.body: ; preds = %entry, %for.body %i = phi i32 [ 0, %entry ], [ %add, %for.body ] - %ddst.addr = phi i16* [ %ddst, %entry ], [ %incdec.ptr, %for.body ] - %incdec.ptr = getelementptr inbounds i16, i16* %ddst.addr, i64 1 - store i16 10, i16* %ddst.addr, align 4, !nontemporal !8 + %ddst.addr = phi ptr [ %ddst, %entry ], [ %incdec.ptr, %for.body ] + %incdec.ptr = getelementptr inbounds i16, ptr %ddst.addr, i64 1 + store i16 10, ptr %ddst.addr, align 4, !nontemporal !8 %add = add nuw nsw i32 %i, 4 %cmp = icmp ult i32 %i, 4092 br i1 %cmp, label %for.body, label %for.cond.cleanup @@ -88,7 +88,7 @@ for.cond.cleanup: ; preds = %for.body ret void } -define void @test_i32_store(i32* nocapture %ddst) { +define void @test_i32_store(ptr nocapture %ddst) { ; CHECK-LABEL: define void @test_i32_store( ; CHECK-LABEL: vector.body: ; CHECK: store <16 x i32> {{.*}} !nontemporal !0 @@ -99,15 +99,15 @@ entry: for.body: ; preds = %entry, %for.body %i = phi i32 [ 0, %entry ], [ %add, %for.body ] - %ddst.addr = phi i32* [ %ddst, %entry ], [ %incdec.ptr3, %for.body ] - %incdec.ptr = getelementptr inbounds i32, i32* %ddst.addr, i64 1 - store i32 10, i32* %ddst.addr, align 4, !nontemporal !8 - %incdec.ptr1 = getelementptr inbounds i32, i32* %ddst.addr, i64 2 - store i32 20, i32* %incdec.ptr, align 4, !nontemporal !8 - %incdec.ptr2 = getelementptr inbounds i32, i32* %ddst.addr, i64 3 - store i32 30, i32* %incdec.ptr1, align 4, !nontemporal !8 - %incdec.ptr3 = getelementptr inbounds i32, i32* %ddst.addr, i64 4 - store i32 40, i32* %incdec.ptr2, align 4, !nontemporal !8 + %ddst.addr = phi ptr [ %ddst, %entry ], [ %incdec.ptr3, %for.body ] + %incdec.ptr = getelementptr inbounds i32, ptr %ddst.addr, i64 1 + store i32 10, ptr %ddst.addr, align 4, !nontemporal !8 + %incdec.ptr1 = getelementptr inbounds i32, ptr %ddst.addr, i64 2 + store i32 20, ptr %incdec.ptr, align 4, !nontemporal !8 + %incdec.ptr2 = getelementptr inbounds i32, ptr %ddst.addr, i64 3 + store i32 30, ptr %incdec.ptr1, align 4, !nontemporal !8 + %incdec.ptr3 = getelementptr inbounds i32, ptr %ddst.addr, i64 4 + store i32 40, ptr %incdec.ptr2, align 4, !nontemporal !8 %add = add nuw nsw i32 %i, 4 %cmp = icmp ult i32 %i, 4092 br i1 %cmp, label %for.body, label %for.cond.cleanup @@ -116,7 +116,7 @@ for.cond.cleanup: ; preds = %for.body ret void } -define void @test_i33_store(i33* nocapture %ddst) { +define void @test_i33_store(ptr nocapture %ddst) { ; CHECK-LABEL: define void @test_i33_store( ; CHECK-NOT: vector.body: ; CHECK: ret @@ -126,15 +126,15 @@ entry: for.body: ; preds = %entry, %for.body %i = phi i32 [ 0, %entry ], [ %add, %for.body ] - %ddst.addr = phi i33* [ %ddst, %entry ], [ %incdec.ptr3, %for.body ] - %incdec.ptr = getelementptr inbounds i33, i33* %ddst.addr, i64 1 - store i33 10, i33* %ddst.addr, align 4, !nontemporal !8 - %incdec.ptr1 = getelementptr inbounds i33, i33* %ddst.addr, i64 2 - store i33 20, i33* %incdec.ptr, align 4, !nontemporal !8 - %incdec.ptr2 = getelementptr inbounds i33, i33* %ddst.addr, i64 3 - store i33 30, i33* %incdec.ptr1, align 4, !nontemporal !8 - %incdec.ptr3 = getelementptr inbounds i33, i33* %ddst.addr, i64 4 - store i33 40, i33* %incdec.ptr2, align 4, !nontemporal !8 + %ddst.addr = phi ptr [ %ddst, %entry ], [ %incdec.ptr3, %for.body ] + %incdec.ptr = getelementptr inbounds i33, ptr %ddst.addr, i64 1 + store i33 10, ptr %ddst.addr, align 4, !nontemporal !8 + %incdec.ptr1 = getelementptr inbounds i33, ptr %ddst.addr, i64 2 + store i33 20, ptr %incdec.ptr, align 4, !nontemporal !8 + %incdec.ptr2 = getelementptr inbounds i33, ptr %ddst.addr, i64 3 + store i33 30, ptr %incdec.ptr1, align 4, !nontemporal !8 + %incdec.ptr3 = getelementptr inbounds i33, ptr %ddst.addr, i64 4 + store i33 40, ptr %incdec.ptr2, align 4, !nontemporal !8 %add = add nuw nsw i32 %i, 3 %cmp = icmp ult i32 %i, 4092 br i1 %cmp, label %for.body, label %for.cond.cleanup @@ -143,7 +143,7 @@ for.cond.cleanup: ; preds = %for.body ret void } -define void @test_i40_store(i40* nocapture %ddst) { +define void @test_i40_store(ptr nocapture %ddst) { ; CHECK-LABEL: define void @test_i40_store( ; CHECK-NOT: vector.body: ; CHECK: ret @@ -153,15 +153,15 @@ entry: for.body: ; preds = %entry, %for.body %i = phi i32 [ 0, %entry ], [ %add, %for.body ] - %ddst.addr = phi i40* [ %ddst, %entry ], [ %incdec.ptr3, %for.body ] - %incdec.ptr = getelementptr inbounds i40, i40* %ddst.addr, i64 1 - store i40 10, i40* %ddst.addr, align 4, !nontemporal !8 - %incdec.ptr1 = getelementptr inbounds i40, i40* %ddst.addr, i64 2 - store i40 20, i40* %incdec.ptr, align 4, !nontemporal !8 - %incdec.ptr2 = getelementptr inbounds i40, i40* %ddst.addr, i64 3 - store i40 30, i40* %incdec.ptr1, align 4, !nontemporal !8 - %incdec.ptr3 = getelementptr inbounds i40, i40* %ddst.addr, i64 4 - store i40 40, i40* %incdec.ptr2, align 4, !nontemporal !8 + %ddst.addr = phi ptr [ %ddst, %entry ], [ %incdec.ptr3, %for.body ] + %incdec.ptr = getelementptr inbounds i40, ptr %ddst.addr, i64 1 + store i40 10, ptr %ddst.addr, align 4, !nontemporal !8 + %incdec.ptr1 = getelementptr inbounds i40, ptr %ddst.addr, i64 2 + store i40 20, ptr %incdec.ptr, align 4, !nontemporal !8 + %incdec.ptr2 = getelementptr inbounds i40, ptr %ddst.addr, i64 3 + store i40 30, ptr %incdec.ptr1, align 4, !nontemporal !8 + %incdec.ptr3 = getelementptr inbounds i40, ptr %ddst.addr, i64 4 + store i40 40, ptr %incdec.ptr2, align 4, !nontemporal !8 %add = add nuw nsw i32 %i, 3 %cmp = icmp ult i32 %i, 4092 br i1 %cmp, label %for.body, label %for.cond.cleanup @@ -169,7 +169,7 @@ for.body: ; preds = %entry, %for.body for.cond.cleanup: ; preds = %for.body ret void } -define void @test_i64_store(i64* nocapture %ddst) local_unnamed_addr #0 { +define void @test_i64_store(ptr nocapture %ddst) local_unnamed_addr #0 { ; CHECK-LABEL: define void @test_i64_store( ; CHECK-LABEL: vector.body: ; CHECK: store <4 x i64> {{.*}} !nontemporal !0 @@ -180,9 +180,9 @@ entry: for.body: ; preds = %entry, %for.body %i = phi i32 [ 0, %entry ], [ %add, %for.body ] - %ddst.addr = phi i64* [ %ddst, %entry ], [ %incdec.ptr, %for.body ] - %incdec.ptr = getelementptr inbounds i64, i64* %ddst.addr, i64 1 - store i64 10, i64* %ddst.addr, align 4, !nontemporal !8 + %ddst.addr = phi ptr [ %ddst, %entry ], [ %incdec.ptr, %for.body ] + %incdec.ptr = getelementptr inbounds i64, ptr %ddst.addr, i64 1 + store i64 10, ptr %ddst.addr, align 4, !nontemporal !8 %add = add nuw nsw i32 %i, 4 %cmp = icmp ult i32 %i, 4092 br i1 %cmp, label %for.body, label %for.cond.cleanup @@ -191,7 +191,7 @@ for.cond.cleanup: ; preds = %for.body ret void } -define void @test_double_store(double* %ddst) { +define void @test_double_store(ptr %ddst) { ; CHECK-LABEL: define void @test_double_store( ; CHECK-LABEL: vector.body: ; CHECK: store <4 x double> {{.*}} !nontemporal !0 @@ -202,9 +202,9 @@ entry: for.body: ; preds = %entry, %for.body %i = phi i32 [ 0, %entry ], [ %add, %for.body ] - %ddst.addr = phi double* [ %ddst, %entry ], [ %incdec.ptr, %for.body ] - %incdec.ptr = getelementptr inbounds double, double* %ddst.addr, i64 1 - store double 10.0, double* %ddst.addr, align 4, !nontemporal !8 + %ddst.addr = phi ptr [ %ddst, %entry ], [ %incdec.ptr, %for.body ] + %incdec.ptr = getelementptr inbounds double, ptr %ddst.addr, i64 1 + store double 10.0, ptr %ddst.addr, align 4, !nontemporal !8 %add = add nuw nsw i32 %i, 4 %cmp = icmp ult i32 %i, 4092 br i1 %cmp, label %for.body, label %for.cond.cleanup @@ -213,7 +213,7 @@ for.cond.cleanup: ; preds = %for.body ret void } -define void @test_i128_store(i128* %ddst) { +define void @test_i128_store(ptr %ddst) { ; CHECK-LABEL: define void @test_i128_store( ; CHECK-LABEL: vector.body: ; CHECK: store <4 x i128> {{.*}} !nontemporal !0 @@ -224,9 +224,9 @@ entry: for.body: ; preds = %entry, %for.body %i = phi i32 [ 0, %entry ], [ %add, %for.body ] - %ddst.addr = phi i128* [ %ddst, %entry ], [ %incdec.ptr, %for.body ] - %incdec.ptr = getelementptr inbounds i128, i128* %ddst.addr, i64 1 - store i128 10, i128* %ddst.addr, align 4, !nontemporal !8 + %ddst.addr = phi ptr [ %ddst, %entry ], [ %incdec.ptr, %for.body ] + %incdec.ptr = getelementptr inbounds i128, ptr %ddst.addr, i64 1 + store i128 10, ptr %ddst.addr, align 4, !nontemporal !8 %add = add nuw nsw i32 %i, 4 %cmp = icmp ult i32 %i, 4092 br i1 %cmp, label %for.body, label %for.cond.cleanup @@ -235,7 +235,7 @@ for.cond.cleanup: ; preds = %for.body ret void } -define void @test_i256_store(i256* %ddst) { +define void @test_i256_store(ptr %ddst) { ; CHECK-LABEL: define void @test_i256_store( ; CHECK-NOT: vector.body: ; CHECK: ret void @@ -245,9 +245,9 @@ entry: for.body: ; preds = %entry, %for.body %i = phi i32 [ 0, %entry ], [ %add, %for.body ] - %ddst.addr = phi i256* [ %ddst, %entry ], [ %incdec.ptr, %for.body ] - %incdec.ptr = getelementptr inbounds i256, i256* %ddst.addr, i64 1 - store i256 10, i256* %ddst.addr, align 4, !nontemporal !8 + %ddst.addr = phi ptr [ %ddst, %entry ], [ %incdec.ptr, %for.body ] + %incdec.ptr = getelementptr inbounds i256, ptr %ddst.addr, i64 1 + store i256 10, ptr %ddst.addr, align 4, !nontemporal !8 %add = add nuw nsw i32 %i, 4 %cmp = icmp ult i32 %i, 4092 br i1 %cmp, label %for.body, label %for.cond.cleanup @@ -256,7 +256,7 @@ for.cond.cleanup: ; preds = %for.body ret void } -define i4 @test_i4_load(i4* %ddst) { +define i4 @test_i4_load(ptr %ddst) { ; CHECK-LABEL: define i4 @test_i4_load ; CHECK-NOT: vector.body: ; CHECk: ret i4 %{{.*}} @@ -267,8 +267,8 @@ entry: for.body: ; preds = %entry, %for.body %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ] %acc.08 = phi i4 [ 0, %entry ], [ %add, %for.body ] - %arrayidx = getelementptr inbounds i4, i4* %ddst, i64 %indvars.iv - %l = load i4, i4* %arrayidx, align 1, !nontemporal !8 + %arrayidx = getelementptr inbounds i4, ptr %ddst, i64 %indvars.iv + %l = load i4, ptr %arrayidx, align 1, !nontemporal !8 %add = add i4 %l, %acc.08 %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 %exitcond.not = icmp eq i64 %indvars.iv.next, 4092 @@ -278,10 +278,10 @@ for.cond.cleanup: ; preds = %for.body ret i4 %add } -define i8 @test_load_i8(i8* %ddst) { +define i8 @test_load_i8(ptr %ddst) { ; CHECK-LABEL: @test_load_i8( ; CHECK: vector.body: -; CHECK: load <4 x i8>, <4 x i8>* {{.*}}, align 1, !nontemporal !0 +; CHECK: load <4 x i8>, ptr {{.*}}, align 1, !nontemporal !0 ; CHECk: ret i8 %{{.*}} ; entry: @@ -290,8 +290,8 @@ entry: for.body: ; preds = %entry, %for.body %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ] %acc.08 = phi i8 [ 0, %entry ], [ %add, %for.body ] - %arrayidx = getelementptr inbounds i8, i8* %ddst, i64 %indvars.iv - %l = load i8, i8* %arrayidx, align 1, !nontemporal !8 + %arrayidx = getelementptr inbounds i8, ptr %ddst, i64 %indvars.iv + %l = load i8, ptr %arrayidx, align 1, !nontemporal !8 %add = add i8 %l, %acc.08 %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 %exitcond.not = icmp eq i64 %indvars.iv.next, 4092 @@ -301,10 +301,10 @@ for.cond.cleanup: ; preds = %for.body ret i8 %add } -define half @test_half_load(half* %ddst) { +define half @test_half_load(ptr %ddst) { ; CHECK-LABEL: @test_half_load ; CHECK-LABEL: vector.body: -; CHECK: load <4 x half>, <4 x half>* {{.*}}, align 2, !nontemporal !0 +; CHECK: load <4 x half>, ptr {{.*}}, align 2, !nontemporal !0 ; CHECk: ret half %{{.*}} ; entry: @@ -313,8 +313,8 @@ entry: for.body: ; preds = %entry, %for.body %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ] %acc.08 = phi half [ 0.0, %entry ], [ %add, %for.body ] - %arrayidx = getelementptr inbounds half, half* %ddst, i64 %indvars.iv - %l = load half, half* %arrayidx, align 2, !nontemporal !8 + %arrayidx = getelementptr inbounds half, ptr %ddst, i64 %indvars.iv + %l = load half, ptr %arrayidx, align 2, !nontemporal !8 %add = fadd half %l, %acc.08 %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 %exitcond.not = icmp eq i64 %indvars.iv.next, 4092 @@ -324,10 +324,10 @@ for.cond.cleanup: ; preds = %for.body ret half %add } -define i16 @test_i16_load(i16* %ddst) { +define i16 @test_i16_load(ptr %ddst) { ; CHECK-LABEL: @test_i16_load ; CHECK-LABEL: vector.body: -; CHECK: load <4 x i16>, <4 x i16>* {{.*}}, align 2, !nontemporal !0 +; CHECK: load <4 x i16>, ptr {{.*}}, align 2, !nontemporal !0 ; CHECk: ret i16 %{{.*}} ; entry: @@ -336,8 +336,8 @@ entry: for.body: ; preds = %entry, %for.body %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ] %acc.08 = phi i16 [ 0, %entry ], [ %add, %for.body ] - %arrayidx = getelementptr inbounds i16, i16* %ddst, i64 %indvars.iv - %l = load i16, i16* %arrayidx, align 2, !nontemporal !8 + %arrayidx = getelementptr inbounds i16, ptr %ddst, i64 %indvars.iv + %l = load i16, ptr %arrayidx, align 2, !nontemporal !8 %add = add i16 %l, %acc.08 %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 %exitcond.not = icmp eq i64 %indvars.iv.next, 4092 @@ -347,10 +347,10 @@ for.cond.cleanup: ; preds = %for.body ret i16 %add } -define i32 @test_i32_load(i32* %ddst) { +define i32 @test_i32_load(ptr %ddst) { ; CHECK-LABEL: @test_i32_load ; CHECK-LABEL: vector.body: -; CHECK: load <4 x i32>, <4 x i32>* {{.*}}, align 4, !nontemporal !0 +; CHECK: load <4 x i32>, ptr {{.*}}, align 4, !nontemporal !0 ; CHECk: ret i32 %{{.*}} ; entry: @@ -359,8 +359,8 @@ entry: for.body: ; preds = %entry, %for.body %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ] %acc.08 = phi i32 [ 0, %entry ], [ %add, %for.body ] - %arrayidx = getelementptr inbounds i32, i32* %ddst, i64 %indvars.iv - %l = load i32, i32* %arrayidx, align 4, !nontemporal !8 + %arrayidx = getelementptr inbounds i32, ptr %ddst, i64 %indvars.iv + %l = load i32, ptr %arrayidx, align 4, !nontemporal !8 %add = add i32 %l, %acc.08 %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 %exitcond.not = icmp eq i64 %indvars.iv.next, 4092 @@ -370,7 +370,7 @@ for.cond.cleanup: ; preds = %for.body ret i32 %add } -define i33 @test_i33_load(i33* %ddst) { +define i33 @test_i33_load(ptr %ddst) { ; CHECK-LABEL: @test_i33_load ; CHECK-NOT: vector.body: ; CHECk: ret i33 %{{.*}} @@ -381,8 +381,8 @@ entry: for.body: ; preds = %entry, %for.body %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ] %acc.08 = phi i33 [ 0, %entry ], [ %add, %for.body ] - %arrayidx = getelementptr inbounds i33, i33* %ddst, i64 %indvars.iv - %l = load i33, i33* %arrayidx, align 4, !nontemporal !8 + %arrayidx = getelementptr inbounds i33, ptr %ddst, i64 %indvars.iv + %l = load i33, ptr %arrayidx, align 4, !nontemporal !8 %add = add i33 %l, %acc.08 %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 %exitcond.not = icmp eq i64 %indvars.iv.next, 4092 @@ -392,7 +392,7 @@ for.cond.cleanup: ; preds = %for.body ret i33 %add } -define i40 @test_i40_load(i40* %ddst) { +define i40 @test_i40_load(ptr %ddst) { ; CHECK-LABEL: @test_i40_load ; CHECK-NOT: vector.body: ; CHECk: ret i40 %{{.*}} @@ -403,8 +403,8 @@ entry: for.body: ; preds = %entry, %for.body %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ] %acc.08 = phi i40 [ 0, %entry ], [ %add, %for.body ] - %arrayidx = getelementptr inbounds i40, i40* %ddst, i64 %indvars.iv - %l = load i40, i40* %arrayidx, align 4, !nontemporal !8 + %arrayidx = getelementptr inbounds i40, ptr %ddst, i64 %indvars.iv + %l = load i40, ptr %arrayidx, align 4, !nontemporal !8 %add = add i40 %l, %acc.08 %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 %exitcond.not = icmp eq i64 %indvars.iv.next, 4092 @@ -414,10 +414,10 @@ for.cond.cleanup: ; preds = %for.body ret i40 %add } -define i64 @test_i64_load(i64* %ddst) { +define i64 @test_i64_load(ptr %ddst) { ; CHECK-LABEL: @test_i64_load ; CHECK-LABEL: vector.body: -; CHECK: load <4 x i64>, <4 x i64>* {{.*}}, align 4, !nontemporal !0 +; CHECK: load <4 x i64>, ptr {{.*}}, align 4, !nontemporal !0 ; CHECk: ret i64 %{{.*}} ; entry: @@ -426,8 +426,8 @@ entry: for.body: ; preds = %entry, %for.body %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ] %acc.08 = phi i64 [ 0, %entry ], [ %add, %for.body ] - %arrayidx = getelementptr inbounds i64, i64* %ddst, i64 %indvars.iv - %l = load i64, i64* %arrayidx, align 4, !nontemporal !8 + %arrayidx = getelementptr inbounds i64, ptr %ddst, i64 %indvars.iv + %l = load i64, ptr %arrayidx, align 4, !nontemporal !8 %add = add i64 %l, %acc.08 %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 %exitcond.not = icmp eq i64 %indvars.iv.next, 4092 @@ -437,10 +437,10 @@ for.cond.cleanup: ; preds = %for.body ret i64 %add } -define double @test_double_load(double* %ddst) { +define double @test_double_load(ptr %ddst) { ; CHECK-LABEL: @test_double_load ; CHECK-LABEL: vector.body: -; CHECK: load <4 x double>, <4 x double>* {{.*}}, align 4, !nontemporal !0 +; CHECK: load <4 x double>, ptr {{.*}}, align 4, !nontemporal !0 ; CHECk: ret double %{{.*}} ; entry: @@ -449,8 +449,8 @@ entry: for.body: ; preds = %entry, %for.body %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ] %acc.08 = phi double [ 0.0, %entry ], [ %add, %for.body ] - %arrayidx = getelementptr inbounds double, double* %ddst, i64 %indvars.iv - %l = load double, double* %arrayidx, align 4, !nontemporal !8 + %arrayidx = getelementptr inbounds double, ptr %ddst, i64 %indvars.iv + %l = load double, ptr %arrayidx, align 4, !nontemporal !8 %add = fadd double %l, %acc.08 %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 %exitcond.not = icmp eq i64 %indvars.iv.next, 4092 @@ -460,10 +460,10 @@ for.cond.cleanup: ; preds = %for.body ret double %add } -define i128 @test_i128_load(i128* %ddst) { +define i128 @test_i128_load(ptr %ddst) { ; CHECK-LABEL: @test_i128_load ; CHECK-LABEL: vector.body: -; CHECK: load <4 x i128>, <4 x i128>* {{.*}}, align 4, !nontemporal !0 +; CHECK: load <4 x i128>, ptr {{.*}}, align 4, !nontemporal !0 ; CHECk: ret i128 %{{.*}} ; entry: @@ -472,8 +472,8 @@ entry: for.body: ; preds = %entry, %for.body %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ] %acc.08 = phi i128 [ 0, %entry ], [ %add, %for.body ] - %arrayidx = getelementptr inbounds i128, i128* %ddst, i64 %indvars.iv - %l = load i128, i128* %arrayidx, align 4, !nontemporal !8 + %arrayidx = getelementptr inbounds i128, ptr %ddst, i64 %indvars.iv + %l = load i128, ptr %arrayidx, align 4, !nontemporal !8 %add = add i128 %l, %acc.08 %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 %exitcond.not = icmp eq i64 %indvars.iv.next, 4092 @@ -483,7 +483,7 @@ for.cond.cleanup: ; preds = %for.body ret i128 %add } -define i256 @test_256_load(i256* %ddst) { +define i256 @test_256_load(ptr %ddst) { ; CHECK-LABEL: @test_256_load ; CHECK-NOT: vector.body: ; CHECk: ret i256 %{{.*}} @@ -494,8 +494,8 @@ entry: for.body: ; preds = %entry, %for.body %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ] %acc.08 = phi i256 [ 0, %entry ], [ %add, %for.body ] - %arrayidx = getelementptr inbounds i256, i256* %ddst, i64 %indvars.iv - %l = load i256, i256* %arrayidx, align 4, !nontemporal !8 + %arrayidx = getelementptr inbounds i256, ptr %ddst, i64 %indvars.iv + %l = load i256, ptr %arrayidx, align 4, !nontemporal !8 %add = add i256 %l, %acc.08 %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 %exitcond.not = icmp eq i64 %indvars.iv.next, 4092 diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/scalar_interleave.ll b/llvm/test/Transforms/LoopVectorize/AArch64/scalar_interleave.ll index 6379c4a..2849c83 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/scalar_interleave.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/scalar_interleave.ll @@ -9,12 +9,12 @@ target triple = "aarch64-arm-none-eabi" ; It should also not be interleaved as the predicated interleaving will just ; create less efficient code. -define void @arm_correlate_f16(half* nocapture noundef readonly %pSrcA, i32 noundef %srcALen, half* nocapture noundef readonly %pSrcB, i32 noundef %srcBLen, half* nocapture noundef writeonly %pDst) { +define void @arm_correlate_f16(ptr nocapture noundef readonly %pSrcA, i32 noundef %srcALen, ptr nocapture noundef readonly %pSrcB, i32 noundef %srcBLen, ptr nocapture noundef writeonly %pDst) { ; CHECK-LABEL: @arm_correlate_f16( ; CHECK-NEXT: entry: ; CHECK-NEXT: [[SUB:%.*]] = add i32 [[SRCBLEN:%.*]], -1 ; CHECK-NEXT: [[IDX_EXT:%.*]] = zext i32 [[SUB]] to i64 -; CHECK-NEXT: [[ADD_PTR:%.*]] = getelementptr inbounds half, half* [[PSRCB:%.*]], i64 [[IDX_EXT]] +; CHECK-NEXT: [[ADD_PTR:%.*]] = getelementptr inbounds half, ptr [[PSRCB:%.*]], i64 [[IDX_EXT]] ; CHECK-NEXT: [[ADD:%.*]] = add i32 [[SRCALEN:%.*]], -2 ; CHECK-NEXT: [[SUB1:%.*]] = add i32 [[ADD]], [[SRCBLEN]] ; CHECK-NEXT: [[CMP:%.*]] = icmp ugt i32 [[SRCALEN]], [[SRCBLEN]] @@ -22,7 +22,7 @@ define void @arm_correlate_f16(half* nocapture noundef readonly %pSrcA, i32 noun ; CHECK: if.then: ; CHECK-NEXT: [[SUB2:%.*]] = sub i32 [[SRCALEN]], [[SRCBLEN]] ; CHECK-NEXT: [[IDX_EXT3:%.*]] = zext i32 [[SUB2]] to i64 -; CHECK-NEXT: [[ADD_PTR4:%.*]] = getelementptr inbounds half, half* [[PDST:%.*]], i64 [[IDX_EXT3]] +; CHECK-NEXT: [[ADD_PTR4:%.*]] = getelementptr inbounds half, ptr [[PDST:%.*]], i64 [[IDX_EXT3]] ; CHECK-NEXT: br label [[IF_END12:%.*]] ; CHECK: if.else: ; CHECK-NEXT: [[CMP5:%.*]] = icmp ult i32 [[SRCALEN]], [[SRCBLEN]] @@ -30,16 +30,16 @@ define void @arm_correlate_f16(half* nocapture noundef readonly %pSrcA, i32 noun ; CHECK: if.then6: ; CHECK-NEXT: [[SUB7:%.*]] = add i32 [[SRCALEN]], -1 ; CHECK-NEXT: [[IDX_EXT8:%.*]] = zext i32 [[SUB7]] to i64 -; CHECK-NEXT: [[ADD_PTR9:%.*]] = getelementptr inbounds half, half* [[PSRCA:%.*]], i64 [[IDX_EXT8]] +; CHECK-NEXT: [[ADD_PTR9:%.*]] = getelementptr inbounds half, ptr [[PSRCA:%.*]], i64 [[IDX_EXT8]] ; CHECK-NEXT: [[IDX_EXT10:%.*]] = zext i32 [[SUB1]] to i64 -; CHECK-NEXT: [[ADD_PTR11:%.*]] = getelementptr inbounds half, half* [[PDST]], i64 [[IDX_EXT10]] +; CHECK-NEXT: [[ADD_PTR11:%.*]] = getelementptr inbounds half, ptr [[PDST]], i64 [[IDX_EXT10]] ; CHECK-NEXT: br label [[IF_END12]] ; CHECK: if.end12: ; CHECK-NEXT: [[SRCALEN_ADDR_0:%.*]] = phi i32 [ [[SRCALEN]], [[IF_THEN]] ], [ [[SRCBLEN]], [[IF_THEN6]] ], [ [[SRCALEN]], [[IF_ELSE]] ] ; CHECK-NEXT: [[SRCBLEN_ADDR_0:%.*]] = phi i32 [ [[SRCBLEN]], [[IF_THEN]] ], [ [[SRCALEN]], [[IF_THEN6]] ], [ [[SRCBLEN]], [[IF_ELSE]] ] -; CHECK-NEXT: [[PDST_ADDR_0:%.*]] = phi half* [ [[ADD_PTR4]], [[IF_THEN]] ], [ [[ADD_PTR11]], [[IF_THEN6]] ], [ [[PDST]], [[IF_ELSE]] ] -; CHECK-NEXT: [[PIN1_0:%.*]] = phi half* [ [[PSRCA]], [[IF_THEN]] ], [ [[PSRCB]], [[IF_THEN6]] ], [ [[PSRCA]], [[IF_ELSE]] ] -; CHECK-NEXT: [[PIN2_0:%.*]] = phi half* [ [[ADD_PTR]], [[IF_THEN]] ], [ [[ADD_PTR9]], [[IF_THEN6]] ], [ [[ADD_PTR]], [[IF_ELSE]] ] +; CHECK-NEXT: [[PDST_ADDR_0:%.*]] = phi ptr [ [[ADD_PTR4]], [[IF_THEN]] ], [ [[ADD_PTR11]], [[IF_THEN6]] ], [ [[PDST]], [[IF_ELSE]] ] +; CHECK-NEXT: [[PIN1_0:%.*]] = phi ptr [ [[PSRCA]], [[IF_THEN]] ], [ [[PSRCB]], [[IF_THEN6]] ], [ [[PSRCA]], [[IF_ELSE]] ] +; CHECK-NEXT: [[PIN2_0:%.*]] = phi ptr [ [[ADD_PTR]], [[IF_THEN]] ], [ [[ADD_PTR9]], [[IF_THEN6]] ], [ [[ADD_PTR]], [[IF_ELSE]] ] ; CHECK-NEXT: [[CMP27:%.*]] = phi i64 [ 1, [[IF_THEN]] ], [ -1, [[IF_THEN6]] ], [ 1, [[IF_ELSE]] ] ; CHECK-NEXT: [[TMP0:%.*]] = add i32 [[SRCBLEN]], [[SRCALEN]] ; CHECK-NEXT: [[TMP1:%.*]] = add i32 [[TMP0]], -1 @@ -47,7 +47,7 @@ define void @arm_correlate_f16(half* nocapture noundef readonly %pSrcA, i32 noun ; CHECK: for.cond14.preheader: ; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i32 [ 1, [[IF_END12]] ], [ [[INDVARS_IV_NEXT:%.*]], [[FOR_END:%.*]] ] ; CHECK-NEXT: [[I_077:%.*]] = phi i32 [ 0, [[IF_END12]] ], [ [[INC33:%.*]], [[FOR_END]] ] -; CHECK-NEXT: [[PDST_ADDR_176:%.*]] = phi half* [ [[PDST_ADDR_0]], [[IF_END12]] ], [ [[PDST_ADDR_2:%.*]], [[FOR_END]] ] +; CHECK-NEXT: [[PDST_ADDR_176:%.*]] = phi ptr [ [[PDST_ADDR_0]], [[IF_END12]] ], [ [[PDST_ADDR_2:%.*]], [[FOR_END]] ] ; CHECK-NEXT: br label [[FOR_BODY16:%.*]] ; CHECK: for.body16: ; CHECK-NEXT: [[J_074:%.*]] = phi i32 [ 0, [[FOR_COND14_PREHEADER]] ], [ [[INC:%.*]], [[FOR_INC:%.*]] ] @@ -59,12 +59,12 @@ define void @arm_correlate_f16(half* nocapture noundef readonly %pSrcA, i32 noun ; CHECK-NEXT: br i1 [[OR_COND]], label [[IF_THEN20:%.*]], label [[FOR_INC]] ; CHECK: if.then20: ; CHECK-NEXT: [[IDXPROM:%.*]] = zext i32 [[J_074]] to i64 -; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds half, half* [[PIN1_0]], i64 [[IDXPROM]] -; CHECK-NEXT: [[TMP2:%.*]] = load half, half* [[ARRAYIDX]], align 2 +; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds half, ptr [[PIN1_0]], i64 [[IDXPROM]] +; CHECK-NEXT: [[TMP2:%.*]] = load half, ptr [[ARRAYIDX]], align 2 ; CHECK-NEXT: [[SUB22:%.*]] = sub nsw i32 0, [[SUB17]] ; CHECK-NEXT: [[IDXPROM23:%.*]] = sext i32 [[SUB22]] to i64 -; CHECK-NEXT: [[ARRAYIDX24:%.*]] = getelementptr inbounds half, half* [[PIN2_0]], i64 [[IDXPROM23]] -; CHECK-NEXT: [[TMP3:%.*]] = load half, half* [[ARRAYIDX24]], align 2 +; CHECK-NEXT: [[ARRAYIDX24:%.*]] = getelementptr inbounds half, ptr [[PIN2_0]], i64 [[IDXPROM23]] +; CHECK-NEXT: [[TMP3:%.*]] = load half, ptr [[ARRAYIDX24]], align 2 ; CHECK-NEXT: [[MUL:%.*]] = fmul fast half [[TMP3]], [[TMP2]] ; CHECK-NEXT: [[ADD25:%.*]] = fadd fast half [[MUL]], [[SUM_073]] ; CHECK-NEXT: br label [[FOR_INC]] @@ -75,8 +75,8 @@ define void @arm_correlate_f16(half* nocapture noundef readonly %pSrcA, i32 noun ; CHECK-NEXT: br i1 [[EXITCOND]], label [[FOR_END]], label [[FOR_BODY16]] ; CHECK: for.end: ; CHECK-NEXT: [[SUM_1_LCSSA:%.*]] = phi half [ [[SUM_1]], [[FOR_INC]] ] -; CHECK-NEXT: [[PDST_ADDR_2]] = getelementptr inbounds half, half* [[PDST_ADDR_176]], i64 [[CMP27]] -; CHECK-NEXT: store half [[SUM_1_LCSSA]], half* [[PDST_ADDR_176]], align 2 +; CHECK-NEXT: [[PDST_ADDR_2]] = getelementptr inbounds half, ptr [[PDST_ADDR_176]], i64 [[CMP27]] +; CHECK-NEXT: store half [[SUM_1_LCSSA]], ptr [[PDST_ADDR_176]], align 2 ; CHECK-NEXT: [[INC33]] = add nuw i32 [[I_077]], 1 ; CHECK-NEXT: [[INDVARS_IV_NEXT]] = add i32 [[INDVARS_IV]], 1 ; CHECK-NEXT: [[EXITCOND78_NOT:%.*]] = icmp eq i32 [[INC33]], [[TMP1]] @@ -87,7 +87,7 @@ define void @arm_correlate_f16(half* nocapture noundef readonly %pSrcA, i32 noun entry: %sub = add i32 %srcBLen, -1 %idx.ext = zext i32 %sub to i64 - %add.ptr = getelementptr inbounds half, half* %pSrcB, i64 %idx.ext + %add.ptr = getelementptr inbounds half, ptr %pSrcB, i64 %idx.ext %add = add i32 %srcALen, -2 %sub1 = add i32 %add, %srcBLen %cmp = icmp ugt i32 %srcALen, %srcBLen @@ -96,7 +96,7 @@ entry: if.then: ; preds = %entry %sub2 = sub i32 %srcALen, %srcBLen %idx.ext3 = zext i32 %sub2 to i64 - %add.ptr4 = getelementptr inbounds half, half* %pDst, i64 %idx.ext3 + %add.ptr4 = getelementptr inbounds half, ptr %pDst, i64 %idx.ext3 br label %if.end12 if.else: ; preds = %entry @@ -106,17 +106,17 @@ if.else: ; preds = %entry if.then6: ; preds = %if.else %sub7 = add i32 %srcALen, -1 %idx.ext8 = zext i32 %sub7 to i64 - %add.ptr9 = getelementptr inbounds half, half* %pSrcA, i64 %idx.ext8 + %add.ptr9 = getelementptr inbounds half, ptr %pSrcA, i64 %idx.ext8 %idx.ext10 = zext i32 %sub1 to i64 - %add.ptr11 = getelementptr inbounds half, half* %pDst, i64 %idx.ext10 + %add.ptr11 = getelementptr inbounds half, ptr %pDst, i64 %idx.ext10 br label %if.end12 if.end12: ; preds = %if.else, %if.then6, %if.then %srcALen.addr.0 = phi i32 [ %srcALen, %if.then ], [ %srcBLen, %if.then6 ], [ %srcALen, %if.else ] %srcBLen.addr.0 = phi i32 [ %srcBLen, %if.then ], [ %srcALen, %if.then6 ], [ %srcBLen, %if.else ] - %pDst.addr.0 = phi half* [ %add.ptr4, %if.then ], [ %add.ptr11, %if.then6 ], [ %pDst, %if.else ] - %pIn1.0 = phi half* [ %pSrcA, %if.then ], [ %pSrcB, %if.then6 ], [ %pSrcA, %if.else ] - %pIn2.0 = phi half* [ %add.ptr, %if.then ], [ %add.ptr9, %if.then6 ], [ %add.ptr, %if.else ] + %pDst.addr.0 = phi ptr [ %add.ptr4, %if.then ], [ %add.ptr11, %if.then6 ], [ %pDst, %if.else ] + %pIn1.0 = phi ptr [ %pSrcA, %if.then ], [ %pSrcB, %if.then6 ], [ %pSrcA, %if.else ] + %pIn2.0 = phi ptr [ %add.ptr, %if.then ], [ %add.ptr9, %if.then6 ], [ %add.ptr, %if.else ] %cmp27 = phi i64 [ 1, %if.then ], [ -1, %if.then6 ], [ 1, %if.else ] %0 = add i32 %srcBLen, %srcALen %1 = add i32 %0, -1 @@ -125,7 +125,7 @@ if.end12: ; preds = %if.else, %if.then6, for.cond14.preheader: ; preds = %if.end12, %for.end %indvars.iv = phi i32 [ 1, %if.end12 ], [ %indvars.iv.next, %for.end ] %i.077 = phi i32 [ 0, %if.end12 ], [ %inc33, %for.end ] - %pDst.addr.176 = phi half* [ %pDst.addr.0, %if.end12 ], [ %pDst.addr.2, %for.end ] + %pDst.addr.176 = phi ptr [ %pDst.addr.0, %if.end12 ], [ %pDst.addr.2, %for.end ] br label %for.body16 for.body16: ; preds = %for.cond14.preheader, %for.inc @@ -139,12 +139,12 @@ for.body16: ; preds = %for.cond14.preheade if.then20: ; preds = %for.body16 %idxprom = zext i32 %j.074 to i64 - %arrayidx = getelementptr inbounds half, half* %pIn1.0, i64 %idxprom - %2 = load half, half* %arrayidx, align 2 + %arrayidx = getelementptr inbounds half, ptr %pIn1.0, i64 %idxprom + %2 = load half, ptr %arrayidx, align 2 %sub22 = sub nsw i32 0, %sub17 %idxprom23 = sext i32 %sub22 to i64 - %arrayidx24 = getelementptr inbounds half, half* %pIn2.0, i64 %idxprom23 - %3 = load half, half* %arrayidx24, align 2 + %arrayidx24 = getelementptr inbounds half, ptr %pIn2.0, i64 %idxprom23 + %3 = load half, ptr %arrayidx24, align 2 %mul = fmul fast half %3, %2 %add25 = fadd fast half %mul, %sum.073 br label %for.inc @@ -157,8 +157,8 @@ for.inc: ; preds = %for.body16, %if.the for.end: ; preds = %for.inc %sum.1.lcssa = phi half [ %sum.1, %for.inc ] - %pDst.addr.2 = getelementptr inbounds half, half* %pDst.addr.176, i64 %cmp27 - store half %sum.1.lcssa, half* %pDst.addr.176, align 2 + %pDst.addr.2 = getelementptr inbounds half, ptr %pDst.addr.176, i64 %cmp27 + store half %sum.1.lcssa, ptr %pDst.addr.176, align 2 %inc33 = add nuw i32 %i.077, 1 %indvars.iv.next = add i32 %indvars.iv, 1 %exitcond78.not = icmp eq i32 %inc33, %1 diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/sve-illegal-type.ll b/llvm/test/Transforms/LoopVectorize/AArch64/sve-illegal-type.ll index dcd3d96..342b377 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/sve-illegal-type.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/sve-illegal-type.ll @@ -4,24 +4,24 @@ target triple = "aarch64-linux-gnu" ; CHECK-REMARKS: Scalable vectorization is not supported for all element types found in this loop -define dso_local void @loop_sve_i128(i128* nocapture %ptr, i64 %N) { +define dso_local void @loop_sve_i128(ptr nocapture %ptr, i64 %N) { ; CHECK-LABEL: @loop_sve_i128 ; CHECK: vector.body -; CHECK: %[[LOAD1:.*]] = load i128, i128* {{.*}} -; CHECK-NEXT: %[[LOAD2:.*]] = load i128, i128* {{.*}} +; CHECK: %[[LOAD1:.*]] = load i128, ptr {{.*}} +; CHECK-NEXT: %[[LOAD2:.*]] = load i128, ptr {{.*}} ; CHECK-NEXT: %[[ADD1:.*]] = add nsw i128 %[[LOAD1]], 42 ; CHECK-NEXT: %[[ADD2:.*]] = add nsw i128 %[[LOAD2]], 42 -; CHECK-NEXT: store i128 %[[ADD1]], i128* {{.*}} -; CHECK-NEXT: store i128 %[[ADD2]], i128* {{.*}} +; CHECK-NEXT: store i128 %[[ADD1]], ptr {{.*}} +; CHECK-NEXT: store i128 %[[ADD2]], ptr {{.*}} entry: br label %for.body for.body: %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ] - %arrayidx = getelementptr inbounds i128, i128* %ptr, i64 %iv - %0 = load i128, i128* %arrayidx, align 16 + %arrayidx = getelementptr inbounds i128, ptr %ptr, i64 %iv + %0 = load i128, ptr %arrayidx, align 16 %add = add nsw i128 %0, 42 - store i128 %add, i128* %arrayidx, align 16 + store i128 %add, ptr %arrayidx, align 16 %iv.next = add i64 %iv, 1 %exitcond.not = icmp eq i64 %iv.next, %N br i1 %exitcond.not, label %for.end, label %for.body, !llvm.loop !0 @@ -31,24 +31,24 @@ for.end: } ; CHECK-REMARKS: Scalable vectorization is not supported for all element types found in this loop -define dso_local void @loop_sve_f128(fp128* nocapture %ptr, i64 %N) { +define dso_local void @loop_sve_f128(ptr nocapture %ptr, i64 %N) { ; CHECK-LABEL: @loop_sve_f128 ; CHECK: vector.body -; CHECK: %[[LOAD1:.*]] = load fp128, fp128* -; CHECK-NEXT: %[[LOAD2:.*]] = load fp128, fp128* +; CHECK: %[[LOAD1:.*]] = load fp128, ptr +; CHECK-NEXT: %[[LOAD2:.*]] = load fp128, ptr ; CHECK-NEXT: %[[FSUB1:.*]] = fsub fp128 %[[LOAD1]], 0xL00000000000000008000000000000000 ; CHECK-NEXT: %[[FSUB2:.*]] = fsub fp128 %[[LOAD2]], 0xL00000000000000008000000000000000 -; CHECK-NEXT: store fp128 %[[FSUB1]], fp128* {{.*}} -; CHECK-NEXT: store fp128 %[[FSUB2]], fp128* {{.*}} +; CHECK-NEXT: store fp128 %[[FSUB1]], ptr {{.*}} +; CHECK-NEXT: store fp128 %[[FSUB2]], ptr {{.*}} entry: br label %for.body for.body: %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ] - %arrayidx = getelementptr inbounds fp128, fp128* %ptr, i64 %iv - %0 = load fp128, fp128* %arrayidx, align 16 + %arrayidx = getelementptr inbounds fp128, ptr %ptr, i64 %iv + %0 = load fp128, ptr %arrayidx, align 16 %add = fsub fp128 %0, 0xL00000000000000008000000000000000 - store fp128 %add, fp128* %arrayidx, align 16 + store fp128 %add, ptr %arrayidx, align 16 %iv.next = add nuw nsw i64 %iv, 1 %exitcond.not = icmp eq i64 %iv.next, %N br i1 %exitcond.not, label %for.end, label %for.body, !llvm.loop !0 @@ -58,20 +58,20 @@ for.end: } ; CHECK-REMARKS: Scalable vectorization is not supported for all element types found in this loop -define dso_local void @loop_invariant_sve_i128(i128* nocapture %ptr, i128 %val, i64 %N) { +define dso_local void @loop_invariant_sve_i128(ptr nocapture %ptr, i128 %val, i64 %N) { ; CHECK-LABEL: @loop_invariant_sve_i128 ; CHECK: vector.body -; CHECK: %[[GEP1:.*]] = getelementptr inbounds i128, i128* %ptr -; CHECK-NEXT: %[[GEP2:.*]] = getelementptr inbounds i128, i128* %ptr -; CHECK-NEXT: store i128 %val, i128* %[[GEP1]] -; CHECK-NEXT: store i128 %val, i128* %[[GEP2]] +; CHECK: %[[GEP1:.*]] = getelementptr inbounds i128, ptr %ptr +; CHECK-NEXT: %[[GEP2:.*]] = getelementptr inbounds i128, ptr %ptr +; CHECK-NEXT: store i128 %val, ptr %[[GEP1]] +; CHECK-NEXT: store i128 %val, ptr %[[GEP2]] entry: br label %for.body for.body: %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ] - %arrayidx = getelementptr inbounds i128, i128* %ptr, i64 %iv - store i128 %val, i128* %arrayidx, align 16 + %arrayidx = getelementptr inbounds i128, ptr %ptr, i64 %iv + store i128 %val, ptr %arrayidx, align 16 %iv.next = add nuw nsw i64 %iv, 1 %exitcond.not = icmp eq i64 %iv.next, %N br i1 %exitcond.not, label %for.end, label %for.body, !llvm.loop !0 @@ -81,25 +81,25 @@ for.end: } ; CHECK-REMARKS: Scalable vectorization is not supported for all element types found in this loop -define void @uniform_store_i1(i1* noalias %dst, i64* noalias %start, i64 %N) { +define void @uniform_store_i1(ptr noalias %dst, ptr noalias %start, i64 %N) { ; CHECK-LABEL: @uniform_store_i1 ; CHECK: vector.body -; CHECK: %[[GEP:.*]] = getelementptr inbounds i64, <64 x i64*> {{.*}}, i64 1 -; CHECK: %[[ICMP:.*]] = icmp eq <64 x i64*> %[[GEP]], %[[SPLAT:.*]] +; CHECK: %[[GEP:.*]] = getelementptr inbounds i64, <64 x ptr> {{.*}}, i64 1 +; CHECK: %[[ICMP:.*]] = icmp eq <64 x ptr> %[[GEP]], %[[SPLAT:.*]] ; CHECK: %[[EXTRACT1:.*]] = extractelement <64 x i1> %[[ICMP]], i32 63 -; CHECK: store i1 %[[EXTRACT1]], i1* %dst +; CHECK: store i1 %[[EXTRACT1]], ptr %dst ; CHECK-NOT: vscale entry: br label %for.body for.body: - %first.sroa = phi i64* [ %incdec.ptr, %for.body ], [ %start, %entry ] + %first.sroa = phi ptr [ %incdec.ptr, %for.body ], [ %start, %entry ] %iv = phi i64 [ %iv.next, %for.body ], [ 0, %entry ] %iv.next = add i64 %iv, 1 - %0 = load i64, i64* %first.sroa - %incdec.ptr = getelementptr inbounds i64, i64* %first.sroa, i64 1 - %cmp.not = icmp eq i64* %incdec.ptr, %start - store i1 %cmp.not, i1* %dst + %0 = load i64, ptr %first.sroa + %incdec.ptr = getelementptr inbounds i64, ptr %first.sroa, i64 1 + %cmp.not = icmp eq ptr %incdec.ptr, %start + store i1 %cmp.not, ptr %dst %cmp = icmp ult i64 %iv, %N br i1 %cmp, label %for.body, label %end, !llvm.loop !0 @@ -107,21 +107,21 @@ end: ret void } -define dso_local void @loop_fixed_width_i128(i128* nocapture %ptr, i64 %N) { +define dso_local void @loop_fixed_width_i128(ptr nocapture %ptr, i64 %N) { ; CHECK-LABEL: @loop_fixed_width_i128 -; CHECK: load <4 x i128>, <4 x i128>* +; CHECK: load <4 x i128>, ptr ; CHECK: add nsw <4 x i128> {{.*}}, -; CHECK: store <4 x i128> {{.*}} <4 x i128>* +; CHECK: store <4 x i128> {{.*}} ptr ; CHECK-NOT: vscale entry: br label %for.body for.body: %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ] - %arrayidx = getelementptr inbounds i128, i128* %ptr, i64 %iv - %0 = load i128, i128* %arrayidx, align 16 + %arrayidx = getelementptr inbounds i128, ptr %ptr, i64 %iv + %0 = load i128, ptr %arrayidx, align 16 %add = add nsw i128 %0, 42 - store i128 %add, i128* %arrayidx, align 16 + store i128 %add, ptr %arrayidx, align 16 %iv.next = add i64 %iv, 1 %exitcond.not = icmp eq i64 %iv.next, %N br i1 %exitcond.not, label %for.end, label %for.body diff --git a/llvm/test/Transforms/LoopVectorize/ARM/mve-icmpcost.ll b/llvm/test/Transforms/LoopVectorize/ARM/mve-icmpcost.ll index 90aadec..e7a361e 100644 --- a/llvm/test/Transforms/LoopVectorize/ARM/mve-icmpcost.ll +++ b/llvm/test/Transforms/LoopVectorize/ARM/mve-icmpcost.ll @@ -5,63 +5,63 @@ target datalayout = "e-m:e-p:32:32-Fi8-i64:64-v128:64:128-a:0:32-n32-S64" target triple = "thumbv8.1m.main-arm-none-eabi" ; CHECK: LV: Found an estimated cost of 0 for VF 1 For instruction: %i.016 = phi i32 [ 0, %for.body.lr.ph ], [ %inc, %for.inc ] -; CHECK: LV: Found an estimated cost of 0 for VF 1 For instruction: %arrayidx = getelementptr inbounds i16, i16* %s, i32 %i.016 -; CHECK: LV: Found an estimated cost of 1 for VF 1 For instruction: %1 = load i16, i16* %arrayidx, align 2 +; CHECK: LV: Found an estimated cost of 0 for VF 1 For instruction: %arrayidx = getelementptr inbounds i16, ptr %s, i32 %i.016 +; CHECK: LV: Found an estimated cost of 1 for VF 1 For instruction: %1 = load i16, ptr %arrayidx, align 2 ; CHECK: LV: Found an estimated cost of 0 for VF 1 For instruction: %conv = sext i16 %1 to i32 ; CHECK: LV: Found an estimated cost of 1 for VF 1 For instruction: %cmp2 = icmp sgt i32 %conv, %conv1 ; CHECK: LV: Found an estimated cost of 0 for VF 1 For instruction: br i1 %cmp2, label %if.then, label %for.inc ; CHECK: LV: Found an estimated cost of 1 for VF 1 For instruction: %conv6 = add i16 %1, %0 -; CHECK: LV: Found an estimated cost of 0 for VF 1 For instruction: %arrayidx7 = getelementptr inbounds i16, i16* %d, i32 %i.016 -; CHECK: LV: Found an estimated cost of 1 for VF 1 For instruction: store i16 %conv6, i16* %arrayidx7, align 2 +; CHECK: LV: Found an estimated cost of 0 for VF 1 For instruction: %arrayidx7 = getelementptr inbounds i16, ptr %d, i32 %i.016 +; CHECK: LV: Found an estimated cost of 1 for VF 1 For instruction: store i16 %conv6, ptr %arrayidx7, align 2 ; CHECK: LV: Found an estimated cost of 0 for VF 1 For instruction: br label %for.inc ; CHECK: LV: Found an estimated cost of 1 for VF 1 For instruction: %inc = add nuw nsw i32 %i.016, 1 ; CHECK: LV: Found an estimated cost of 1 for VF 1 For instruction: %exitcond.not = icmp eq i32 %inc, %n ; CHECK: LV: Found an estimated cost of 0 for VF 1 For instruction: br i1 %exitcond.not, label %for.cond.cleanup.loopexit, label %for.body ; CHECK: LV: Scalar loop costs: 5. ; CHECK: LV: Found an estimated cost of 0 for VF 2 For instruction: %i.016 = phi i32 [ 0, %for.body.lr.ph ], [ %inc, %for.inc ] -; CHECK: LV: Found an estimated cost of 0 for VF 2 For instruction: %arrayidx = getelementptr inbounds i16, i16* %s, i32 %i.016 -; CHECK: LV: Found an estimated cost of 18 for VF 2 For instruction: %1 = load i16, i16* %arrayidx, align 2 +; CHECK: LV: Found an estimated cost of 0 for VF 2 For instruction: %arrayidx = getelementptr inbounds i16, ptr %s, i32 %i.016 +; CHECK: LV: Found an estimated cost of 18 for VF 2 For instruction: %1 = load i16, ptr %arrayidx, align 2 ; CHECK: LV: Found an estimated cost of 4 for VF 2 For instruction: %conv = sext i16 %1 to i32 ; CHECK: LV: Found an estimated cost of 20 for VF 2 For instruction: %cmp2 = icmp sgt i32 %conv, %conv1 ; CHECK: LV: Found an estimated cost of 0 for VF 2 For instruction: br i1 %cmp2, label %if.then, label %for.inc ; CHECK: LV: Found an estimated cost of 26 for VF 2 For instruction: %conv6 = add i16 %1, %0 -; CHECK: LV: Found an estimated cost of 0 for VF 2 For instruction: %arrayidx7 = getelementptr inbounds i16, i16* %d, i32 %i.016 -; CHECK: LV: Found an estimated cost of 16 for VF 2 For instruction: store i16 %conv6, i16* %arrayidx7, align 2 +; CHECK: LV: Found an estimated cost of 0 for VF 2 For instruction: %arrayidx7 = getelementptr inbounds i16, ptr %d, i32 %i.016 +; CHECK: LV: Found an estimated cost of 16 for VF 2 For instruction: store i16 %conv6, ptr %arrayidx7, align 2 ; CHECK: LV: Found an estimated cost of 0 for VF 2 For instruction: br label %for.inc ; CHECK: LV: Found an estimated cost of 1 for VF 2 For instruction: %inc = add nuw nsw i32 %i.016, 1 ; CHECK: LV: Found an estimated cost of 1 for VF 2 For instruction: %exitcond.not = icmp eq i32 %inc, %n ; CHECK: LV: Found an estimated cost of 0 for VF 2 For instruction: br i1 %exitcond.not, label %for.cond.cleanup.loopexit, label %for.body ; CHECK: LV: Vector loop of width 2 costs: 43. ; CHECK: LV: Found an estimated cost of 0 for VF 4 For instruction: %i.016 = phi i32 [ 0, %for.body.lr.ph ], [ %inc, %for.inc ] -; CHECK: LV: Found an estimated cost of 0 for VF 4 For instruction: %arrayidx = getelementptr inbounds i16, i16* %s, i32 %i.016 -; CHECK: LV: Found an estimated cost of 2 for VF 4 For instruction: %1 = load i16, i16* %arrayidx, align 2 +; CHECK: LV: Found an estimated cost of 0 for VF 4 For instruction: %arrayidx = getelementptr inbounds i16, ptr %s, i32 %i.016 +; CHECK: LV: Found an estimated cost of 2 for VF 4 For instruction: %1 = load i16, ptr %arrayidx, align 2 ; CHECK: LV: Found an estimated cost of 0 for VF 4 For instruction: %conv = sext i16 %1 to i32 ; CHECK: LV: Found an estimated cost of 2 for VF 4 For instruction: %cmp2 = icmp sgt i32 %conv, %conv1 ; CHECK: LV: Found an estimated cost of 0 for VF 4 For instruction: br i1 %cmp2, label %if.then, label %for.inc ; CHECK: LV: Found an estimated cost of 2 for VF 4 For instruction: %conv6 = add i16 %1, %0 -; CHECK: LV: Found an estimated cost of 0 for VF 4 For instruction: %arrayidx7 = getelementptr inbounds i16, i16* %d, i32 %i.016 -; CHECK: LV: Found an estimated cost of 2 for VF 4 For instruction: store i16 %conv6, i16* %arrayidx7, align 2 +; CHECK: LV: Found an estimated cost of 0 for VF 4 For instruction: %arrayidx7 = getelementptr inbounds i16, ptr %d, i32 %i.016 +; CHECK: LV: Found an estimated cost of 2 for VF 4 For instruction: store i16 %conv6, ptr %arrayidx7, align 2 ; CHECK: LV: Found an estimated cost of 0 for VF 4 For instruction: br label %for.inc ; CHECK: LV: Found an estimated cost of 1 for VF 4 For instruction: %inc = add nuw nsw i32 %i.016, 1 ; CHECK: LV: Found an estimated cost of 1 for VF 4 For instruction: %exitcond.not = icmp eq i32 %inc, %n ; CHECK: LV: Found an estimated cost of 0 for VF 4 For instruction: br i1 %exitcond.not, label %for.cond.cleanup.loopexit, label %for.body ; CHECK: LV: Vector loop of width 4 costs: 2. ; CHECK: LV: Found an estimated cost of 0 for VF 8 For instruction: %i.016 = phi i32 [ 0, %for.body.lr.ph ], [ %inc, %for.inc ] -; CHECK: LV: Found an estimated cost of 0 for VF 8 For instruction: %arrayidx = getelementptr inbounds i16, i16* %s, i32 %i.016 -; CHECK: LV: Found an estimated cost of 2 for VF 8 For instruction: %1 = load i16, i16* %arrayidx, align 2 +; CHECK: LV: Found an estimated cost of 0 for VF 8 For instruction: %arrayidx = getelementptr inbounds i16, ptr %s, i32 %i.016 +; CHECK: LV: Found an estimated cost of 2 for VF 8 For instruction: %1 = load i16, ptr %arrayidx, align 2 ; CHECK: LV: Found an estimated cost of 2 for VF 8 For instruction: %conv = sext i16 %1 to i32 ; CHECK: LV: Found an estimated cost of 36 for VF 8 For instruction: %cmp2 = icmp sgt i32 %conv, %conv1 ; CHECK: LV: Found an estimated cost of 0 for VF 8 For instruction: br i1 %cmp2, label %if.then, label %for.inc ; CHECK: LV: Found an estimated cost of 2 for VF 8 For instruction: %conv6 = add i16 %1, %0 -; CHECK: LV: Found an estimated cost of 0 for VF 8 For instruction: %arrayidx7 = getelementptr inbounds i16, i16* %d, i32 %i.016 -; CHECK: LV: Found an estimated cost of 2 for VF 8 For instruction: store i16 %conv6, i16* %arrayidx7, align 2 +; CHECK: LV: Found an estimated cost of 0 for VF 8 For instruction: %arrayidx7 = getelementptr inbounds i16, ptr %d, i32 %i.016 +; CHECK: LV: Found an estimated cost of 2 for VF 8 For instruction: store i16 %conv6, ptr %arrayidx7, align 2 ; CHECK: LV: Found an estimated cost of 0 for VF 8 For instruction: br label %for.inc ; CHECK: LV: Found an estimated cost of 1 for VF 8 For instruction: %inc = add nuw nsw i32 %i.016, 1 ; CHECK: LV: Found an estimated cost of 1 for VF 8 For instruction: %exitcond.not = icmp eq i32 %inc, %n ; CHECK: LV: Found an estimated cost of 0 for VF 8 For instruction: br i1 %exitcond.not, label %for.cond.cleanup.loopexit, label %for.body ; CHECK: LV: Vector loop of width 8 costs: 5. ; CHECK: LV: Selecting VF: 4. -define void @expensive_icmp(i16* noalias nocapture %d, i16* nocapture readonly %s, i32 %n, i16 zeroext %m) #0 { +define void @expensive_icmp(ptr noalias nocapture %d, ptr nocapture readonly %s, i32 %n, i16 zeroext %m) #0 { entry: %cmp15 = icmp sgt i32 %n, 0 br i1 %cmp15, label %for.body.lr.ph, label %for.cond.cleanup @@ -76,16 +76,16 @@ for.cond.cleanup: ; preds = %for.inc, %entry for.body: ; preds = %for.body.lr.ph, %for.inc %i.016 = phi i32 [ 0, %for.body.lr.ph ], [ %inc, %for.inc ] - %arrayidx = getelementptr inbounds i16, i16* %s, i32 %i.016 - %1 = load i16, i16* %arrayidx, align 2 + %arrayidx = getelementptr inbounds i16, ptr %s, i32 %i.016 + %1 = load i16, ptr %arrayidx, align 2 %conv = sext i16 %1 to i32 %cmp2 = icmp sgt i32 %conv, %conv1 br i1 %cmp2, label %if.then, label %for.inc if.then: ; preds = %for.body %conv6 = add i16 %1, %0 - %arrayidx7 = getelementptr inbounds i16, i16* %d, i32 %i.016 - store i16 %conv6, i16* %arrayidx7, align 2 + %arrayidx7 = getelementptr inbounds i16, ptr %d, i32 %i.016 + store i16 %conv6, ptr %arrayidx7, align 2 br label %for.inc for.inc: ; preds = %for.body, %if.then @@ -95,112 +95,112 @@ for.inc: ; preds = %for.body, %if.then } ; CHECK: LV: Found an estimated cost of 0 for VF 1 For instruction: %blkCnt.012 = phi i32 [ %dec, %while.body ], [ %blockSize, %while.body.preheader ] -; CHECK: LV: Found an estimated cost of 0 for VF 1 For instruction: %pSrcA.addr.011 = phi i8* [ %incdec.ptr, %while.body ], [ %pSrcA, %while.body.preheader ] -; CHECK: LV: Found an estimated cost of 0 for VF 1 For instruction: %pDst.addr.010 = phi i8* [ %incdec.ptr5, %while.body ], [ %pDst, %while.body.preheader ] -; CHECK: LV: Found an estimated cost of 0 for VF 1 For instruction: %pSrcB.addr.09 = phi i8* [ %incdec.ptr2, %while.body ], [ %pSrcB, %while.body.preheader ] -; CHECK: LV: Found an estimated cost of 0 for VF 1 For instruction: %incdec.ptr = getelementptr inbounds i8, i8* %pSrcA.addr.011, i32 1 -; CHECK: LV: Found an estimated cost of 1 for VF 1 For instruction: %0 = load i8, i8* %pSrcA.addr.011, align 1 +; CHECK: LV: Found an estimated cost of 0 for VF 1 For instruction: %pSrcA.addr.011 = phi ptr [ %incdec.ptr, %while.body ], [ %pSrcA, %while.body.preheader ] +; CHECK: LV: Found an estimated cost of 0 for VF 1 For instruction: %pDst.addr.010 = phi ptr [ %incdec.ptr5, %while.body ], [ %pDst, %while.body.preheader ] +; CHECK: LV: Found an estimated cost of 0 for VF 1 For instruction: %pSrcB.addr.09 = phi ptr [ %incdec.ptr2, %while.body ], [ %pSrcB, %while.body.preheader ] +; CHECK: LV: Found an estimated cost of 0 for VF 1 For instruction: %incdec.ptr = getelementptr inbounds i8, ptr %pSrcA.addr.011, i32 1 +; CHECK: LV: Found an estimated cost of 1 for VF 1 For instruction: %0 = load i8, ptr %pSrcA.addr.011, align 1 ; CHECK: LV: Found an estimated cost of 0 for VF 1 For instruction: %conv1 = sext i8 %0 to i32 -; CHECK: LV: Found an estimated cost of 0 for VF 1 For instruction: %incdec.ptr2 = getelementptr inbounds i8, i8* %pSrcB.addr.09, i32 1 -; CHECK: LV: Found an estimated cost of 1 for VF 1 For instruction: %1 = load i8, i8* %pSrcB.addr.09, align 1 +; CHECK: LV: Found an estimated cost of 0 for VF 1 For instruction: %incdec.ptr2 = getelementptr inbounds i8, ptr %pSrcB.addr.09, i32 1 +; CHECK: LV: Found an estimated cost of 1 for VF 1 For instruction: %1 = load i8, ptr %pSrcB.addr.09, align 1 ; CHECK: LV: Found an estimated cost of 0 for VF 1 For instruction: %conv3 = sext i8 %1 to i32 ; CHECK: LV: Found an estimated cost of 1 for VF 1 For instruction: %mul = mul nsw i32 %conv3, %conv1 ; CHECK: LV: Found an estimated cost of 1 for VF 1 For instruction: %shr = ashr i32 %mul, 7 ; CHECK: LV: Found an estimated cost of 1 for VF 1 For instruction: %2 = icmp slt i32 %shr, 127 ; CHECK: LV: Found an estimated cost of 1 for VF 1 For instruction: %spec.select.i = select i1 %2, i32 %shr, i32 127 ; CHECK: LV: Found an estimated cost of 0 for VF 1 For instruction: %conv4 = trunc i32 %spec.select.i to i8 -; CHECK: LV: Found an estimated cost of 0 for VF 1 For instruction: %incdec.ptr5 = getelementptr inbounds i8, i8* %pDst.addr.010, i32 1 -; CHECK: LV: Found an estimated cost of 1 for VF 1 For instruction: store i8 %conv4, i8* %pDst.addr.010, align 1 +; CHECK: LV: Found an estimated cost of 0 for VF 1 For instruction: %incdec.ptr5 = getelementptr inbounds i8, ptr %pDst.addr.010, i32 1 +; CHECK: LV: Found an estimated cost of 1 for VF 1 For instruction: store i8 %conv4, ptr %pDst.addr.010, align 1 ; CHECK: LV: Found an estimated cost of 1 for VF 1 For instruction: %dec = add i32 %blkCnt.012, -1 ; CHECK: LV: Found an estimated cost of 1 for VF 1 For instruction: %cmp.not = icmp eq i32 %dec, 0 ; CHECK: LV: Found an estimated cost of 0 for VF 1 For instruction: br i1 %cmp.not, label %while.end.loopexit, label %while.body ; CHECK: LV: Scalar loop costs: 9. ; CHECK: LV: Found an estimated cost of 0 for VF 2 For instruction: %blkCnt.012 = phi i32 [ %dec, %while.body ], [ %blockSize, %while.body.preheader ] -; CHECK: LV: Found an estimated cost of 0 for VF 2 For instruction: %pSrcA.addr.011 = phi i8* [ %incdec.ptr, %while.body ], [ %pSrcA, %while.body.preheader ] -; CHECK: LV: Found an estimated cost of 0 for VF 2 For instruction: %pDst.addr.010 = phi i8* [ %incdec.ptr5, %while.body ], [ %pDst, %while.body.preheader ] -; CHECK: LV: Found an estimated cost of 0 for VF 2 For instruction: %pSrcB.addr.09 = phi i8* [ %incdec.ptr2, %while.body ], [ %pSrcB, %while.body.preheader ] -; CHECK: LV: Found an estimated cost of 0 for VF 2 For instruction: %incdec.ptr = getelementptr inbounds i8, i8* %pSrcA.addr.011, i32 1 -; CHECK: LV: Found an estimated cost of 18 for VF 2 For instruction: %0 = load i8, i8* %pSrcA.addr.011, align 1 +; CHECK: LV: Found an estimated cost of 0 for VF 2 For instruction: %pSrcA.addr.011 = phi ptr [ %incdec.ptr, %while.body ], [ %pSrcA, %while.body.preheader ] +; CHECK: LV: Found an estimated cost of 0 for VF 2 For instruction: %pDst.addr.010 = phi ptr [ %incdec.ptr5, %while.body ], [ %pDst, %while.body.preheader ] +; CHECK: LV: Found an estimated cost of 0 for VF 2 For instruction: %pSrcB.addr.09 = phi ptr [ %incdec.ptr2, %while.body ], [ %pSrcB, %while.body.preheader ] +; CHECK: LV: Found an estimated cost of 0 for VF 2 For instruction: %incdec.ptr = getelementptr inbounds i8, ptr %pSrcA.addr.011, i32 1 +; CHECK: LV: Found an estimated cost of 18 for VF 2 For instruction: %0 = load i8, ptr %pSrcA.addr.011, align 1 ; CHECK: LV: Found an estimated cost of 4 for VF 2 For instruction: %conv1 = sext i8 %0 to i32 -; CHECK: LV: Found an estimated cost of 0 for VF 2 For instruction: %incdec.ptr2 = getelementptr inbounds i8, i8* %pSrcB.addr.09, i32 1 -; CHECK: LV: Found an estimated cost of 18 for VF 2 For instruction: %1 = load i8, i8* %pSrcB.addr.09, align 1 +; CHECK: LV: Found an estimated cost of 0 for VF 2 For instruction: %incdec.ptr2 = getelementptr inbounds i8, ptr %pSrcB.addr.09, i32 1 +; CHECK: LV: Found an estimated cost of 18 for VF 2 For instruction: %1 = load i8, ptr %pSrcB.addr.09, align 1 ; CHECK: LV: Found an estimated cost of 4 for VF 2 For instruction: %conv3 = sext i8 %1 to i32 ; CHECK: LV: Found an estimated cost of 26 for VF 2 For instruction: %mul = mul nsw i32 %conv3, %conv1 ; CHECK: LV: Found an estimated cost of 18 for VF 2 For instruction: %shr = ashr i32 %mul, 7 ; CHECK: LV: Found an estimated cost of 0 for VF 2 For instruction: %2 = icmp slt i32 %shr, 127 ; CHECK: LV: Found an estimated cost of 22 for VF 2 For instruction: %spec.select.i = select i1 %2, i32 %shr, i32 127 ; CHECK: LV: Found an estimated cost of 0 for VF 2 For instruction: %conv4 = trunc i32 %spec.select.i to i8 -; CHECK: LV: Found an estimated cost of 0 for VF 2 For instruction: %incdec.ptr5 = getelementptr inbounds i8, i8* %pDst.addr.010, i32 1 -; CHECK: LV: Found an estimated cost of 18 for VF 2 For instruction: store i8 %conv4, i8* %pDst.addr.010, align 1 +; CHECK: LV: Found an estimated cost of 0 for VF 2 For instruction: %incdec.ptr5 = getelementptr inbounds i8, ptr %pDst.addr.010, i32 1 +; CHECK: LV: Found an estimated cost of 18 for VF 2 For instruction: store i8 %conv4, ptr %pDst.addr.010, align 1 ; CHECK: LV: Found an estimated cost of 1 for VF 2 For instruction: %dec = add i32 %blkCnt.012, -1 ; CHECK: LV: Found an estimated cost of 1 for VF 2 For instruction: %cmp.not = icmp eq i32 %dec, 0 ; CHECK: LV: Found an estimated cost of 0 for VF 2 For instruction: br i1 %cmp.not, label %while.end.loopexit, label %while.body ; CHECK: LV: Vector loop of width 2 costs: 65. ; CHECK: LV: Found an estimated cost of 0 for VF 4 For instruction: %blkCnt.012 = phi i32 [ %dec, %while.body ], [ %blockSize, %while.body.preheader ] -; CHECK: LV: Found an estimated cost of 0 for VF 4 For instruction: %pSrcA.addr.011 = phi i8* [ %incdec.ptr, %while.body ], [ %pSrcA, %while.body.preheader ] -; CHECK: LV: Found an estimated cost of 0 for VF 4 For instruction: %pDst.addr.010 = phi i8* [ %incdec.ptr5, %while.body ], [ %pDst, %while.body.preheader ] -; CHECK: LV: Found an estimated cost of 0 for VF 4 For instruction: %pSrcB.addr.09 = phi i8* [ %incdec.ptr2, %while.body ], [ %pSrcB, %while.body.preheader ] -; CHECK: LV: Found an estimated cost of 0 for VF 4 For instruction: %incdec.ptr = getelementptr inbounds i8, i8* %pSrcA.addr.011, i32 1 -; CHECK: LV: Found an estimated cost of 2 for VF 4 For instruction: %0 = load i8, i8* %pSrcA.addr.011, align 1 +; CHECK: LV: Found an estimated cost of 0 for VF 4 For instruction: %pSrcA.addr.011 = phi ptr [ %incdec.ptr, %while.body ], [ %pSrcA, %while.body.preheader ] +; CHECK: LV: Found an estimated cost of 0 for VF 4 For instruction: %pDst.addr.010 = phi ptr [ %incdec.ptr5, %while.body ], [ %pDst, %while.body.preheader ] +; CHECK: LV: Found an estimated cost of 0 for VF 4 For instruction: %pSrcB.addr.09 = phi ptr [ %incdec.ptr2, %while.body ], [ %pSrcB, %while.body.preheader ] +; CHECK: LV: Found an estimated cost of 0 for VF 4 For instruction: %incdec.ptr = getelementptr inbounds i8, ptr %pSrcA.addr.011, i32 1 +; CHECK: LV: Found an estimated cost of 2 for VF 4 For instruction: %0 = load i8, ptr %pSrcA.addr.011, align 1 ; CHECK: LV: Found an estimated cost of 0 for VF 4 For instruction: %conv1 = sext i8 %0 to i32 -; CHECK: LV: Found an estimated cost of 0 for VF 4 For instruction: %incdec.ptr2 = getelementptr inbounds i8, i8* %pSrcB.addr.09, i32 1 -; CHECK: LV: Found an estimated cost of 2 for VF 4 For instruction: %1 = load i8, i8* %pSrcB.addr.09, align 1 +; CHECK: LV: Found an estimated cost of 0 for VF 4 For instruction: %incdec.ptr2 = getelementptr inbounds i8, ptr %pSrcB.addr.09, i32 1 +; CHECK: LV: Found an estimated cost of 2 for VF 4 For instruction: %1 = load i8, ptr %pSrcB.addr.09, align 1 ; CHECK: LV: Found an estimated cost of 0 for VF 4 For instruction: %conv3 = sext i8 %1 to i32 ; CHECK: LV: Found an estimated cost of 2 for VF 4 For instruction: %mul = mul nsw i32 %conv3, %conv1 ; CHECK: LV: Found an estimated cost of 2 for VF 4 For instruction: %shr = ashr i32 %mul, 7 ; CHECK: LV: Found an estimated cost of 0 for VF 4 For instruction: %2 = icmp slt i32 %shr, 127 ; CHECK: LV: Found an estimated cost of 2 for VF 4 For instruction: %spec.select.i = select i1 %2, i32 %shr, i32 127 ; CHECK: LV: Found an estimated cost of 0 for VF 4 For instruction: %conv4 = trunc i32 %spec.select.i to i8 -; CHECK: LV: Found an estimated cost of 0 for VF 4 For instruction: %incdec.ptr5 = getelementptr inbounds i8, i8* %pDst.addr.010, i32 1 -; CHECK: LV: Found an estimated cost of 2 for VF 4 For instruction: store i8 %conv4, i8* %pDst.addr.010, align 1 +; CHECK: LV: Found an estimated cost of 0 for VF 4 For instruction: %incdec.ptr5 = getelementptr inbounds i8, ptr %pDst.addr.010, i32 1 +; CHECK: LV: Found an estimated cost of 2 for VF 4 For instruction: store i8 %conv4, ptr %pDst.addr.010, align 1 ; CHECK: LV: Found an estimated cost of 1 for VF 4 For instruction: %dec = add i32 %blkCnt.012, -1 ; CHECK: LV: Found an estimated cost of 1 for VF 4 For instruction: %cmp.not = icmp eq i32 %dec, 0 ; CHECK: LV: Found an estimated cost of 0 for VF 4 For instruction: br i1 %cmp.not, label %while.end.loopexit, label %while.body ; CHECK: LV: Vector loop of width 4 costs: 3. ; CHECK: LV: Found an estimated cost of 0 for VF 8 For instruction: %blkCnt.012 = phi i32 [ %dec, %while.body ], [ %blockSize, %while.body.preheader ] -; CHECK: LV: Found an estimated cost of 0 for VF 8 For instruction: %pSrcA.addr.011 = phi i8* [ %incdec.ptr, %while.body ], [ %pSrcA, %while.body.preheader ] -; CHECK: LV: Found an estimated cost of 0 for VF 8 For instruction: %pDst.addr.010 = phi i8* [ %incdec.ptr5, %while.body ], [ %pDst, %while.body.preheader ] -; CHECK: LV: Found an estimated cost of 0 for VF 8 For instruction: %pSrcB.addr.09 = phi i8* [ %incdec.ptr2, %while.body ], [ %pSrcB, %while.body.preheader ] -; CHECK: LV: Found an estimated cost of 0 for VF 8 For instruction: %incdec.ptr = getelementptr inbounds i8, i8* %pSrcA.addr.011, i32 1 -; CHECK: LV: Found an estimated cost of 2 for VF 8 For instruction: %0 = load i8, i8* %pSrcA.addr.011, align 1 +; CHECK: LV: Found an estimated cost of 0 for VF 8 For instruction: %pSrcA.addr.011 = phi ptr [ %incdec.ptr, %while.body ], [ %pSrcA, %while.body.preheader ] +; CHECK: LV: Found an estimated cost of 0 for VF 8 For instruction: %pDst.addr.010 = phi ptr [ %incdec.ptr5, %while.body ], [ %pDst, %while.body.preheader ] +; CHECK: LV: Found an estimated cost of 0 for VF 8 For instruction: %pSrcB.addr.09 = phi ptr [ %incdec.ptr2, %while.body ], [ %pSrcB, %while.body.preheader ] +; CHECK: LV: Found an estimated cost of 0 for VF 8 For instruction: %incdec.ptr = getelementptr inbounds i8, ptr %pSrcA.addr.011, i32 1 +; CHECK: LV: Found an estimated cost of 2 for VF 8 For instruction: %0 = load i8, ptr %pSrcA.addr.011, align 1 ; CHECK: LV: Found an estimated cost of 2 for VF 8 For instruction: %conv1 = sext i8 %0 to i32 -; CHECK: LV: Found an estimated cost of 0 for VF 8 For instruction: %incdec.ptr2 = getelementptr inbounds i8, i8* %pSrcB.addr.09, i32 1 -; CHECK: LV: Found an estimated cost of 2 for VF 8 For instruction: %1 = load i8, i8* %pSrcB.addr.09, align 1 +; CHECK: LV: Found an estimated cost of 0 for VF 8 For instruction: %incdec.ptr2 = getelementptr inbounds i8, ptr %pSrcB.addr.09, i32 1 +; CHECK: LV: Found an estimated cost of 2 for VF 8 For instruction: %1 = load i8, ptr %pSrcB.addr.09, align 1 ; CHECK: LV: Found an estimated cost of 2 for VF 8 For instruction: %conv3 = sext i8 %1 to i32 ; CHECK: LV: Found an estimated cost of 4 for VF 8 For instruction: %mul = mul nsw i32 %conv3, %conv1 ; CHECK: LV: Found an estimated cost of 4 for VF 8 For instruction: %shr = ashr i32 %mul, 7 ; CHECK: LV: Found an estimated cost of 0 for VF 8 For instruction: %2 = icmp slt i32 %shr, 127 ; CHECK: LV: Found an estimated cost of 4 for VF 8 For instruction: %spec.select.i = select i1 %2, i32 %shr, i32 127 ; CHECK: LV: Found an estimated cost of 2 for VF 8 For instruction: %conv4 = trunc i32 %spec.select.i to i8 -; CHECK: LV: Found an estimated cost of 0 for VF 8 For instruction: %incdec.ptr5 = getelementptr inbounds i8, i8* %pDst.addr.010, i32 1 -; CHECK: LV: Found an estimated cost of 2 for VF 8 For instruction: store i8 %conv4, i8* %pDst.addr.010, align 1 +; CHECK: LV: Found an estimated cost of 0 for VF 8 For instruction: %incdec.ptr5 = getelementptr inbounds i8, ptr %pDst.addr.010, i32 1 +; CHECK: LV: Found an estimated cost of 2 for VF 8 For instruction: store i8 %conv4, ptr %pDst.addr.010, align 1 ; CHECK: LV: Found an estimated cost of 1 for VF 8 For instruction: %dec = add i32 %blkCnt.012, -1 ; CHECK: LV: Found an estimated cost of 1 for VF 8 For instruction: %cmp.not = icmp eq i32 %dec, 0 ; CHECK: LV: Found an estimated cost of 0 for VF 8 For instruction: br i1 %cmp.not, label %while.end.loopexit, label %while.body ; CHECK: LV: Vector loop of width 8 costs: 3. ; CHECK: LV: Found an estimated cost of 0 for VF 16 For instruction: %blkCnt.012 = phi i32 [ %dec, %while.body ], [ %blockSize, %while.body.preheader ] -; CHECK: LV: Found an estimated cost of 0 for VF 16 For instruction: %pSrcA.addr.011 = phi i8* [ %incdec.ptr, %while.body ], [ %pSrcA, %while.body.preheader ] -; CHECK: LV: Found an estimated cost of 0 for VF 16 For instruction: %pDst.addr.010 = phi i8* [ %incdec.ptr5, %while.body ], [ %pDst, %while.body.preheader ] -; CHECK: LV: Found an estimated cost of 0 for VF 16 For instruction: %pSrcB.addr.09 = phi i8* [ %incdec.ptr2, %while.body ], [ %pSrcB, %while.body.preheader ] -; CHECK: LV: Found an estimated cost of 0 for VF 16 For instruction: %incdec.ptr = getelementptr inbounds i8, i8* %pSrcA.addr.011, i32 1 -; CHECK: LV: Found an estimated cost of 2 for VF 16 For instruction: %0 = load i8, i8* %pSrcA.addr.011, align 1 +; CHECK: LV: Found an estimated cost of 0 for VF 16 For instruction: %pSrcA.addr.011 = phi ptr [ %incdec.ptr, %while.body ], [ %pSrcA, %while.body.preheader ] +; CHECK: LV: Found an estimated cost of 0 for VF 16 For instruction: %pDst.addr.010 = phi ptr [ %incdec.ptr5, %while.body ], [ %pDst, %while.body.preheader ] +; CHECK: LV: Found an estimated cost of 0 for VF 16 For instruction: %pSrcB.addr.09 = phi ptr [ %incdec.ptr2, %while.body ], [ %pSrcB, %while.body.preheader ] +; CHECK: LV: Found an estimated cost of 0 for VF 16 For instruction: %incdec.ptr = getelementptr inbounds i8, ptr %pSrcA.addr.011, i32 1 +; CHECK: LV: Found an estimated cost of 2 for VF 16 For instruction: %0 = load i8, ptr %pSrcA.addr.011, align 1 ; CHECK: LV: Found an estimated cost of 6 for VF 16 For instruction: %conv1 = sext i8 %0 to i32 -; CHECK: LV: Found an estimated cost of 0 for VF 16 For instruction: %incdec.ptr2 = getelementptr inbounds i8, i8* %pSrcB.addr.09, i32 1 -; CHECK: LV: Found an estimated cost of 2 for VF 16 For instruction: %1 = load i8, i8* %pSrcB.addr.09, align 1 +; CHECK: LV: Found an estimated cost of 0 for VF 16 For instruction: %incdec.ptr2 = getelementptr inbounds i8, ptr %pSrcB.addr.09, i32 1 +; CHECK: LV: Found an estimated cost of 2 for VF 16 For instruction: %1 = load i8, ptr %pSrcB.addr.09, align 1 ; CHECK: LV: Found an estimated cost of 6 for VF 16 For instruction: %conv3 = sext i8 %1 to i32 ; CHECK: LV: Found an estimated cost of 8 for VF 16 For instruction: %mul = mul nsw i32 %conv3, %conv1 ; CHECK: LV: Found an estimated cost of 8 for VF 16 For instruction: %shr = ashr i32 %mul, 7 ; CHECK: LV: Found an estimated cost of 0 for VF 16 For instruction: %2 = icmp slt i32 %shr, 127 ; CHECK: LV: Found an estimated cost of 8 for VF 16 For instruction: %spec.select.i = select i1 %2, i32 %shr, i32 127 ; CHECK: LV: Found an estimated cost of 6 for VF 16 For instruction: %conv4 = trunc i32 %spec.select.i to i8 -; CHECK: LV: Found an estimated cost of 0 for VF 16 For instruction: %incdec.ptr5 = getelementptr inbounds i8, i8* %pDst.addr.010, i32 1 -; CHECK: LV: Found an estimated cost of 2 for VF 16 For instruction: store i8 %conv4, i8* %pDst.addr.010, align 1 +; CHECK: LV: Found an estimated cost of 0 for VF 16 For instruction: %incdec.ptr5 = getelementptr inbounds i8, ptr %pDst.addr.010, i32 1 +; CHECK: LV: Found an estimated cost of 2 for VF 16 For instruction: store i8 %conv4, ptr %pDst.addr.010, align 1 ; CHECK: LV: Found an estimated cost of 1 for VF 16 For instruction: %dec = add i32 %blkCnt.012, -1 ; CHECK: LV: Found an estimated cost of 1 for VF 16 For instruction: %cmp.not = icmp eq i32 %dec, 0 ; CHECK: LV: Found an estimated cost of 0 for VF 16 For instruction: br i1 %cmp.not, label %while.end.loopexit, label %while.body ; CHECK: LV: Vector loop of width 16 costs: 3. ; CHECK: LV: Selecting VF: 16. -define void @cheap_icmp(i8* nocapture readonly %pSrcA, i8* nocapture readonly %pSrcB, i8* nocapture %pDst, i32 %blockSize) #0 { +define void @cheap_icmp(ptr nocapture readonly %pSrcA, ptr nocapture readonly %pSrcB, ptr nocapture %pDst, i32 %blockSize) #0 { entry: %cmp.not8 = icmp eq i32 %blockSize, 0 br i1 %cmp.not8, label %while.end, label %while.body.preheader @@ -210,22 +210,22 @@ while.body.preheader: ; preds = %entry while.body: ; preds = %while.body.preheader, %while.body %blkCnt.012 = phi i32 [ %dec, %while.body ], [ %blockSize, %while.body.preheader ] - %pSrcA.addr.011 = phi i8* [ %incdec.ptr, %while.body ], [ %pSrcA, %while.body.preheader ] - %pDst.addr.010 = phi i8* [ %incdec.ptr5, %while.body ], [ %pDst, %while.body.preheader ] - %pSrcB.addr.09 = phi i8* [ %incdec.ptr2, %while.body ], [ %pSrcB, %while.body.preheader ] - %incdec.ptr = getelementptr inbounds i8, i8* %pSrcA.addr.011, i32 1 - %0 = load i8, i8* %pSrcA.addr.011, align 1 + %pSrcA.addr.011 = phi ptr [ %incdec.ptr, %while.body ], [ %pSrcA, %while.body.preheader ] + %pDst.addr.010 = phi ptr [ %incdec.ptr5, %while.body ], [ %pDst, %while.body.preheader ] + %pSrcB.addr.09 = phi ptr [ %incdec.ptr2, %while.body ], [ %pSrcB, %while.body.preheader ] + %incdec.ptr = getelementptr inbounds i8, ptr %pSrcA.addr.011, i32 1 + %0 = load i8, ptr %pSrcA.addr.011, align 1 %conv1 = sext i8 %0 to i32 - %incdec.ptr2 = getelementptr inbounds i8, i8* %pSrcB.addr.09, i32 1 - %1 = load i8, i8* %pSrcB.addr.09, align 1 + %incdec.ptr2 = getelementptr inbounds i8, ptr %pSrcB.addr.09, i32 1 + %1 = load i8, ptr %pSrcB.addr.09, align 1 %conv3 = sext i8 %1 to i32 %mul = mul nsw i32 %conv3, %conv1 %shr = ashr i32 %mul, 7 %2 = icmp slt i32 %shr, 127 %spec.select.i = select i1 %2, i32 %shr, i32 127 %conv4 = trunc i32 %spec.select.i to i8 - %incdec.ptr5 = getelementptr inbounds i8, i8* %pDst.addr.010, i32 1 - store i8 %conv4, i8* %pDst.addr.010, align 1 + %incdec.ptr5 = getelementptr inbounds i8, ptr %pDst.addr.010, i32 1 + store i8 %conv4, ptr %pDst.addr.010, align 1 %dec = add i32 %blkCnt.012, -1 %cmp.not = icmp eq i32 %dec, 0 br i1 %cmp.not, label %while.end.loopexit, label %while.body @@ -240,22 +240,22 @@ while.end: ; preds = %while.end.loopexit, ; CHECK: LV: Found an estimated cost of 1 for VF 1 For instruction: %cmp1 = fcmp ; CHECK: LV: Found an estimated cost of 12 for VF 2 For instruction: %cmp1 = fcmp ; CHECK: LV: Found an estimated cost of 24 for VF 4 For instruction: %cmp1 = fcmp -define void @floatcmp(float* nocapture readonly %pSrc, i32* nocapture %pDst, i32 %blockSize) #0 { +define void @floatcmp(ptr nocapture readonly %pSrc, ptr nocapture %pDst, i32 %blockSize) #0 { entry: %cmp.not7 = icmp eq i32 %blockSize, 0 br i1 %cmp.not7, label %while.end, label %while.body while.body: ; preds = %entry, %while.body - %pSrc.addr.010 = phi float* [ %incdec.ptr2, %while.body ], [ %pSrc, %entry ] + %pSrc.addr.010 = phi ptr [ %incdec.ptr2, %while.body ], [ %pSrc, %entry ] %blockSize.addr.09 = phi i32 [ %dec, %while.body ], [ %blockSize, %entry ] - %pDst.addr.08 = phi i32* [ %incdec.ptr, %while.body ], [ %pDst, %entry ] - %0 = load float, float* %pSrc.addr.010, align 4 + %pDst.addr.08 = phi ptr [ %incdec.ptr, %while.body ], [ %pDst, %entry ] + %0 = load float, ptr %pSrc.addr.010, align 4 %cmp1 = fcmp nnan ninf nsz olt float %0, 0.000000e+00 %cond = select nnan ninf nsz i1 %cmp1, float 1.000000e+01, float %0 %conv = fptosi float %cond to i32 - %incdec.ptr = getelementptr inbounds i32, i32* %pDst.addr.08, i32 1 - store i32 %conv, i32* %pDst.addr.08, align 4 - %incdec.ptr2 = getelementptr inbounds float, float* %pSrc.addr.010, i32 1 + %incdec.ptr = getelementptr inbounds i32, ptr %pDst.addr.08, i32 1 + store i32 %conv, ptr %pDst.addr.08, align 4 + %incdec.ptr2 = getelementptr inbounds float, ptr %pSrc.addr.010, i32 1 %dec = add i32 %blockSize.addr.09, -1 %cmp.not = icmp eq i32 %dec, 0 br i1 %cmp.not, label %while.end, label %while.body diff --git a/llvm/test/Transforms/LoopVectorize/ARM/mve-qabs.ll b/llvm/test/Transforms/LoopVectorize/ARM/mve-qabs.ll index 5fa71e4..c1492ec 100644 --- a/llvm/test/Transforms/LoopVectorize/ARM/mve-qabs.ll +++ b/llvm/test/Transforms/LoopVectorize/ARM/mve-qabs.ll @@ -4,11 +4,11 @@ target datalayout = "e-m:e-p:32:32-Fi8-i64:64-v128:64:128-a:0:32-n32-S64" target triple = "thumbv8.1m.main-arm-none-eabi" -define void @arm_abs_q7(i8* nocapture readonly %pSrc, i8* nocapture %pDst, i32 %blockSize) #0 { +define void @arm_abs_q7(ptr nocapture readonly %pSrc, ptr nocapture %pDst, i32 %blockSize) #0 { ; CHECK-LABEL: @arm_abs_q7( ; CHECK-NEXT: entry: -; CHECK-NEXT: [[PSRC2:%.*]] = ptrtoint i8* [[PSRC:%.*]] to i32 -; CHECK-NEXT: [[PDST1:%.*]] = ptrtoint i8* [[PDST:%.*]] to i32 +; CHECK-NEXT: [[PSRC2:%.*]] = ptrtoint ptr [[PSRC:%.*]] to i32 +; CHECK-NEXT: [[PDST1:%.*]] = ptrtoint ptr [[PDST:%.*]] to i32 ; CHECK-NEXT: [[CMP_NOT19:%.*]] = icmp eq i32 [[BLOCKSIZE:%.*]], 0 ; CHECK-NEXT: br i1 [[CMP_NOT19]], label [[WHILE_END:%.*]], label [[WHILE_BODY_PREHEADER:%.*]] ; CHECK: while.body.preheader: @@ -19,47 +19,45 @@ define void @arm_abs_q7(i8* nocapture readonly %pSrc, i8* nocapture %pDst, i32 % ; CHECK-NEXT: br i1 [[OR_COND]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] ; CHECK: vector.ph: ; CHECK-NEXT: [[N_VEC:%.*]] = and i32 [[BLOCKSIZE]], -16 -; CHECK-NEXT: [[IND_END:%.*]] = getelementptr i8, i8* [[PSRC]], i32 [[N_VEC]] +; CHECK-NEXT: [[IND_END:%.*]] = getelementptr i8, ptr [[PSRC]], i32 [[N_VEC]] ; CHECK-NEXT: [[IND_END3:%.*]] = and i32 [[BLOCKSIZE]], 15 -; CHECK-NEXT: [[IND_END5:%.*]] = getelementptr i8, i8* [[PDST]], i32 [[N_VEC]] +; CHECK-NEXT: [[IND_END5:%.*]] = getelementptr i8, ptr [[PDST]], i32 [[N_VEC]] ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] -; CHECK-NEXT: [[NEXT_GEP:%.*]] = getelementptr i8, i8* [[PSRC]], i32 [[INDEX]] -; CHECK-NEXT: [[NEXT_GEP7:%.*]] = getelementptr i8, i8* [[PDST]], i32 [[INDEX]] -; CHECK-NEXT: [[TMP1:%.*]] = bitcast i8* [[NEXT_GEP]] to <16 x i8>* -; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <16 x i8>, <16 x i8>* [[TMP1]], align 1 -; CHECK-NEXT: [[TMP2:%.*]] = icmp sgt <16 x i8> [[WIDE_LOAD]], zeroinitializer -; CHECK-NEXT: [[TMP3:%.*]] = icmp eq <16 x i8> [[WIDE_LOAD]], -; CHECK-NEXT: [[TMP4:%.*]] = sub <16 x i8> zeroinitializer, [[WIDE_LOAD]] -; CHECK-NEXT: [[TMP5:%.*]] = select <16 x i1> [[TMP3]], <16 x i8> , <16 x i8> [[TMP4]] -; CHECK-NEXT: [[TMP6:%.*]] = select <16 x i1> [[TMP2]], <16 x i8> [[WIDE_LOAD]], <16 x i8> [[TMP5]] -; CHECK-NEXT: [[TMP7:%.*]] = bitcast i8* [[NEXT_GEP7]] to <16 x i8>* -; CHECK-NEXT: store <16 x i8> [[TMP6]], <16 x i8>* [[TMP7]], align 1 +; CHECK-NEXT: [[NEXT_GEP:%.*]] = getelementptr i8, ptr [[PSRC]], i32 [[INDEX]] +; CHECK-NEXT: [[NEXT_GEP7:%.*]] = getelementptr i8, ptr [[PDST]], i32 [[INDEX]] +; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <16 x i8>, ptr [[NEXT_GEP]], align 1 +; CHECK-NEXT: [[TMP1:%.*]] = icmp sgt <16 x i8> [[WIDE_LOAD]], zeroinitializer +; CHECK-NEXT: [[TMP2:%.*]] = icmp eq <16 x i8> [[WIDE_LOAD]], +; CHECK-NEXT: [[TMP3:%.*]] = sub <16 x i8> zeroinitializer, [[WIDE_LOAD]] +; CHECK-NEXT: [[TMP4:%.*]] = select <16 x i1> [[TMP2]], <16 x i8> , <16 x i8> [[TMP3]] +; CHECK-NEXT: [[TMP5:%.*]] = select <16 x i1> [[TMP1]], <16 x i8> [[WIDE_LOAD]], <16 x i8> [[TMP4]] +; CHECK-NEXT: store <16 x i8> [[TMP5]], ptr [[NEXT_GEP7]], align 1 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 16 -; CHECK-NEXT: [[TMP8:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]] -; CHECK-NEXT: br i1 [[TMP8]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] +; CHECK-NEXT: [[TMP6:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]] +; CHECK-NEXT: br i1 [[TMP6]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] ; CHECK: middle.block: ; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i32 [[N_VEC]], [[BLOCKSIZE]] ; CHECK-NEXT: br i1 [[CMP_N]], label [[WHILE_END]], label [[SCALAR_PH]] ; CHECK: scalar.ph: -; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i8* [ [[IND_END]], [[MIDDLE_BLOCK]] ], [ [[PSRC]], [[WHILE_BODY_PREHEADER]] ] +; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi ptr [ [[IND_END]], [[MIDDLE_BLOCK]] ], [ [[PSRC]], [[WHILE_BODY_PREHEADER]] ] ; CHECK-NEXT: [[BC_RESUME_VAL4:%.*]] = phi i32 [ [[IND_END3]], [[MIDDLE_BLOCK]] ], [ [[BLOCKSIZE]], [[WHILE_BODY_PREHEADER]] ] -; CHECK-NEXT: [[BC_RESUME_VAL6:%.*]] = phi i8* [ [[IND_END5]], [[MIDDLE_BLOCK]] ], [ [[PDST]], [[WHILE_BODY_PREHEADER]] ] +; CHECK-NEXT: [[BC_RESUME_VAL6:%.*]] = phi ptr [ [[IND_END5]], [[MIDDLE_BLOCK]] ], [ [[PDST]], [[WHILE_BODY_PREHEADER]] ] ; CHECK-NEXT: br label [[WHILE_BODY:%.*]] ; CHECK: while.body: -; CHECK-NEXT: [[PSRC_ADDR_022:%.*]] = phi i8* [ [[INCDEC_PTR:%.*]], [[WHILE_BODY]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ] +; CHECK-NEXT: [[PSRC_ADDR_022:%.*]] = phi ptr [ [[INCDEC_PTR:%.*]], [[WHILE_BODY]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ] ; CHECK-NEXT: [[BLKCNT_021:%.*]] = phi i32 [ [[DEC:%.*]], [[WHILE_BODY]] ], [ [[BC_RESUME_VAL4]], [[SCALAR_PH]] ] -; CHECK-NEXT: [[PDST_ADDR_020:%.*]] = phi i8* [ [[INCDEC_PTR13:%.*]], [[WHILE_BODY]] ], [ [[BC_RESUME_VAL6]], [[SCALAR_PH]] ] -; CHECK-NEXT: [[INCDEC_PTR]] = getelementptr inbounds i8, i8* [[PSRC_ADDR_022]], i32 1 -; CHECK-NEXT: [[TMP9:%.*]] = load i8, i8* [[PSRC_ADDR_022]], align 1 -; CHECK-NEXT: [[CMP1:%.*]] = icmp sgt i8 [[TMP9]], 0 -; CHECK-NEXT: [[CMP5:%.*]] = icmp eq i8 [[TMP9]], -128 -; CHECK-NEXT: [[SUB:%.*]] = sub i8 0, [[TMP9]] +; CHECK-NEXT: [[PDST_ADDR_020:%.*]] = phi ptr [ [[INCDEC_PTR13:%.*]], [[WHILE_BODY]] ], [ [[BC_RESUME_VAL6]], [[SCALAR_PH]] ] +; CHECK-NEXT: [[INCDEC_PTR]] = getelementptr inbounds i8, ptr [[PSRC_ADDR_022]], i32 1 +; CHECK-NEXT: [[TMP7:%.*]] = load i8, ptr [[PSRC_ADDR_022]], align 1 +; CHECK-NEXT: [[CMP1:%.*]] = icmp sgt i8 [[TMP7]], 0 +; CHECK-NEXT: [[CMP5:%.*]] = icmp eq i8 [[TMP7]], -128 +; CHECK-NEXT: [[SUB:%.*]] = sub i8 0, [[TMP7]] ; CHECK-NEXT: [[COND:%.*]] = select i1 [[CMP5]], i8 127, i8 [[SUB]] -; CHECK-NEXT: [[COND11:%.*]] = select i1 [[CMP1]], i8 [[TMP9]], i8 [[COND]] -; CHECK-NEXT: [[INCDEC_PTR13]] = getelementptr inbounds i8, i8* [[PDST_ADDR_020]], i32 1 -; CHECK-NEXT: store i8 [[COND11]], i8* [[PDST_ADDR_020]], align 1 +; CHECK-NEXT: [[COND11:%.*]] = select i1 [[CMP1]], i8 [[TMP7]], i8 [[COND]] +; CHECK-NEXT: [[INCDEC_PTR13]] = getelementptr inbounds i8, ptr [[PDST_ADDR_020]], i32 1 +; CHECK-NEXT: store i8 [[COND11]], ptr [[PDST_ADDR_020]], align 1 ; CHECK-NEXT: [[DEC]] = add i32 [[BLKCNT_021]], -1 ; CHECK-NEXT: [[CMP_NOT:%.*]] = icmp eq i32 [[DEC]], 0 ; CHECK-NEXT: br i1 [[CMP_NOT]], label [[WHILE_END]], label [[WHILE_BODY]], !llvm.loop [[LOOP2:![0-9]+]] @@ -74,18 +72,18 @@ while.body.preheader: ; preds = %entry br label %while.body while.body: ; preds = %while.body.preheader, %while.body - %pSrc.addr.022 = phi i8* [ %incdec.ptr, %while.body ], [ %pSrc, %while.body.preheader ] + %pSrc.addr.022 = phi ptr [ %incdec.ptr, %while.body ], [ %pSrc, %while.body.preheader ] %blkCnt.021 = phi i32 [ %dec, %while.body ], [ %blockSize, %while.body.preheader ] - %pDst.addr.020 = phi i8* [ %incdec.ptr13, %while.body ], [ %pDst, %while.body.preheader ] - %incdec.ptr = getelementptr inbounds i8, i8* %pSrc.addr.022, i32 1 - %0 = load i8, i8* %pSrc.addr.022, align 1 + %pDst.addr.020 = phi ptr [ %incdec.ptr13, %while.body ], [ %pDst, %while.body.preheader ] + %incdec.ptr = getelementptr inbounds i8, ptr %pSrc.addr.022, i32 1 + %0 = load i8, ptr %pSrc.addr.022, align 1 %cmp1 = icmp sgt i8 %0, 0 %cmp5 = icmp eq i8 %0, -128 %sub = sub i8 0, %0 %cond = select i1 %cmp5, i8 127, i8 %sub %cond11 = select i1 %cmp1, i8 %0, i8 %cond - %incdec.ptr13 = getelementptr inbounds i8, i8* %pDst.addr.020, i32 1 - store i8 %cond11, i8* %pDst.addr.020, align 1 + %incdec.ptr13 = getelementptr inbounds i8, ptr %pDst.addr.020, i32 1 + store i8 %cond11, ptr %pDst.addr.020, align 1 %dec = add i32 %blkCnt.021, -1 %cmp.not = icmp eq i32 %dec, 0 br i1 %cmp.not, label %while.end.loopexit, label %while.body @@ -97,11 +95,11 @@ while.end: ; preds = %while.end.loopexit, ret void } -define void @arm_abs_q15(i16* nocapture readonly %pSrc, i16* nocapture %pDst, i32 %blockSize) #0 { +define void @arm_abs_q15(ptr nocapture readonly %pSrc, ptr nocapture %pDst, i32 %blockSize) #0 { ; CHECK-LABEL: @arm_abs_q15( ; CHECK-NEXT: entry: -; CHECK-NEXT: [[PSRC2:%.*]] = ptrtoint i16* [[PSRC:%.*]] to i32 -; CHECK-NEXT: [[PDST1:%.*]] = ptrtoint i16* [[PDST:%.*]] to i32 +; CHECK-NEXT: [[PSRC2:%.*]] = ptrtoint ptr [[PSRC:%.*]] to i32 +; CHECK-NEXT: [[PDST1:%.*]] = ptrtoint ptr [[PDST:%.*]] to i32 ; CHECK-NEXT: [[CMP_NOT20:%.*]] = icmp eq i32 [[BLOCKSIZE:%.*]], 0 ; CHECK-NEXT: br i1 [[CMP_NOT20]], label [[WHILE_END:%.*]], label [[WHILE_BODY_PREHEADER:%.*]] ; CHECK: while.body.preheader: @@ -112,47 +110,49 @@ define void @arm_abs_q15(i16* nocapture readonly %pSrc, i16* nocapture %pDst, i3 ; CHECK-NEXT: br i1 [[OR_COND]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] ; CHECK: vector.ph: ; CHECK-NEXT: [[N_VEC:%.*]] = and i32 [[BLOCKSIZE]], -8 -; CHECK-NEXT: [[IND_END:%.*]] = getelementptr i16, i16* [[PSRC]], i32 [[N_VEC]] +; CHECK-NEXT: [[TMP1:%.*]] = shl i32 [[N_VEC]], 1 +; CHECK-NEXT: [[IND_END:%.*]] = getelementptr i8, ptr [[PSRC]], i32 [[TMP1]] ; CHECK-NEXT: [[IND_END3:%.*]] = and i32 [[BLOCKSIZE]], 7 -; CHECK-NEXT: [[IND_END5:%.*]] = getelementptr i16, i16* [[PDST]], i32 [[N_VEC]] +; CHECK-NEXT: [[TMP2:%.*]] = shl i32 [[N_VEC]], 1 +; CHECK-NEXT: [[IND_END5:%.*]] = getelementptr i8, ptr [[PDST]], i32 [[TMP2]] ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] -; CHECK-NEXT: [[NEXT_GEP:%.*]] = getelementptr i16, i16* [[PSRC]], i32 [[INDEX]] -; CHECK-NEXT: [[NEXT_GEP7:%.*]] = getelementptr i16, i16* [[PDST]], i32 [[INDEX]] -; CHECK-NEXT: [[TMP1:%.*]] = bitcast i16* [[NEXT_GEP]] to <8 x i16>* -; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <8 x i16>, <8 x i16>* [[TMP1]], align 2 -; CHECK-NEXT: [[TMP2:%.*]] = icmp sgt <8 x i16> [[WIDE_LOAD]], zeroinitializer -; CHECK-NEXT: [[TMP3:%.*]] = icmp eq <8 x i16> [[WIDE_LOAD]], -; CHECK-NEXT: [[TMP4:%.*]] = sub <8 x i16> zeroinitializer, [[WIDE_LOAD]] -; CHECK-NEXT: [[TMP5:%.*]] = select <8 x i1> [[TMP3]], <8 x i16> , <8 x i16> [[TMP4]] -; CHECK-NEXT: [[TMP6:%.*]] = select <8 x i1> [[TMP2]], <8 x i16> [[WIDE_LOAD]], <8 x i16> [[TMP5]] -; CHECK-NEXT: [[TMP7:%.*]] = bitcast i16* [[NEXT_GEP7]] to <8 x i16>* -; CHECK-NEXT: store <8 x i16> [[TMP6]], <8 x i16>* [[TMP7]], align 2 +; CHECK-NEXT: [[TMP3:%.*]] = shl i32 [[INDEX]], 1 +; CHECK-NEXT: [[NEXT_GEP:%.*]] = getelementptr i8, ptr [[PSRC]], i32 [[TMP3]] +; CHECK-NEXT: [[TMP4:%.*]] = shl i32 [[INDEX]], 1 +; CHECK-NEXT: [[NEXT_GEP7:%.*]] = getelementptr i8, ptr [[PDST]], i32 [[TMP4]] +; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <8 x i16>, ptr [[NEXT_GEP]], align 2 +; CHECK-NEXT: [[TMP5:%.*]] = icmp sgt <8 x i16> [[WIDE_LOAD]], zeroinitializer +; CHECK-NEXT: [[TMP6:%.*]] = icmp eq <8 x i16> [[WIDE_LOAD]], +; CHECK-NEXT: [[TMP7:%.*]] = sub <8 x i16> zeroinitializer, [[WIDE_LOAD]] +; CHECK-NEXT: [[TMP8:%.*]] = select <8 x i1> [[TMP6]], <8 x i16> , <8 x i16> [[TMP7]] +; CHECK-NEXT: [[TMP9:%.*]] = select <8 x i1> [[TMP5]], <8 x i16> [[WIDE_LOAD]], <8 x i16> [[TMP8]] +; CHECK-NEXT: store <8 x i16> [[TMP9]], ptr [[NEXT_GEP7]], align 2 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 8 -; CHECK-NEXT: [[TMP8:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]] -; CHECK-NEXT: br i1 [[TMP8]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP3:![0-9]+]] +; CHECK-NEXT: [[TMP10:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]] +; CHECK-NEXT: br i1 [[TMP10]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP3:![0-9]+]] ; CHECK: middle.block: ; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i32 [[N_VEC]], [[BLOCKSIZE]] ; CHECK-NEXT: br i1 [[CMP_N]], label [[WHILE_END]], label [[SCALAR_PH]] ; CHECK: scalar.ph: -; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i16* [ [[IND_END]], [[MIDDLE_BLOCK]] ], [ [[PSRC]], [[WHILE_BODY_PREHEADER]] ] +; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi ptr [ [[IND_END]], [[MIDDLE_BLOCK]] ], [ [[PSRC]], [[WHILE_BODY_PREHEADER]] ] ; CHECK-NEXT: [[BC_RESUME_VAL4:%.*]] = phi i32 [ [[IND_END3]], [[MIDDLE_BLOCK]] ], [ [[BLOCKSIZE]], [[WHILE_BODY_PREHEADER]] ] -; CHECK-NEXT: [[BC_RESUME_VAL6:%.*]] = phi i16* [ [[IND_END5]], [[MIDDLE_BLOCK]] ], [ [[PDST]], [[WHILE_BODY_PREHEADER]] ] +; CHECK-NEXT: [[BC_RESUME_VAL6:%.*]] = phi ptr [ [[IND_END5]], [[MIDDLE_BLOCK]] ], [ [[PDST]], [[WHILE_BODY_PREHEADER]] ] ; CHECK-NEXT: br label [[WHILE_BODY:%.*]] ; CHECK: while.body: -; CHECK-NEXT: [[PSRC_ADDR_023:%.*]] = phi i16* [ [[INCDEC_PTR:%.*]], [[WHILE_BODY]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ] +; CHECK-NEXT: [[PSRC_ADDR_023:%.*]] = phi ptr [ [[INCDEC_PTR:%.*]], [[WHILE_BODY]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ] ; CHECK-NEXT: [[BLKCNT_022:%.*]] = phi i32 [ [[DEC:%.*]], [[WHILE_BODY]] ], [ [[BC_RESUME_VAL4]], [[SCALAR_PH]] ] -; CHECK-NEXT: [[PDST_ADDR_021:%.*]] = phi i16* [ [[INCDEC_PTR13:%.*]], [[WHILE_BODY]] ], [ [[BC_RESUME_VAL6]], [[SCALAR_PH]] ] -; CHECK-NEXT: [[INCDEC_PTR]] = getelementptr inbounds i16, i16* [[PSRC_ADDR_023]], i32 1 -; CHECK-NEXT: [[TMP9:%.*]] = load i16, i16* [[PSRC_ADDR_023]], align 2 -; CHECK-NEXT: [[CMP1:%.*]] = icmp sgt i16 [[TMP9]], 0 -; CHECK-NEXT: [[CMP5:%.*]] = icmp eq i16 [[TMP9]], -32768 -; CHECK-NEXT: [[SUB:%.*]] = sub i16 0, [[TMP9]] +; CHECK-NEXT: [[PDST_ADDR_021:%.*]] = phi ptr [ [[INCDEC_PTR13:%.*]], [[WHILE_BODY]] ], [ [[BC_RESUME_VAL6]], [[SCALAR_PH]] ] +; CHECK-NEXT: [[INCDEC_PTR]] = getelementptr inbounds i16, ptr [[PSRC_ADDR_023]], i32 1 +; CHECK-NEXT: [[TMP11:%.*]] = load i16, ptr [[PSRC_ADDR_023]], align 2 +; CHECK-NEXT: [[CMP1:%.*]] = icmp sgt i16 [[TMP11]], 0 +; CHECK-NEXT: [[CMP5:%.*]] = icmp eq i16 [[TMP11]], -32768 +; CHECK-NEXT: [[SUB:%.*]] = sub i16 0, [[TMP11]] ; CHECK-NEXT: [[COND:%.*]] = select i1 [[CMP5]], i16 32767, i16 [[SUB]] -; CHECK-NEXT: [[COND11:%.*]] = select i1 [[CMP1]], i16 [[TMP9]], i16 [[COND]] -; CHECK-NEXT: [[INCDEC_PTR13]] = getelementptr inbounds i16, i16* [[PDST_ADDR_021]], i32 1 -; CHECK-NEXT: store i16 [[COND11]], i16* [[PDST_ADDR_021]], align 2 +; CHECK-NEXT: [[COND11:%.*]] = select i1 [[CMP1]], i16 [[TMP11]], i16 [[COND]] +; CHECK-NEXT: [[INCDEC_PTR13]] = getelementptr inbounds i16, ptr [[PDST_ADDR_021]], i32 1 +; CHECK-NEXT: store i16 [[COND11]], ptr [[PDST_ADDR_021]], align 2 ; CHECK-NEXT: [[DEC]] = add i32 [[BLKCNT_022]], -1 ; CHECK-NEXT: [[CMP_NOT:%.*]] = icmp eq i32 [[DEC]], 0 ; CHECK-NEXT: br i1 [[CMP_NOT]], label [[WHILE_END]], label [[WHILE_BODY]], !llvm.loop [[LOOP4:![0-9]+]] @@ -167,18 +167,18 @@ while.body.preheader: ; preds = %entry br label %while.body while.body: ; preds = %while.body.preheader, %while.body - %pSrc.addr.023 = phi i16* [ %incdec.ptr, %while.body ], [ %pSrc, %while.body.preheader ] + %pSrc.addr.023 = phi ptr [ %incdec.ptr, %while.body ], [ %pSrc, %while.body.preheader ] %blkCnt.022 = phi i32 [ %dec, %while.body ], [ %blockSize, %while.body.preheader ] - %pDst.addr.021 = phi i16* [ %incdec.ptr13, %while.body ], [ %pDst, %while.body.preheader ] - %incdec.ptr = getelementptr inbounds i16, i16* %pSrc.addr.023, i32 1 - %0 = load i16, i16* %pSrc.addr.023, align 2 + %pDst.addr.021 = phi ptr [ %incdec.ptr13, %while.body ], [ %pDst, %while.body.preheader ] + %incdec.ptr = getelementptr inbounds i16, ptr %pSrc.addr.023, i32 1 + %0 = load i16, ptr %pSrc.addr.023, align 2 %cmp1 = icmp sgt i16 %0, 0 %cmp5 = icmp eq i16 %0, -32768 %sub = sub i16 0, %0 %cond = select i1 %cmp5, i16 32767, i16 %sub %cond11 = select i1 %cmp1, i16 %0, i16 %cond - %incdec.ptr13 = getelementptr inbounds i16, i16* %pDst.addr.021, i32 1 - store i16 %cond11, i16* %pDst.addr.021, align 2 + %incdec.ptr13 = getelementptr inbounds i16, ptr %pDst.addr.021, i32 1 + store i16 %cond11, ptr %pDst.addr.021, align 2 %dec = add i32 %blkCnt.022, -1 %cmp.not = icmp eq i32 %dec, 0 br i1 %cmp.not, label %while.end.loopexit, label %while.body @@ -190,11 +190,11 @@ while.end: ; preds = %while.end.loopexit, ret void } -define void @arm_abs_q31(i32* nocapture readonly %pSrc, i32* nocapture %pDst, i32 %blockSize) #0 { +define void @arm_abs_q31(ptr nocapture readonly %pSrc, ptr nocapture %pDst, i32 %blockSize) #0 { ; CHECK-LABEL: @arm_abs_q31( ; CHECK-NEXT: entry: -; CHECK-NEXT: [[PSRC2:%.*]] = ptrtoint i32* [[PSRC:%.*]] to i32 -; CHECK-NEXT: [[PDST1:%.*]] = ptrtoint i32* [[PDST:%.*]] to i32 +; CHECK-NEXT: [[PSRC2:%.*]] = ptrtoint ptr [[PSRC:%.*]] to i32 +; CHECK-NEXT: [[PDST1:%.*]] = ptrtoint ptr [[PDST:%.*]] to i32 ; CHECK-NEXT: [[CMP_NOT14:%.*]] = icmp eq i32 [[BLOCKSIZE:%.*]], 0 ; CHECK-NEXT: br i1 [[CMP_NOT14]], label [[WHILE_END:%.*]], label [[WHILE_BODY_PREHEADER:%.*]] ; CHECK: while.body.preheader: @@ -205,47 +205,49 @@ define void @arm_abs_q31(i32* nocapture readonly %pSrc, i32* nocapture %pDst, i3 ; CHECK-NEXT: br i1 [[OR_COND]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] ; CHECK: vector.ph: ; CHECK-NEXT: [[N_VEC:%.*]] = and i32 [[BLOCKSIZE]], -4 -; CHECK-NEXT: [[IND_END:%.*]] = getelementptr i32, i32* [[PSRC]], i32 [[N_VEC]] +; CHECK-NEXT: [[TMP1:%.*]] = shl i32 [[N_VEC]], 2 +; CHECK-NEXT: [[IND_END:%.*]] = getelementptr i8, ptr [[PSRC]], i32 [[TMP1]] ; CHECK-NEXT: [[IND_END3:%.*]] = and i32 [[BLOCKSIZE]], 3 -; CHECK-NEXT: [[IND_END5:%.*]] = getelementptr i32, i32* [[PDST]], i32 [[N_VEC]] +; CHECK-NEXT: [[TMP2:%.*]] = shl i32 [[N_VEC]], 2 +; CHECK-NEXT: [[IND_END5:%.*]] = getelementptr i8, ptr [[PDST]], i32 [[TMP2]] ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] -; CHECK-NEXT: [[NEXT_GEP:%.*]] = getelementptr i32, i32* [[PSRC]], i32 [[INDEX]] -; CHECK-NEXT: [[NEXT_GEP7:%.*]] = getelementptr i32, i32* [[PDST]], i32 [[INDEX]] -; CHECK-NEXT: [[TMP1:%.*]] = bitcast i32* [[NEXT_GEP]] to <4 x i32>* -; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i32>, <4 x i32>* [[TMP1]], align 4 -; CHECK-NEXT: [[TMP2:%.*]] = icmp sgt <4 x i32> [[WIDE_LOAD]], zeroinitializer -; CHECK-NEXT: [[TMP3:%.*]] = icmp eq <4 x i32> [[WIDE_LOAD]], -; CHECK-NEXT: [[TMP4:%.*]] = sub nsw <4 x i32> zeroinitializer, [[WIDE_LOAD]] -; CHECK-NEXT: [[TMP5:%.*]] = select <4 x i1> [[TMP3]], <4 x i32> , <4 x i32> [[TMP4]] -; CHECK-NEXT: [[TMP6:%.*]] = select <4 x i1> [[TMP2]], <4 x i32> [[WIDE_LOAD]], <4 x i32> [[TMP5]] -; CHECK-NEXT: [[TMP7:%.*]] = bitcast i32* [[NEXT_GEP7]] to <4 x i32>* -; CHECK-NEXT: store <4 x i32> [[TMP6]], <4 x i32>* [[TMP7]], align 4 +; CHECK-NEXT: [[TMP3:%.*]] = shl i32 [[INDEX]], 2 +; CHECK-NEXT: [[NEXT_GEP:%.*]] = getelementptr i8, ptr [[PSRC]], i32 [[TMP3]] +; CHECK-NEXT: [[TMP4:%.*]] = shl i32 [[INDEX]], 2 +; CHECK-NEXT: [[NEXT_GEP7:%.*]] = getelementptr i8, ptr [[PDST]], i32 [[TMP4]] +; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i32>, ptr [[NEXT_GEP]], align 4 +; CHECK-NEXT: [[TMP5:%.*]] = icmp sgt <4 x i32> [[WIDE_LOAD]], zeroinitializer +; CHECK-NEXT: [[TMP6:%.*]] = icmp eq <4 x i32> [[WIDE_LOAD]], +; CHECK-NEXT: [[TMP7:%.*]] = sub nsw <4 x i32> zeroinitializer, [[WIDE_LOAD]] +; CHECK-NEXT: [[TMP8:%.*]] = select <4 x i1> [[TMP6]], <4 x i32> , <4 x i32> [[TMP7]] +; CHECK-NEXT: [[TMP9:%.*]] = select <4 x i1> [[TMP5]], <4 x i32> [[WIDE_LOAD]], <4 x i32> [[TMP8]] +; CHECK-NEXT: store <4 x i32> [[TMP9]], ptr [[NEXT_GEP7]], align 4 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 4 -; CHECK-NEXT: [[TMP8:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]] -; CHECK-NEXT: br i1 [[TMP8]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP5:![0-9]+]] +; CHECK-NEXT: [[TMP10:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]] +; CHECK-NEXT: br i1 [[TMP10]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP5:![0-9]+]] ; CHECK: middle.block: ; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i32 [[N_VEC]], [[BLOCKSIZE]] ; CHECK-NEXT: br i1 [[CMP_N]], label [[WHILE_END]], label [[SCALAR_PH]] ; CHECK: scalar.ph: -; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i32* [ [[IND_END]], [[MIDDLE_BLOCK]] ], [ [[PSRC]], [[WHILE_BODY_PREHEADER]] ] +; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi ptr [ [[IND_END]], [[MIDDLE_BLOCK]] ], [ [[PSRC]], [[WHILE_BODY_PREHEADER]] ] ; CHECK-NEXT: [[BC_RESUME_VAL4:%.*]] = phi i32 [ [[IND_END3]], [[MIDDLE_BLOCK]] ], [ [[BLOCKSIZE]], [[WHILE_BODY_PREHEADER]] ] -; CHECK-NEXT: [[BC_RESUME_VAL6:%.*]] = phi i32* [ [[IND_END5]], [[MIDDLE_BLOCK]] ], [ [[PDST]], [[WHILE_BODY_PREHEADER]] ] +; CHECK-NEXT: [[BC_RESUME_VAL6:%.*]] = phi ptr [ [[IND_END5]], [[MIDDLE_BLOCK]] ], [ [[PDST]], [[WHILE_BODY_PREHEADER]] ] ; CHECK-NEXT: br label [[WHILE_BODY:%.*]] ; CHECK: while.body: -; CHECK-NEXT: [[PSRC_ADDR_017:%.*]] = phi i32* [ [[INCDEC_PTR:%.*]], [[WHILE_BODY]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ] +; CHECK-NEXT: [[PSRC_ADDR_017:%.*]] = phi ptr [ [[INCDEC_PTR:%.*]], [[WHILE_BODY]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ] ; CHECK-NEXT: [[BLKCNT_016:%.*]] = phi i32 [ [[DEC:%.*]], [[WHILE_BODY]] ], [ [[BC_RESUME_VAL4]], [[SCALAR_PH]] ] -; CHECK-NEXT: [[PDST_ADDR_015:%.*]] = phi i32* [ [[INCDEC_PTR7:%.*]], [[WHILE_BODY]] ], [ [[BC_RESUME_VAL6]], [[SCALAR_PH]] ] -; CHECK-NEXT: [[INCDEC_PTR]] = getelementptr inbounds i32, i32* [[PSRC_ADDR_017]], i32 1 -; CHECK-NEXT: [[TMP9:%.*]] = load i32, i32* [[PSRC_ADDR_017]], align 4 -; CHECK-NEXT: [[CMP1:%.*]] = icmp sgt i32 [[TMP9]], 0 -; CHECK-NEXT: [[CMP2:%.*]] = icmp eq i32 [[TMP9]], -2147483648 -; CHECK-NEXT: [[SUB:%.*]] = sub nsw i32 0, [[TMP9]] +; CHECK-NEXT: [[PDST_ADDR_015:%.*]] = phi ptr [ [[INCDEC_PTR7:%.*]], [[WHILE_BODY]] ], [ [[BC_RESUME_VAL6]], [[SCALAR_PH]] ] +; CHECK-NEXT: [[INCDEC_PTR]] = getelementptr inbounds i32, ptr [[PSRC_ADDR_017]], i32 1 +; CHECK-NEXT: [[TMP11:%.*]] = load i32, ptr [[PSRC_ADDR_017]], align 4 +; CHECK-NEXT: [[CMP1:%.*]] = icmp sgt i32 [[TMP11]], 0 +; CHECK-NEXT: [[CMP2:%.*]] = icmp eq i32 [[TMP11]], -2147483648 +; CHECK-NEXT: [[SUB:%.*]] = sub nsw i32 0, [[TMP11]] ; CHECK-NEXT: [[COND:%.*]] = select i1 [[CMP2]], i32 2147483647, i32 [[SUB]] -; CHECK-NEXT: [[COND6:%.*]] = select i1 [[CMP1]], i32 [[TMP9]], i32 [[COND]] -; CHECK-NEXT: [[INCDEC_PTR7]] = getelementptr inbounds i32, i32* [[PDST_ADDR_015]], i32 1 -; CHECK-NEXT: store i32 [[COND6]], i32* [[PDST_ADDR_015]], align 4 +; CHECK-NEXT: [[COND6:%.*]] = select i1 [[CMP1]], i32 [[TMP11]], i32 [[COND]] +; CHECK-NEXT: [[INCDEC_PTR7]] = getelementptr inbounds i32, ptr [[PDST_ADDR_015]], i32 1 +; CHECK-NEXT: store i32 [[COND6]], ptr [[PDST_ADDR_015]], align 4 ; CHECK-NEXT: [[DEC]] = add i32 [[BLKCNT_016]], -1 ; CHECK-NEXT: [[CMP_NOT:%.*]] = icmp eq i32 [[DEC]], 0 ; CHECK-NEXT: br i1 [[CMP_NOT]], label [[WHILE_END]], label [[WHILE_BODY]], !llvm.loop [[LOOP6:![0-9]+]] @@ -260,18 +262,18 @@ while.body.preheader: ; preds = %entry br label %while.body while.body: ; preds = %while.body.preheader, %while.body - %pSrc.addr.017 = phi i32* [ %incdec.ptr, %while.body ], [ %pSrc, %while.body.preheader ] + %pSrc.addr.017 = phi ptr [ %incdec.ptr, %while.body ], [ %pSrc, %while.body.preheader ] %blkCnt.016 = phi i32 [ %dec, %while.body ], [ %blockSize, %while.body.preheader ] - %pDst.addr.015 = phi i32* [ %incdec.ptr7, %while.body ], [ %pDst, %while.body.preheader ] - %incdec.ptr = getelementptr inbounds i32, i32* %pSrc.addr.017, i32 1 - %0 = load i32, i32* %pSrc.addr.017, align 4 + %pDst.addr.015 = phi ptr [ %incdec.ptr7, %while.body ], [ %pDst, %while.body.preheader ] + %incdec.ptr = getelementptr inbounds i32, ptr %pSrc.addr.017, i32 1 + %0 = load i32, ptr %pSrc.addr.017, align 4 %cmp1 = icmp sgt i32 %0, 0 %cmp2 = icmp eq i32 %0, -2147483648 %sub = sub nsw i32 0, %0 %cond = select i1 %cmp2, i32 2147483647, i32 %sub %cond6 = select i1 %cmp1, i32 %0, i32 %cond - %incdec.ptr7 = getelementptr inbounds i32, i32* %pDst.addr.015, i32 1 - store i32 %cond6, i32* %pDst.addr.015, align 4 + %incdec.ptr7 = getelementptr inbounds i32, ptr %pDst.addr.015, i32 1 + store i32 %cond6, ptr %pDst.addr.015, align 4 %dec = add i32 %blkCnt.016, -1 %cmp.not = icmp eq i32 %dec, 0 br i1 %cmp.not, label %while.end.loopexit, label %while.body diff --git a/llvm/test/Transforms/LoopVectorize/ARM/mve-saddsatcost.ll b/llvm/test/Transforms/LoopVectorize/ARM/mve-saddsatcost.ll index a180bf4..0515cce 100644 --- a/llvm/test/Transforms/LoopVectorize/ARM/mve-saddsatcost.ll +++ b/llvm/test/Transforms/LoopVectorize/ARM/mve-saddsatcost.ll @@ -12,7 +12,7 @@ target triple = "thumbv8.1m.main-arm-none-eabi" ; CHECK-COST: Found an estimated cost of 8 for VF 4 For instruction: %1 = tail call i16 @llvm.sadd.sat.i16(i16 %0, i16 %offset) ; CHECK-COST: Found an estimated cost of 2 for VF 8 For instruction: %1 = tail call i16 @llvm.sadd.sat.i16(i16 %0, i16 %offset) -define void @arm_offset_q15(i16* nocapture readonly %pSrc, i16 signext %offset, i16* nocapture noalias %pDst, i32 %blockSize) #0 { +define void @arm_offset_q15(ptr nocapture readonly %pSrc, i16 signext %offset, ptr nocapture noalias %pDst, i32 %blockSize) #0 { ; CHECK-LABEL: @arm_offset_q15( ; CHECK-NEXT: entry: ; CHECK-NEXT: [[CMP_NOT6:%.*]] = icmp eq i32 [[BLOCKSIZE:%.*]], 0 @@ -25,14 +25,14 @@ define void @arm_offset_q15(i16* nocapture readonly %pSrc, i16 signext %offset, ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] -; CHECK-NEXT: [[NEXT_GEP:%.*]] = getelementptr i16, i16* [[PSRC:%.*]], i32 [[INDEX]] -; CHECK-NEXT: [[NEXT_GEP5:%.*]] = getelementptr i16, i16* [[PDST:%.*]], i32 [[INDEX]] +; CHECK-NEXT: [[TMP0:%.*]] = shl i32 [[INDEX]], 1 +; CHECK-NEXT: [[NEXT_GEP:%.*]] = getelementptr i8, ptr [[PSRC:%.*]], i32 [[TMP0]] +; CHECK-NEXT: [[TMP1:%.*]] = shl i32 [[INDEX]], 1 +; CHECK-NEXT: [[NEXT_GEP5:%.*]] = getelementptr i8, ptr [[PDST:%.*]], i32 [[TMP1]] ; CHECK-NEXT: [[ACTIVE_LANE_MASK:%.*]] = call <8 x i1> @llvm.get.active.lane.mask.v8i1.i32(i32 [[INDEX]], i32 [[BLOCKSIZE]]) -; CHECK-NEXT: [[TMP0:%.*]] = bitcast i16* [[NEXT_GEP]] to <8 x i16>* -; CHECK-NEXT: [[WIDE_MASKED_LOAD:%.*]] = call <8 x i16> @llvm.masked.load.v8i16.p0v8i16(<8 x i16>* [[TMP0]], i32 2, <8 x i1> [[ACTIVE_LANE_MASK]], <8 x i16> poison) -; CHECK-NEXT: [[TMP1:%.*]] = call <8 x i16> @llvm.sadd.sat.v8i16(<8 x i16> [[WIDE_MASKED_LOAD]], <8 x i16> [[BROADCAST_SPLAT7]]) -; CHECK-NEXT: [[TMP2:%.*]] = bitcast i16* [[NEXT_GEP5]] to <8 x i16>* -; CHECK-NEXT: call void @llvm.masked.store.v8i16.p0v8i16(<8 x i16> [[TMP1]], <8 x i16>* [[TMP2]], i32 2, <8 x i1> [[ACTIVE_LANE_MASK]]) +; CHECK-NEXT: [[WIDE_MASKED_LOAD:%.*]] = call <8 x i16> @llvm.masked.load.v8i16.p0(ptr [[NEXT_GEP]], i32 2, <8 x i1> [[ACTIVE_LANE_MASK]], <8 x i16> poison) +; CHECK-NEXT: [[TMP2:%.*]] = call <8 x i16> @llvm.sadd.sat.v8i16(<8 x i16> [[WIDE_MASKED_LOAD]], <8 x i16> [[BROADCAST_SPLAT7]]) +; CHECK-NEXT: call void @llvm.masked.store.v8i16.p0(<8 x i16> [[TMP2]], ptr [[NEXT_GEP5]], i32 2, <8 x i1> [[ACTIVE_LANE_MASK]]) ; CHECK-NEXT: [[INDEX_NEXT]] = add i32 [[INDEX]], 8 ; CHECK-NEXT: [[TMP3:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-NEXT: br i1 [[TMP3]], label [[WHILE_END]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] @@ -45,13 +45,13 @@ entry: while.body: ; preds = %entry, %while.body %blkCnt.09 = phi i32 [ %dec, %while.body ], [ %blockSize, %entry ] - %pSrc.addr.08 = phi i16* [ %incdec.ptr, %while.body ], [ %pSrc, %entry ] - %pDst.addr.07 = phi i16* [ %incdec.ptr3, %while.body ], [ %pDst, %entry ] - %incdec.ptr = getelementptr inbounds i16, i16* %pSrc.addr.08, i32 1 - %0 = load i16, i16* %pSrc.addr.08, align 2 + %pSrc.addr.08 = phi ptr [ %incdec.ptr, %while.body ], [ %pSrc, %entry ] + %pDst.addr.07 = phi ptr [ %incdec.ptr3, %while.body ], [ %pDst, %entry ] + %incdec.ptr = getelementptr inbounds i16, ptr %pSrc.addr.08, i32 1 + %0 = load i16, ptr %pSrc.addr.08, align 2 %1 = tail call i16 @llvm.sadd.sat.i16(i16 %0, i16 %offset) - %incdec.ptr3 = getelementptr inbounds i16, i16* %pDst.addr.07, i32 1 - store i16 %1, i16* %pDst.addr.07, align 2 + %incdec.ptr3 = getelementptr inbounds i16, ptr %pDst.addr.07, i32 1 + store i16 %1, ptr %pDst.addr.07, align 2 %dec = add i32 %blkCnt.09, -1 %cmp.not = icmp eq i32 %dec, 0 br i1 %cmp.not, label %while.end, label %while.body @@ -63,3 +63,5 @@ while.end: ; preds = %while.body, %entry declare i16 @llvm.sadd.sat.i16(i16, i16) attributes #0 = { "target-features"="+mve" } +;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line: +; CHECK-COST: {{.*}} diff --git a/llvm/test/Transforms/LoopVectorize/ARM/mve-selectandorcost.ll b/llvm/test/Transforms/LoopVectorize/ARM/mve-selectandorcost.ll index decffcb..5d85a4c 100644 --- a/llvm/test/Transforms/LoopVectorize/ARM/mve-selectandorcost.ll +++ b/llvm/test/Transforms/LoopVectorize/ARM/mve-selectandorcost.ll @@ -11,7 +11,7 @@ target triple = "thumbv8.1m.main-arm-none-eabi" ; CHECK-COST: LV: Found an estimated cost of 26 for VF 2 For instruction: %or.cond = select i1 %cmp2, i1 true, i1 %cmp3 ; CHECK-COST: LV: Found an estimated cost of 2 for VF 4 For instruction: %or.cond = select i1 %cmp2, i1 true, i1 %cmp3 -define float @test(float* nocapture readonly %pA, float* nocapture readonly %pB, i32 %blockSize) #0 { +define float @test(ptr nocapture readonly %pA, ptr nocapture readonly %pB, i32 %blockSize) #0 { ; CHECK-LABEL: @test( ; CHECK-NEXT: entry: ; CHECK-NEXT: [[CMP_NOT16:%.*]] = icmp eq i32 [[BLOCKSIZE:%.*]], 0 @@ -21,63 +21,65 @@ define float @test(float* nocapture readonly %pA, float* nocapture readonly %pB, ; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] ; CHECK: vector.ph: ; CHECK-NEXT: [[N_VEC:%.*]] = and i32 [[BLOCKSIZE]], -4 -; CHECK-NEXT: [[IND_END:%.*]] = getelementptr float, float* [[PA:%.*]], i32 [[N_VEC]] -; CHECK-NEXT: [[IND_END1:%.*]] = getelementptr float, float* [[PB:%.*]], i32 [[N_VEC]] +; CHECK-NEXT: [[TMP0:%.*]] = shl i32 [[N_VEC]], 2 +; CHECK-NEXT: [[IND_END:%.*]] = getelementptr i8, ptr [[PA:%.*]], i32 [[TMP0]] +; CHECK-NEXT: [[TMP1:%.*]] = shl i32 [[N_VEC]], 2 +; CHECK-NEXT: [[IND_END1:%.*]] = getelementptr i8, ptr [[PB:%.*]], i32 [[TMP1]] ; CHECK-NEXT: [[IND_END3:%.*]] = and i32 [[BLOCKSIZE]], 3 ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <4 x float> [ zeroinitializer, [[VECTOR_PH]] ], [ [[PREDPHI:%.*]], [[VECTOR_BODY]] ] -; CHECK-NEXT: [[NEXT_GEP:%.*]] = getelementptr float, float* [[PA]], i32 [[INDEX]] -; CHECK-NEXT: [[NEXT_GEP5:%.*]] = getelementptr float, float* [[PB]], i32 [[INDEX]] -; CHECK-NEXT: [[TMP0:%.*]] = bitcast float* [[NEXT_GEP]] to <4 x float>* -; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x float>, <4 x float>* [[TMP0]], align 4 -; CHECK-NEXT: [[TMP1:%.*]] = bitcast float* [[NEXT_GEP5]] to <4 x float>* -; CHECK-NEXT: [[WIDE_LOAD6:%.*]] = load <4 x float>, <4 x float>* [[TMP1]], align 4 -; CHECK-NEXT: [[TMP2:%.*]] = fcmp fast oeq <4 x float> [[WIDE_LOAD]], zeroinitializer -; CHECK-NEXT: [[TMP3:%.*]] = fcmp fast oeq <4 x float> [[WIDE_LOAD6]], zeroinitializer -; CHECK-NEXT: [[TMP4:%.*]] = select <4 x i1> [[TMP2]], <4 x i1> [[TMP3]], <4 x i1> zeroinitializer -; CHECK-NEXT: [[TMP5:%.*]] = call fast <4 x float> @llvm.fabs.v4f32(<4 x float> [[WIDE_LOAD]]) -; CHECK-NEXT: [[TMP6:%.*]] = call fast <4 x float> @llvm.fabs.v4f32(<4 x float> [[WIDE_LOAD6]]) -; CHECK-NEXT: [[TMP7:%.*]] = fadd fast <4 x float> [[TMP6]], [[TMP5]] -; CHECK-NEXT: [[TMP8:%.*]] = fsub fast <4 x float> [[WIDE_LOAD]], [[WIDE_LOAD6]] -; CHECK-NEXT: [[TMP9:%.*]] = call fast <4 x float> @llvm.fabs.v4f32(<4 x float> [[TMP8]]) -; CHECK-NEXT: [[TMP10:%.*]] = fdiv fast <4 x float> [[TMP9]], [[TMP7]] -; CHECK-NEXT: [[TMP11:%.*]] = select <4 x i1> [[TMP4]], <4 x float> , <4 x float> [[TMP10]] -; CHECK-NEXT: [[PREDPHI]] = fadd fast <4 x float> [[VEC_PHI]], [[TMP11]] +; CHECK-NEXT: [[TMP2:%.*]] = shl i32 [[INDEX]], 2 +; CHECK-NEXT: [[NEXT_GEP:%.*]] = getelementptr i8, ptr [[PA]], i32 [[TMP2]] +; CHECK-NEXT: [[TMP3:%.*]] = shl i32 [[INDEX]], 2 +; CHECK-NEXT: [[NEXT_GEP5:%.*]] = getelementptr i8, ptr [[PB]], i32 [[TMP3]] +; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x float>, ptr [[NEXT_GEP]], align 4 +; CHECK-NEXT: [[WIDE_LOAD6:%.*]] = load <4 x float>, ptr [[NEXT_GEP5]], align 4 +; CHECK-NEXT: [[TMP4:%.*]] = fcmp fast oeq <4 x float> [[WIDE_LOAD]], zeroinitializer +; CHECK-NEXT: [[TMP5:%.*]] = fcmp fast oeq <4 x float> [[WIDE_LOAD6]], zeroinitializer +; CHECK-NEXT: [[DOTNOT8:%.*]] = select <4 x i1> [[TMP4]], <4 x i1> [[TMP5]], <4 x i1> zeroinitializer +; CHECK-NEXT: [[TMP6:%.*]] = call fast <4 x float> @llvm.fabs.v4f32(<4 x float> [[WIDE_LOAD]]) +; CHECK-NEXT: [[TMP7:%.*]] = call fast <4 x float> @llvm.fabs.v4f32(<4 x float> [[WIDE_LOAD6]]) +; CHECK-NEXT: [[TMP8:%.*]] = fadd fast <4 x float> [[TMP7]], [[TMP6]] +; CHECK-NEXT: [[TMP9:%.*]] = fsub fast <4 x float> [[WIDE_LOAD]], [[WIDE_LOAD6]] +; CHECK-NEXT: [[TMP10:%.*]] = call fast <4 x float> @llvm.fabs.v4f32(<4 x float> [[TMP9]]) +; CHECK-NEXT: [[TMP11:%.*]] = fdiv fast <4 x float> [[TMP10]], [[TMP8]] +; CHECK-NEXT: [[TMP12:%.*]] = select <4 x i1> [[DOTNOT8]], <4 x float> , <4 x float> [[TMP11]] +; CHECK-NEXT: [[PREDPHI]] = fadd fast <4 x float> [[VEC_PHI]], [[TMP12]] ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 4 -; CHECK-NEXT: [[TMP12:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]] -; CHECK-NEXT: br i1 [[TMP12]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] +; CHECK-NEXT: [[TMP13:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]] +; CHECK-NEXT: br i1 [[TMP13]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] ; CHECK: middle.block: -; CHECK-NEXT: [[TMP13:%.*]] = call fast float @llvm.vector.reduce.fadd.v4f32(float -0.000000e+00, <4 x float> [[PREDPHI]]) +; CHECK-NEXT: [[TMP14:%.*]] = call fast float @llvm.vector.reduce.fadd.v4f32(float -0.000000e+00, <4 x float> [[PREDPHI]]) ; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i32 [[N_VEC]], [[BLOCKSIZE]] ; CHECK-NEXT: br i1 [[CMP_N]], label [[WHILE_END]], label [[SCALAR_PH]] ; CHECK: scalar.ph: -; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi float* [ [[IND_END]], [[MIDDLE_BLOCK]] ], [ [[PA]], [[WHILE_BODY_PREHEADER]] ] -; CHECK-NEXT: [[BC_RESUME_VAL2:%.*]] = phi float* [ [[IND_END1]], [[MIDDLE_BLOCK]] ], [ [[PB]], [[WHILE_BODY_PREHEADER]] ] +; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi ptr [ [[IND_END]], [[MIDDLE_BLOCK]] ], [ [[PA]], [[WHILE_BODY_PREHEADER]] ] +; CHECK-NEXT: [[BC_RESUME_VAL2:%.*]] = phi ptr [ [[IND_END1]], [[MIDDLE_BLOCK]] ], [ [[PB]], [[WHILE_BODY_PREHEADER]] ] ; CHECK-NEXT: [[BC_RESUME_VAL4:%.*]] = phi i32 [ [[IND_END3]], [[MIDDLE_BLOCK]] ], [ [[BLOCKSIZE]], [[WHILE_BODY_PREHEADER]] ] -; CHECK-NEXT: [[BC_MERGE_RDX:%.*]] = phi float [ [[TMP13]], [[MIDDLE_BLOCK]] ], [ 0.000000e+00, [[WHILE_BODY_PREHEADER]] ] +; CHECK-NEXT: [[BC_MERGE_RDX:%.*]] = phi float [ [[TMP14]], [[MIDDLE_BLOCK]] ], [ 0.000000e+00, [[WHILE_BODY_PREHEADER]] ] ; CHECK-NEXT: br label [[WHILE_BODY:%.*]] ; CHECK: while.body: -; CHECK-NEXT: [[PA_ADDR_020:%.*]] = phi float* [ [[INCDEC_PTR:%.*]], [[IF_END:%.*]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ] -; CHECK-NEXT: [[PB_ADDR_019:%.*]] = phi float* [ [[INCDEC_PTR1:%.*]], [[IF_END]] ], [ [[BC_RESUME_VAL2]], [[SCALAR_PH]] ] +; CHECK-NEXT: [[PA_ADDR_020:%.*]] = phi ptr [ [[INCDEC_PTR:%.*]], [[IF_END:%.*]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ] +; CHECK-NEXT: [[PB_ADDR_019:%.*]] = phi ptr [ [[INCDEC_PTR1:%.*]], [[IF_END]] ], [ [[BC_RESUME_VAL2]], [[SCALAR_PH]] ] ; CHECK-NEXT: [[BLOCKSIZE_ADDR_018:%.*]] = phi i32 [ [[DEC:%.*]], [[IF_END]] ], [ [[BC_RESUME_VAL4]], [[SCALAR_PH]] ] ; CHECK-NEXT: [[ACCUM_017:%.*]] = phi float [ [[ACCUM_1:%.*]], [[IF_END]] ], [ [[BC_MERGE_RDX]], [[SCALAR_PH]] ] -; CHECK-NEXT: [[INCDEC_PTR]] = getelementptr inbounds float, float* [[PA_ADDR_020]], i32 1 -; CHECK-NEXT: [[TMP14:%.*]] = load float, float* [[PA_ADDR_020]], align 4 -; CHECK-NEXT: [[INCDEC_PTR1]] = getelementptr inbounds float, float* [[PB_ADDR_019]], i32 1 -; CHECK-NEXT: [[TMP15:%.*]] = load float, float* [[PB_ADDR_019]], align 4 -; CHECK-NEXT: [[CMP2:%.*]] = fcmp fast une float [[TMP14]], 0.000000e+00 -; CHECK-NEXT: [[CMP3:%.*]] = fcmp fast une float [[TMP15]], 0.000000e+00 +; CHECK-NEXT: [[INCDEC_PTR]] = getelementptr inbounds float, ptr [[PA_ADDR_020]], i32 1 +; CHECK-NEXT: [[TMP15:%.*]] = load float, ptr [[PA_ADDR_020]], align 4 +; CHECK-NEXT: [[INCDEC_PTR1]] = getelementptr inbounds float, ptr [[PB_ADDR_019]], i32 1 +; CHECK-NEXT: [[TMP16:%.*]] = load float, ptr [[PB_ADDR_019]], align 4 +; CHECK-NEXT: [[CMP2:%.*]] = fcmp fast une float [[TMP15]], 0.000000e+00 +; CHECK-NEXT: [[CMP3:%.*]] = fcmp fast une float [[TMP16]], 0.000000e+00 ; CHECK-NEXT: [[OR_COND:%.*]] = select i1 [[CMP2]], i1 true, i1 [[CMP3]] ; CHECK-NEXT: br i1 [[OR_COND]], label [[IF_THEN:%.*]], label [[IF_END]] ; CHECK: if.then: -; CHECK-NEXT: [[TMP16:%.*]] = tail call fast float @llvm.fabs.f32(float [[TMP14]]) ; CHECK-NEXT: [[TMP17:%.*]] = tail call fast float @llvm.fabs.f32(float [[TMP15]]) -; CHECK-NEXT: [[ADD:%.*]] = fadd fast float [[TMP17]], [[TMP16]] -; CHECK-NEXT: [[SUB:%.*]] = fsub fast float [[TMP14]], [[TMP15]] -; CHECK-NEXT: [[TMP18:%.*]] = tail call fast float @llvm.fabs.f32(float [[SUB]]) -; CHECK-NEXT: [[DIV:%.*]] = fdiv fast float [[TMP18]], [[ADD]] +; CHECK-NEXT: [[TMP18:%.*]] = tail call fast float @llvm.fabs.f32(float [[TMP16]]) +; CHECK-NEXT: [[ADD:%.*]] = fadd fast float [[TMP18]], [[TMP17]] +; CHECK-NEXT: [[SUB:%.*]] = fsub fast float [[TMP15]], [[TMP16]] +; CHECK-NEXT: [[TMP19:%.*]] = tail call fast float @llvm.fabs.f32(float [[SUB]]) +; CHECK-NEXT: [[DIV:%.*]] = fdiv fast float [[TMP19]], [[ADD]] ; CHECK-NEXT: [[ADD4:%.*]] = fadd fast float [[DIV]], [[ACCUM_017]] ; CHECK-NEXT: br label [[IF_END]] ; CHECK: if.end: @@ -86,7 +88,7 @@ define float @test(float* nocapture readonly %pA, float* nocapture readonly %pB, ; CHECK-NEXT: [[CMP_NOT:%.*]] = icmp eq i32 [[DEC]], 0 ; CHECK-NEXT: br i1 [[CMP_NOT]], label [[WHILE_END]], label [[WHILE_BODY]], !llvm.loop [[LOOP2:![0-9]+]] ; CHECK: while.end: -; CHECK-NEXT: [[ACCUM_0_LCSSA:%.*]] = phi float [ 0.000000e+00, [[ENTRY:%.*]] ], [ [[ACCUM_1]], [[IF_END]] ], [ [[TMP13]], [[MIDDLE_BLOCK]] ] +; CHECK-NEXT: [[ACCUM_0_LCSSA:%.*]] = phi float [ 0.000000e+00, [[ENTRY:%.*]] ], [ [[ACCUM_1]], [[IF_END]] ], [ [[TMP14]], [[MIDDLE_BLOCK]] ] ; CHECK-NEXT: ret float [[ACCUM_0_LCSSA]] ; entry: @@ -94,14 +96,14 @@ entry: br i1 %cmp.not16, label %while.end, label %while.body while.body: ; preds = %entry, %if.end - %pA.addr.020 = phi float* [ %incdec.ptr, %if.end ], [ %pA, %entry ] - %pB.addr.019 = phi float* [ %incdec.ptr1, %if.end ], [ %pB, %entry ] + %pA.addr.020 = phi ptr [ %incdec.ptr, %if.end ], [ %pA, %entry ] + %pB.addr.019 = phi ptr [ %incdec.ptr1, %if.end ], [ %pB, %entry ] %blockSize.addr.018 = phi i32 [ %dec, %if.end ], [ %blockSize, %entry ] %accum.017 = phi float [ %accum.1, %if.end ], [ 0.000000e+00, %entry ] - %incdec.ptr = getelementptr inbounds float, float* %pA.addr.020, i32 1 - %0 = load float, float* %pA.addr.020, align 4 - %incdec.ptr1 = getelementptr inbounds float, float* %pB.addr.019, i32 1 - %1 = load float, float* %pB.addr.019, align 4 + %incdec.ptr = getelementptr inbounds float, ptr %pA.addr.020, i32 1 + %0 = load float, ptr %pA.addr.020, align 4 + %incdec.ptr1 = getelementptr inbounds float, ptr %pB.addr.019, i32 1 + %1 = load float, ptr %pB.addr.019, align 4 %cmp2 = fcmp fast une float %0, 0.000000e+00 %cmp3 = fcmp fast une float %1, 0.000000e+00 %or.cond = select i1 %cmp2, i1 true, i1 %cmp3 @@ -131,3 +133,5 @@ while.end: ; preds = %if.end, %entry declare float @llvm.fabs.f32(float) attributes #0 = { "target-features"="+mve.fp" } +;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line: +; CHECK-COST: {{.*}} diff --git a/llvm/test/Transforms/LoopVectorize/ARM/mve-vldn.ll b/llvm/test/Transforms/LoopVectorize/ARM/mve-vldn.ll index ca95279..3589aaa0 100644 --- a/llvm/test/Transforms/LoopVectorize/ARM/mve-vldn.ll +++ b/llvm/test/Transforms/LoopVectorize/ARM/mve-vldn.ll @@ -9,7 +9,7 @@ target triple = "thumbv8.1m.main-none-none-eabi" ; CHECK-LABEL: vld2 ; CHECK-2: vector.body ; CHECK-NO2-NOT: vector.body -define void @vld2(half* nocapture readonly %pIn, half* nocapture %pOut, i32 %numRows, i32 %numCols, i32 %scale.coerce) #0 { +define void @vld2(ptr nocapture readonly %pIn, ptr nocapture %pOut, i32 %numRows, i32 %numCols, i32 %scale.coerce) #0 { entry: %tmp.0.extract.trunc = trunc i32 %scale.coerce to i16 %0 = bitcast i16 %tmp.0.extract.trunc to half @@ -19,19 +19,19 @@ entry: br i1 %cmp26, label %while.end, label %while.body while.body: ; preds = %entry, %while.body - %pIn.addr.029 = phi half* [ %add.ptr, %while.body ], [ %pIn, %entry ] - %pOut.addr.028 = phi half* [ %add.ptr7, %while.body ], [ %pOut, %entry ] + %pIn.addr.029 = phi ptr [ %add.ptr, %while.body ], [ %pIn, %entry ] + %pOut.addr.028 = phi ptr [ %add.ptr7, %while.body ], [ %pOut, %entry ] %blkCnt.027 = phi i32 [ %dec, %while.body ], [ %shr, %entry ] - %1 = load half, half* %pIn.addr.029, align 2 - %arrayidx2 = getelementptr inbounds half, half* %pIn.addr.029, i32 1 - %2 = load half, half* %arrayidx2, align 2 + %1 = load half, ptr %pIn.addr.029, align 2 + %arrayidx2 = getelementptr inbounds half, ptr %pIn.addr.029, i32 1 + %2 = load half, ptr %arrayidx2, align 2 %mul3 = fmul half %1, %0 %mul4 = fmul half %2, %0 - store half %mul3, half* %pOut.addr.028, align 2 - %arrayidx6 = getelementptr inbounds half, half* %pOut.addr.028, i32 1 - store half %mul4, half* %arrayidx6, align 2 - %add.ptr = getelementptr inbounds half, half* %pIn.addr.029, i32 2 - %add.ptr7 = getelementptr inbounds half, half* %pOut.addr.028, i32 2 + store half %mul3, ptr %pOut.addr.028, align 2 + %arrayidx6 = getelementptr inbounds half, ptr %pOut.addr.028, i32 1 + store half %mul4, ptr %arrayidx6, align 2 + %add.ptr = getelementptr inbounds half, ptr %pIn.addr.029, i32 2 + %add.ptr7 = getelementptr inbounds half, ptr %pOut.addr.028, i32 2 %dec = add nsw i32 %blkCnt.027, -1 %cmp = icmp eq i32 %dec, 0 br i1 %cmp, label %while.end, label %while.body @@ -43,7 +43,7 @@ while.end: ; preds = %while.body, %entry ; CHECK-LABEL: vld4 ; CHECK-4: vector.body ; CHECK-NO4-NOT: vector.body -define void @vld4(half* nocapture readonly %pIn, half* nocapture %pOut, i32 %numRows, i32 %numCols, i32 %scale.coerce) #0 { +define void @vld4(ptr nocapture readonly %pIn, ptr nocapture %pOut, i32 %numRows, i32 %numCols, i32 %scale.coerce) #0 { entry: %tmp.0.extract.trunc = trunc i32 %scale.coerce to i16 %0 = bitcast i16 %tmp.0.extract.trunc to half @@ -53,29 +53,29 @@ entry: br i1 %cmp38, label %while.end, label %while.body while.body: ; preds = %entry, %while.body - %pIn.addr.041 = phi half* [ %add.ptr, %while.body ], [ %pIn, %entry ] - %pOut.addr.040 = phi half* [ %add.ptr13, %while.body ], [ %pOut, %entry ] + %pIn.addr.041 = phi ptr [ %add.ptr, %while.body ], [ %pIn, %entry ] + %pOut.addr.040 = phi ptr [ %add.ptr13, %while.body ], [ %pOut, %entry ] %blkCnt.039 = phi i32 [ %dec, %while.body ], [ %shr, %entry ] - %1 = load half, half* %pIn.addr.041, align 2 - %arrayidx2 = getelementptr inbounds half, half* %pIn.addr.041, i32 1 - %2 = load half, half* %arrayidx2, align 2 - %arrayidx3 = getelementptr inbounds half, half* %pIn.addr.041, i32 2 - %3 = load half, half* %arrayidx3, align 2 - %arrayidx4 = getelementptr inbounds half, half* %pIn.addr.041, i32 3 - %4 = load half, half* %arrayidx4, align 2 + %1 = load half, ptr %pIn.addr.041, align 2 + %arrayidx2 = getelementptr inbounds half, ptr %pIn.addr.041, i32 1 + %2 = load half, ptr %arrayidx2, align 2 + %arrayidx3 = getelementptr inbounds half, ptr %pIn.addr.041, i32 2 + %3 = load half, ptr %arrayidx3, align 2 + %arrayidx4 = getelementptr inbounds half, ptr %pIn.addr.041, i32 3 + %4 = load half, ptr %arrayidx4, align 2 %mul5 = fmul half %1, %0 %mul6 = fmul half %2, %0 %mul7 = fmul half %3, %0 %mul8 = fmul half %4, %0 - store half %mul5, half* %pOut.addr.040, align 2 - %arrayidx10 = getelementptr inbounds half, half* %pOut.addr.040, i32 1 - store half %mul6, half* %arrayidx10, align 2 - %arrayidx11 = getelementptr inbounds half, half* %pOut.addr.040, i32 2 - store half %mul7, half* %arrayidx11, align 2 - %arrayidx12 = getelementptr inbounds half, half* %pOut.addr.040, i32 3 - store half %mul8, half* %arrayidx12, align 2 - %add.ptr = getelementptr inbounds half, half* %pIn.addr.041, i32 4 - %add.ptr13 = getelementptr inbounds half, half* %pOut.addr.040, i32 4 + store half %mul5, ptr %pOut.addr.040, align 2 + %arrayidx10 = getelementptr inbounds half, ptr %pOut.addr.040, i32 1 + store half %mul6, ptr %arrayidx10, align 2 + %arrayidx11 = getelementptr inbounds half, ptr %pOut.addr.040, i32 2 + store half %mul7, ptr %arrayidx11, align 2 + %arrayidx12 = getelementptr inbounds half, ptr %pOut.addr.040, i32 3 + store half %mul8, ptr %arrayidx12, align 2 + %add.ptr = getelementptr inbounds half, ptr %pIn.addr.041, i32 4 + %add.ptr13 = getelementptr inbounds half, ptr %pOut.addr.040, i32 4 %dec = add nsw i32 %blkCnt.039, -1 %cmp = icmp eq i32 %dec, 0 br i1 %cmp, label %while.end, label %while.body diff --git a/llvm/test/Transforms/LoopVectorize/ARM/pointer_iv.ll b/llvm/test/Transforms/LoopVectorize/ARM/pointer_iv.ll index d394fda..7277d6d 100644 --- a/llvm/test/Transforms/LoopVectorize/ARM/pointer_iv.ll +++ b/llvm/test/Transforms/LoopVectorize/ARM/pointer_iv.ll @@ -4,7 +4,7 @@ target datalayout = "e-m:e-p:32:32-Fi8-i64:64-v128:64:128-a:0:32-n32-S64" target triple = "thumbv8.1m.main-none-none-eabi" -define hidden void @pointer_phi_v4i32_add1(i32* noalias nocapture readonly %A, i32* noalias nocapture %B, i32 %s, i32%y) { +define hidden void @pointer_phi_v4i32_add1(ptr noalias nocapture readonly %A, ptr noalias nocapture %B, i32 %s, i32%y) { ; CHECK-LABEL: @pointer_phi_v4i32_add1( ; CHECK-NEXT: entry: ; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i32> poison, i32 [[Y:%.*]], i64 0 @@ -12,13 +12,13 @@ define hidden void @pointer_phi_v4i32_add1(i32* noalias nocapture readonly %A, i ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] -; CHECK-NEXT: [[NEXT_GEP:%.*]] = getelementptr i32, i32* [[A:%.*]], i32 [[INDEX]] -; CHECK-NEXT: [[NEXT_GEP4:%.*]] = getelementptr i32, i32* [[B:%.*]], i32 [[INDEX]] -; CHECK-NEXT: [[TMP0:%.*]] = bitcast i32* [[NEXT_GEP]] to <4 x i32>* -; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i32>, <4 x i32>* [[TMP0]], align 4 -; CHECK-NEXT: [[TMP1:%.*]] = add nsw <4 x i32> [[WIDE_LOAD]], [[BROADCAST_SPLAT]] -; CHECK-NEXT: [[TMP2:%.*]] = bitcast i32* [[NEXT_GEP4]] to <4 x i32>* -; CHECK-NEXT: store <4 x i32> [[TMP1]], <4 x i32>* [[TMP2]], align 4 +; CHECK-NEXT: [[TMP0:%.*]] = shl i32 [[INDEX]], 2 +; CHECK-NEXT: [[NEXT_GEP:%.*]] = getelementptr i8, ptr [[A:%.*]], i32 [[TMP0]] +; CHECK-NEXT: [[TMP1:%.*]] = shl i32 [[INDEX]], 2 +; CHECK-NEXT: [[NEXT_GEP4:%.*]] = getelementptr i8, ptr [[B:%.*]], i32 [[TMP1]] +; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i32>, ptr [[NEXT_GEP]], align 4 +; CHECK-NEXT: [[TMP2:%.*]] = add nsw <4 x i32> [[WIDE_LOAD]], [[BROADCAST_SPLAT]] +; CHECK-NEXT: store <4 x i32> [[TMP2]], ptr [[NEXT_GEP4]], align 4 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 4 ; CHECK-NEXT: [[TMP3:%.*]] = icmp eq i32 [[INDEX_NEXT]], 1000 ; CHECK-NEXT: br i1 [[TMP3]], label [[END:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] @@ -28,14 +28,14 @@ define hidden void @pointer_phi_v4i32_add1(i32* noalias nocapture readonly %A, i entry: br label %for.body for.body: - %A.addr.09 = phi i32* [ %add.ptr, %for.body ], [ %A, %entry ] + %A.addr.09 = phi ptr [ %add.ptr, %for.body ], [ %A, %entry ] %i.08 = phi i32 [ %inc, %for.body ], [ 0, %entry ] - %B.addr.07 = phi i32* [ %incdec.ptr, %for.body ], [ %B, %entry ] - %0 = load i32, i32* %A.addr.09, align 4 - %add.ptr = getelementptr inbounds i32, i32* %A.addr.09, i32 1 + %B.addr.07 = phi ptr [ %incdec.ptr, %for.body ], [ %B, %entry ] + %0 = load i32, ptr %A.addr.09, align 4 + %add.ptr = getelementptr inbounds i32, ptr %A.addr.09, i32 1 %add = add nsw i32 %0, %y - store i32 %add, i32* %B.addr.07, align 4 - %incdec.ptr = getelementptr inbounds i32, i32* %B.addr.07, i32 1 + store i32 %add, ptr %B.addr.07, align 4 + %incdec.ptr = getelementptr inbounds i32, ptr %B.addr.07, i32 1 %inc = add nuw nsw i32 %i.08, 1 %exitcond = icmp eq i32 %inc, 1000 br i1 %exitcond, label %end, label %for.body @@ -43,37 +43,36 @@ end: ret void } -define hidden void @pointer_phi_v4i32_add2(i32* noalias nocapture readonly %A, i32* noalias nocapture %B, i32 %y) { +define hidden void @pointer_phi_v4i32_add2(ptr noalias nocapture readonly %A, ptr noalias nocapture %B, i32 %y) { ; CHECK-LABEL: @pointer_phi_v4i32_add2( ; CHECK-NEXT: entry: -; CHECK-NEXT: [[IND_END:%.*]] = getelementptr i32, i32* [[A:%.*]], i32 1992 -; CHECK-NEXT: [[IND_END2:%.*]] = getelementptr i32, i32* [[B:%.*]], i32 996 +; CHECK-NEXT: [[IND_END:%.*]] = getelementptr i8, ptr [[A:%.*]], i32 7968 +; CHECK-NEXT: [[IND_END2:%.*]] = getelementptr i8, ptr [[B:%.*]], i32 3984 ; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i32> poison, i32 [[Y:%.*]], i64 0 ; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i32> [[BROADCAST_SPLATINSERT]], <4 x i32> poison, <4 x i32> zeroinitializer ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] -; CHECK-NEXT: [[TMP0:%.*]] = shl i32 [[INDEX]], 1 -; CHECK-NEXT: [[NEXT_GEP:%.*]] = getelementptr i32, i32* [[A]], i32 [[TMP0]] -; CHECK-NEXT: [[NEXT_GEP4:%.*]] = getelementptr i32, i32* [[B]], i32 [[INDEX]] -; CHECK-NEXT: [[TMP1:%.*]] = bitcast i32* [[NEXT_GEP]] to <8 x i32>* -; CHECK-NEXT: [[WIDE_VEC:%.*]] = load <8 x i32>, <8 x i32>* [[TMP1]], align 4 +; CHECK-NEXT: [[TMP0:%.*]] = shl i32 [[INDEX]], 3 +; CHECK-NEXT: [[NEXT_GEP:%.*]] = getelementptr i8, ptr [[A]], i32 [[TMP0]] +; CHECK-NEXT: [[TMP1:%.*]] = shl i32 [[INDEX]], 2 +; CHECK-NEXT: [[NEXT_GEP4:%.*]] = getelementptr i8, ptr [[B]], i32 [[TMP1]] +; CHECK-NEXT: [[WIDE_VEC:%.*]] = load <8 x i32>, ptr [[NEXT_GEP]], align 4 ; CHECK-NEXT: [[STRIDED_VEC:%.*]] = shufflevector <8 x i32> [[WIDE_VEC]], <8 x i32> poison, <4 x i32> ; CHECK-NEXT: [[TMP2:%.*]] = add nsw <4 x i32> [[STRIDED_VEC]], [[BROADCAST_SPLAT]] -; CHECK-NEXT: [[TMP3:%.*]] = bitcast i32* [[NEXT_GEP4]] to <4 x i32>* -; CHECK-NEXT: store <4 x i32> [[TMP2]], <4 x i32>* [[TMP3]], align 4 +; CHECK-NEXT: store <4 x i32> [[TMP2]], ptr [[NEXT_GEP4]], align 4 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 4 -; CHECK-NEXT: [[TMP4:%.*]] = icmp eq i32 [[INDEX_NEXT]], 996 -; CHECK-NEXT: br i1 [[TMP4]], label [[FOR_BODY:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP2:![0-9]+]] +; CHECK-NEXT: [[TMP3:%.*]] = icmp eq i32 [[INDEX_NEXT]], 996 +; CHECK-NEXT: br i1 [[TMP3]], label [[FOR_BODY:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP2:![0-9]+]] ; CHECK: for.body: -; CHECK-NEXT: [[A_ADDR_09:%.*]] = phi i32* [ [[ADD_PTR:%.*]], [[FOR_BODY]] ], [ [[IND_END]], [[VECTOR_BODY]] ] +; CHECK-NEXT: [[A_ADDR_09:%.*]] = phi ptr [ [[ADD_PTR:%.*]], [[FOR_BODY]] ], [ [[IND_END]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[I_08:%.*]] = phi i32 [ [[INC:%.*]], [[FOR_BODY]] ], [ 996, [[VECTOR_BODY]] ] -; CHECK-NEXT: [[B_ADDR_07:%.*]] = phi i32* [ [[INCDEC_PTR:%.*]], [[FOR_BODY]] ], [ [[IND_END2]], [[VECTOR_BODY]] ] -; CHECK-NEXT: [[TMP5:%.*]] = load i32, i32* [[A_ADDR_09]], align 4 -; CHECK-NEXT: [[ADD_PTR]] = getelementptr inbounds i32, i32* [[A_ADDR_09]], i32 2 -; CHECK-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP5]], [[Y]] -; CHECK-NEXT: store i32 [[ADD]], i32* [[B_ADDR_07]], align 4 -; CHECK-NEXT: [[INCDEC_PTR]] = getelementptr inbounds i32, i32* [[B_ADDR_07]], i32 1 +; CHECK-NEXT: [[B_ADDR_07:%.*]] = phi ptr [ [[INCDEC_PTR:%.*]], [[FOR_BODY]] ], [ [[IND_END2]], [[VECTOR_BODY]] ] +; CHECK-NEXT: [[TMP4:%.*]] = load i32, ptr [[A_ADDR_09]], align 4 +; CHECK-NEXT: [[ADD_PTR]] = getelementptr inbounds i32, ptr [[A_ADDR_09]], i32 2 +; CHECK-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP4]], [[Y]] +; CHECK-NEXT: store i32 [[ADD]], ptr [[B_ADDR_07]], align 4 +; CHECK-NEXT: [[INCDEC_PTR]] = getelementptr inbounds i32, ptr [[B_ADDR_07]], i32 1 ; CHECK-NEXT: [[INC]] = add nuw nsw i32 [[I_08]], 1 ; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i32 [[INC]], 1000 ; CHECK-NEXT: br i1 [[EXITCOND]], label [[END:%.*]], label [[FOR_BODY]], !llvm.loop [[LOOP3:![0-9]+]] @@ -83,14 +82,14 @@ define hidden void @pointer_phi_v4i32_add2(i32* noalias nocapture readonly %A, i entry: br label %for.body for.body: - %A.addr.09 = phi i32* [ %add.ptr, %for.body ], [ %A, %entry ] + %A.addr.09 = phi ptr [ %add.ptr, %for.body ], [ %A, %entry ] %i.08 = phi i32 [ %inc, %for.body ], [ 0, %entry ] - %B.addr.07 = phi i32* [ %incdec.ptr, %for.body ], [ %B, %entry ] - %0 = load i32, i32* %A.addr.09, align 4 - %add.ptr = getelementptr inbounds i32, i32* %A.addr.09, i32 2 + %B.addr.07 = phi ptr [ %incdec.ptr, %for.body ], [ %B, %entry ] + %0 = load i32, ptr %A.addr.09, align 4 + %add.ptr = getelementptr inbounds i32, ptr %A.addr.09, i32 2 %add = add nsw i32 %0, %y - store i32 %add, i32* %B.addr.07, align 4 - %incdec.ptr = getelementptr inbounds i32, i32* %B.addr.07, i32 1 + store i32 %add, ptr %B.addr.07, align 4 + %incdec.ptr = getelementptr inbounds i32, ptr %B.addr.07, i32 1 %inc = add nuw nsw i32 %i.08, 1 %exitcond = icmp eq i32 %inc, 1000 br i1 %exitcond, label %end, label %for.body @@ -98,36 +97,36 @@ end: ret void } -define hidden void @pointer_phi_v4i32_add3(i32* noalias nocapture readonly %A, i32* noalias nocapture %B, i32 %y) { +define hidden void @pointer_phi_v4i32_add3(ptr noalias nocapture readonly %A, ptr noalias nocapture %B, i32 %y) { ; CHECK-LABEL: @pointer_phi_v4i32_add3( ; CHECK-NEXT: entry: -; CHECK-NEXT: [[IND_END:%.*]] = getelementptr i32, i32* [[A:%.*]], i32 2988 -; CHECK-NEXT: [[IND_END2:%.*]] = getelementptr i32, i32* [[B:%.*]], i32 996 +; CHECK-NEXT: [[IND_END:%.*]] = getelementptr i8, ptr [[A:%.*]], i32 11952 +; CHECK-NEXT: [[IND_END2:%.*]] = getelementptr i8, ptr [[B:%.*]], i32 3984 ; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i32> poison, i32 [[Y:%.*]], i64 0 ; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i32> [[BROADCAST_SPLATINSERT]], <4 x i32> poison, <4 x i32> zeroinitializer ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: -; CHECK-NEXT: [[POINTER_PHI:%.*]] = phi i32* [ [[A]], [[ENTRY:%.*]] ], [ [[PTR_IND:%.*]], [[VECTOR_BODY]] ] +; CHECK-NEXT: [[POINTER_PHI:%.*]] = phi ptr [ [[A]], [[ENTRY:%.*]] ], [ [[PTR_IND:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[ENTRY]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] -; CHECK-NEXT: [[TMP0:%.*]] = getelementptr i32, i32* [[POINTER_PHI]], <4 x i32> -; CHECK-NEXT: [[NEXT_GEP:%.*]] = getelementptr i32, i32* [[B]], i32 [[INDEX]] -; CHECK-NEXT: [[WIDE_MASKED_GATHER:%.*]] = call <4 x i32> @llvm.masked.gather.v4i32.v4p0i32(<4 x i32*> [[TMP0]], i32 4, <4 x i1> , <4 x i32> poison) -; CHECK-NEXT: [[TMP1:%.*]] = add nsw <4 x i32> [[WIDE_MASKED_GATHER]], [[BROADCAST_SPLAT]] -; CHECK-NEXT: [[TMP2:%.*]] = bitcast i32* [[NEXT_GEP]] to <4 x i32>* -; CHECK-NEXT: store <4 x i32> [[TMP1]], <4 x i32>* [[TMP2]], align 4 +; CHECK-NEXT: [[TMP0:%.*]] = getelementptr i8, ptr [[POINTER_PHI]], <4 x i32> +; CHECK-NEXT: [[TMP1:%.*]] = shl i32 [[INDEX]], 2 +; CHECK-NEXT: [[NEXT_GEP:%.*]] = getelementptr i8, ptr [[B]], i32 [[TMP1]] +; CHECK-NEXT: [[WIDE_MASKED_GATHER:%.*]] = call <4 x i32> @llvm.masked.gather.v4i32.v4p0(<4 x ptr> [[TMP0]], i32 4, <4 x i1> , <4 x i32> poison) +; CHECK-NEXT: [[TMP2:%.*]] = add nsw <4 x i32> [[WIDE_MASKED_GATHER]], [[BROADCAST_SPLAT]] +; CHECK-NEXT: store <4 x i32> [[TMP2]], ptr [[NEXT_GEP]], align 4 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 4 -; CHECK-NEXT: [[PTR_IND]] = getelementptr i32, i32* [[POINTER_PHI]], i32 12 +; CHECK-NEXT: [[PTR_IND]] = getelementptr i8, ptr [[POINTER_PHI]], i32 48 ; CHECK-NEXT: [[TMP3:%.*]] = icmp eq i32 [[INDEX_NEXT]], 996 ; CHECK-NEXT: br i1 [[TMP3]], label [[FOR_BODY:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP5:![0-9]+]] ; CHECK: for.body: -; CHECK-NEXT: [[A_ADDR_09:%.*]] = phi i32* [ [[ADD_PTR:%.*]], [[FOR_BODY]] ], [ [[IND_END]], [[VECTOR_BODY]] ] +; CHECK-NEXT: [[A_ADDR_09:%.*]] = phi ptr [ [[ADD_PTR:%.*]], [[FOR_BODY]] ], [ [[IND_END]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[I_08:%.*]] = phi i32 [ [[INC:%.*]], [[FOR_BODY]] ], [ 996, [[VECTOR_BODY]] ] -; CHECK-NEXT: [[B_ADDR_07:%.*]] = phi i32* [ [[INCDEC_PTR:%.*]], [[FOR_BODY]] ], [ [[IND_END2]], [[VECTOR_BODY]] ] -; CHECK-NEXT: [[TMP4:%.*]] = load i32, i32* [[A_ADDR_09]], align 4 -; CHECK-NEXT: [[ADD_PTR]] = getelementptr inbounds i32, i32* [[A_ADDR_09]], i32 3 +; CHECK-NEXT: [[B_ADDR_07:%.*]] = phi ptr [ [[INCDEC_PTR:%.*]], [[FOR_BODY]] ], [ [[IND_END2]], [[VECTOR_BODY]] ] +; CHECK-NEXT: [[TMP4:%.*]] = load i32, ptr [[A_ADDR_09]], align 4 +; CHECK-NEXT: [[ADD_PTR]] = getelementptr inbounds i32, ptr [[A_ADDR_09]], i32 3 ; CHECK-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP4]], [[Y]] -; CHECK-NEXT: store i32 [[ADD]], i32* [[B_ADDR_07]], align 4 -; CHECK-NEXT: [[INCDEC_PTR]] = getelementptr inbounds i32, i32* [[B_ADDR_07]], i32 1 +; CHECK-NEXT: store i32 [[ADD]], ptr [[B_ADDR_07]], align 4 +; CHECK-NEXT: [[INCDEC_PTR]] = getelementptr inbounds i32, ptr [[B_ADDR_07]], i32 1 ; CHECK-NEXT: [[INC]] = add nuw nsw i32 [[I_08]], 1 ; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i32 [[INC]], 1000 ; CHECK-NEXT: br i1 [[EXITCOND]], label [[END:%.*]], label [[FOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]] @@ -137,14 +136,14 @@ define hidden void @pointer_phi_v4i32_add3(i32* noalias nocapture readonly %A, i entry: br label %for.body for.body: - %A.addr.09 = phi i32* [ %add.ptr, %for.body ], [ %A, %entry ] + %A.addr.09 = phi ptr [ %add.ptr, %for.body ], [ %A, %entry ] %i.08 = phi i32 [ %inc, %for.body ], [ 0, %entry ] - %B.addr.07 = phi i32* [ %incdec.ptr, %for.body ], [ %B, %entry ] - %0 = load i32, i32* %A.addr.09, align 4 - %add.ptr = getelementptr inbounds i32, i32* %A.addr.09, i32 3 + %B.addr.07 = phi ptr [ %incdec.ptr, %for.body ], [ %B, %entry ] + %0 = load i32, ptr %A.addr.09, align 4 + %add.ptr = getelementptr inbounds i32, ptr %A.addr.09, i32 3 %add = add nsw i32 %0, %y - store i32 %add, i32* %B.addr.07, align 4 - %incdec.ptr = getelementptr inbounds i32, i32* %B.addr.07, i32 1 + store i32 %add, ptr %B.addr.07, align 4 + %incdec.ptr = getelementptr inbounds i32, ptr %B.addr.07, i32 1 %inc = add nuw nsw i32 %i.08, 1 %exitcond = icmp eq i32 %inc, 1000 br i1 %exitcond, label %end, label %for.body @@ -152,7 +151,7 @@ end: ret void } -define hidden void @pointer_phi_v8i16_add1(i16* noalias nocapture readonly %A, i16* noalias nocapture %B, i32 %y) { +define hidden void @pointer_phi_v8i16_add1(ptr noalias nocapture readonly %A, ptr noalias nocapture %B, i32 %y) { ; CHECK-LABEL: @pointer_phi_v8i16_add1( ; CHECK-NEXT: entry: ; CHECK-NEXT: [[TMP0:%.*]] = trunc i32 [[Y:%.*]] to i16 @@ -161,13 +160,13 @@ define hidden void @pointer_phi_v8i16_add1(i16* noalias nocapture readonly %A, i ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] -; CHECK-NEXT: [[NEXT_GEP:%.*]] = getelementptr i16, i16* [[A:%.*]], i32 [[INDEX]] -; CHECK-NEXT: [[NEXT_GEP4:%.*]] = getelementptr i16, i16* [[B:%.*]], i32 [[INDEX]] -; CHECK-NEXT: [[TMP1:%.*]] = bitcast i16* [[NEXT_GEP]] to <8 x i16>* -; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <8 x i16>, <8 x i16>* [[TMP1]], align 2 -; CHECK-NEXT: [[TMP2:%.*]] = add <8 x i16> [[WIDE_LOAD]], [[BROADCAST_SPLAT]] -; CHECK-NEXT: [[TMP3:%.*]] = bitcast i16* [[NEXT_GEP4]] to <8 x i16>* -; CHECK-NEXT: store <8 x i16> [[TMP2]], <8 x i16>* [[TMP3]], align 2 +; CHECK-NEXT: [[TMP1:%.*]] = shl i32 [[INDEX]], 1 +; CHECK-NEXT: [[NEXT_GEP:%.*]] = getelementptr i8, ptr [[A:%.*]], i32 [[TMP1]] +; CHECK-NEXT: [[TMP2:%.*]] = shl i32 [[INDEX]], 1 +; CHECK-NEXT: [[NEXT_GEP4:%.*]] = getelementptr i8, ptr [[B:%.*]], i32 [[TMP2]] +; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <8 x i16>, ptr [[NEXT_GEP]], align 2 +; CHECK-NEXT: [[TMP3:%.*]] = add <8 x i16> [[WIDE_LOAD]], [[BROADCAST_SPLAT]] +; CHECK-NEXT: store <8 x i16> [[TMP3]], ptr [[NEXT_GEP4]], align 2 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 8 ; CHECK-NEXT: [[TMP4:%.*]] = icmp eq i32 [[INDEX_NEXT]], 1000 ; CHECK-NEXT: br i1 [[TMP4]], label [[END:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP7:![0-9]+]] @@ -178,14 +177,14 @@ entry: %0 = trunc i32 %y to i16 br label %for.body for.body: ; preds = %for.body, %for.body.lr.ph - %A.addr.011 = phi i16* [ %A, %entry ], [ %add.ptr, %for.body ] + %A.addr.011 = phi ptr [ %A, %entry ], [ %add.ptr, %for.body ] %i.010 = phi i32 [ 0, %entry ], [ %inc, %for.body ] - %B.addr.09 = phi i16* [ %B, %entry ], [ %incdec.ptr, %for.body ] - %l1 = load i16, i16* %A.addr.011, align 2 - %add.ptr = getelementptr inbounds i16, i16* %A.addr.011, i32 1 + %B.addr.09 = phi ptr [ %B, %entry ], [ %incdec.ptr, %for.body ] + %l1 = load i16, ptr %A.addr.011, align 2 + %add.ptr = getelementptr inbounds i16, ptr %A.addr.011, i32 1 %conv1 = add i16 %l1, %0 - store i16 %conv1, i16* %B.addr.09, align 2 - %incdec.ptr = getelementptr inbounds i16, i16* %B.addr.09, i32 1 + store i16 %conv1, ptr %B.addr.09, align 2 + %incdec.ptr = getelementptr inbounds i16, ptr %B.addr.09, i32 1 %inc = add nuw nsw i32 %i.010, 1 %exitcond = icmp eq i32 %inc, 1000 br i1 %exitcond, label %end, label %for.body @@ -193,38 +192,37 @@ end: ret void } -define hidden void @pointer_phi_v8i16_add2(i16* noalias nocapture readonly %A, i16* noalias nocapture %B, i32 %y) { +define hidden void @pointer_phi_v8i16_add2(ptr noalias nocapture readonly %A, ptr noalias nocapture %B, i32 %y) { ; CHECK-LABEL: @pointer_phi_v8i16_add2( ; CHECK-NEXT: entry: ; CHECK-NEXT: [[TMP0:%.*]] = trunc i32 [[Y:%.*]] to i16 -; CHECK-NEXT: [[IND_END:%.*]] = getelementptr i16, i16* [[A:%.*]], i32 1984 -; CHECK-NEXT: [[IND_END2:%.*]] = getelementptr i16, i16* [[B:%.*]], i32 992 +; CHECK-NEXT: [[IND_END:%.*]] = getelementptr i8, ptr [[A:%.*]], i32 3968 +; CHECK-NEXT: [[IND_END2:%.*]] = getelementptr i8, ptr [[B:%.*]], i32 1984 ; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <8 x i16> poison, i16 [[TMP0]], i64 0 ; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <8 x i16> [[BROADCAST_SPLATINSERT]], <8 x i16> poison, <8 x i32> zeroinitializer ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] -; CHECK-NEXT: [[TMP1:%.*]] = shl i32 [[INDEX]], 1 -; CHECK-NEXT: [[NEXT_GEP:%.*]] = getelementptr i16, i16* [[A]], i32 [[TMP1]] -; CHECK-NEXT: [[NEXT_GEP4:%.*]] = getelementptr i16, i16* [[B]], i32 [[INDEX]] -; CHECK-NEXT: [[TMP2:%.*]] = bitcast i16* [[NEXT_GEP]] to <16 x i16>* -; CHECK-NEXT: [[WIDE_VEC:%.*]] = load <16 x i16>, <16 x i16>* [[TMP2]], align 2 +; CHECK-NEXT: [[TMP1:%.*]] = shl i32 [[INDEX]], 2 +; CHECK-NEXT: [[NEXT_GEP:%.*]] = getelementptr i8, ptr [[A]], i32 [[TMP1]] +; CHECK-NEXT: [[TMP2:%.*]] = shl i32 [[INDEX]], 1 +; CHECK-NEXT: [[NEXT_GEP4:%.*]] = getelementptr i8, ptr [[B]], i32 [[TMP2]] +; CHECK-NEXT: [[WIDE_VEC:%.*]] = load <16 x i16>, ptr [[NEXT_GEP]], align 2 ; CHECK-NEXT: [[STRIDED_VEC:%.*]] = shufflevector <16 x i16> [[WIDE_VEC]], <16 x i16> poison, <8 x i32> ; CHECK-NEXT: [[TMP3:%.*]] = add <8 x i16> [[STRIDED_VEC]], [[BROADCAST_SPLAT]] -; CHECK-NEXT: [[TMP4:%.*]] = bitcast i16* [[NEXT_GEP4]] to <8 x i16>* -; CHECK-NEXT: store <8 x i16> [[TMP3]], <8 x i16>* [[TMP4]], align 2 +; CHECK-NEXT: store <8 x i16> [[TMP3]], ptr [[NEXT_GEP4]], align 2 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 8 -; CHECK-NEXT: [[TMP5:%.*]] = icmp eq i32 [[INDEX_NEXT]], 992 -; CHECK-NEXT: br i1 [[TMP5]], label [[FOR_BODY:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]] +; CHECK-NEXT: [[TMP4:%.*]] = icmp eq i32 [[INDEX_NEXT]], 992 +; CHECK-NEXT: br i1 [[TMP4]], label [[FOR_BODY:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]] ; CHECK: for.body: -; CHECK-NEXT: [[A_ADDR_011:%.*]] = phi i16* [ [[ADD_PTR:%.*]], [[FOR_BODY]] ], [ [[IND_END]], [[VECTOR_BODY]] ] +; CHECK-NEXT: [[A_ADDR_011:%.*]] = phi ptr [ [[ADD_PTR:%.*]], [[FOR_BODY]] ], [ [[IND_END]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[I_010:%.*]] = phi i32 [ [[INC:%.*]], [[FOR_BODY]] ], [ 992, [[VECTOR_BODY]] ] -; CHECK-NEXT: [[B_ADDR_09:%.*]] = phi i16* [ [[INCDEC_PTR:%.*]], [[FOR_BODY]] ], [ [[IND_END2]], [[VECTOR_BODY]] ] -; CHECK-NEXT: [[L1:%.*]] = load i16, i16* [[A_ADDR_011]], align 2 -; CHECK-NEXT: [[ADD_PTR]] = getelementptr inbounds i16, i16* [[A_ADDR_011]], i32 2 +; CHECK-NEXT: [[B_ADDR_09:%.*]] = phi ptr [ [[INCDEC_PTR:%.*]], [[FOR_BODY]] ], [ [[IND_END2]], [[VECTOR_BODY]] ] +; CHECK-NEXT: [[L1:%.*]] = load i16, ptr [[A_ADDR_011]], align 2 +; CHECK-NEXT: [[ADD_PTR]] = getelementptr inbounds i16, ptr [[A_ADDR_011]], i32 2 ; CHECK-NEXT: [[CONV1:%.*]] = add i16 [[L1]], [[TMP0]] -; CHECK-NEXT: store i16 [[CONV1]], i16* [[B_ADDR_09]], align 2 -; CHECK-NEXT: [[INCDEC_PTR]] = getelementptr inbounds i16, i16* [[B_ADDR_09]], i32 1 +; CHECK-NEXT: store i16 [[CONV1]], ptr [[B_ADDR_09]], align 2 +; CHECK-NEXT: [[INCDEC_PTR]] = getelementptr inbounds i16, ptr [[B_ADDR_09]], i32 1 ; CHECK-NEXT: [[INC]] = add nuw nsw i32 [[I_010]], 1 ; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i32 [[INC]], 1000 ; CHECK-NEXT: br i1 [[EXITCOND]], label [[END:%.*]], label [[FOR_BODY]], !llvm.loop [[LOOP9:![0-9]+]] @@ -235,14 +233,14 @@ entry: %0 = trunc i32 %y to i16 br label %for.body for.body: ; preds = %for.body, %for.body.lr.ph - %A.addr.011 = phi i16* [ %A, %entry ], [ %add.ptr, %for.body ] + %A.addr.011 = phi ptr [ %A, %entry ], [ %add.ptr, %for.body ] %i.010 = phi i32 [ 0, %entry ], [ %inc, %for.body ] - %B.addr.09 = phi i16* [ %B, %entry ], [ %incdec.ptr, %for.body ] - %l1 = load i16, i16* %A.addr.011, align 2 - %add.ptr = getelementptr inbounds i16, i16* %A.addr.011, i32 2 + %B.addr.09 = phi ptr [ %B, %entry ], [ %incdec.ptr, %for.body ] + %l1 = load i16, ptr %A.addr.011, align 2 + %add.ptr = getelementptr inbounds i16, ptr %A.addr.011, i32 2 %conv1 = add i16 %l1, %0 - store i16 %conv1, i16* %B.addr.09, align 2 - %incdec.ptr = getelementptr inbounds i16, i16* %B.addr.09, i32 1 + store i16 %conv1, ptr %B.addr.09, align 2 + %incdec.ptr = getelementptr inbounds i16, ptr %B.addr.09, i32 1 %inc = add nuw nsw i32 %i.010, 1 %exitcond = icmp eq i32 %inc, 1000 br i1 %exitcond, label %end, label %for.body @@ -250,20 +248,20 @@ end: ret void } -define hidden void @pointer_phi_v8i16_add3(i16* noalias nocapture readonly %A, i16* noalias nocapture %B, i32 %y) { +define hidden void @pointer_phi_v8i16_add3(ptr noalias nocapture readonly %A, ptr noalias nocapture %B, i32 %y) { ; CHECK-LABEL: @pointer_phi_v8i16_add3( ; CHECK-NEXT: entry: ; CHECK-NEXT: [[TMP0:%.*]] = trunc i32 [[Y:%.*]] to i16 ; CHECK-NEXT: br label [[FOR_BODY:%.*]] ; CHECK: for.body: -; CHECK-NEXT: [[A_ADDR_011:%.*]] = phi i16* [ [[A:%.*]], [[ENTRY:%.*]] ], [ [[ADD_PTR:%.*]], [[FOR_BODY]] ] +; CHECK-NEXT: [[A_ADDR_011:%.*]] = phi ptr [ [[A:%.*]], [[ENTRY:%.*]] ], [ [[ADD_PTR:%.*]], [[FOR_BODY]] ] ; CHECK-NEXT: [[I_010:%.*]] = phi i32 [ 0, [[ENTRY]] ], [ [[INC:%.*]], [[FOR_BODY]] ] -; CHECK-NEXT: [[B_ADDR_09:%.*]] = phi i16* [ [[B:%.*]], [[ENTRY]] ], [ [[INCDEC_PTR:%.*]], [[FOR_BODY]] ] -; CHECK-NEXT: [[L1:%.*]] = load i16, i16* [[A_ADDR_011]], align 2 -; CHECK-NEXT: [[ADD_PTR]] = getelementptr inbounds i16, i16* [[A_ADDR_011]], i32 3 +; CHECK-NEXT: [[B_ADDR_09:%.*]] = phi ptr [ [[B:%.*]], [[ENTRY]] ], [ [[INCDEC_PTR:%.*]], [[FOR_BODY]] ] +; CHECK-NEXT: [[L1:%.*]] = load i16, ptr [[A_ADDR_011]], align 2 +; CHECK-NEXT: [[ADD_PTR]] = getelementptr inbounds i16, ptr [[A_ADDR_011]], i32 3 ; CHECK-NEXT: [[CONV1:%.*]] = add i16 [[L1]], [[TMP0]] -; CHECK-NEXT: store i16 [[CONV1]], i16* [[B_ADDR_09]], align 2 -; CHECK-NEXT: [[INCDEC_PTR]] = getelementptr inbounds i16, i16* [[B_ADDR_09]], i32 1 +; CHECK-NEXT: store i16 [[CONV1]], ptr [[B_ADDR_09]], align 2 +; CHECK-NEXT: [[INCDEC_PTR]] = getelementptr inbounds i16, ptr [[B_ADDR_09]], i32 1 ; CHECK-NEXT: [[INC]] = add nuw nsw i32 [[I_010]], 1 ; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i32 [[INC]], 1000 ; CHECK-NEXT: br i1 [[EXITCOND]], label [[END:%.*]], label [[FOR_BODY]] @@ -274,14 +272,14 @@ entry: %0 = trunc i32 %y to i16 br label %for.body for.body: ; preds = %for.body, %for.body.lr.ph - %A.addr.011 = phi i16* [ %A, %entry ], [ %add.ptr, %for.body ] + %A.addr.011 = phi ptr [ %A, %entry ], [ %add.ptr, %for.body ] %i.010 = phi i32 [ 0, %entry ], [ %inc, %for.body ] - %B.addr.09 = phi i16* [ %B, %entry ], [ %incdec.ptr, %for.body ] - %l1 = load i16, i16* %A.addr.011, align 2 - %add.ptr = getelementptr inbounds i16, i16* %A.addr.011, i32 3 + %B.addr.09 = phi ptr [ %B, %entry ], [ %incdec.ptr, %for.body ] + %l1 = load i16, ptr %A.addr.011, align 2 + %add.ptr = getelementptr inbounds i16, ptr %A.addr.011, i32 3 %conv1 = add i16 %l1, %0 - store i16 %conv1, i16* %B.addr.09, align 2 - %incdec.ptr = getelementptr inbounds i16, i16* %B.addr.09, i32 1 + store i16 %conv1, ptr %B.addr.09, align 2 + %incdec.ptr = getelementptr inbounds i16, ptr %B.addr.09, i32 1 %inc = add nuw nsw i32 %i.010, 1 %exitcond = icmp eq i32 %inc, 1000 br i1 %exitcond, label %end, label %for.body @@ -289,36 +287,34 @@ end: ret void } -define hidden void @pointer_phi_v16i8_add1(i8* noalias nocapture readonly %A, i8* noalias nocapture %B, i32 %y) { +define hidden void @pointer_phi_v16i8_add1(ptr noalias nocapture readonly %A, ptr noalias nocapture %B, i32 %y) { ; CHECK-LABEL: @pointer_phi_v16i8_add1( ; CHECK-NEXT: entry: ; CHECK-NEXT: [[TMP0:%.*]] = trunc i32 [[Y:%.*]] to i8 -; CHECK-NEXT: [[IND_END:%.*]] = getelementptr i8, i8* [[A:%.*]], i32 992 -; CHECK-NEXT: [[IND_END2:%.*]] = getelementptr i8, i8* [[B:%.*]], i32 992 +; CHECK-NEXT: [[IND_END:%.*]] = getelementptr i8, ptr [[A:%.*]], i32 992 +; CHECK-NEXT: [[IND_END2:%.*]] = getelementptr i8, ptr [[B:%.*]], i32 992 ; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <16 x i8> poison, i8 [[TMP0]], i64 0 ; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <16 x i8> [[BROADCAST_SPLATINSERT]], <16 x i8> poison, <16 x i32> zeroinitializer ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] -; CHECK-NEXT: [[NEXT_GEP:%.*]] = getelementptr i8, i8* [[A]], i32 [[INDEX]] -; CHECK-NEXT: [[NEXT_GEP4:%.*]] = getelementptr i8, i8* [[B]], i32 [[INDEX]] -; CHECK-NEXT: [[TMP1:%.*]] = bitcast i8* [[NEXT_GEP]] to <16 x i8>* -; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <16 x i8>, <16 x i8>* [[TMP1]], align 1 -; CHECK-NEXT: [[TMP2:%.*]] = add <16 x i8> [[WIDE_LOAD]], [[BROADCAST_SPLAT]] -; CHECK-NEXT: [[TMP3:%.*]] = bitcast i8* [[NEXT_GEP4]] to <16 x i8>* -; CHECK-NEXT: store <16 x i8> [[TMP2]], <16 x i8>* [[TMP3]], align 1 +; CHECK-NEXT: [[NEXT_GEP:%.*]] = getelementptr i8, ptr [[A]], i32 [[INDEX]] +; CHECK-NEXT: [[NEXT_GEP4:%.*]] = getelementptr i8, ptr [[B]], i32 [[INDEX]] +; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <16 x i8>, ptr [[NEXT_GEP]], align 1 +; CHECK-NEXT: [[TMP1:%.*]] = add <16 x i8> [[WIDE_LOAD]], [[BROADCAST_SPLAT]] +; CHECK-NEXT: store <16 x i8> [[TMP1]], ptr [[NEXT_GEP4]], align 1 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 16 -; CHECK-NEXT: [[TMP4:%.*]] = icmp eq i32 [[INDEX_NEXT]], 992 -; CHECK-NEXT: br i1 [[TMP4]], label [[FOR_BODY:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP10:![0-9]+]] +; CHECK-NEXT: [[TMP2:%.*]] = icmp eq i32 [[INDEX_NEXT]], 992 +; CHECK-NEXT: br i1 [[TMP2]], label [[FOR_BODY:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP10:![0-9]+]] ; CHECK: for.body: -; CHECK-NEXT: [[A_ADDR_010:%.*]] = phi i8* [ [[ADD_PTR:%.*]], [[FOR_BODY]] ], [ [[IND_END]], [[VECTOR_BODY]] ] +; CHECK-NEXT: [[A_ADDR_010:%.*]] = phi ptr [ [[ADD_PTR:%.*]], [[FOR_BODY]] ], [ [[IND_END]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[I_09:%.*]] = phi i32 [ [[INC:%.*]], [[FOR_BODY]] ], [ 992, [[VECTOR_BODY]] ] -; CHECK-NEXT: [[B_ADDR_08:%.*]] = phi i8* [ [[INCDEC_PTR:%.*]], [[FOR_BODY]] ], [ [[IND_END2]], [[VECTOR_BODY]] ] -; CHECK-NEXT: [[TMP5:%.*]] = load i8, i8* [[A_ADDR_010]], align 1 -; CHECK-NEXT: [[ADD_PTR]] = getelementptr inbounds i8, i8* [[A_ADDR_010]], i32 1 -; CHECK-NEXT: [[CONV1:%.*]] = add i8 [[TMP5]], [[TMP0]] -; CHECK-NEXT: store i8 [[CONV1]], i8* [[B_ADDR_08]], align 1 -; CHECK-NEXT: [[INCDEC_PTR]] = getelementptr inbounds i8, i8* [[B_ADDR_08]], i32 1 +; CHECK-NEXT: [[B_ADDR_08:%.*]] = phi ptr [ [[INCDEC_PTR:%.*]], [[FOR_BODY]] ], [ [[IND_END2]], [[VECTOR_BODY]] ] +; CHECK-NEXT: [[TMP3:%.*]] = load i8, ptr [[A_ADDR_010]], align 1 +; CHECK-NEXT: [[ADD_PTR]] = getelementptr inbounds i8, ptr [[A_ADDR_010]], i32 1 +; CHECK-NEXT: [[CONV1:%.*]] = add i8 [[TMP3]], [[TMP0]] +; CHECK-NEXT: store i8 [[CONV1]], ptr [[B_ADDR_08]], align 1 +; CHECK-NEXT: [[INCDEC_PTR]] = getelementptr inbounds i8, ptr [[B_ADDR_08]], i32 1 ; CHECK-NEXT: [[INC]] = add nuw nsw i32 [[I_09]], 1 ; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i32 [[INC]], 1000 ; CHECK-NEXT: br i1 [[EXITCOND]], label [[END:%.*]], label [[FOR_BODY]], !llvm.loop [[LOOP11:![0-9]+]] @@ -330,14 +326,14 @@ entry: br label %for.body for.body: - %A.addr.010 = phi i8* [ %A, %entry ], [ %add.ptr, %for.body ] + %A.addr.010 = phi ptr [ %A, %entry ], [ %add.ptr, %for.body ] %i.09 = phi i32 [ 0, %entry ], [ %inc, %for.body ] - %B.addr.08 = phi i8* [ %B, %entry ], [ %incdec.ptr, %for.body ] - %1 = load i8, i8* %A.addr.010, align 1 - %add.ptr = getelementptr inbounds i8, i8* %A.addr.010, i32 1 + %B.addr.08 = phi ptr [ %B, %entry ], [ %incdec.ptr, %for.body ] + %1 = load i8, ptr %A.addr.010, align 1 + %add.ptr = getelementptr inbounds i8, ptr %A.addr.010, i32 1 %conv1 = add i8 %1, %0 - store i8 %conv1, i8* %B.addr.08, align 1 - %incdec.ptr = getelementptr inbounds i8, i8* %B.addr.08, i32 1 + store i8 %conv1, ptr %B.addr.08, align 1 + %incdec.ptr = getelementptr inbounds i8, ptr %B.addr.08, i32 1 %inc = add nuw nsw i32 %i.09, 1 %exitcond = icmp eq i32 %inc, 1000 br i1 %exitcond, label %end, label %for.body @@ -345,38 +341,36 @@ end: ret void } -define hidden void @pointer_phi_v16i8_add2(i8* noalias nocapture readonly %A, i8* noalias nocapture %B, i32 %y) { +define hidden void @pointer_phi_v16i8_add2(ptr noalias nocapture readonly %A, ptr noalias nocapture %B, i32 %y) { ; CHECK-LABEL: @pointer_phi_v16i8_add2( ; CHECK-NEXT: entry: ; CHECK-NEXT: [[TMP0:%.*]] = trunc i32 [[Y:%.*]] to i8 -; CHECK-NEXT: [[IND_END:%.*]] = getelementptr i8, i8* [[A:%.*]], i32 1984 -; CHECK-NEXT: [[IND_END2:%.*]] = getelementptr i8, i8* [[B:%.*]], i32 992 +; CHECK-NEXT: [[IND_END:%.*]] = getelementptr i8, ptr [[A:%.*]], i32 1984 +; CHECK-NEXT: [[IND_END2:%.*]] = getelementptr i8, ptr [[B:%.*]], i32 992 ; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <16 x i8> poison, i8 [[TMP0]], i64 0 ; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <16 x i8> [[BROADCAST_SPLATINSERT]], <16 x i8> poison, <16 x i32> zeroinitializer ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[TMP1:%.*]] = shl i32 [[INDEX]], 1 -; CHECK-NEXT: [[NEXT_GEP:%.*]] = getelementptr i8, i8* [[A]], i32 [[TMP1]] -; CHECK-NEXT: [[NEXT_GEP4:%.*]] = getelementptr i8, i8* [[B]], i32 [[INDEX]] -; CHECK-NEXT: [[TMP2:%.*]] = bitcast i8* [[NEXT_GEP]] to <32 x i8>* -; CHECK-NEXT: [[WIDE_VEC:%.*]] = load <32 x i8>, <32 x i8>* [[TMP2]], align 1 +; CHECK-NEXT: [[NEXT_GEP:%.*]] = getelementptr i8, ptr [[A]], i32 [[TMP1]] +; CHECK-NEXT: [[NEXT_GEP4:%.*]] = getelementptr i8, ptr [[B]], i32 [[INDEX]] +; CHECK-NEXT: [[WIDE_VEC:%.*]] = load <32 x i8>, ptr [[NEXT_GEP]], align 1 ; CHECK-NEXT: [[STRIDED_VEC:%.*]] = shufflevector <32 x i8> [[WIDE_VEC]], <32 x i8> poison, <16 x i32> -; CHECK-NEXT: [[TMP3:%.*]] = add <16 x i8> [[STRIDED_VEC]], [[BROADCAST_SPLAT]] -; CHECK-NEXT: [[TMP4:%.*]] = bitcast i8* [[NEXT_GEP4]] to <16 x i8>* -; CHECK-NEXT: store <16 x i8> [[TMP3]], <16 x i8>* [[TMP4]], align 1 +; CHECK-NEXT: [[TMP2:%.*]] = add <16 x i8> [[STRIDED_VEC]], [[BROADCAST_SPLAT]] +; CHECK-NEXT: store <16 x i8> [[TMP2]], ptr [[NEXT_GEP4]], align 1 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 16 -; CHECK-NEXT: [[TMP5:%.*]] = icmp eq i32 [[INDEX_NEXT]], 992 -; CHECK-NEXT: br i1 [[TMP5]], label [[FOR_BODY:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP12:![0-9]+]] +; CHECK-NEXT: [[TMP3:%.*]] = icmp eq i32 [[INDEX_NEXT]], 992 +; CHECK-NEXT: br i1 [[TMP3]], label [[FOR_BODY:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP12:![0-9]+]] ; CHECK: for.body: -; CHECK-NEXT: [[A_ADDR_010:%.*]] = phi i8* [ [[ADD_PTR:%.*]], [[FOR_BODY]] ], [ [[IND_END]], [[VECTOR_BODY]] ] +; CHECK-NEXT: [[A_ADDR_010:%.*]] = phi ptr [ [[ADD_PTR:%.*]], [[FOR_BODY]] ], [ [[IND_END]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[I_09:%.*]] = phi i32 [ [[INC:%.*]], [[FOR_BODY]] ], [ 992, [[VECTOR_BODY]] ] -; CHECK-NEXT: [[B_ADDR_08:%.*]] = phi i8* [ [[INCDEC_PTR:%.*]], [[FOR_BODY]] ], [ [[IND_END2]], [[VECTOR_BODY]] ] -; CHECK-NEXT: [[TMP6:%.*]] = load i8, i8* [[A_ADDR_010]], align 1 -; CHECK-NEXT: [[ADD_PTR]] = getelementptr inbounds i8, i8* [[A_ADDR_010]], i32 2 -; CHECK-NEXT: [[CONV1:%.*]] = add i8 [[TMP6]], [[TMP0]] -; CHECK-NEXT: store i8 [[CONV1]], i8* [[B_ADDR_08]], align 1 -; CHECK-NEXT: [[INCDEC_PTR]] = getelementptr inbounds i8, i8* [[B_ADDR_08]], i32 1 +; CHECK-NEXT: [[B_ADDR_08:%.*]] = phi ptr [ [[INCDEC_PTR:%.*]], [[FOR_BODY]] ], [ [[IND_END2]], [[VECTOR_BODY]] ] +; CHECK-NEXT: [[TMP4:%.*]] = load i8, ptr [[A_ADDR_010]], align 1 +; CHECK-NEXT: [[ADD_PTR]] = getelementptr inbounds i8, ptr [[A_ADDR_010]], i32 2 +; CHECK-NEXT: [[CONV1:%.*]] = add i8 [[TMP4]], [[TMP0]] +; CHECK-NEXT: store i8 [[CONV1]], ptr [[B_ADDR_08]], align 1 +; CHECK-NEXT: [[INCDEC_PTR]] = getelementptr inbounds i8, ptr [[B_ADDR_08]], i32 1 ; CHECK-NEXT: [[INC]] = add nuw nsw i32 [[I_09]], 1 ; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i32 [[INC]], 1000 ; CHECK-NEXT: br i1 [[EXITCOND]], label [[END:%.*]], label [[FOR_BODY]], !llvm.loop [[LOOP13:![0-9]+]] @@ -388,14 +382,14 @@ entry: br label %for.body for.body: - %A.addr.010 = phi i8* [ %A, %entry ], [ %add.ptr, %for.body ] + %A.addr.010 = phi ptr [ %A, %entry ], [ %add.ptr, %for.body ] %i.09 = phi i32 [ 0, %entry ], [ %inc, %for.body ] - %B.addr.08 = phi i8* [ %B, %entry ], [ %incdec.ptr, %for.body ] - %1 = load i8, i8* %A.addr.010, align 1 - %add.ptr = getelementptr inbounds i8, i8* %A.addr.010, i32 2 + %B.addr.08 = phi ptr [ %B, %entry ], [ %incdec.ptr, %for.body ] + %1 = load i8, ptr %A.addr.010, align 1 + %add.ptr = getelementptr inbounds i8, ptr %A.addr.010, i32 2 %conv1 = add i8 %1, %0 - store i8 %conv1, i8* %B.addr.08, align 1 - %incdec.ptr = getelementptr inbounds i8, i8* %B.addr.08, i32 1 + store i8 %conv1, ptr %B.addr.08, align 1 + %incdec.ptr = getelementptr inbounds i8, ptr %B.addr.08, i32 1 %inc = add nuw nsw i32 %i.09, 1 %exitcond = icmp eq i32 %inc, 1000 br i1 %exitcond, label %end, label %for.body @@ -403,20 +397,20 @@ end: ret void } -define hidden void @pointer_phi_v16i8_add3(i8* noalias nocapture readonly %A, i8* noalias nocapture %B, i32 %y) { +define hidden void @pointer_phi_v16i8_add3(ptr noalias nocapture readonly %A, ptr noalias nocapture %B, i32 %y) { ; CHECK-LABEL: @pointer_phi_v16i8_add3( ; CHECK-NEXT: entry: ; CHECK-NEXT: [[TMP0:%.*]] = trunc i32 [[Y:%.*]] to i8 ; CHECK-NEXT: br label [[FOR_BODY:%.*]] ; CHECK: for.body: -; CHECK-NEXT: [[A_ADDR_010:%.*]] = phi i8* [ [[A:%.*]], [[ENTRY:%.*]] ], [ [[ADD_PTR:%.*]], [[FOR_BODY]] ] +; CHECK-NEXT: [[A_ADDR_010:%.*]] = phi ptr [ [[A:%.*]], [[ENTRY:%.*]] ], [ [[ADD_PTR:%.*]], [[FOR_BODY]] ] ; CHECK-NEXT: [[I_09:%.*]] = phi i32 [ 0, [[ENTRY]] ], [ [[INC:%.*]], [[FOR_BODY]] ] -; CHECK-NEXT: [[B_ADDR_08:%.*]] = phi i8* [ [[B:%.*]], [[ENTRY]] ], [ [[INCDEC_PTR:%.*]], [[FOR_BODY]] ] -; CHECK-NEXT: [[TMP1:%.*]] = load i8, i8* [[A_ADDR_010]], align 1 -; CHECK-NEXT: [[ADD_PTR]] = getelementptr inbounds i8, i8* [[A_ADDR_010]], i32 3 +; CHECK-NEXT: [[B_ADDR_08:%.*]] = phi ptr [ [[B:%.*]], [[ENTRY]] ], [ [[INCDEC_PTR:%.*]], [[FOR_BODY]] ] +; CHECK-NEXT: [[TMP1:%.*]] = load i8, ptr [[A_ADDR_010]], align 1 +; CHECK-NEXT: [[ADD_PTR]] = getelementptr inbounds i8, ptr [[A_ADDR_010]], i32 3 ; CHECK-NEXT: [[CONV1:%.*]] = add i8 [[TMP1]], [[TMP0]] -; CHECK-NEXT: store i8 [[CONV1]], i8* [[B_ADDR_08]], align 1 -; CHECK-NEXT: [[INCDEC_PTR]] = getelementptr inbounds i8, i8* [[B_ADDR_08]], i32 1 +; CHECK-NEXT: store i8 [[CONV1]], ptr [[B_ADDR_08]], align 1 +; CHECK-NEXT: [[INCDEC_PTR]] = getelementptr inbounds i8, ptr [[B_ADDR_08]], i32 1 ; CHECK-NEXT: [[INC]] = add nuw nsw i32 [[I_09]], 1 ; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i32 [[INC]], 1000 ; CHECK-NEXT: br i1 [[EXITCOND]], label [[END:%.*]], label [[FOR_BODY]] @@ -428,14 +422,14 @@ entry: br label %for.body for.body: - %A.addr.010 = phi i8* [ %A, %entry ], [ %add.ptr, %for.body ] + %A.addr.010 = phi ptr [ %A, %entry ], [ %add.ptr, %for.body ] %i.09 = phi i32 [ 0, %entry ], [ %inc, %for.body ] - %B.addr.08 = phi i8* [ %B, %entry ], [ %incdec.ptr, %for.body ] - %1 = load i8, i8* %A.addr.010, align 1 - %add.ptr = getelementptr inbounds i8, i8* %A.addr.010, i32 3 + %B.addr.08 = phi ptr [ %B, %entry ], [ %incdec.ptr, %for.body ] + %1 = load i8, ptr %A.addr.010, align 1 + %add.ptr = getelementptr inbounds i8, ptr %A.addr.010, i32 3 %conv1 = add i8 %1, %0 - store i8 %conv1, i8* %B.addr.08, align 1 - %incdec.ptr = getelementptr inbounds i8, i8* %B.addr.08, i32 1 + store i8 %conv1, ptr %B.addr.08, align 1 + %incdec.ptr = getelementptr inbounds i8, ptr %B.addr.08, i32 1 %inc = add nuw nsw i32 %i.09, 1 %exitcond = icmp eq i32 %inc, 1000 br i1 %exitcond, label %end, label %for.body @@ -443,7 +437,7 @@ end: ret void } -define hidden void @pointer_phi_v4f32_add1(float* noalias nocapture readonly %A, float* noalias nocapture %B, float %y) { +define hidden void @pointer_phi_v4f32_add1(ptr noalias nocapture readonly %A, ptr noalias nocapture %B, float %y) { ; CHECK-LABEL: @pointer_phi_v4f32_add1( ; CHECK-NEXT: entry: ; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x float> poison, float [[Y:%.*]], i64 0 @@ -451,13 +445,13 @@ define hidden void @pointer_phi_v4f32_add1(float* noalias nocapture readonly %A, ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] -; CHECK-NEXT: [[NEXT_GEP:%.*]] = getelementptr float, float* [[A:%.*]], i32 [[INDEX]] -; CHECK-NEXT: [[NEXT_GEP4:%.*]] = getelementptr float, float* [[B:%.*]], i32 [[INDEX]] -; CHECK-NEXT: [[TMP0:%.*]] = bitcast float* [[NEXT_GEP]] to <4 x float>* -; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x float>, <4 x float>* [[TMP0]], align 4 -; CHECK-NEXT: [[TMP1:%.*]] = fadd fast <4 x float> [[WIDE_LOAD]], [[BROADCAST_SPLAT]] -; CHECK-NEXT: [[TMP2:%.*]] = bitcast float* [[NEXT_GEP4]] to <4 x float>* -; CHECK-NEXT: store <4 x float> [[TMP1]], <4 x float>* [[TMP2]], align 4 +; CHECK-NEXT: [[TMP0:%.*]] = shl i32 [[INDEX]], 2 +; CHECK-NEXT: [[NEXT_GEP:%.*]] = getelementptr i8, ptr [[A:%.*]], i32 [[TMP0]] +; CHECK-NEXT: [[TMP1:%.*]] = shl i32 [[INDEX]], 2 +; CHECK-NEXT: [[NEXT_GEP4:%.*]] = getelementptr i8, ptr [[B:%.*]], i32 [[TMP1]] +; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x float>, ptr [[NEXT_GEP]], align 4 +; CHECK-NEXT: [[TMP2:%.*]] = fadd fast <4 x float> [[WIDE_LOAD]], [[BROADCAST_SPLAT]] +; CHECK-NEXT: store <4 x float> [[TMP2]], ptr [[NEXT_GEP4]], align 4 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 4 ; CHECK-NEXT: [[TMP3:%.*]] = icmp eq i32 [[INDEX_NEXT]], 1000 ; CHECK-NEXT: br i1 [[TMP3]], label [[END:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP14:![0-9]+]] @@ -467,14 +461,14 @@ define hidden void @pointer_phi_v4f32_add1(float* noalias nocapture readonly %A, entry: br label %for.body for.body: - %A.addr.09 = phi float* [ %add.ptr, %for.body ], [ %A, %entry ] + %A.addr.09 = phi ptr [ %add.ptr, %for.body ], [ %A, %entry ] %i.08 = phi i32 [ %inc, %for.body ], [ 0, %entry ] - %B.addr.07 = phi float* [ %incdec.ptr, %for.body ], [ %B, %entry ] - %0 = load float, float* %A.addr.09, align 4 - %add.ptr = getelementptr inbounds float, float* %A.addr.09, i32 1 + %B.addr.07 = phi ptr [ %incdec.ptr, %for.body ], [ %B, %entry ] + %0 = load float, ptr %A.addr.09, align 4 + %add.ptr = getelementptr inbounds float, ptr %A.addr.09, i32 1 %add = fadd fast float %0, %y - store float %add, float* %B.addr.07, align 4 - %incdec.ptr = getelementptr inbounds float, float* %B.addr.07, i32 1 + store float %add, ptr %B.addr.07, align 4 + %incdec.ptr = getelementptr inbounds float, ptr %B.addr.07, i32 1 %inc = add nuw nsw i32 %i.08, 1 %exitcond = icmp eq i32 %inc, 1000 br i1 %exitcond, label %end, label %for.body @@ -482,37 +476,36 @@ end: ret void } -define hidden void @pointer_phi_v4f32_add2(float* noalias nocapture readonly %A, float* noalias nocapture %B, float %y) { +define hidden void @pointer_phi_v4f32_add2(ptr noalias nocapture readonly %A, ptr noalias nocapture %B, float %y) { ; CHECK-LABEL: @pointer_phi_v4f32_add2( ; CHECK-NEXT: entry: -; CHECK-NEXT: [[IND_END:%.*]] = getelementptr float, float* [[A:%.*]], i32 1992 -; CHECK-NEXT: [[IND_END2:%.*]] = getelementptr float, float* [[B:%.*]], i32 996 +; CHECK-NEXT: [[IND_END:%.*]] = getelementptr i8, ptr [[A:%.*]], i32 7968 +; CHECK-NEXT: [[IND_END2:%.*]] = getelementptr i8, ptr [[B:%.*]], i32 3984 ; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x float> poison, float [[Y:%.*]], i64 0 ; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x float> [[BROADCAST_SPLATINSERT]], <4 x float> poison, <4 x i32> zeroinitializer ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] -; CHECK-NEXT: [[TMP0:%.*]] = shl i32 [[INDEX]], 1 -; CHECK-NEXT: [[NEXT_GEP:%.*]] = getelementptr float, float* [[A]], i32 [[TMP0]] -; CHECK-NEXT: [[NEXT_GEP4:%.*]] = getelementptr float, float* [[B]], i32 [[INDEX]] -; CHECK-NEXT: [[TMP1:%.*]] = bitcast float* [[NEXT_GEP]] to <8 x float>* -; CHECK-NEXT: [[WIDE_VEC:%.*]] = load <8 x float>, <8 x float>* [[TMP1]], align 4 +; CHECK-NEXT: [[TMP0:%.*]] = shl i32 [[INDEX]], 3 +; CHECK-NEXT: [[NEXT_GEP:%.*]] = getelementptr i8, ptr [[A]], i32 [[TMP0]] +; CHECK-NEXT: [[TMP1:%.*]] = shl i32 [[INDEX]], 2 +; CHECK-NEXT: [[NEXT_GEP4:%.*]] = getelementptr i8, ptr [[B]], i32 [[TMP1]] +; CHECK-NEXT: [[WIDE_VEC:%.*]] = load <8 x float>, ptr [[NEXT_GEP]], align 4 ; CHECK-NEXT: [[STRIDED_VEC:%.*]] = shufflevector <8 x float> [[WIDE_VEC]], <8 x float> poison, <4 x i32> ; CHECK-NEXT: [[TMP2:%.*]] = fadd fast <4 x float> [[STRIDED_VEC]], [[BROADCAST_SPLAT]] -; CHECK-NEXT: [[TMP3:%.*]] = bitcast float* [[NEXT_GEP4]] to <4 x float>* -; CHECK-NEXT: store <4 x float> [[TMP2]], <4 x float>* [[TMP3]], align 4 +; CHECK-NEXT: store <4 x float> [[TMP2]], ptr [[NEXT_GEP4]], align 4 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 4 -; CHECK-NEXT: [[TMP4:%.*]] = icmp eq i32 [[INDEX_NEXT]], 996 -; CHECK-NEXT: br i1 [[TMP4]], label [[FOR_BODY:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP15:![0-9]+]] +; CHECK-NEXT: [[TMP3:%.*]] = icmp eq i32 [[INDEX_NEXT]], 996 +; CHECK-NEXT: br i1 [[TMP3]], label [[FOR_BODY:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP15:![0-9]+]] ; CHECK: for.body: -; CHECK-NEXT: [[A_ADDR_09:%.*]] = phi float* [ [[ADD_PTR:%.*]], [[FOR_BODY]] ], [ [[IND_END]], [[VECTOR_BODY]] ] +; CHECK-NEXT: [[A_ADDR_09:%.*]] = phi ptr [ [[ADD_PTR:%.*]], [[FOR_BODY]] ], [ [[IND_END]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[I_08:%.*]] = phi i32 [ [[INC:%.*]], [[FOR_BODY]] ], [ 996, [[VECTOR_BODY]] ] -; CHECK-NEXT: [[B_ADDR_07:%.*]] = phi float* [ [[INCDEC_PTR:%.*]], [[FOR_BODY]] ], [ [[IND_END2]], [[VECTOR_BODY]] ] -; CHECK-NEXT: [[TMP5:%.*]] = load float, float* [[A_ADDR_09]], align 4 -; CHECK-NEXT: [[ADD_PTR]] = getelementptr inbounds float, float* [[A_ADDR_09]], i32 2 -; CHECK-NEXT: [[ADD:%.*]] = fadd fast float [[TMP5]], [[Y]] -; CHECK-NEXT: store float [[ADD]], float* [[B_ADDR_07]], align 4 -; CHECK-NEXT: [[INCDEC_PTR]] = getelementptr inbounds float, float* [[B_ADDR_07]], i32 1 +; CHECK-NEXT: [[B_ADDR_07:%.*]] = phi ptr [ [[INCDEC_PTR:%.*]], [[FOR_BODY]] ], [ [[IND_END2]], [[VECTOR_BODY]] ] +; CHECK-NEXT: [[TMP4:%.*]] = load float, ptr [[A_ADDR_09]], align 4 +; CHECK-NEXT: [[ADD_PTR]] = getelementptr inbounds float, ptr [[A_ADDR_09]], i32 2 +; CHECK-NEXT: [[ADD:%.*]] = fadd fast float [[TMP4]], [[Y]] +; CHECK-NEXT: store float [[ADD]], ptr [[B_ADDR_07]], align 4 +; CHECK-NEXT: [[INCDEC_PTR]] = getelementptr inbounds float, ptr [[B_ADDR_07]], i32 1 ; CHECK-NEXT: [[INC]] = add nuw nsw i32 [[I_08]], 1 ; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i32 [[INC]], 1000 ; CHECK-NEXT: br i1 [[EXITCOND]], label [[END:%.*]], label [[FOR_BODY]], !llvm.loop [[LOOP16:![0-9]+]] @@ -522,14 +515,14 @@ define hidden void @pointer_phi_v4f32_add2(float* noalias nocapture readonly %A, entry: br label %for.body for.body: - %A.addr.09 = phi float* [ %add.ptr, %for.body ], [ %A, %entry ] + %A.addr.09 = phi ptr [ %add.ptr, %for.body ], [ %A, %entry ] %i.08 = phi i32 [ %inc, %for.body ], [ 0, %entry ] - %B.addr.07 = phi float* [ %incdec.ptr, %for.body ], [ %B, %entry ] - %0 = load float, float* %A.addr.09, align 4 - %add.ptr = getelementptr inbounds float, float* %A.addr.09, i32 2 + %B.addr.07 = phi ptr [ %incdec.ptr, %for.body ], [ %B, %entry ] + %0 = load float, ptr %A.addr.09, align 4 + %add.ptr = getelementptr inbounds float, ptr %A.addr.09, i32 2 %add = fadd fast float %0, %y - store float %add, float* %B.addr.07, align 4 - %incdec.ptr = getelementptr inbounds float, float* %B.addr.07, i32 1 + store float %add, ptr %B.addr.07, align 4 + %incdec.ptr = getelementptr inbounds float, ptr %B.addr.07, i32 1 %inc = add nuw nsw i32 %i.08, 1 %exitcond = icmp eq i32 %inc, 1000 br i1 %exitcond, label %end, label %for.body @@ -537,36 +530,36 @@ end: ret void } -define hidden void @pointer_phi_v4f32_add3(float* noalias nocapture readonly %A, float* noalias nocapture %B, float %y) { +define hidden void @pointer_phi_v4f32_add3(ptr noalias nocapture readonly %A, ptr noalias nocapture %B, float %y) { ; CHECK-LABEL: @pointer_phi_v4f32_add3( ; CHECK-NEXT: entry: -; CHECK-NEXT: [[IND_END:%.*]] = getelementptr float, float* [[A:%.*]], i32 2988 -; CHECK-NEXT: [[IND_END2:%.*]] = getelementptr float, float* [[B:%.*]], i32 996 +; CHECK-NEXT: [[IND_END:%.*]] = getelementptr i8, ptr [[A:%.*]], i32 11952 +; CHECK-NEXT: [[IND_END2:%.*]] = getelementptr i8, ptr [[B:%.*]], i32 3984 ; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x float> poison, float [[Y:%.*]], i64 0 ; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x float> [[BROADCAST_SPLATINSERT]], <4 x float> poison, <4 x i32> zeroinitializer ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: -; CHECK-NEXT: [[POINTER_PHI:%.*]] = phi float* [ [[A]], [[ENTRY:%.*]] ], [ [[PTR_IND:%.*]], [[VECTOR_BODY]] ] +; CHECK-NEXT: [[POINTER_PHI:%.*]] = phi ptr [ [[A]], [[ENTRY:%.*]] ], [ [[PTR_IND:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[ENTRY]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] -; CHECK-NEXT: [[TMP0:%.*]] = getelementptr float, float* [[POINTER_PHI]], <4 x i32> -; CHECK-NEXT: [[NEXT_GEP:%.*]] = getelementptr float, float* [[B]], i32 [[INDEX]] -; CHECK-NEXT: [[WIDE_MASKED_GATHER:%.*]] = call <4 x float> @llvm.masked.gather.v4f32.v4p0f32(<4 x float*> [[TMP0]], i32 4, <4 x i1> , <4 x float> poison) -; CHECK-NEXT: [[TMP1:%.*]] = fadd fast <4 x float> [[WIDE_MASKED_GATHER]], [[BROADCAST_SPLAT]] -; CHECK-NEXT: [[TMP2:%.*]] = bitcast float* [[NEXT_GEP]] to <4 x float>* -; CHECK-NEXT: store <4 x float> [[TMP1]], <4 x float>* [[TMP2]], align 4 +; CHECK-NEXT: [[TMP0:%.*]] = getelementptr i8, ptr [[POINTER_PHI]], <4 x i32> +; CHECK-NEXT: [[TMP1:%.*]] = shl i32 [[INDEX]], 2 +; CHECK-NEXT: [[NEXT_GEP:%.*]] = getelementptr i8, ptr [[B]], i32 [[TMP1]] +; CHECK-NEXT: [[WIDE_MASKED_GATHER:%.*]] = call <4 x float> @llvm.masked.gather.v4f32.v4p0(<4 x ptr> [[TMP0]], i32 4, <4 x i1> , <4 x float> poison) +; CHECK-NEXT: [[TMP2:%.*]] = fadd fast <4 x float> [[WIDE_MASKED_GATHER]], [[BROADCAST_SPLAT]] +; CHECK-NEXT: store <4 x float> [[TMP2]], ptr [[NEXT_GEP]], align 4 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 4 -; CHECK-NEXT: [[PTR_IND]] = getelementptr float, float* [[POINTER_PHI]], i32 12 +; CHECK-NEXT: [[PTR_IND]] = getelementptr i8, ptr [[POINTER_PHI]], i32 48 ; CHECK-NEXT: [[TMP3:%.*]] = icmp eq i32 [[INDEX_NEXT]], 996 ; CHECK-NEXT: br i1 [[TMP3]], label [[FOR_BODY:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP17:![0-9]+]] ; CHECK: for.body: -; CHECK-NEXT: [[A_ADDR_09:%.*]] = phi float* [ [[ADD_PTR:%.*]], [[FOR_BODY]] ], [ [[IND_END]], [[VECTOR_BODY]] ] +; CHECK-NEXT: [[A_ADDR_09:%.*]] = phi ptr [ [[ADD_PTR:%.*]], [[FOR_BODY]] ], [ [[IND_END]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[I_08:%.*]] = phi i32 [ [[INC:%.*]], [[FOR_BODY]] ], [ 996, [[VECTOR_BODY]] ] -; CHECK-NEXT: [[B_ADDR_07:%.*]] = phi float* [ [[INCDEC_PTR:%.*]], [[FOR_BODY]] ], [ [[IND_END2]], [[VECTOR_BODY]] ] -; CHECK-NEXT: [[TMP4:%.*]] = load float, float* [[A_ADDR_09]], align 4 -; CHECK-NEXT: [[ADD_PTR]] = getelementptr inbounds float, float* [[A_ADDR_09]], i32 3 +; CHECK-NEXT: [[B_ADDR_07:%.*]] = phi ptr [ [[INCDEC_PTR:%.*]], [[FOR_BODY]] ], [ [[IND_END2]], [[VECTOR_BODY]] ] +; CHECK-NEXT: [[TMP4:%.*]] = load float, ptr [[A_ADDR_09]], align 4 +; CHECK-NEXT: [[ADD_PTR]] = getelementptr inbounds float, ptr [[A_ADDR_09]], i32 3 ; CHECK-NEXT: [[ADD:%.*]] = fadd fast float [[TMP4]], [[Y]] -; CHECK-NEXT: store float [[ADD]], float* [[B_ADDR_07]], align 4 -; CHECK-NEXT: [[INCDEC_PTR]] = getelementptr inbounds float, float* [[B_ADDR_07]], i32 1 +; CHECK-NEXT: store float [[ADD]], ptr [[B_ADDR_07]], align 4 +; CHECK-NEXT: [[INCDEC_PTR]] = getelementptr inbounds float, ptr [[B_ADDR_07]], i32 1 ; CHECK-NEXT: [[INC]] = add nuw nsw i32 [[I_08]], 1 ; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i32 [[INC]], 1000 ; CHECK-NEXT: br i1 [[EXITCOND]], label [[END:%.*]], label [[FOR_BODY]], !llvm.loop [[LOOP18:![0-9]+]] @@ -576,14 +569,14 @@ define hidden void @pointer_phi_v4f32_add3(float* noalias nocapture readonly %A, entry: br label %for.body for.body: - %A.addr.09 = phi float* [ %add.ptr, %for.body ], [ %A, %entry ] + %A.addr.09 = phi ptr [ %add.ptr, %for.body ], [ %A, %entry ] %i.08 = phi i32 [ %inc, %for.body ], [ 0, %entry ] - %B.addr.07 = phi float* [ %incdec.ptr, %for.body ], [ %B, %entry ] - %0 = load float, float* %A.addr.09, align 4 - %add.ptr = getelementptr inbounds float, float* %A.addr.09, i32 3 + %B.addr.07 = phi ptr [ %incdec.ptr, %for.body ], [ %B, %entry ] + %0 = load float, ptr %A.addr.09, align 4 + %add.ptr = getelementptr inbounds float, ptr %A.addr.09, i32 3 %add = fadd fast float %0, %y - store float %add, float* %B.addr.07, align 4 - %incdec.ptr = getelementptr inbounds float, float* %B.addr.07, i32 1 + store float %add, ptr %B.addr.07, align 4 + %incdec.ptr = getelementptr inbounds float, ptr %B.addr.07, i32 1 %inc = add nuw nsw i32 %i.08, 1 %exitcond = icmp eq i32 %inc, 1000 br i1 %exitcond, label %end, label %for.body @@ -591,7 +584,7 @@ end: ret void } -define hidden void @pointer_phi_v4half_add1(half* noalias nocapture readonly %A, half* noalias nocapture %B, half %y) { +define hidden void @pointer_phi_v4half_add1(ptr noalias nocapture readonly %A, ptr noalias nocapture %B, half %y) { ; CHECK-LABEL: @pointer_phi_v4half_add1( ; CHECK-NEXT: entry: ; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <8 x half> poison, half [[Y:%.*]], i64 0 @@ -599,13 +592,13 @@ define hidden void @pointer_phi_v4half_add1(half* noalias nocapture readonly %A, ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] -; CHECK-NEXT: [[NEXT_GEP:%.*]] = getelementptr half, half* [[A:%.*]], i32 [[INDEX]] -; CHECK-NEXT: [[NEXT_GEP4:%.*]] = getelementptr half, half* [[B:%.*]], i32 [[INDEX]] -; CHECK-NEXT: [[TMP0:%.*]] = bitcast half* [[NEXT_GEP]] to <8 x half>* -; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <8 x half>, <8 x half>* [[TMP0]], align 4 -; CHECK-NEXT: [[TMP1:%.*]] = fadd fast <8 x half> [[WIDE_LOAD]], [[BROADCAST_SPLAT]] -; CHECK-NEXT: [[TMP2:%.*]] = bitcast half* [[NEXT_GEP4]] to <8 x half>* -; CHECK-NEXT: store <8 x half> [[TMP1]], <8 x half>* [[TMP2]], align 4 +; CHECK-NEXT: [[TMP0:%.*]] = shl i32 [[INDEX]], 1 +; CHECK-NEXT: [[NEXT_GEP:%.*]] = getelementptr i8, ptr [[A:%.*]], i32 [[TMP0]] +; CHECK-NEXT: [[TMP1:%.*]] = shl i32 [[INDEX]], 1 +; CHECK-NEXT: [[NEXT_GEP4:%.*]] = getelementptr i8, ptr [[B:%.*]], i32 [[TMP1]] +; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <8 x half>, ptr [[NEXT_GEP]], align 4 +; CHECK-NEXT: [[TMP2:%.*]] = fadd fast <8 x half> [[WIDE_LOAD]], [[BROADCAST_SPLAT]] +; CHECK-NEXT: store <8 x half> [[TMP2]], ptr [[NEXT_GEP4]], align 4 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 8 ; CHECK-NEXT: [[TMP3:%.*]] = icmp eq i32 [[INDEX_NEXT]], 1000 ; CHECK-NEXT: br i1 [[TMP3]], label [[END:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP19:![0-9]+]] @@ -615,14 +608,14 @@ define hidden void @pointer_phi_v4half_add1(half* noalias nocapture readonly %A, entry: br label %for.body for.body: - %A.addr.09 = phi half* [ %add.ptr, %for.body ], [ %A, %entry ] + %A.addr.09 = phi ptr [ %add.ptr, %for.body ], [ %A, %entry ] %i.08 = phi i32 [ %inc, %for.body ], [ 0, %entry ] - %B.addr.07 = phi half* [ %incdec.ptr, %for.body ], [ %B, %entry ] - %0 = load half, half* %A.addr.09, align 4 - %add.ptr = getelementptr inbounds half, half* %A.addr.09, i32 1 + %B.addr.07 = phi ptr [ %incdec.ptr, %for.body ], [ %B, %entry ] + %0 = load half, ptr %A.addr.09, align 4 + %add.ptr = getelementptr inbounds half, ptr %A.addr.09, i32 1 %add = fadd fast half %0, %y - store half %add, half* %B.addr.07, align 4 - %incdec.ptr = getelementptr inbounds half, half* %B.addr.07, i32 1 + store half %add, ptr %B.addr.07, align 4 + %incdec.ptr = getelementptr inbounds half, ptr %B.addr.07, i32 1 %inc = add nuw nsw i32 %i.08, 1 %exitcond = icmp eq i32 %inc, 1000 br i1 %exitcond, label %end, label %for.body @@ -630,37 +623,36 @@ end: ret void } -define hidden void @pointer_phi_v4half_add2(half* noalias nocapture readonly %A, half* noalias nocapture %B, half %y) { +define hidden void @pointer_phi_v4half_add2(ptr noalias nocapture readonly %A, ptr noalias nocapture %B, half %y) { ; CHECK-LABEL: @pointer_phi_v4half_add2( ; CHECK-NEXT: entry: -; CHECK-NEXT: [[IND_END:%.*]] = getelementptr half, half* [[A:%.*]], i32 1984 -; CHECK-NEXT: [[IND_END2:%.*]] = getelementptr half, half* [[B:%.*]], i32 992 +; CHECK-NEXT: [[IND_END:%.*]] = getelementptr i8, ptr [[A:%.*]], i32 3968 +; CHECK-NEXT: [[IND_END2:%.*]] = getelementptr i8, ptr [[B:%.*]], i32 1984 ; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <8 x half> poison, half [[Y:%.*]], i64 0 ; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <8 x half> [[BROADCAST_SPLATINSERT]], <8 x half> poison, <8 x i32> zeroinitializer ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] -; CHECK-NEXT: [[TMP0:%.*]] = shl i32 [[INDEX]], 1 -; CHECK-NEXT: [[NEXT_GEP:%.*]] = getelementptr half, half* [[A]], i32 [[TMP0]] -; CHECK-NEXT: [[NEXT_GEP4:%.*]] = getelementptr half, half* [[B]], i32 [[INDEX]] -; CHECK-NEXT: [[TMP1:%.*]] = bitcast half* [[NEXT_GEP]] to <16 x half>* -; CHECK-NEXT: [[WIDE_VEC:%.*]] = load <16 x half>, <16 x half>* [[TMP1]], align 4 +; CHECK-NEXT: [[TMP0:%.*]] = shl i32 [[INDEX]], 2 +; CHECK-NEXT: [[NEXT_GEP:%.*]] = getelementptr i8, ptr [[A]], i32 [[TMP0]] +; CHECK-NEXT: [[TMP1:%.*]] = shl i32 [[INDEX]], 1 +; CHECK-NEXT: [[NEXT_GEP4:%.*]] = getelementptr i8, ptr [[B]], i32 [[TMP1]] +; CHECK-NEXT: [[WIDE_VEC:%.*]] = load <16 x half>, ptr [[NEXT_GEP]], align 4 ; CHECK-NEXT: [[STRIDED_VEC:%.*]] = shufflevector <16 x half> [[WIDE_VEC]], <16 x half> poison, <8 x i32> ; CHECK-NEXT: [[TMP2:%.*]] = fadd fast <8 x half> [[STRIDED_VEC]], [[BROADCAST_SPLAT]] -; CHECK-NEXT: [[TMP3:%.*]] = bitcast half* [[NEXT_GEP4]] to <8 x half>* -; CHECK-NEXT: store <8 x half> [[TMP2]], <8 x half>* [[TMP3]], align 4 +; CHECK-NEXT: store <8 x half> [[TMP2]], ptr [[NEXT_GEP4]], align 4 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 8 -; CHECK-NEXT: [[TMP4:%.*]] = icmp eq i32 [[INDEX_NEXT]], 992 -; CHECK-NEXT: br i1 [[TMP4]], label [[FOR_BODY:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP20:![0-9]+]] +; CHECK-NEXT: [[TMP3:%.*]] = icmp eq i32 [[INDEX_NEXT]], 992 +; CHECK-NEXT: br i1 [[TMP3]], label [[FOR_BODY:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP20:![0-9]+]] ; CHECK: for.body: -; CHECK-NEXT: [[A_ADDR_09:%.*]] = phi half* [ [[ADD_PTR:%.*]], [[FOR_BODY]] ], [ [[IND_END]], [[VECTOR_BODY]] ] +; CHECK-NEXT: [[A_ADDR_09:%.*]] = phi ptr [ [[ADD_PTR:%.*]], [[FOR_BODY]] ], [ [[IND_END]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[I_08:%.*]] = phi i32 [ [[INC:%.*]], [[FOR_BODY]] ], [ 992, [[VECTOR_BODY]] ] -; CHECK-NEXT: [[B_ADDR_07:%.*]] = phi half* [ [[INCDEC_PTR:%.*]], [[FOR_BODY]] ], [ [[IND_END2]], [[VECTOR_BODY]] ] -; CHECK-NEXT: [[TMP5:%.*]] = load half, half* [[A_ADDR_09]], align 4 -; CHECK-NEXT: [[ADD_PTR]] = getelementptr inbounds half, half* [[A_ADDR_09]], i32 2 -; CHECK-NEXT: [[ADD:%.*]] = fadd fast half [[TMP5]], [[Y]] -; CHECK-NEXT: store half [[ADD]], half* [[B_ADDR_07]], align 4 -; CHECK-NEXT: [[INCDEC_PTR]] = getelementptr inbounds half, half* [[B_ADDR_07]], i32 1 +; CHECK-NEXT: [[B_ADDR_07:%.*]] = phi ptr [ [[INCDEC_PTR:%.*]], [[FOR_BODY]] ], [ [[IND_END2]], [[VECTOR_BODY]] ] +; CHECK-NEXT: [[TMP4:%.*]] = load half, ptr [[A_ADDR_09]], align 4 +; CHECK-NEXT: [[ADD_PTR]] = getelementptr inbounds half, ptr [[A_ADDR_09]], i32 2 +; CHECK-NEXT: [[ADD:%.*]] = fadd fast half [[TMP4]], [[Y]] +; CHECK-NEXT: store half [[ADD]], ptr [[B_ADDR_07]], align 4 +; CHECK-NEXT: [[INCDEC_PTR]] = getelementptr inbounds half, ptr [[B_ADDR_07]], i32 1 ; CHECK-NEXT: [[INC]] = add nuw nsw i32 [[I_08]], 1 ; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i32 [[INC]], 1000 ; CHECK-NEXT: br i1 [[EXITCOND]], label [[END:%.*]], label [[FOR_BODY]], !llvm.loop [[LOOP21:![0-9]+]] @@ -670,14 +662,14 @@ define hidden void @pointer_phi_v4half_add2(half* noalias nocapture readonly %A, entry: br label %for.body for.body: - %A.addr.09 = phi half* [ %add.ptr, %for.body ], [ %A, %entry ] + %A.addr.09 = phi ptr [ %add.ptr, %for.body ], [ %A, %entry ] %i.08 = phi i32 [ %inc, %for.body ], [ 0, %entry ] - %B.addr.07 = phi half* [ %incdec.ptr, %for.body ], [ %B, %entry ] - %0 = load half, half* %A.addr.09, align 4 - %add.ptr = getelementptr inbounds half, half* %A.addr.09, i32 2 + %B.addr.07 = phi ptr [ %incdec.ptr, %for.body ], [ %B, %entry ] + %0 = load half, ptr %A.addr.09, align 4 + %add.ptr = getelementptr inbounds half, ptr %A.addr.09, i32 2 %add = fadd fast half %0, %y - store half %add, half* %B.addr.07, align 4 - %incdec.ptr = getelementptr inbounds half, half* %B.addr.07, i32 1 + store half %add, ptr %B.addr.07, align 4 + %incdec.ptr = getelementptr inbounds half, ptr %B.addr.07, i32 1 %inc = add nuw nsw i32 %i.08, 1 %exitcond = icmp eq i32 %inc, 1000 br i1 %exitcond, label %end, label %for.body @@ -685,37 +677,36 @@ end: ret void } -define hidden void @pointer_phi_v4half_add3(half* noalias nocapture readonly %A, half* noalias nocapture %B, half %y) { +define hidden void @pointer_phi_v4half_add3(ptr noalias nocapture readonly %A, ptr noalias nocapture %B, half %y) { ; CHECK-LABEL: @pointer_phi_v4half_add3( ; CHECK-NEXT: entry: -; CHECK-NEXT: [[IND_END:%.*]] = getelementptr half, half* [[A:%.*]], i32 2976 -; CHECK-NEXT: [[IND_END2:%.*]] = getelementptr half, half* [[B:%.*]], i32 992 +; CHECK-NEXT: [[IND_END:%.*]] = getelementptr i8, ptr [[A:%.*]], i32 5952 +; CHECK-NEXT: [[IND_END2:%.*]] = getelementptr i8, ptr [[B:%.*]], i32 1984 ; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <8 x half> poison, half [[Y:%.*]], i64 0 ; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <8 x half> [[BROADCAST_SPLATINSERT]], <8 x half> poison, <8 x i32> zeroinitializer ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] -; CHECK-NEXT: [[TMP0:%.*]] = mul i32 [[INDEX]], 3 -; CHECK-NEXT: [[NEXT_GEP:%.*]] = getelementptr half, half* [[A]], i32 [[TMP0]] -; CHECK-NEXT: [[NEXT_GEP4:%.*]] = getelementptr half, half* [[B]], i32 [[INDEX]] -; CHECK-NEXT: [[TMP1:%.*]] = bitcast half* [[NEXT_GEP]] to <24 x half>* -; CHECK-NEXT: [[WIDE_VEC:%.*]] = load <24 x half>, <24 x half>* [[TMP1]], align 4 +; CHECK-NEXT: [[TMP0:%.*]] = mul i32 [[INDEX]], 6 +; CHECK-NEXT: [[NEXT_GEP:%.*]] = getelementptr i8, ptr [[A]], i32 [[TMP0]] +; CHECK-NEXT: [[TMP1:%.*]] = shl i32 [[INDEX]], 1 +; CHECK-NEXT: [[NEXT_GEP4:%.*]] = getelementptr i8, ptr [[B]], i32 [[TMP1]] +; CHECK-NEXT: [[WIDE_VEC:%.*]] = load <24 x half>, ptr [[NEXT_GEP]], align 4 ; CHECK-NEXT: [[STRIDED_VEC:%.*]] = shufflevector <24 x half> [[WIDE_VEC]], <24 x half> poison, <8 x i32> ; CHECK-NEXT: [[TMP2:%.*]] = fadd fast <8 x half> [[STRIDED_VEC]], [[BROADCAST_SPLAT]] -; CHECK-NEXT: [[TMP3:%.*]] = bitcast half* [[NEXT_GEP4]] to <8 x half>* -; CHECK-NEXT: store <8 x half> [[TMP2]], <8 x half>* [[TMP3]], align 4 +; CHECK-NEXT: store <8 x half> [[TMP2]], ptr [[NEXT_GEP4]], align 4 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 8 -; CHECK-NEXT: [[TMP4:%.*]] = icmp eq i32 [[INDEX_NEXT]], 992 -; CHECK-NEXT: br i1 [[TMP4]], label [[FOR_BODY:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP22:![0-9]+]] +; CHECK-NEXT: [[TMP3:%.*]] = icmp eq i32 [[INDEX_NEXT]], 992 +; CHECK-NEXT: br i1 [[TMP3]], label [[FOR_BODY:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP22:![0-9]+]] ; CHECK: for.body: -; CHECK-NEXT: [[A_ADDR_09:%.*]] = phi half* [ [[ADD_PTR:%.*]], [[FOR_BODY]] ], [ [[IND_END]], [[VECTOR_BODY]] ] +; CHECK-NEXT: [[A_ADDR_09:%.*]] = phi ptr [ [[ADD_PTR:%.*]], [[FOR_BODY]] ], [ [[IND_END]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[I_08:%.*]] = phi i32 [ [[INC:%.*]], [[FOR_BODY]] ], [ 992, [[VECTOR_BODY]] ] -; CHECK-NEXT: [[B_ADDR_07:%.*]] = phi half* [ [[INCDEC_PTR:%.*]], [[FOR_BODY]] ], [ [[IND_END2]], [[VECTOR_BODY]] ] -; CHECK-NEXT: [[TMP5:%.*]] = load half, half* [[A_ADDR_09]], align 4 -; CHECK-NEXT: [[ADD_PTR]] = getelementptr inbounds half, half* [[A_ADDR_09]], i32 3 -; CHECK-NEXT: [[ADD:%.*]] = fadd fast half [[TMP5]], [[Y]] -; CHECK-NEXT: store half [[ADD]], half* [[B_ADDR_07]], align 4 -; CHECK-NEXT: [[INCDEC_PTR]] = getelementptr inbounds half, half* [[B_ADDR_07]], i32 1 +; CHECK-NEXT: [[B_ADDR_07:%.*]] = phi ptr [ [[INCDEC_PTR:%.*]], [[FOR_BODY]] ], [ [[IND_END2]], [[VECTOR_BODY]] ] +; CHECK-NEXT: [[TMP4:%.*]] = load half, ptr [[A_ADDR_09]], align 4 +; CHECK-NEXT: [[ADD_PTR]] = getelementptr inbounds half, ptr [[A_ADDR_09]], i32 3 +; CHECK-NEXT: [[ADD:%.*]] = fadd fast half [[TMP4]], [[Y]] +; CHECK-NEXT: store half [[ADD]], ptr [[B_ADDR_07]], align 4 +; CHECK-NEXT: [[INCDEC_PTR]] = getelementptr inbounds half, ptr [[B_ADDR_07]], i32 1 ; CHECK-NEXT: [[INC]] = add nuw nsw i32 [[I_08]], 1 ; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i32 [[INC]], 1000 ; CHECK-NEXT: br i1 [[EXITCOND]], label [[END:%.*]], label [[FOR_BODY]], !llvm.loop [[LOOP23:![0-9]+]] @@ -725,14 +716,14 @@ define hidden void @pointer_phi_v4half_add3(half* noalias nocapture readonly %A, entry: br label %for.body for.body: - %A.addr.09 = phi half* [ %add.ptr, %for.body ], [ %A, %entry ] + %A.addr.09 = phi ptr [ %add.ptr, %for.body ], [ %A, %entry ] %i.08 = phi i32 [ %inc, %for.body ], [ 0, %entry ] - %B.addr.07 = phi half* [ %incdec.ptr, %for.body ], [ %B, %entry ] - %0 = load half, half* %A.addr.09, align 4 - %add.ptr = getelementptr inbounds half, half* %A.addr.09, i32 3 + %B.addr.07 = phi ptr [ %incdec.ptr, %for.body ], [ %B, %entry ] + %0 = load half, ptr %A.addr.09, align 4 + %add.ptr = getelementptr inbounds half, ptr %A.addr.09, i32 3 %add = fadd fast half %0, %y - store half %add, half* %B.addr.07, align 4 - %incdec.ptr = getelementptr inbounds half, half* %B.addr.07, i32 1 + store half %add, ptr %B.addr.07, align 4 + %incdec.ptr = getelementptr inbounds half, ptr %B.addr.07, i32 1 %inc = add nuw nsw i32 %i.08, 1 %exitcond = icmp eq i32 %inc, 1000 br i1 %exitcond, label %end, label %for.body @@ -743,46 +734,45 @@ end: !0 = distinct !{!0, !1} !1 = !{!"llvm.loop.interleave.count", i32 2} -define hidden void @pointer_phi_v4i32_uf2(i32* noalias nocapture readonly %A, i32* noalias nocapture %B, i32 %n, i32 %y) { +define hidden void @pointer_phi_v4i32_uf2(ptr noalias nocapture readonly %A, ptr noalias nocapture %B, i32 %n, i32 %y) { ; CHECK-LABEL: @pointer_phi_v4i32_uf2( ; CHECK-NEXT: entry: -; CHECK-NEXT: [[IND_END:%.*]] = getelementptr i32, i32* [[A:%.*]], i32 59952 -; CHECK-NEXT: [[IND_END2:%.*]] = getelementptr i32, i32* [[B:%.*]], i32 9992 +; CHECK-NEXT: [[IND_END:%.*]] = getelementptr i8, ptr [[A:%.*]], i32 239808 +; CHECK-NEXT: [[IND_END2:%.*]] = getelementptr i8, ptr [[B:%.*]], i32 39968 ; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i32> poison, i32 [[Y:%.*]], i64 0 ; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i32> [[BROADCAST_SPLATINSERT]], <4 x i32> poison, <4 x i32> zeroinitializer ; CHECK-NEXT: [[BROADCAST_SPLATINSERT6:%.*]] = insertelement <4 x i32> poison, i32 [[Y]], i64 0 ; CHECK-NEXT: [[BROADCAST_SPLAT7:%.*]] = shufflevector <4 x i32> [[BROADCAST_SPLATINSERT6]], <4 x i32> poison, <4 x i32> zeroinitializer ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: -; CHECK-NEXT: [[POINTER_PHI:%.*]] = phi i32* [ [[A]], [[ENTRY:%.*]] ], [ [[PTR_IND:%.*]], [[VECTOR_BODY]] ] +; CHECK-NEXT: [[POINTER_PHI:%.*]] = phi ptr [ [[A]], [[ENTRY:%.*]] ], [ [[PTR_IND:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[ENTRY]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] -; CHECK-NEXT: [[TMP0:%.*]] = getelementptr i32, i32* [[POINTER_PHI]], <4 x i32> -; CHECK-NEXT: [[TMP1:%.*]] = getelementptr i32, i32* [[POINTER_PHI]], <4 x i32> -; CHECK-NEXT: [[NEXT_GEP:%.*]] = getelementptr i32, i32* [[B]], i32 [[INDEX]] -; CHECK-NEXT: [[WIDE_MASKED_GATHER:%.*]] = call <4 x i32> @llvm.masked.gather.v4i32.v4p0i32(<4 x i32*> [[TMP0]], i32 4, <4 x i1> , <4 x i32> poison) -; CHECK-NEXT: [[WIDE_MASKED_GATHER5:%.*]] = call <4 x i32> @llvm.masked.gather.v4i32.v4p0i32(<4 x i32*> [[TMP1]], i32 4, <4 x i1> , <4 x i32> poison) -; CHECK-NEXT: [[TMP2:%.*]] = add nsw <4 x i32> [[WIDE_MASKED_GATHER]], [[BROADCAST_SPLAT]] -; CHECK-NEXT: [[TMP3:%.*]] = add nsw <4 x i32> [[WIDE_MASKED_GATHER5]], [[BROADCAST_SPLAT7]] -; CHECK-NEXT: [[TMP4:%.*]] = bitcast i32* [[NEXT_GEP]] to <4 x i32>* -; CHECK-NEXT: store <4 x i32> [[TMP2]], <4 x i32>* [[TMP4]], align 4 -; CHECK-NEXT: [[TMP5:%.*]] = getelementptr i32, i32* [[NEXT_GEP]], i32 4 -; CHECK-NEXT: [[TMP6:%.*]] = bitcast i32* [[TMP5]] to <4 x i32>* -; CHECK-NEXT: store <4 x i32> [[TMP3]], <4 x i32>* [[TMP6]], align 4 +; CHECK-NEXT: [[TMP0:%.*]] = getelementptr i8, ptr [[POINTER_PHI]], <4 x i32> +; CHECK-NEXT: [[TMP1:%.*]] = getelementptr i8, ptr [[POINTER_PHI]], <4 x i32> +; CHECK-NEXT: [[TMP2:%.*]] = shl i32 [[INDEX]], 2 +; CHECK-NEXT: [[NEXT_GEP:%.*]] = getelementptr i8, ptr [[B]], i32 [[TMP2]] +; CHECK-NEXT: [[WIDE_MASKED_GATHER:%.*]] = call <4 x i32> @llvm.masked.gather.v4i32.v4p0(<4 x ptr> [[TMP0]], i32 4, <4 x i1> , <4 x i32> poison) +; CHECK-NEXT: [[WIDE_MASKED_GATHER5:%.*]] = call <4 x i32> @llvm.masked.gather.v4i32.v4p0(<4 x ptr> [[TMP1]], i32 4, <4 x i1> , <4 x i32> poison) +; CHECK-NEXT: [[TMP3:%.*]] = add nsw <4 x i32> [[WIDE_MASKED_GATHER]], [[BROADCAST_SPLAT]] +; CHECK-NEXT: [[TMP4:%.*]] = add nsw <4 x i32> [[WIDE_MASKED_GATHER5]], [[BROADCAST_SPLAT7]] +; CHECK-NEXT: store <4 x i32> [[TMP3]], ptr [[NEXT_GEP]], align 4 +; CHECK-NEXT: [[TMP5:%.*]] = getelementptr i32, ptr [[NEXT_GEP]], i32 4 +; CHECK-NEXT: store <4 x i32> [[TMP4]], ptr [[TMP5]], align 4 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 8 -; CHECK-NEXT: [[PTR_IND]] = getelementptr i32, i32* [[POINTER_PHI]], i32 48 -; CHECK-NEXT: [[TMP7:%.*]] = icmp eq i32 [[INDEX_NEXT]], 9992 -; CHECK-NEXT: br i1 [[TMP7]], label [[FOR_BODY:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP24:![0-9]+]] +; CHECK-NEXT: [[PTR_IND]] = getelementptr i8, ptr [[POINTER_PHI]], i32 192 +; CHECK-NEXT: [[TMP6:%.*]] = icmp eq i32 [[INDEX_NEXT]], 9992 +; CHECK-NEXT: br i1 [[TMP6]], label [[FOR_BODY:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP24:![0-9]+]] ; CHECK: for.cond.cleanup: ; CHECK-NEXT: ret void ; CHECK: for.body: -; CHECK-NEXT: [[A_ADDR_08:%.*]] = phi i32* [ [[ADD_PTR:%.*]], [[FOR_BODY]] ], [ [[IND_END]], [[VECTOR_BODY]] ] +; CHECK-NEXT: [[A_ADDR_08:%.*]] = phi ptr [ [[ADD_PTR:%.*]], [[FOR_BODY]] ], [ [[IND_END]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[I_07:%.*]] = phi i32 [ [[INC:%.*]], [[FOR_BODY]] ], [ 9992, [[VECTOR_BODY]] ] -; CHECK-NEXT: [[B_ADDR_06:%.*]] = phi i32* [ [[INCDEC_PTR:%.*]], [[FOR_BODY]] ], [ [[IND_END2]], [[VECTOR_BODY]] ] -; CHECK-NEXT: [[TMP8:%.*]] = load i32, i32* [[A_ADDR_08]], align 4 -; CHECK-NEXT: [[ADD_PTR]] = getelementptr inbounds i32, i32* [[A_ADDR_08]], i32 6 -; CHECK-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP8]], [[Y]] -; CHECK-NEXT: store i32 [[ADD]], i32* [[B_ADDR_06]], align 4 -; CHECK-NEXT: [[INCDEC_PTR]] = getelementptr inbounds i32, i32* [[B_ADDR_06]], i32 1 +; CHECK-NEXT: [[B_ADDR_06:%.*]] = phi ptr [ [[INCDEC_PTR:%.*]], [[FOR_BODY]] ], [ [[IND_END2]], [[VECTOR_BODY]] ] +; CHECK-NEXT: [[TMP7:%.*]] = load i32, ptr [[A_ADDR_08]], align 4 +; CHECK-NEXT: [[ADD_PTR]] = getelementptr inbounds i32, ptr [[A_ADDR_08]], i32 6 +; CHECK-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP7]], [[Y]] +; CHECK-NEXT: store i32 [[ADD]], ptr [[B_ADDR_06]], align 4 +; CHECK-NEXT: [[INCDEC_PTR]] = getelementptr inbounds i32, ptr [[B_ADDR_06]], i32 1 ; CHECK-NEXT: [[INC]] = add nuw nsw i32 [[I_07]], 1 ; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i32 [[INC]], 10000 ; CHECK-NEXT: br i1 [[EXITCOND]], label [[FOR_COND_CLEANUP:%.*]], label [[FOR_BODY]], !llvm.loop [[LOOP25:![0-9]+]] @@ -795,14 +785,14 @@ for.cond.cleanup: ret void for.body: - %A.addr.08 = phi i32* [ %A, %entry ], [ %add.ptr, %for.body ] + %A.addr.08 = phi ptr [ %A, %entry ], [ %add.ptr, %for.body ] %i.07 = phi i32 [ 0, %entry ], [ %inc, %for.body ] - %B.addr.06 = phi i32* [ %B, %entry ], [ %incdec.ptr, %for.body ] - %0 = load i32, i32* %A.addr.08, align 4 - %add.ptr = getelementptr inbounds i32, i32* %A.addr.08, i32 6 + %B.addr.06 = phi ptr [ %B, %entry ], [ %incdec.ptr, %for.body ] + %0 = load i32, ptr %A.addr.08, align 4 + %add.ptr = getelementptr inbounds i32, ptr %A.addr.08, i32 6 %add = add nsw i32 %0, %y - store i32 %add, i32* %B.addr.06, align 4 - %incdec.ptr = getelementptr inbounds i32, i32* %B.addr.06, i32 1 + store i32 %add, ptr %B.addr.06, align 4 + %incdec.ptr = getelementptr inbounds i32, ptr %B.addr.06, i32 1 %inc = add nuw nsw i32 %i.07, 1 %exitcond = icmp eq i32 %inc, 10000 br i1 %exitcond, label %for.cond.cleanup, label %for.body, !llvm.loop !0 @@ -811,11 +801,11 @@ for.body: !2 = distinct !{!2, !3} !3 = !{!"llvm.loop.interleave.count", i32 4} -define hidden void @pointer_phi_v4i32_uf4(i32* noalias nocapture readonly %A, i32* noalias nocapture %B, i32 %n, i32 %y) { +define hidden void @pointer_phi_v4i32_uf4(ptr noalias nocapture readonly %A, ptr noalias nocapture %B, i32 %n, i32 %y) { ; CHECK-LABEL: @pointer_phi_v4i32_uf4( ; CHECK-NEXT: entry: -; CHECK-NEXT: [[IND_END:%.*]] = getelementptr i32, i32* [[A:%.*]], i32 59904 -; CHECK-NEXT: [[IND_END2:%.*]] = getelementptr i32, i32* [[B:%.*]], i32 9984 +; CHECK-NEXT: [[IND_END:%.*]] = getelementptr i8, ptr [[A:%.*]], i32 239616 +; CHECK-NEXT: [[IND_END2:%.*]] = getelementptr i8, ptr [[B:%.*]], i32 39936 ; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i32> poison, i32 [[Y:%.*]], i64 0 ; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i32> [[BROADCAST_SPLATINSERT]], <4 x i32> poison, <4 x i32> zeroinitializer ; CHECK-NEXT: [[BROADCAST_SPLATINSERT10:%.*]] = insertelement <4 x i32> poison, i32 [[Y]], i64 0 @@ -826,47 +816,44 @@ define hidden void @pointer_phi_v4i32_uf4(i32* noalias nocapture readonly %A, i3 ; CHECK-NEXT: [[BROADCAST_SPLAT15:%.*]] = shufflevector <4 x i32> [[BROADCAST_SPLATINSERT14]], <4 x i32> poison, <4 x i32> zeroinitializer ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: -; CHECK-NEXT: [[POINTER_PHI:%.*]] = phi i32* [ [[A]], [[ENTRY:%.*]] ], [ [[PTR_IND:%.*]], [[VECTOR_BODY]] ] +; CHECK-NEXT: [[POINTER_PHI:%.*]] = phi ptr [ [[A]], [[ENTRY:%.*]] ], [ [[PTR_IND:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[ENTRY]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] -; CHECK-NEXT: [[TMP0:%.*]] = getelementptr i32, i32* [[POINTER_PHI]], <4 x i32> -; CHECK-NEXT: [[TMP1:%.*]] = getelementptr i32, i32* [[POINTER_PHI]], <4 x i32> -; CHECK-NEXT: [[TMP2:%.*]] = getelementptr i32, i32* [[POINTER_PHI]], <4 x i32> -; CHECK-NEXT: [[TMP3:%.*]] = getelementptr i32, i32* [[POINTER_PHI]], <4 x i32> -; CHECK-NEXT: [[NEXT_GEP:%.*]] = getelementptr i32, i32* [[B]], i32 [[INDEX]] -; CHECK-NEXT: [[WIDE_MASKED_GATHER:%.*]] = call <4 x i32> @llvm.masked.gather.v4i32.v4p0i32(<4 x i32*> [[TMP0]], i32 4, <4 x i1> , <4 x i32> poison) -; CHECK-NEXT: [[WIDE_MASKED_GATHER7:%.*]] = call <4 x i32> @llvm.masked.gather.v4i32.v4p0i32(<4 x i32*> [[TMP1]], i32 4, <4 x i1> , <4 x i32> poison) -; CHECK-NEXT: [[WIDE_MASKED_GATHER8:%.*]] = call <4 x i32> @llvm.masked.gather.v4i32.v4p0i32(<4 x i32*> [[TMP2]], i32 4, <4 x i1> , <4 x i32> poison) -; CHECK-NEXT: [[WIDE_MASKED_GATHER9:%.*]] = call <4 x i32> @llvm.masked.gather.v4i32.v4p0i32(<4 x i32*> [[TMP3]], i32 4, <4 x i1> , <4 x i32> poison) -; CHECK-NEXT: [[TMP4:%.*]] = add nsw <4 x i32> [[WIDE_MASKED_GATHER]], [[BROADCAST_SPLAT]] -; CHECK-NEXT: [[TMP5:%.*]] = add nsw <4 x i32> [[WIDE_MASKED_GATHER7]], [[BROADCAST_SPLAT11]] -; CHECK-NEXT: [[TMP6:%.*]] = add nsw <4 x i32> [[WIDE_MASKED_GATHER8]], [[BROADCAST_SPLAT13]] -; CHECK-NEXT: [[TMP7:%.*]] = add nsw <4 x i32> [[WIDE_MASKED_GATHER9]], [[BROADCAST_SPLAT15]] -; CHECK-NEXT: [[TMP8:%.*]] = bitcast i32* [[NEXT_GEP]] to <4 x i32>* -; CHECK-NEXT: store <4 x i32> [[TMP4]], <4 x i32>* [[TMP8]], align 4 -; CHECK-NEXT: [[TMP9:%.*]] = getelementptr i32, i32* [[NEXT_GEP]], i32 4 -; CHECK-NEXT: [[TMP10:%.*]] = bitcast i32* [[TMP9]] to <4 x i32>* -; CHECK-NEXT: store <4 x i32> [[TMP5]], <4 x i32>* [[TMP10]], align 4 -; CHECK-NEXT: [[TMP11:%.*]] = getelementptr i32, i32* [[NEXT_GEP]], i32 8 -; CHECK-NEXT: [[TMP12:%.*]] = bitcast i32* [[TMP11]] to <4 x i32>* -; CHECK-NEXT: store <4 x i32> [[TMP6]], <4 x i32>* [[TMP12]], align 4 -; CHECK-NEXT: [[TMP13:%.*]] = getelementptr i32, i32* [[NEXT_GEP]], i32 12 -; CHECK-NEXT: [[TMP14:%.*]] = bitcast i32* [[TMP13]] to <4 x i32>* -; CHECK-NEXT: store <4 x i32> [[TMP7]], <4 x i32>* [[TMP14]], align 4 +; CHECK-NEXT: [[TMP0:%.*]] = getelementptr i8, ptr [[POINTER_PHI]], <4 x i32> +; CHECK-NEXT: [[TMP1:%.*]] = getelementptr i8, ptr [[POINTER_PHI]], <4 x i32> +; CHECK-NEXT: [[TMP2:%.*]] = getelementptr i8, ptr [[POINTER_PHI]], <4 x i32> +; CHECK-NEXT: [[TMP3:%.*]] = getelementptr i8, ptr [[POINTER_PHI]], <4 x i32> +; CHECK-NEXT: [[TMP4:%.*]] = shl i32 [[INDEX]], 2 +; CHECK-NEXT: [[NEXT_GEP:%.*]] = getelementptr i8, ptr [[B]], i32 [[TMP4]] +; CHECK-NEXT: [[WIDE_MASKED_GATHER:%.*]] = call <4 x i32> @llvm.masked.gather.v4i32.v4p0(<4 x ptr> [[TMP0]], i32 4, <4 x i1> , <4 x i32> poison) +; CHECK-NEXT: [[WIDE_MASKED_GATHER7:%.*]] = call <4 x i32> @llvm.masked.gather.v4i32.v4p0(<4 x ptr> [[TMP1]], i32 4, <4 x i1> , <4 x i32> poison) +; CHECK-NEXT: [[WIDE_MASKED_GATHER8:%.*]] = call <4 x i32> @llvm.masked.gather.v4i32.v4p0(<4 x ptr> [[TMP2]], i32 4, <4 x i1> , <4 x i32> poison) +; CHECK-NEXT: [[WIDE_MASKED_GATHER9:%.*]] = call <4 x i32> @llvm.masked.gather.v4i32.v4p0(<4 x ptr> [[TMP3]], i32 4, <4 x i1> , <4 x i32> poison) +; CHECK-NEXT: [[TMP5:%.*]] = add nsw <4 x i32> [[WIDE_MASKED_GATHER]], [[BROADCAST_SPLAT]] +; CHECK-NEXT: [[TMP6:%.*]] = add nsw <4 x i32> [[WIDE_MASKED_GATHER7]], [[BROADCAST_SPLAT11]] +; CHECK-NEXT: [[TMP7:%.*]] = add nsw <4 x i32> [[WIDE_MASKED_GATHER8]], [[BROADCAST_SPLAT13]] +; CHECK-NEXT: [[TMP8:%.*]] = add nsw <4 x i32> [[WIDE_MASKED_GATHER9]], [[BROADCAST_SPLAT15]] +; CHECK-NEXT: store <4 x i32> [[TMP5]], ptr [[NEXT_GEP]], align 4 +; CHECK-NEXT: [[TMP9:%.*]] = getelementptr i32, ptr [[NEXT_GEP]], i32 4 +; CHECK-NEXT: store <4 x i32> [[TMP6]], ptr [[TMP9]], align 4 +; CHECK-NEXT: [[TMP10:%.*]] = getelementptr i32, ptr [[NEXT_GEP]], i32 8 +; CHECK-NEXT: store <4 x i32> [[TMP7]], ptr [[TMP10]], align 4 +; CHECK-NEXT: [[TMP11:%.*]] = getelementptr i32, ptr [[NEXT_GEP]], i32 12 +; CHECK-NEXT: store <4 x i32> [[TMP8]], ptr [[TMP11]], align 4 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 16 -; CHECK-NEXT: [[PTR_IND]] = getelementptr i32, i32* [[POINTER_PHI]], i32 96 -; CHECK-NEXT: [[TMP15:%.*]] = icmp eq i32 [[INDEX_NEXT]], 9984 -; CHECK-NEXT: br i1 [[TMP15]], label [[FOR_BODY:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP26:![0-9]+]] +; CHECK-NEXT: [[PTR_IND]] = getelementptr i8, ptr [[POINTER_PHI]], i32 384 +; CHECK-NEXT: [[TMP12:%.*]] = icmp eq i32 [[INDEX_NEXT]], 9984 +; CHECK-NEXT: br i1 [[TMP12]], label [[FOR_BODY:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP26:![0-9]+]] ; CHECK: for.cond.cleanup: ; CHECK-NEXT: ret void ; CHECK: for.body: -; CHECK-NEXT: [[A_ADDR_08:%.*]] = phi i32* [ [[ADD_PTR:%.*]], [[FOR_BODY]] ], [ [[IND_END]], [[VECTOR_BODY]] ] +; CHECK-NEXT: [[A_ADDR_08:%.*]] = phi ptr [ [[ADD_PTR:%.*]], [[FOR_BODY]] ], [ [[IND_END]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[I_07:%.*]] = phi i32 [ [[INC:%.*]], [[FOR_BODY]] ], [ 9984, [[VECTOR_BODY]] ] -; CHECK-NEXT: [[B_ADDR_06:%.*]] = phi i32* [ [[INCDEC_PTR:%.*]], [[FOR_BODY]] ], [ [[IND_END2]], [[VECTOR_BODY]] ] -; CHECK-NEXT: [[TMP16:%.*]] = load i32, i32* [[A_ADDR_08]], align 4 -; CHECK-NEXT: [[ADD_PTR]] = getelementptr inbounds i32, i32* [[A_ADDR_08]], i32 6 -; CHECK-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP16]], [[Y]] -; CHECK-NEXT: store i32 [[ADD]], i32* [[B_ADDR_06]], align 4 -; CHECK-NEXT: [[INCDEC_PTR]] = getelementptr inbounds i32, i32* [[B_ADDR_06]], i32 1 +; CHECK-NEXT: [[B_ADDR_06:%.*]] = phi ptr [ [[INCDEC_PTR:%.*]], [[FOR_BODY]] ], [ [[IND_END2]], [[VECTOR_BODY]] ] +; CHECK-NEXT: [[TMP13:%.*]] = load i32, ptr [[A_ADDR_08]], align 4 +; CHECK-NEXT: [[ADD_PTR]] = getelementptr inbounds i32, ptr [[A_ADDR_08]], i32 6 +; CHECK-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP13]], [[Y]] +; CHECK-NEXT: store i32 [[ADD]], ptr [[B_ADDR_06]], align 4 +; CHECK-NEXT: [[INCDEC_PTR]] = getelementptr inbounds i32, ptr [[B_ADDR_06]], i32 1 ; CHECK-NEXT: [[INC]] = add nuw nsw i32 [[I_07]], 1 ; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i32 [[INC]], 10000 ; CHECK-NEXT: br i1 [[EXITCOND]], label [[FOR_COND_CLEANUP:%.*]], label [[FOR_BODY]], !llvm.loop [[LOOP27:![0-9]+]] @@ -878,75 +865,75 @@ for.cond.cleanup: ret void for.body: - %A.addr.08 = phi i32* [ %A, %entry ], [ %add.ptr, %for.body ] + %A.addr.08 = phi ptr [ %A, %entry ], [ %add.ptr, %for.body ] %i.07 = phi i32 [ 0, %entry ], [ %inc, %for.body ] - %B.addr.06 = phi i32* [ %B, %entry ], [ %incdec.ptr, %for.body ] - %0 = load i32, i32* %A.addr.08, align 4 - %add.ptr = getelementptr inbounds i32, i32* %A.addr.08, i32 6 + %B.addr.06 = phi ptr [ %B, %entry ], [ %incdec.ptr, %for.body ] + %0 = load i32, ptr %A.addr.08, align 4 + %add.ptr = getelementptr inbounds i32, ptr %A.addr.08, i32 6 %add = add nsw i32 %0, %y - store i32 %add, i32* %B.addr.06, align 4 - %incdec.ptr = getelementptr inbounds i32, i32* %B.addr.06, i32 1 + store i32 %add, ptr %B.addr.06, align 4 + %incdec.ptr = getelementptr inbounds i32, ptr %B.addr.06, i32 1 %inc = add nuw nsw i32 %i.07, 1 %exitcond = icmp eq i32 %inc, 10000 br i1 %exitcond, label %for.cond.cleanup, label %for.body, !llvm.loop !2 } -define hidden void @mult_ptr_iv(i8* noalias nocapture readonly %x, i8* noalias nocapture %z) { +define hidden void @mult_ptr_iv(ptr noalias nocapture readonly %x, ptr noalias nocapture %z) { ; CHECK-LABEL: @mult_ptr_iv( ; CHECK-NEXT: entry: -; CHECK-NEXT: [[SCEVGEP:%.*]] = getelementptr i8, i8* [[Z:%.*]], i32 3000 -; CHECK-NEXT: [[SCEVGEP1:%.*]] = getelementptr i8, i8* [[X:%.*]], i32 3000 -; CHECK-NEXT: [[BOUND0:%.*]] = icmp ugt i8* [[SCEVGEP1]], [[Z]] -; CHECK-NEXT: [[BOUND1:%.*]] = icmp ugt i8* [[SCEVGEP]], [[X]] +; CHECK-NEXT: [[UGLYGEP:%.*]] = getelementptr i8, ptr [[Z:%.*]], i32 3000 +; CHECK-NEXT: [[UGLYGEP1:%.*]] = getelementptr i8, ptr [[X:%.*]], i32 3000 +; CHECK-NEXT: [[BOUND0:%.*]] = icmp ugt ptr [[UGLYGEP1]], [[Z]] +; CHECK-NEXT: [[BOUND1:%.*]] = icmp ugt ptr [[UGLYGEP]], [[X]] ; CHECK-NEXT: [[FOUND_CONFLICT:%.*]] = and i1 [[BOUND0]], [[BOUND1]] ; CHECK-NEXT: br i1 [[FOUND_CONFLICT]], label [[FOR_BODY:%.*]], label [[VECTOR_PH:%.*]] ; CHECK: vector.ph: -; CHECK-NEXT: [[IND_END:%.*]] = getelementptr i8, i8* [[X]], i32 3000 -; CHECK-NEXT: [[IND_END2:%.*]] = getelementptr i8, i8* [[Z]], i32 3000 +; CHECK-NEXT: [[IND_END:%.*]] = getelementptr i8, ptr [[X]], i32 3000 +; CHECK-NEXT: [[IND_END2:%.*]] = getelementptr i8, ptr [[Z]], i32 3000 ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: -; CHECK-NEXT: [[POINTER_PHI:%.*]] = phi i8* [ [[X]], [[VECTOR_PH]] ], [ [[PTR_IND:%.*]], [[VECTOR_BODY]] ] -; CHECK-NEXT: [[POINTER_PHI5:%.*]] = phi i8* [ [[Z]], [[VECTOR_PH]] ], [ [[PTR_IND6:%.*]], [[VECTOR_BODY]] ] +; CHECK-NEXT: [[POINTER_PHI:%.*]] = phi ptr [ [[X]], [[VECTOR_PH]] ], [ [[PTR_IND:%.*]], [[VECTOR_BODY]] ] +; CHECK-NEXT: [[POINTER_PHI5:%.*]] = phi ptr [ [[Z]], [[VECTOR_PH]] ], [ [[PTR_IND6:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] -; CHECK-NEXT: [[TMP0:%.*]] = getelementptr i8, i8* [[POINTER_PHI]], <4 x i32> -; CHECK-NEXT: [[TMP1:%.*]] = getelementptr i8, i8* [[POINTER_PHI5]], <4 x i32> -; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds i8, <4 x i8*> [[TMP0]], i32 1 -; CHECK-NEXT: [[WIDE_MASKED_GATHER:%.*]] = call <4 x i8> @llvm.masked.gather.v4i8.v4p0i8(<4 x i8*> [[TMP0]], i32 1, <4 x i1> , <4 x i8> poison), !alias.scope !28 -; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds i8, <4 x i8*> [[TMP0]], i32 2 -; CHECK-NEXT: [[WIDE_MASKED_GATHER7:%.*]] = call <4 x i8> @llvm.masked.gather.v4i8.v4p0i8(<4 x i8*> [[TMP2]], i32 1, <4 x i1> , <4 x i8> poison), !alias.scope !28 -; CHECK-NEXT: [[WIDE_MASKED_GATHER8:%.*]] = call <4 x i8> @llvm.masked.gather.v4i8.v4p0i8(<4 x i8*> [[TMP3]], i32 1, <4 x i1> , <4 x i8> poison), !alias.scope !28 +; CHECK-NEXT: [[TMP0:%.*]] = getelementptr i8, ptr [[POINTER_PHI]], <4 x i32> +; CHECK-NEXT: [[TMP1:%.*]] = getelementptr i8, ptr [[POINTER_PHI5]], <4 x i32> +; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds i8, <4 x ptr> [[TMP0]], i32 1 +; CHECK-NEXT: [[WIDE_MASKED_GATHER:%.*]] = call <4 x i8> @llvm.masked.gather.v4i8.v4p0(<4 x ptr> [[TMP0]], i32 1, <4 x i1> , <4 x i8> poison), !alias.scope !28 +; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds i8, <4 x ptr> [[TMP0]], i32 2 +; CHECK-NEXT: [[WIDE_MASKED_GATHER7:%.*]] = call <4 x i8> @llvm.masked.gather.v4i8.v4p0(<4 x ptr> [[TMP2]], i32 1, <4 x i1> , <4 x i8> poison), !alias.scope !28 +; CHECK-NEXT: [[WIDE_MASKED_GATHER8:%.*]] = call <4 x i8> @llvm.masked.gather.v4i8.v4p0(<4 x ptr> [[TMP3]], i32 1, <4 x i1> , <4 x i8> poison), !alias.scope !28 ; CHECK-NEXT: [[TMP4:%.*]] = mul <4 x i8> [[WIDE_MASKED_GATHER]], ; CHECK-NEXT: [[TMP5:%.*]] = mul <4 x i8> [[WIDE_MASKED_GATHER]], [[WIDE_MASKED_GATHER7]] ; CHECK-NEXT: [[TMP6:%.*]] = mul <4 x i8> [[WIDE_MASKED_GATHER]], [[WIDE_MASKED_GATHER8]] -; CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds i8, <4 x i8*> [[TMP1]], i32 1 -; CHECK-NEXT: call void @llvm.masked.scatter.v4i8.v4p0i8(<4 x i8> [[TMP4]], <4 x i8*> [[TMP1]], i32 1, <4 x i1> ), !alias.scope !31, !noalias !28 -; CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds i8, <4 x i8*> [[TMP1]], i32 2 -; CHECK-NEXT: call void @llvm.masked.scatter.v4i8.v4p0i8(<4 x i8> [[TMP5]], <4 x i8*> [[TMP7]], i32 1, <4 x i1> ), !alias.scope !31, !noalias !28 -; CHECK-NEXT: call void @llvm.masked.scatter.v4i8.v4p0i8(<4 x i8> [[TMP6]], <4 x i8*> [[TMP8]], i32 1, <4 x i1> ), !alias.scope !31, !noalias !28 +; CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds i8, <4 x ptr> [[TMP1]], i32 1 +; CHECK-NEXT: call void @llvm.masked.scatter.v4i8.v4p0(<4 x i8> [[TMP4]], <4 x ptr> [[TMP1]], i32 1, <4 x i1> ), !alias.scope !31, !noalias !28 +; CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds i8, <4 x ptr> [[TMP1]], i32 2 +; CHECK-NEXT: call void @llvm.masked.scatter.v4i8.v4p0(<4 x i8> [[TMP5]], <4 x ptr> [[TMP7]], i32 1, <4 x i1> ), !alias.scope !31, !noalias !28 +; CHECK-NEXT: call void @llvm.masked.scatter.v4i8.v4p0(<4 x i8> [[TMP6]], <4 x ptr> [[TMP8]], i32 1, <4 x i1> ), !alias.scope !31, !noalias !28 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 4 -; CHECK-NEXT: [[PTR_IND]] = getelementptr i8, i8* [[POINTER_PHI]], i32 12 -; CHECK-NEXT: [[PTR_IND6]] = getelementptr i8, i8* [[POINTER_PHI5]], i32 12 +; CHECK-NEXT: [[PTR_IND]] = getelementptr i8, ptr [[POINTER_PHI]], i32 12 +; CHECK-NEXT: [[PTR_IND6]] = getelementptr i8, ptr [[POINTER_PHI5]], i32 12 ; CHECK-NEXT: [[TMP9:%.*]] = icmp eq i32 [[INDEX_NEXT]], 1000 ; CHECK-NEXT: br i1 [[TMP9]], label [[END:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP33:![0-9]+]] ; CHECK: for.body: -; CHECK-NEXT: [[X_ADDR_050:%.*]] = phi i8* [ [[INCDEC_PTR2:%.*]], [[FOR_BODY]] ], [ [[X]], [[ENTRY:%.*]] ] -; CHECK-NEXT: [[Z_ADDR_049:%.*]] = phi i8* [ [[INCDEC_PTR34:%.*]], [[FOR_BODY]] ], [ [[Z]], [[ENTRY]] ] +; CHECK-NEXT: [[X_ADDR_050:%.*]] = phi ptr [ [[INCDEC_PTR2:%.*]], [[FOR_BODY]] ], [ [[X]], [[ENTRY:%.*]] ] +; CHECK-NEXT: [[Z_ADDR_049:%.*]] = phi ptr [ [[INCDEC_PTR34:%.*]], [[FOR_BODY]] ], [ [[Z]], [[ENTRY]] ] ; CHECK-NEXT: [[I_048:%.*]] = phi i32 [ [[INC:%.*]], [[FOR_BODY]] ], [ 0, [[ENTRY]] ] -; CHECK-NEXT: [[INCDEC_PTR:%.*]] = getelementptr inbounds i8, i8* [[X_ADDR_050]], i32 1 -; CHECK-NEXT: [[TMP10:%.*]] = load i8, i8* [[X_ADDR_050]], align 1 -; CHECK-NEXT: [[INCDEC_PTR1:%.*]] = getelementptr inbounds i8, i8* [[X_ADDR_050]], i32 2 -; CHECK-NEXT: [[TMP11:%.*]] = load i8, i8* [[INCDEC_PTR]], align 1 -; CHECK-NEXT: [[INCDEC_PTR2]] = getelementptr inbounds i8, i8* [[X_ADDR_050]], i32 3 -; CHECK-NEXT: [[TMP12:%.*]] = load i8, i8* [[INCDEC_PTR1]], align 1 +; CHECK-NEXT: [[INCDEC_PTR:%.*]] = getelementptr inbounds i8, ptr [[X_ADDR_050]], i32 1 +; CHECK-NEXT: [[TMP10:%.*]] = load i8, ptr [[X_ADDR_050]], align 1 +; CHECK-NEXT: [[INCDEC_PTR1:%.*]] = getelementptr inbounds i8, ptr [[X_ADDR_050]], i32 2 +; CHECK-NEXT: [[TMP11:%.*]] = load i8, ptr [[INCDEC_PTR]], align 1 +; CHECK-NEXT: [[INCDEC_PTR2]] = getelementptr inbounds i8, ptr [[X_ADDR_050]], i32 3 +; CHECK-NEXT: [[TMP12:%.*]] = load i8, ptr [[INCDEC_PTR1]], align 1 ; CHECK-NEXT: [[MUL:%.*]] = mul i8 [[TMP10]], 10 ; CHECK-NEXT: [[MUL1:%.*]] = mul i8 [[TMP10]], [[TMP11]] ; CHECK-NEXT: [[MUL2:%.*]] = mul i8 [[TMP10]], [[TMP12]] -; CHECK-NEXT: [[INCDEC_PTR32:%.*]] = getelementptr inbounds i8, i8* [[Z_ADDR_049]], i32 1 -; CHECK-NEXT: store i8 [[MUL]], i8* [[Z_ADDR_049]], align 1 -; CHECK-NEXT: [[INCDEC_PTR33:%.*]] = getelementptr inbounds i8, i8* [[Z_ADDR_049]], i32 2 -; CHECK-NEXT: store i8 [[MUL1]], i8* [[INCDEC_PTR32]], align 1 -; CHECK-NEXT: [[INCDEC_PTR34]] = getelementptr inbounds i8, i8* [[Z_ADDR_049]], i32 3 -; CHECK-NEXT: store i8 [[MUL2]], i8* [[INCDEC_PTR33]], align 1 +; CHECK-NEXT: [[INCDEC_PTR32:%.*]] = getelementptr inbounds i8, ptr [[Z_ADDR_049]], i32 1 +; CHECK-NEXT: store i8 [[MUL]], ptr [[Z_ADDR_049]], align 1 +; CHECK-NEXT: [[INCDEC_PTR33:%.*]] = getelementptr inbounds i8, ptr [[Z_ADDR_049]], i32 2 +; CHECK-NEXT: store i8 [[MUL1]], ptr [[INCDEC_PTR32]], align 1 +; CHECK-NEXT: [[INCDEC_PTR34]] = getelementptr inbounds i8, ptr [[Z_ADDR_049]], i32 3 +; CHECK-NEXT: store i8 [[MUL2]], ptr [[INCDEC_PTR33]], align 1 ; CHECK-NEXT: [[INC]] = add nuw i32 [[I_048]], 1 ; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i32 [[INC]], 1000 ; CHECK-NEXT: br i1 [[EXITCOND]], label [[END]], label [[FOR_BODY]], !llvm.loop [[LOOP34:![0-9]+]] @@ -957,15 +944,15 @@ entry: br label %for.body for.body: - %x.addr.050 = phi i8* [ %incdec.ptr2, %for.body ], [ %x, %entry ] - %z.addr.049 = phi i8* [ %incdec.ptr34, %for.body ], [ %z, %entry ] + %x.addr.050 = phi ptr [ %incdec.ptr2, %for.body ], [ %x, %entry ] + %z.addr.049 = phi ptr [ %incdec.ptr34, %for.body ], [ %z, %entry ] %i.048 = phi i32 [ %inc, %for.body ], [ 0, %entry ] - %incdec.ptr = getelementptr inbounds i8, i8* %x.addr.050, i32 1 - %0 = load i8, i8* %x.addr.050, align 1 - %incdec.ptr1 = getelementptr inbounds i8, i8* %x.addr.050, i32 2 - %1 = load i8, i8* %incdec.ptr, align 1 - %incdec.ptr2 = getelementptr inbounds i8, i8* %x.addr.050, i32 3 - %2 = load i8, i8* %incdec.ptr1, align 1 + %incdec.ptr = getelementptr inbounds i8, ptr %x.addr.050, i32 1 + %0 = load i8, ptr %x.addr.050, align 1 + %incdec.ptr1 = getelementptr inbounds i8, ptr %x.addr.050, i32 2 + %1 = load i8, ptr %incdec.ptr, align 1 + %incdec.ptr2 = getelementptr inbounds i8, ptr %x.addr.050, i32 3 + %2 = load i8, ptr %incdec.ptr1, align 1 %conv = zext i8 %0 to i32 %mul = mul nuw nsw i32 %conv, 10 %conv1 = zext i8 %1 to i32 @@ -975,12 +962,12 @@ for.body: %conv3 = trunc i32 %mul to i8 %conv4 = trunc i32 %mul1 to i8 %conv5 = trunc i32 %mul2 to i8 - %incdec.ptr32 = getelementptr inbounds i8, i8* %z.addr.049, i32 1 - store i8 %conv3, i8* %z.addr.049, align 1 - %incdec.ptr33 = getelementptr inbounds i8, i8* %z.addr.049, i32 2 - store i8 %conv4, i8* %incdec.ptr32, align 1 - %incdec.ptr34 = getelementptr inbounds i8, i8* %z.addr.049, i32 3 - store i8 %conv5, i8* %incdec.ptr33, align 1 + %incdec.ptr32 = getelementptr inbounds i8, ptr %z.addr.049, i32 1 + store i8 %conv3, ptr %z.addr.049, align 1 + %incdec.ptr33 = getelementptr inbounds i8, ptr %z.addr.049, i32 2 + store i8 %conv4, ptr %incdec.ptr32, align 1 + %incdec.ptr34 = getelementptr inbounds i8, ptr %z.addr.049, i32 3 + store i8 %conv5, ptr %incdec.ptr33, align 1 %inc = add nuw i32 %i.048, 1 %exitcond = icmp eq i32 %inc, 1000 br i1 %exitcond, label %end, label %for.body diff --git a/llvm/test/Transforms/LoopVectorize/ARM/tail-folding-allowed.ll b/llvm/test/Transforms/LoopVectorize/ARM/tail-folding-allowed.ll index 571d649..82ac429 100644 --- a/llvm/test/Transforms/LoopVectorize/ARM/tail-folding-allowed.ll +++ b/llvm/test/Transforms/LoopVectorize/ARM/tail-folding-allowed.ll @@ -5,7 +5,7 @@ target datalayout = "e-m:e-p:32:32-Fi8-i64:64-v128:64:128-a:0:32-n32-S64" ; Test that ARMTTIImpl::preferPredicateOverEpilogue triggers tail-folding. -define dso_local void @f1(i32* noalias nocapture %A, i32* noalias nocapture readonly %B, i32* noalias nocapture readonly %C, i32 %N) { +define dso_local void @f1(ptr noalias nocapture %A, ptr noalias nocapture readonly %B, ptr noalias nocapture readonly %C, i32 %N) { ; CHECK-LABEL: f1( ; CHECK: entry: ; CHECK: @llvm.get.active.lane.mask @@ -25,19 +25,19 @@ for.cond.cleanup: ; preds = %for.cond.cleanup.lo for.body: ; preds = %for.body.preheader, %for.body %i.09 = phi i32 [ %inc, %for.body ], [ 0, %for.body.preheader ] - %arrayidx = getelementptr inbounds i32, i32* %B, i32 %i.09 - %0 = load i32, i32* %arrayidx, align 4 - %arrayidx1 = getelementptr inbounds i32, i32* %C, i32 %i.09 - %1 = load i32, i32* %arrayidx1, align 4 + %arrayidx = getelementptr inbounds i32, ptr %B, i32 %i.09 + %0 = load i32, ptr %arrayidx, align 4 + %arrayidx1 = getelementptr inbounds i32, ptr %C, i32 %i.09 + %1 = load i32, ptr %arrayidx1, align 4 %add = add nsw i32 %1, %0 - %arrayidx2 = getelementptr inbounds i32, i32* %A, i32 %i.09 - store i32 %add, i32* %arrayidx2, align 4 + %arrayidx2 = getelementptr inbounds i32, ptr %A, i32 %i.09 + store i32 %add, ptr %arrayidx2, align 4 %inc = add nuw nsw i32 %i.09, 1 %exitcond.not = icmp eq i32 %inc, %N br i1 %exitcond.not, label %for.cond.cleanup.loopexit, label %for.body } -define dso_local void @f32_reduction(float* nocapture readonly %Input, i32 %N, float* nocapture %Output) { +define dso_local void @f32_reduction(ptr nocapture readonly %Input, i32 %N, ptr nocapture %Output) { ; CHECK-LABEL: f32_reduction( ; CHECK: vector.body: ; CHECK: @llvm.masked.load @@ -52,9 +52,9 @@ while.body.preheader: ; preds = %entry while.body: ; preds = %while.body.preheader, %while.body %blkCnt.09 = phi i32 [ %dec, %while.body ], [ %N, %while.body.preheader ] %sum.08 = phi float [ %add, %while.body ], [ 0.000000e+00, %while.body.preheader ] - %Input.addr.07 = phi float* [ %incdec.ptr, %while.body ], [ %Input, %while.body.preheader ] - %incdec.ptr = getelementptr inbounds float, float* %Input.addr.07, i32 1 - %0 = load float, float* %Input.addr.07, align 4 + %Input.addr.07 = phi ptr [ %incdec.ptr, %while.body ], [ %Input, %while.body.preheader ] + %incdec.ptr = getelementptr inbounds float, ptr %Input.addr.07, i32 1 + %0 = load float, ptr %Input.addr.07, align 4 %add = fadd fast float %0, %sum.08 %dec = add i32 %blkCnt.09, -1 %cmp = icmp eq i32 %dec, 0 @@ -68,11 +68,11 @@ while.end: ; preds = %while.end.loopexit, %sum.0.lcssa = phi float [ 0.000000e+00, %entry ], [ %add.lcssa, %while.end.loopexit ] %conv = uitofp i32 %N to float %div = fdiv fast float %sum.0.lcssa, %conv - store float %div, float* %Output, align 4 + store float %div, ptr %Output, align 4 ret void } -define dso_local void @f16_reduction(half* nocapture readonly %Input, i32 %N, half* nocapture %Output) { +define dso_local void @f16_reduction(ptr nocapture readonly %Input, i32 %N, ptr nocapture %Output) { ; CHECK-LABEL: f16_reduction( ; CHECK: vector.body: ; CHECK: @llvm.masked.load @@ -87,9 +87,9 @@ while.body.preheader: ; preds = %entry while.body: ; preds = %while.body.preheader, %while.body %blkCnt.09 = phi i32 [ %dec, %while.body ], [ %N, %while.body.preheader ] %sum.08 = phi half [ %add, %while.body ], [ 0.000000e+00, %while.body.preheader ] - %Input.addr.07 = phi half* [ %incdec.ptr, %while.body ], [ %Input, %while.body.preheader ] - %incdec.ptr = getelementptr inbounds half, half* %Input.addr.07, i32 1 - %0 = load half, half* %Input.addr.07, align 2 + %Input.addr.07 = phi ptr [ %incdec.ptr, %while.body ], [ %Input, %while.body.preheader ] + %incdec.ptr = getelementptr inbounds half, ptr %Input.addr.07, i32 1 + %0 = load half, ptr %Input.addr.07, align 2 %add = fadd fast half %0, %sum.08 %dec = add i32 %blkCnt.09, -1 %cmp = icmp eq i32 %dec, 0 @@ -103,11 +103,11 @@ while.end: ; preds = %while.end.loopexit, %sum.0.lcssa = phi half [ 0.000000e+00, %entry ], [ %add.lcssa, %while.end.loopexit ] %conv = uitofp i32 %N to half %div = fdiv fast half %sum.0.lcssa, %conv - store half %div, half* %Output, align 2 + store half %div, ptr %Output, align 2 ret void } -define dso_local void @mixed_f32_i32_reduction(float* nocapture readonly %fInput, i32* nocapture readonly %iInput, i32 %N, float* nocapture %fOutput, i32* nocapture %iOutput) { +define dso_local void @mixed_f32_i32_reduction(ptr nocapture readonly %fInput, ptr nocapture readonly %iInput, i32 %N, ptr nocapture %fOutput, ptr nocapture %iOutput) { ; CHECK-LABEL: mixed_f32_i32_reduction( ; CHECK: vector.body: ; CHECK: @llvm.masked.load @@ -123,13 +123,13 @@ while.body: %blkCnt.020 = phi i32 [ %dec, %while.body ], [ %N, %while.body.preheader ] %isum.019 = phi i32 [ %add2, %while.body ], [ 0, %while.body.preheader ] %fsum.018 = phi float [ %add, %while.body ], [ 0.000000e+00, %while.body.preheader ] - %fInput.addr.017 = phi float* [ %incdec.ptr, %while.body ], [ %fInput, %while.body.preheader ] - %iInput.addr.016 = phi i32* [ %incdec.ptr1, %while.body ], [ %iInput, %while.body.preheader ] - %incdec.ptr = getelementptr inbounds float, float* %fInput.addr.017, i32 1 - %incdec.ptr1 = getelementptr inbounds i32, i32* %iInput.addr.016, i32 1 - %0 = load i32, i32* %iInput.addr.016, align 4 + %fInput.addr.017 = phi ptr [ %incdec.ptr, %while.body ], [ %fInput, %while.body.preheader ] + %iInput.addr.016 = phi ptr [ %incdec.ptr1, %while.body ], [ %iInput, %while.body.preheader ] + %incdec.ptr = getelementptr inbounds float, ptr %fInput.addr.017, i32 1 + %incdec.ptr1 = getelementptr inbounds i32, ptr %iInput.addr.016, i32 1 + %0 = load i32, ptr %iInput.addr.016, align 4 %add2 = add nsw i32 %0, %isum.019 - %1 = load float, float* %fInput.addr.017, align 4 + %1 = load float, ptr %fInput.addr.017, align 4 %add = fadd fast float %1, %fsum.018 %dec = add i32 %blkCnt.020, -1 %cmp = icmp eq i32 %dec, 0 @@ -146,14 +146,14 @@ while.end: %isum.0.lcssa = phi float [ 0.000000e+00, %entry ], [ %phitmp, %while.end.loopexit ] %conv = uitofp i32 %N to float %div = fdiv fast float %fsum.0.lcssa, %conv - store float %div, float* %fOutput, align 4 + store float %div, ptr %fOutput, align 4 %div5 = fdiv fast float %isum.0.lcssa, %conv %conv6 = fptosi float %div5 to i32 - store i32 %conv6, i32* %iOutput, align 4 + store i32 %conv6, ptr %iOutput, align 4 ret void } -define dso_local i32 @i32_mul_reduction(i32* noalias nocapture readonly %B, i32 %N) { +define dso_local i32 @i32_mul_reduction(ptr noalias nocapture readonly %B, i32 %N) { ; CHECK-LABEL: i32_mul_reduction( ; CHECK: vector.body: ; CHECK: @llvm.masked.load @@ -176,15 +176,15 @@ for.cond.cleanup: for.body: %i.08 = phi i32 [ %inc, %for.body ], [ 0, %for.body.preheader ] %S.07 = phi i32 [ %mul, %for.body ], [ 1, %for.body.preheader ] - %arrayidx = getelementptr inbounds i32, i32* %B, i32 %i.08 - %0 = load i32, i32* %arrayidx, align 4 + %arrayidx = getelementptr inbounds i32, ptr %B, i32 %i.08 + %0 = load i32, ptr %arrayidx, align 4 %mul = mul nsw i32 %0, %S.07 %inc = add nuw nsw i32 %i.08, 1 %exitcond = icmp eq i32 %inc, %N br i1 %exitcond, label %for.cond.cleanup.loopexit, label %for.body } -define dso_local i32 @i32_or_reduction(i32* noalias nocapture readonly %B, i32 %N) { +define dso_local i32 @i32_or_reduction(ptr noalias nocapture readonly %B, i32 %N) { ; CHECK-LABEL: i32_or_reduction( ; CHECK: vector.body: ; CHECK: @llvm.masked.load @@ -207,15 +207,15 @@ for.cond.cleanup: ; preds = %for.cond.cleanup.lo for.body: ; preds = %for.body.preheader, %for.body %i.08 = phi i32 [ %inc, %for.body ], [ 0, %for.body.preheader ] %S.07 = phi i32 [ %or, %for.body ], [ 1, %for.body.preheader ] - %arrayidx = getelementptr inbounds i32, i32* %B, i32 %i.08 - %0 = load i32, i32* %arrayidx, align 4 + %arrayidx = getelementptr inbounds i32, ptr %B, i32 %i.08 + %0 = load i32, ptr %arrayidx, align 4 %or = or i32 %0, %S.07 %inc = add nuw nsw i32 %i.08, 1 %exitcond = icmp eq i32 %inc, %N br i1 %exitcond, label %for.cond.cleanup.loopexit, label %for.body } -define dso_local i32 @i32_and_reduction(i32* noalias nocapture readonly %A, i32 %N, i32 %S) { +define dso_local i32 @i32_and_reduction(ptr noalias nocapture readonly %A, i32 %N, i32 %S) { ; CHECK-LABEL: i32_and_reduction( ; CHECK: vector.body: ; CHECK: @llvm.masked.load @@ -238,8 +238,8 @@ for.cond.cleanup: ; preds = %for.cond.cleanup.lo for.body: ; preds = %for.body.preheader, %for.body %i.07 = phi i32 [ %inc, %for.body ], [ 0, %for.body.preheader ] %S.addr.06 = phi i32 [ %and, %for.body ], [ %S, %for.body.preheader ] - %arrayidx = getelementptr inbounds i32, i32* %A, i32 %i.07 - %0 = load i32, i32* %arrayidx, align 4 + %arrayidx = getelementptr inbounds i32, ptr %A, i32 %i.07 + %0 = load i32, ptr %arrayidx, align 4 %and = and i32 %0, %S.addr.06 %inc = add nuw nsw i32 %i.07, 1 %exitcond = icmp eq i32 %inc, %N diff --git a/llvm/test/Transforms/LoopVectorize/ARM/tail-folding-scalar-epilogue-fallback.ll b/llvm/test/Transforms/LoopVectorize/ARM/tail-folding-scalar-epilogue-fallback.ll index ee9c903..571d93a 100644 --- a/llvm/test/Transforms/LoopVectorize/ARM/tail-folding-scalar-epilogue-fallback.ll +++ b/llvm/test/Transforms/LoopVectorize/ARM/tail-folding-scalar-epilogue-fallback.ll @@ -9,68 +9,66 @@ target datalayout = "e-m:e-p:32:32-Fi8-i64:64-v128:64:128-a:0:32-n32-S64" target triple = "thumbv8.1m.main-arm-unknown-eabihf" -define void @outside_user_blocks_tail_folding(i8* nocapture readonly %ptr, i32 %size, i8** %pos) { +define void @outside_user_blocks_tail_folding(ptr nocapture readonly %ptr, i32 %size, ptr %pos) { ; CHECK-LABEL: @outside_user_blocks_tail_folding( ; CHECK-NEXT: header: -; CHECK-NEXT: [[PTR0:%.*]] = load i8*, i8** [[POS:%.*]], align 4 +; CHECK-NEXT: [[PTR0:%.*]] = load ptr, ptr [[POS:%.*]], align 4 ; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i32 [[SIZE:%.*]], 16 ; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] ; CHECK: vector.ph: ; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i32 [[SIZE]], 16 ; CHECK-NEXT: [[N_VEC:%.*]] = sub i32 [[SIZE]], [[N_MOD_VF]] ; CHECK-NEXT: [[IND_END:%.*]] = sub i32 [[SIZE]], [[N_VEC]] -; CHECK-NEXT: [[IND_END1:%.*]] = getelementptr i8, i8* [[PTR:%.*]], i32 [[N_VEC]] +; CHECK-NEXT: [[IND_END1:%.*]] = getelementptr i8, ptr [[PTR:%.*]], i32 [[N_VEC]] ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[TMP0:%.*]] = add i32 [[INDEX]], 0 -; CHECK-NEXT: [[NEXT_GEP:%.*]] = getelementptr i8, i8* [[PTR]], i32 [[TMP0]] -; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds i8, i8* [[NEXT_GEP]], i32 1 -; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds i8, i8* [[TMP1]], i32 0 -; CHECK-NEXT: [[TMP3:%.*]] = bitcast i8* [[TMP2]] to <16 x i8>* -; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <16 x i8>, <16 x i8>* [[TMP3]], align 1 -; CHECK-NEXT: [[TMP4:%.*]] = getelementptr i8, i8* [[NEXT_GEP]], i32 0 -; CHECK-NEXT: [[TMP5:%.*]] = bitcast i8* [[TMP4]] to <16 x i8>* -; CHECK-NEXT: store <16 x i8> [[WIDE_LOAD]], <16 x i8>* [[TMP5]], align 1 +; CHECK-NEXT: [[NEXT_GEP:%.*]] = getelementptr i8, ptr [[PTR]], i32 [[TMP0]] +; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds i8, ptr [[NEXT_GEP]], i32 1 +; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds i8, ptr [[TMP1]], i32 0 +; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <16 x i8>, ptr [[TMP2]], align 1 +; CHECK-NEXT: [[TMP3:%.*]] = getelementptr i8, ptr [[NEXT_GEP]], i32 0 +; CHECK-NEXT: store <16 x i8> [[WIDE_LOAD]], ptr [[TMP3]], align 1 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 16 -; CHECK-NEXT: [[TMP6:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]] -; CHECK-NEXT: br i1 [[TMP6]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] +; CHECK-NEXT: [[TMP4:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]] +; CHECK-NEXT: br i1 [[TMP4]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] ; CHECK: middle.block: ; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i32 [[SIZE]], [[N_VEC]] ; CHECK-NEXT: br i1 [[CMP_N]], label [[END:%.*]], label [[SCALAR_PH]] ; CHECK: scalar.ph: ; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i32 [ [[IND_END]], [[MIDDLE_BLOCK]] ], [ [[SIZE]], [[HEADER:%.*]] ] -; CHECK-NEXT: [[BC_RESUME_VAL2:%.*]] = phi i8* [ [[IND_END1]], [[MIDDLE_BLOCK]] ], [ [[PTR]], [[HEADER]] ] +; CHECK-NEXT: [[BC_RESUME_VAL2:%.*]] = phi ptr [ [[IND_END1]], [[MIDDLE_BLOCK]] ], [ [[PTR]], [[HEADER]] ] ; CHECK-NEXT: br label [[BODY:%.*]] ; CHECK: body: ; CHECK-NEXT: [[DEC66:%.*]] = phi i32 [ [[DEC:%.*]], [[BODY]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ] -; CHECK-NEXT: [[BUFF:%.*]] = phi i8* [ [[INCDEC_PTR:%.*]], [[BODY]] ], [ [[BC_RESUME_VAL2]], [[SCALAR_PH]] ] -; CHECK-NEXT: [[INCDEC_PTR]] = getelementptr inbounds i8, i8* [[BUFF]], i32 1 +; CHECK-NEXT: [[BUFF:%.*]] = phi ptr [ [[INCDEC_PTR:%.*]], [[BODY]] ], [ [[BC_RESUME_VAL2]], [[SCALAR_PH]] ] +; CHECK-NEXT: [[INCDEC_PTR]] = getelementptr inbounds i8, ptr [[BUFF]], i32 1 ; CHECK-NEXT: [[DEC]] = add nsw i32 [[DEC66]], -1 -; CHECK-NEXT: [[TMP7:%.*]] = load i8, i8* [[INCDEC_PTR]], align 1 -; CHECK-NEXT: store i8 [[TMP7]], i8* [[BUFF]], align 1 +; CHECK-NEXT: [[TMP5:%.*]] = load i8, ptr [[INCDEC_PTR]], align 1 +; CHECK-NEXT: store i8 [[TMP5]], ptr [[BUFF]], align 1 ; CHECK-NEXT: [[TOBOOL11:%.*]] = icmp eq i32 [[DEC]], 0 ; CHECK-NEXT: br i1 [[TOBOOL11]], label [[END]], label [[BODY]], !llvm.loop [[LOOP2:![0-9]+]] ; CHECK: end: -; CHECK-NEXT: [[INCDEC_PTR_LCSSA:%.*]] = phi i8* [ [[INCDEC_PTR]], [[BODY]] ], [ [[IND_END1]], [[MIDDLE_BLOCK]] ] -; CHECK-NEXT: store i8* [[INCDEC_PTR_LCSSA]], i8** [[POS]], align 4 +; CHECK-NEXT: [[INCDEC_PTR_LCSSA:%.*]] = phi ptr [ [[INCDEC_PTR]], [[BODY]] ], [ [[IND_END1]], [[MIDDLE_BLOCK]] ] +; CHECK-NEXT: store ptr [[INCDEC_PTR_LCSSA]], ptr [[POS]], align 4 ; CHECK-NEXT: ret void ; header: - %ptr0 = load i8*, i8** %pos, align 4 + %ptr0 = load ptr, ptr %pos, align 4 br label %body body: %dec66 = phi i32 [ %dec, %body ], [ %size, %header ] - %buff = phi i8* [ %incdec.ptr, %body ], [ %ptr, %header ] - %incdec.ptr = getelementptr inbounds i8, i8* %buff, i32 1 + %buff = phi ptr [ %incdec.ptr, %body ], [ %ptr, %header ] + %incdec.ptr = getelementptr inbounds i8, ptr %buff, i32 1 %dec = add nsw i32 %dec66, -1 - %0 = load i8, i8* %incdec.ptr, align 1 - store i8 %0, i8* %buff, align 1 + %0 = load i8, ptr %incdec.ptr, align 1 + store i8 %0, ptr %buff, align 1 %tobool11 = icmp eq i32 %dec, 0 br i1 %tobool11, label %end, label %body end: - store i8* %incdec.ptr, i8** %pos, align 4 + store ptr %incdec.ptr, ptr %pos, align 4 ret void } diff --git a/llvm/test/Transforms/LoopVectorize/ARM/vector_cast.ll b/llvm/test/Transforms/LoopVectorize/ARM/vector_cast.ll index aededbe..ebdf0a1 100644 --- a/llvm/test/Transforms/LoopVectorize/ARM/vector_cast.ll +++ b/llvm/test/Transforms/LoopVectorize/ARM/vector_cast.ll @@ -7,21 +7,20 @@ target triple = "armv7--linux-gnueabi" ; for the stores to the struct. Here we need to perform a bitcast from a vector ; of pointers to a vector i32s. -%class.A = type { i8*, i32 } +%class.A = type { ptr, i32 } ; CHECK-LABEL: test0 -define void @test0(%class.A* %StartPtr, %class.A* %APtr) { +define void @test0(ptr %StartPtr, ptr %APtr) { entry: br label %for.body.i for.body.i: - %addr = phi %class.A* [ %StartPtr, %entry ], [ %incdec.ptr.i, %for.body.i ] - %Data.i.i = getelementptr inbounds %class.A, %class.A* %addr, i32 0, i32 0 - store i8* null, i8** %Data.i.i, align 4, !tbaa !8 - %Length.i.i = getelementptr inbounds %class.A, %class.A* %addr, i32 0, i32 1 - store i32 0, i32* %Length.i.i, align 4, !tbaa !11 - %incdec.ptr.i = getelementptr inbounds %class.A, %class.A* %addr, i32 1 - %cmp.i = icmp eq %class.A* %incdec.ptr.i, %APtr + %addr = phi ptr [ %StartPtr, %entry ], [ %incdec.ptr.i, %for.body.i ] + store ptr null, ptr %addr, align 4, !tbaa !8 + %Length.i.i = getelementptr inbounds %class.A, ptr %addr, i32 0, i32 1 + store i32 0, ptr %Length.i.i, align 4, !tbaa !11 + %incdec.ptr.i = getelementptr inbounds %class.A, ptr %addr, i32 1 + %cmp.i = icmp eq ptr %incdec.ptr.i, %APtr br i1 %cmp.i, label %exit, label %for.body.i exit: diff --git a/llvm/test/Transforms/LoopVectorize/PowerPC/pr30990.ll b/llvm/test/Transforms/LoopVectorize/PowerPC/pr30990.ll index d0908cc..3d6e074 100644 --- a/llvm/test/Transforms/LoopVectorize/PowerPC/pr30990.ll +++ b/llvm/test/Transforms/LoopVectorize/PowerPC/pr30990.ll @@ -2,10 +2,10 @@ target triple = "powerpc64-unknown-linux-gnu" -define signext i32 @foo(i8* readonly %ptr, i32 signext %l) { +define signext i32 @foo(ptr readonly %ptr, i32 signext %l) { entry: %idx.ext = sext i32 %l to i64 - %add.ptr = getelementptr inbounds i8, i8* %ptr, i64 %idx.ext + %add.ptr = getelementptr inbounds i8, ptr %ptr, i64 %idx.ext %cmp7 = icmp sgt i32 %l, 0 br i1 %cmp7, label %while.body.preheader, label %while.end @@ -14,13 +14,13 @@ while.body.preheader: ; preds = %entry while.body: ; preds = %while.body.preheader, %while.body %count.09 = phi i32 [ %add, %while.body ], [ 0, %while.body.preheader ] - %ptr.addr.08 = phi i8* [ %incdec.ptr, %while.body ], [ %ptr, %while.body.preheader ] - %0 = load i8, i8* %ptr.addr.08, align 1 + %ptr.addr.08 = phi ptr [ %incdec.ptr, %while.body ], [ %ptr, %while.body.preheader ] + %0 = load i8, ptr %ptr.addr.08, align 1 %cmp1 = icmp slt i8 %0, -64 %cond = zext i1 %cmp1 to i32 %add = add nsw i32 %cond, %count.09 - %incdec.ptr = getelementptr inbounds i8, i8* %ptr.addr.08, i64 1 - %cmp = icmp ult i8* %incdec.ptr, %add.ptr + %incdec.ptr = getelementptr inbounds i8, ptr %ptr.addr.08, i64 1 + %cmp = icmp ult ptr %incdec.ptr, %add.ptr br i1 %cmp, label %while.body, label %while.end.loopexit while.end.loopexit: ; preds = %while.body @@ -36,10 +36,10 @@ while.end: ; preds = %while.end.loopexit, } -define signext i16 @foo2(i8* readonly %ptr, i32 signext %l) { +define signext i16 @foo2(ptr readonly %ptr, i32 signext %l) { entry: %idx.ext = sext i32 %l to i64 - %add.ptr = getelementptr inbounds i8, i8* %ptr, i64 %idx.ext + %add.ptr = getelementptr inbounds i8, ptr %ptr, i64 %idx.ext %cmp7 = icmp sgt i32 %l, 0 br i1 %cmp7, label %while.body.preheader, label %while.end @@ -48,13 +48,13 @@ while.body.preheader: ; preds = %entry while.body: ; preds = %while.body.preheader, %while.body %count.09 = phi i16 [ %add, %while.body ], [ 0, %while.body.preheader ] - %ptr.addr.08 = phi i8* [ %incdec.ptr, %while.body ], [ %ptr, %while.body.preheader ] - %0 = load i8, i8* %ptr.addr.08, align 1 + %ptr.addr.08 = phi ptr [ %incdec.ptr, %while.body ], [ %ptr, %while.body.preheader ] + %0 = load i8, ptr %ptr.addr.08, align 1 %cmp1 = icmp slt i8 %0, -64 %cond = zext i1 %cmp1 to i16 %add = add nsw i16 %cond, %count.09 - %incdec.ptr = getelementptr inbounds i8, i8* %ptr.addr.08, i64 1 - %cmp = icmp ult i8* %incdec.ptr, %add.ptr + %incdec.ptr = getelementptr inbounds i8, ptr %ptr.addr.08, i64 1 + %cmp = icmp ult ptr %incdec.ptr, %add.ptr br i1 %cmp, label %while.body, label %while.end.loopexit while.end.loopexit: ; preds = %while.body @@ -70,10 +70,10 @@ while.end: ; preds = %while.end.loopexit, ; CHECK: icmp slt <8 x i8> } -define signext i32 @foo3(i16* readonly %ptr, i32 signext %l) { +define signext i32 @foo3(ptr readonly %ptr, i32 signext %l) { entry: %idx.ext = sext i32 %l to i64 - %add.ptr = getelementptr inbounds i16, i16* %ptr, i64 %idx.ext + %add.ptr = getelementptr inbounds i16, ptr %ptr, i64 %idx.ext %cmp7 = icmp sgt i32 %l, 0 br i1 %cmp7, label %while.body.preheader, label %while.end @@ -82,13 +82,13 @@ while.body.preheader: ; preds = %entry while.body: ; preds = %while.body.preheader, %while.body %count.09 = phi i32 [ %add, %while.body ], [ 0, %while.body.preheader ] - %ptr.addr.16 = phi i16* [ %incdec.ptr, %while.body ], [ %ptr, %while.body.preheader ] - %0 = load i16, i16* %ptr.addr.16, align 1 + %ptr.addr.16 = phi ptr [ %incdec.ptr, %while.body ], [ %ptr, %while.body.preheader ] + %0 = load i16, ptr %ptr.addr.16, align 1 %cmp1 = icmp slt i16 %0, -64 %cond = zext i1 %cmp1 to i32 %add = add nsw i32 %cond, %count.09 - %incdec.ptr = getelementptr inbounds i16, i16* %ptr.addr.16, i64 1 - %cmp = icmp ult i16* %incdec.ptr, %add.ptr + %incdec.ptr = getelementptr inbounds i16, ptr %ptr.addr.16, i64 1 + %cmp = icmp ult ptr %incdec.ptr, %add.ptr br i1 %cmp, label %while.body, label %while.end.loopexit while.end.loopexit: ; preds = %while.body @@ -104,10 +104,10 @@ while.end: ; preds = %while.end.loopexit, ; CHECK: icmp slt <4 x i16> } -define i64 @foo4(i16* readonly %ptr, i32 signext %l) { +define i64 @foo4(ptr readonly %ptr, i32 signext %l) { entry: %idx.ext = sext i32 %l to i64 - %add.ptr = getelementptr inbounds i16, i16* %ptr, i64 %idx.ext + %add.ptr = getelementptr inbounds i16, ptr %ptr, i64 %idx.ext %cmp7 = icmp sgt i32 %l, 0 br i1 %cmp7, label %while.body.preheader, label %while.end @@ -116,13 +116,13 @@ while.body.preheader: ; preds = %entry while.body: ; preds = %while.body.preheader, %while.body %count.09 = phi i64 [ %add, %while.body ], [ 0, %while.body.preheader ] - %ptr.addr.16 = phi i16* [ %incdec.ptr, %while.body ], [ %ptr, %while.body.preheader ] - %0 = load i16, i16* %ptr.addr.16, align 1 + %ptr.addr.16 = phi ptr [ %incdec.ptr, %while.body ], [ %ptr, %while.body.preheader ] + %0 = load i16, ptr %ptr.addr.16, align 1 %cmp1 = icmp slt i16 %0, -64 %cond = zext i1 %cmp1 to i64 %add = add nsw i64 %cond, %count.09 - %incdec.ptr = getelementptr inbounds i16, i16* %ptr.addr.16, i64 1 - %cmp = icmp ult i16* %incdec.ptr, %add.ptr + %incdec.ptr = getelementptr inbounds i16, ptr %ptr.addr.16, i64 1 + %cmp = icmp ult ptr %incdec.ptr, %add.ptr br i1 %cmp, label %while.body, label %while.end.loopexit while.end.loopexit: ; preds = %while.body diff --git a/llvm/test/Transforms/LoopVectorize/PowerPC/reg-usage.ll b/llvm/test/Transforms/LoopVectorize/PowerPC/reg-usage.ll index e3285e6..b63f2cf 100644 --- a/llvm/test/Transforms/LoopVectorize/PowerPC/reg-usage.ll +++ b/llvm/test/Transforms/LoopVectorize/PowerPC/reg-usage.ll @@ -23,11 +23,11 @@ for.cond.cleanup: for.body: %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ] %s.015 = phi i32 [ 0, %entry ], [ %add, %for.body ] - %arrayidx = getelementptr inbounds [1024 x i8], [1024 x i8]* @a, i64 0, i64 %indvars.iv - %0 = load i8, i8* %arrayidx, align 1 + %arrayidx = getelementptr inbounds [1024 x i8], ptr @a, i64 0, i64 %indvars.iv + %0 = load i8, ptr %arrayidx, align 1 %conv = zext i8 %0 to i32 - %arrayidx2 = getelementptr inbounds [1024 x i8], [1024 x i8]* @b, i64 0, i64 %indvars.iv - %1 = load i8, i8* %arrayidx2, align 1 + %arrayidx2 = getelementptr inbounds [1024 x i8], ptr @b, i64 0, i64 %indvars.iv + %1 = load i8, ptr %arrayidx2, align 1 %conv3 = zext i8 %1 to i32 %sub = sub nsw i32 %conv, %conv3 %ispos = icmp sgt i32 %sub, -1 @@ -59,12 +59,12 @@ for.body: ; preds = %for.body, %entry %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ] %s.015 = phi i32 [ 0, %entry ], [ %add, %for.body ] %tmp1 = add nsw i64 %indvars.iv, 3 - %arrayidx = getelementptr inbounds [1024 x i8], [1024 x i8]* @a, i64 0, i64 %tmp1 - %tmp = load i8, i8* %arrayidx, align 1 + %arrayidx = getelementptr inbounds [1024 x i8], ptr @a, i64 0, i64 %tmp1 + %tmp = load i8, ptr %arrayidx, align 1 %conv = zext i8 %tmp to i32 %tmp2 = add nsw i64 %indvars.iv, 2 - %arrayidx2 = getelementptr inbounds [1024 x i8], [1024 x i8]* @b, i64 0, i64 %tmp2 - %tmp3 = load i8, i8* %arrayidx2, align 1 + %arrayidx2 = getelementptr inbounds [1024 x i8], ptr @b, i64 0, i64 %tmp2 + %tmp3 = load i8, ptr %arrayidx2, align 1 %conv3 = zext i8 %tmp3 to i32 %sub = sub nsw i32 %conv, %conv3 %ispos = icmp sgt i32 %sub, -1 @@ -76,7 +76,7 @@ for.body: ; preds = %for.body, %entry br i1 %exitcond, label %for.cond.cleanup, label %for.body } -define i64 @bar(i64* nocapture %a) { +define i64 @bar(ptr nocapture %a) { ; CHECK-LABEL: bar ; CHECK: Executing best plan with VF=2, UF=12 @@ -91,10 +91,10 @@ for.cond.cleanup: for.body: %i.012 = phi i64 [ 0, %entry ], [ %inc, %for.body ] %s.011 = phi i64 [ 0, %entry ], [ %add2, %for.body ] - %arrayidx = getelementptr inbounds i64, i64* %a, i64 %i.012 - %0 = load i64, i64* %arrayidx, align 8 + %arrayidx = getelementptr inbounds i64, ptr %a, i64 %i.012 + %0 = load i64, ptr %arrayidx, align 8 %add = add nsw i64 %0, %i.012 - store i64 %add, i64* %arrayidx, align 8 + store i64 %add, ptr %arrayidx, align 8 %add2 = add nsw i64 %add, %s.011 %inc = add nuw nsw i64 %i.012, 1 %exitcond = icmp eq i64 %inc, 1024 @@ -114,12 +114,12 @@ entry: for.body: ; preds = %for.body, %entry %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ] - %arrayidx = getelementptr inbounds [0 x i64], [0 x i64]* @d, i64 0, i64 %indvars.iv - %tmp = load i64, i64* %arrayidx, align 8 - %arrayidx1 = getelementptr inbounds [0 x i32], [0 x i32]* @e, i64 0, i64 %tmp - %tmp1 = load i32, i32* %arrayidx1, align 4 - %arrayidx3 = getelementptr inbounds [0 x i32], [0 x i32]* @c, i64 0, i64 %indvars.iv - store i32 %tmp1, i32* %arrayidx3, align 4 + %arrayidx = getelementptr inbounds [0 x i64], ptr @d, i64 0, i64 %indvars.iv + %tmp = load i64, ptr %arrayidx, align 8 + %arrayidx1 = getelementptr inbounds [0 x i32], ptr @e, i64 0, i64 %tmp + %tmp1 = load i32, ptr %arrayidx1, align 4 + %arrayidx3 = getelementptr inbounds [0 x i32], ptr @c, i64 0, i64 %indvars.iv + store i32 %tmp1, ptr %arrayidx3, align 4 %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 %exitcond = icmp eq i64 %indvars.iv.next, 10000 br i1 %exitcond, label %for.end, label %for.body @@ -128,7 +128,7 @@ for.end: ; preds = %for.body ret void } -define float @float_(float* nocapture readonly %a, float* nocapture readonly %b, i32 %n) { +define float @float_(ptr nocapture readonly %a, ptr nocapture readonly %b, i32 %n) { ;CHECK-LABEL: float_ ;CHECK: LV(REG): VF = 1 ;CHECK: LV(REG): Found max usage: 2 item @@ -148,10 +148,10 @@ preheader: for: %indvars.iv = phi i64 [ 0, %preheader ], [ %indvars.iv.next, %for ] %s.02 = phi float [ 0.0, %preheader ], [ %add4, %for ] - %arrayidx = getelementptr inbounds float, float* %a, i64 %indvars.iv - %t1 = load float, float* %arrayidx, align 4 - %arrayidx3 = getelementptr inbounds float, float* %b, i64 %indvars.iv - %t2 = load float, float* %arrayidx3, align 4 + %arrayidx = getelementptr inbounds float, ptr %a, i64 %indvars.iv + %t1 = load float, ptr %arrayidx, align 4 + %arrayidx3 = getelementptr inbounds float, ptr %b, i64 %indvars.iv + %t2 = load float, ptr %arrayidx3, align 4 %add = fadd fast float %t1, %s.02 %add4 = fadd fast float %add, %t2 %indvars.iv.next = add nuw nsw i64 %indvars.iv, 32 @@ -168,7 +168,7 @@ for.end: } -define void @double_(double* nocapture %A, i32 %n) nounwind uwtable ssp { +define void @double_(ptr nocapture %A, i32 %n) nounwind uwtable ssp { ;CHECK-LABEL: double_ ;CHECK-PWR8: LV(REG): VF = 2 ;CHECK-PWR8: LV(REG): Found max usage: 2 item @@ -189,8 +189,8 @@ define void @double_(double* nocapture %A, i32 %n) nounwind uwtable ssp { ;