From ff302f850242b7f5e1fc48235471b8273c421236 Mon Sep 17 00:00:00 2001 From: Nikita Popov Date: Tue, 20 Dec 2022 11:30:34 +0100 Subject: [PATCH] [AArch64] Convert some tests to opaque pointers (NFC) --- .../AArch64/GlobalISel/select-gv-cmodel-large.mir | 14 +- .../AArch64/GlobalISel/select-gv-cmodel-tiny.mir | 14 +- llvm/test/CodeGen/AArch64/aarch64-gep-opt.ll | 73 +++--- .../AArch64/aarch64-interleaved-ld-combine.ll | 255 +++++++++---------- llvm/test/CodeGen/AArch64/aarch64-loop-gep-opt.ll | 20 +- .../regalloc-last-chance-recolor-with-split.mir | 16 +- .../CodeGen/AArch64/scalable-vector-promotion.ll | 10 +- llvm/test/CodeGen/AArch64/stack-tagging-ex-2.ll | 110 ++++----- .../AArch64/stack-tagging-initializer-merge.ll | 274 +++++++++------------ .../sve-extract-vector-to-predicate-store.ll | 37 ++- .../AArch64/sve-insert-vector-to-predicate-load.ll | 48 ++-- .../sve-lsr-scaled-index-addressing-mode.ll | 54 ++-- 12 files changed, 424 insertions(+), 501 deletions(-) diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/select-gv-cmodel-large.mir b/llvm/test/CodeGen/AArch64/GlobalISel/select-gv-cmodel-large.mir index d503074..b2aadd7 100644 --- a/llvm/test/CodeGen/AArch64/GlobalISel/select-gv-cmodel-large.mir +++ b/llvm/test/CodeGen/AArch64/GlobalISel/select-gv-cmodel-large.mir @@ -9,9 +9,9 @@ define dso_local i32 @gv_large() { entry: %retval = alloca i32, align 4 - store i32 0, i32* %retval, align 4 - %0 = load i32, i32* getelementptr inbounds ([1073741824 x i32], [1073741824 x i32]* @foo1, i64 0, i64 0), align 4 - %1 = load i32, i32* getelementptr inbounds ([1073741824 x i32], [1073741824 x i32]* @foo2, i64 0, i64 0), align 4 + store i32 0, ptr %retval, align 4 + %0 = load i32, ptr @foo1, align 4 + %1 = load i32, ptr @foo2, align 4 %add = add nsw i32 %0, %1 ret i32 %add } @@ -41,8 +41,8 @@ body: | ; CHECK: [[MOVKXi5:%[0-9]+]]:gpr64 = MOVKXi [[MOVKXi4]], target-flags(aarch64-g3) @foo2, 48 ; CHECK: [[COPY1:%[0-9]+]]:gpr64sp = COPY [[MOVKXi5]] ; CHECK: STRWui $wzr, %stack.0.retval, 0 :: (store (s32) into %ir.retval) - ; CHECK: [[LDRWui:%[0-9]+]]:gpr32 = LDRWui [[COPY]], 0 :: (load (s32) from `i32* getelementptr inbounds ([1073741824 x i32], [1073741824 x i32]* @foo1, i64 0, i64 0)`) - ; CHECK: [[LDRWui1:%[0-9]+]]:gpr32 = LDRWui [[COPY1]], 0 :: (load (s32) from `i32* getelementptr inbounds ([1073741824 x i32], [1073741824 x i32]* @foo2, i64 0, i64 0)`) + ; CHECK: [[LDRWui:%[0-9]+]]:gpr32 = LDRWui [[COPY]], 0 :: (load (s32) from @foo1) + ; CHECK: [[LDRWui1:%[0-9]+]]:gpr32 = LDRWui [[COPY1]], 0 :: (load (s32) from @foo2) ; CHECK: [[ADDWrr:%[0-9]+]]:gpr32 = ADDWrr [[LDRWui]], [[LDRWui1]] ; CHECK: $w0 = COPY [[ADDWrr]] ; CHECK: RET_ReallyLR implicit $w0 @@ -53,8 +53,8 @@ body: | %6:gpr(p0) = COPY %7(p0) %0:gpr(p0) = G_FRAME_INDEX %stack.0.retval G_STORE %1(s32), %0(p0) :: (store (s32) into %ir.retval) - %2:gpr(s32) = G_LOAD %3(p0) :: (load (s32) from `i32* getelementptr inbounds ([1073741824 x i32], [1073741824 x i32]* @foo1, i64 0, i64 0)`) - %5:gpr(s32) = G_LOAD %6(p0) :: (load (s32) from `i32* getelementptr inbounds ([1073741824 x i32], [1073741824 x i32]* @foo2, i64 0, i64 0)`) + %2:gpr(s32) = G_LOAD %3(p0) :: (load (s32) from @foo1) + %5:gpr(s32) = G_LOAD %6(p0) :: (load (s32) from @foo2) %8:gpr(s32) = G_ADD %2, %5 $w0 = COPY %8(s32) RET_ReallyLR implicit $w0 diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/select-gv-cmodel-tiny.mir b/llvm/test/CodeGen/AArch64/GlobalISel/select-gv-cmodel-tiny.mir index 64aee2d..e14c43a 100644 --- a/llvm/test/CodeGen/AArch64/GlobalISel/select-gv-cmodel-tiny.mir +++ b/llvm/test/CodeGen/AArch64/GlobalISel/select-gv-cmodel-tiny.mir @@ -9,9 +9,9 @@ define dso_local i32 @gv_tiny() { entry: %retval = alloca i32, align 4 - store i32 0, i32* %retval, align 4 - %0 = load i32, i32* getelementptr inbounds ([1073741824 x i32], [1073741824 x i32]* @foo1, i64 0, i64 0), align 4 - %1 = load i32, i32* getelementptr inbounds ([1073741824 x i32], [1073741824 x i32]* @foo2, i64 0, i64 0), align 4 + store i32 0, ptr %retval, align 4 + %0 = load i32, ptr @foo1, align 4 + %1 = load i32, ptr @foo2, align 4 %add = add nsw i32 %0, %1 ret i32 %add } @@ -35,8 +35,8 @@ body: | ; CHECK: [[ADR1:%[0-9]+]]:gpr64 = ADR @foo2 ; CHECK: [[COPY1:%[0-9]+]]:gpr64sp = COPY [[ADR1]] ; CHECK: STRWui $wzr, %stack.0.retval, 0 :: (store (s32) into %ir.retval) - ; CHECK: [[LDRWui:%[0-9]+]]:gpr32 = LDRWui [[COPY]], 0 :: (load (s32) from `i32* getelementptr inbounds ([1073741824 x i32], [1073741824 x i32]* @foo1, i64 0, i64 0)`) - ; CHECK: [[LDRWui1:%[0-9]+]]:gpr32 = LDRWui [[COPY1]], 0 :: (load (s32) from `i32* getelementptr inbounds ([1073741824 x i32], [1073741824 x i32]* @foo2, i64 0, i64 0)`) + ; CHECK: [[LDRWui:%[0-9]+]]:gpr32 = LDRWui [[COPY]], 0 :: (load (s32) from @foo1) + ; CHECK: [[LDRWui1:%[0-9]+]]:gpr32 = LDRWui [[COPY1]], 0 :: (load (s32) from @foo2) ; CHECK: [[ADDWrr:%[0-9]+]]:gpr32 = ADDWrr [[LDRWui]], [[LDRWui1]] ; CHECK: $w0 = COPY [[ADDWrr]] ; CHECK: RET_ReallyLR implicit $w0 @@ -47,8 +47,8 @@ body: | %6:gpr(p0) = COPY %7(p0) %0:gpr(p0) = G_FRAME_INDEX %stack.0.retval G_STORE %1(s32), %0(p0) :: (store (s32) into %ir.retval) - %2:gpr(s32) = G_LOAD %3(p0) :: (load (s32) from `i32* getelementptr inbounds ([1073741824 x i32], [1073741824 x i32]* @foo1, i64 0, i64 0)`) - %5:gpr(s32) = G_LOAD %6(p0) :: (load (s32) from `i32* getelementptr inbounds ([1073741824 x i32], [1073741824 x i32]* @foo2, i64 0, i64 0)`) + %2:gpr(s32) = G_LOAD %3(p0) :: (load (s32) from @foo1) + %5:gpr(s32) = G_LOAD %6(p0) :: (load (s32) from @foo2) %8:gpr(s32) = G_ADD %2, %5 $w0 = COPY %8(s32) RET_ReallyLR implicit $w0 diff --git a/llvm/test/CodeGen/AArch64/aarch64-gep-opt.ll b/llvm/test/CodeGen/AArch64/aarch64-gep-opt.ll index 59b0b09..c2a3acb 100644 --- a/llvm/test/CodeGen/AArch64/aarch64-gep-opt.ll +++ b/llvm/test/CodeGen/AArch64/aarch64-gep-opt.ll @@ -16,16 +16,16 @@ target triple = "aarch64-linux-gnueabi" ; Check that when two complex GEPs are used in two basic blocks, LLVM can ; eliminate the common subexpression for the second use. -define void @test_GEP_CSE([240 x %struct]* %string, i32* %adj, i32 %lib, i64 %idxprom) { - %liberties = getelementptr [240 x %struct], [240 x %struct]* %string, i64 1, i64 %idxprom, i32 3 - %1 = load i32, i32* %liberties, align 4 +define void @test_GEP_CSE(ptr %string, ptr %adj, i32 %lib, i64 %idxprom) { + %liberties = getelementptr [240 x %struct], ptr %string, i64 1, i64 %idxprom, i32 3 + %1 = load i32, ptr %liberties, align 4 %cmp = icmp eq i32 %1, %lib br i1 %cmp, label %if.then, label %if.end if.then: ; preds = %entry - %origin = getelementptr [240 x %struct], [240 x %struct]* %string, i64 1, i64 %idxprom, i32 2 - %2 = load i32, i32* %origin, align 4 - store i32 %2, i32* %adj, align 4 + %origin = getelementptr [240 x %struct], ptr %string, i64 1, i64 %idxprom, i32 2 + %2 = load i32, ptr %origin, align 4 + store i32 %2, ptr %adj, align 4 br label %if.end if.end: ; preds = %if.then, %entry @@ -39,7 +39,7 @@ if.end: ; preds = %if.then, %entry ; CHECK:ldr ; CHECK-NoAA-LABEL: @test_GEP_CSE( -; CHECK-NoAA: [[PTR0:%[a-zA-Z0-9]+]] = ptrtoint [240 x %struct]* %string to i64 +; CHECK-NoAA: [[PTR0:%[a-zA-Z0-9]+]] = ptrtoint ptr %string to i64 ; CHECK-NoAA: [[PTR1:%[a-zA-Z0-9]+]] = mul i64 %idxprom, 96 ; CHECK-NoAA: [[PTR2:%[a-zA-Z0-9]+]] = add i64 [[PTR0]], [[PTR1]] ; CHECK-NoAA: add i64 [[PTR2]], 23052 @@ -51,38 +51,35 @@ if.end: ; preds = %if.then, %entry ; CHECK-NoAA: inttoptr ; CHECK-UseAA-LABEL: @test_GEP_CSE( -; CHECK-UseAA: [[PTR0:%[a-zA-Z0-9]+]] = bitcast [240 x %struct]* %string to i8* ; CHECK-UseAA: [[IDX:%[a-zA-Z0-9]+]] = mul i64 %idxprom, 96 -; CHECK-UseAA: [[PTR1:%[a-zA-Z0-9]+]] = getelementptr i8, i8* [[PTR0]], i64 [[IDX]] -; CHECK-UseAA: getelementptr i8, i8* [[PTR1]], i64 23052 -; CHECK-UseAA: bitcast +; CHECK-UseAA: [[PTR1:%[a-zA-Z0-9]+]] = getelementptr i8, ptr %string, i64 [[IDX]] +; CHECK-UseAA: getelementptr i8, ptr [[PTR1]], i64 23052 ; CHECK-UseAA: if.then: -; CHECK-UseAA: getelementptr i8, i8* [[PTR1]], i64 23048 -; CHECK-UseAA: bitcast +; CHECK-UseAA: getelementptr i8, ptr [[PTR1]], i64 23048 %class.my = type { i32, [128 x i32], i32, [256 x %struct.pt]} -%struct.pt = type { %struct.point*, i32, i32 } +%struct.pt = type { ptr, i32, i32 } %struct.point = type { i32, i32 } ; Check when a GEP is used across two basic block, LLVM can sink the address ; calculation and code gen can generate a better addressing mode for the second ; use. -define void @test_GEP_across_BB(%class.my* %this, i64 %idx) { - %1 = getelementptr %class.my, %class.my* %this, i64 0, i32 3, i64 %idx, i32 1 - %2 = load i32, i32* %1, align 4 - %3 = getelementptr %class.my, %class.my* %this, i64 0, i32 3, i64 %idx, i32 2 - %4 = load i32, i32* %3, align 4 +define void @test_GEP_across_BB(ptr %this, i64 %idx) { + %1 = getelementptr %class.my, ptr %this, i64 0, i32 3, i64 %idx, i32 1 + %2 = load i32, ptr %1, align 4 + %3 = getelementptr %class.my, ptr %this, i64 0, i32 3, i64 %idx, i32 2 + %4 = load i32, ptr %3, align 4 %5 = icmp eq i32 %2, %4 br i1 %5, label %if.true, label %exit if.true: %6 = shl i32 %4, 1 - store i32 %6, i32* %3, align 4 + store i32 %6, ptr %3, align 4 br label %exit exit: %7 = add nsw i32 %4, 1 - store i32 %7, i32* %1, align 4 + store i32 %7, ptr %1, align 4 ret void } ; CHECK-LABEL: test_GEP_across_BB: @@ -97,21 +94,19 @@ exit: ; CHECK-NoAA: add i64 [[TMP]], 532 ; CHECK-NoAA: if.true: ; CHECK-NoAA: inttoptr -; CHECK-NoAA: bitcast ; CHECK-NoAA: {{%sunk[a-zA-Z0-9]+}} = getelementptr i8, {{.*}}, i64 532 ; CHECK-NoAA: exit: ; CHECK-NoAA: inttoptr -; CHECK-NoAA: bitcast ; CHECK-NoAA: {{%sunk[a-zA-Z0-9]+}} = getelementptr i8, {{.*}}, i64 528 ; CHECK-UseAA-LABEL: test_GEP_across_BB( ; CHECK-UseAA: [[PTR0:%[a-zA-Z0-9]+]] = getelementptr -; CHECK-UseAA: getelementptr i8, i8* [[PTR0]], i64 528 -; CHECK-UseAA: getelementptr i8, i8* [[PTR0]], i64 532 +; CHECK-UseAA: getelementptr i8, ptr [[PTR0]], i64 528 +; CHECK-UseAA: getelementptr i8, ptr [[PTR0]], i64 532 ; CHECK-UseAA: if.true: -; CHECK-UseAA: {{%sunk[a-zA-Z0-9]+}} = getelementptr i8, i8* [[PTR0]], i64 532 +; CHECK-UseAA: {{%sunk[a-zA-Z0-9]+}} = getelementptr i8, ptr [[PTR0]], i64 532 ; CHECK-UseAA: exit: -; CHECK-UseAA: {{%sunk[a-zA-Z0-9]+}} = getelementptr i8, i8* [[PTR0]], i64 528 +; CHECK-UseAA: {{%sunk[a-zA-Z0-9]+}} = getelementptr i8, ptr [[PTR0]], i64 528 %struct.S = type { float, double } @struct_array = global [1024 x %struct.S] zeroinitializer, align 16 @@ -121,49 +116,49 @@ exit: ; The constant offsets are from indices "i64 %idxprom" and "i32 1". As the ; alloca size of %struct.S is 16, and "i32 1" is the 2rd element whose field ; offset is 8, the total constant offset is (5 * 16 + 8) = 88. -define double* @test-struct_1(i32 %i) { +define ptr @test-struct_1(i32 %i) { entry: %add = add nsw i32 %i, 5 %idxprom = sext i32 %add to i64 - %p = getelementptr [1024 x %struct.S], [1024 x %struct.S]* @struct_array, i64 0, i64 %idxprom, i32 1 - ret double* %p + %p = getelementptr [1024 x %struct.S], ptr @struct_array, i64 0, i64 %idxprom, i32 1 + ret ptr %p } ; CHECK-NoAA-LABEL: @test-struct_1( ; CHECK-NoAA-NOT: getelementptr ; CHECK-NoAA: add i64 %{{[a-zA-Z0-9]+}}, 88 ; CHECK-UseAA-LABEL: @test-struct_1( -; CHECK-UseAA: getelementptr i8, i8* %{{[a-zA-Z0-9]+}}, i64 88 +; CHECK-UseAA: getelementptr i8, ptr %{{[a-zA-Z0-9]+}}, i64 88 %struct3 = type { i64, i32 } %struct2 = type { %struct3, i32 } %struct1 = type { i64, %struct2 } -%struct0 = type { i32, i32, i64*, [100 x %struct1] } +%struct0 = type { i32, i32, ptr, [100 x %struct1] } ; The constant offsets are from indices "i32 3", "i64 %arrayidx" and "i32 1". ; "i32 3" is the 4th element whose field offset is 16. The alloca size of ; %struct1 is 32. "i32 1" is the 2rd element whose field offset is 8. So the ; total constant offset is 16 + (-2 * 32) + 8 = -40 -define %struct2* @test-struct_2(%struct0* %ptr, i64 %idx) { +define ptr @test-struct_2(ptr %ptr, i64 %idx) { entry: %arrayidx = add nsw i64 %idx, -2 - %ptr2 = getelementptr %struct0, %struct0* %ptr, i64 0, i32 3, i64 %arrayidx, i32 1 - ret %struct2* %ptr2 + %ptr2 = getelementptr %struct0, ptr %ptr, i64 0, i32 3, i64 %arrayidx, i32 1 + ret ptr %ptr2 } ; CHECK-NoAA-LABEL: @test-struct_2( ; CHECK-NoAA-NOT: = getelementptr ; CHECK-NoAA: add i64 %{{[a-zA-Z0-9]+}}, -40 ; CHECK-UseAA-LABEL: @test-struct_2( -; CHECK-UseAA: getelementptr i8, i8* %{{[a-zA-Z0-9]+}}, i64 -40 +; CHECK-UseAA: getelementptr i8, ptr %{{[a-zA-Z0-9]+}}, i64 -40 ; Test that when a index is added from two constant, SeparateConstOffsetFromGEP ; pass does not generate incorrect result. -define void @test_const_add([3 x i32]* %in) { +define void @test_const_add(ptr %in) { %inc = add nsw i32 2, 1 %idxprom = sext i32 %inc to i64 - %arrayidx = getelementptr [3 x i32], [3 x i32]* %in, i64 %idxprom, i64 2 - store i32 0, i32* %arrayidx, align 4 + %arrayidx = getelementptr [3 x i32], ptr %in, i64 %idxprom, i64 2 + store i32 0, ptr %arrayidx, align 4 ret void } ; CHECK-LABEL: test_const_add: diff --git a/llvm/test/CodeGen/AArch64/aarch64-interleaved-ld-combine.ll b/llvm/test/CodeGen/AArch64/aarch64-interleaved-ld-combine.ll index 6ced29d..a5d94c1 100644 --- a/llvm/test/CodeGen/AArch64/aarch64-interleaved-ld-combine.ll +++ b/llvm/test/CodeGen/AArch64/aarch64-interleaved-ld-combine.ll @@ -6,14 +6,13 @@ target datalayout = "e-m:e-i64:64-i128:128-n32:64-S128" target triple = "arm64--linux-gnu" ; This should be lowered into LD4 -define void @aarch64_ilc_const(<4 x float>* %ptr) { +define void @aarch64_ilc_const(ptr %ptr) { entry: ;;; Check LLVM transformation ; CHECK-LABEL: @aarch64_ilc_const( -; CHECK-DAG: [[GEP:%.+]] = getelementptr inbounds <4 x float>, <4 x float>* %ptr, i64 2 -; CHECK-DAG: [[CAST:%.+]] = bitcast <4 x float>* [[GEP]] to <16 x float>* -; CHECK-DAG: [[LOAD:%.+]] = load <16 x float>, <16 x float>* [[CAST]], align 16 +; CHECK-DAG: [[GEP:%.+]] = getelementptr inbounds <4 x float>, ptr %ptr, i64 2 +; CHECK-DAG: [[LOAD:%.+]] = load <16 x float>, ptr [[GEP]], align 16 ; CHECK-DAG: %{{.* }}= shufflevector <16 x float> [[LOAD]], <16 x float> poison, <4 x i32> ; CHECK-DAG: %{{.* }}= shufflevector <16 x float> [[LOAD]], <16 x float> poison, <4 x i32> ; CHECK-DAG: %{{.* }}= shufflevector <16 x float> [[LOAD]], <16 x float> poison, <4 x i32> @@ -25,14 +24,14 @@ entry: ; AS: ld4 ; AS: ret - %gep1 = getelementptr inbounds <4 x float>, <4 x float>* %ptr, i64 2 - %gep2 = getelementptr inbounds <4 x float>, <4 x float>* %ptr, i64 3 - %gep3 = getelementptr inbounds <4 x float>, <4 x float>* %ptr, i64 4 - %gep4 = getelementptr inbounds <4 x float>, <4 x float>* %ptr, i64 5 - %ld1 = load <4 x float>, <4 x float>* %gep1, align 16 - %ld2 = load <4 x float>, <4 x float>* %gep2, align 16 - %ld3 = load <4 x float>, <4 x float>* %gep3, align 16 - %ld4 = load <4 x float>, <4 x float>* %gep4, align 16 + %gep1 = getelementptr inbounds <4 x float>, ptr %ptr, i64 2 + %gep2 = getelementptr inbounds <4 x float>, ptr %ptr, i64 3 + %gep3 = getelementptr inbounds <4 x float>, ptr %ptr, i64 4 + %gep4 = getelementptr inbounds <4 x float>, ptr %ptr, i64 5 + %ld1 = load <4 x float>, ptr %gep1, align 16 + %ld2 = load <4 x float>, ptr %gep2, align 16 + %ld3 = load <4 x float>, ptr %gep3, align 16 + %ld4 = load <4 x float>, ptr %gep4, align 16 %sv1 = shufflevector <4 x float> %ld1, <4 x float> %ld2, <4 x i32> %sv2 = shufflevector <4 x float> %ld1, <4 x float> %ld2, <4 x i32> %sv3 = shufflevector <4 x float> %ld3, <4 x float> %ld4, <4 x i32> @@ -42,24 +41,23 @@ entry: %m8_11 = shufflevector <4 x float> %sv2, <4 x float> %sv4, <4 x i32> %m12_15 = shufflevector <4 x float> %sv2, <4 x float> %sv4, <4 x i32> - store <4 x float> %m0_3, <4 x float>* %gep1, align 16 - store <4 x float> %m4_7, <4 x float>* %gep2, align 16 - store <4 x float> %m8_11, <4 x float>* %gep3, align 16 - store <4 x float> %m12_15, <4 x float>* %gep4, align 16 + store <4 x float> %m0_3, ptr %gep1, align 16 + store <4 x float> %m4_7, ptr %gep2, align 16 + store <4 x float> %m8_11, ptr %gep3, align 16 + store <4 x float> %m12_15, ptr %gep4, align 16 ret void } ; This should be lowered into LD4 -define void @aarch64_ilc_idx(<4 x float>* %ptr, i64 %idx) { +define void @aarch64_ilc_idx(ptr %ptr, i64 %idx) { entry: ;;; Check LLVM transformation ; CHECK-LABEL: @aarch64_ilc_idx( ; CHECK-DAG: [[ADD:%.+]] = add i64 %idx, 16 ; CHECK-DAG: [[LSHR:%.+]] = lshr i64 [[ADD]], 2 -; CHECK-DAG: [[GEP:%.+]] = getelementptr inbounds <4 x float>, <4 x float>* %ptr, i64 [[LSHR]] -; CHECK-DAG: [[CAST:%.+]] = bitcast <4 x float>* [[GEP]] to <16 x float>* -; CHECK-DAG: [[LOAD:%.+]] = load <16 x float>, <16 x float>* [[CAST]], align 16 +; CHECK-DAG: [[GEP:%.+]] = getelementptr inbounds <4 x float>, ptr %ptr, i64 [[LSHR]] +; CHECK-DAG: [[LOAD:%.+]] = load <16 x float>, ptr [[GEP]], align 16 ; CHECK-DAG: %{{.* }}= shufflevector <16 x float> [[LOAD]], <16 x float> poison, <4 x i32> ; CHECK-DAG: %{{.* }}= shufflevector <16 x float> [[LOAD]], <16 x float> poison, <4 x i32> ; CHECK-DAG: %{{.* }}= shufflevector <16 x float> [[LOAD]], <16 x float> poison, <4 x i32> @@ -87,14 +85,14 @@ entry: %a4 = add i64 %idx, 28 %idx4 = lshr i64 %a4, 2 - %gep2 = getelementptr inbounds <4 x float>, <4 x float>* %ptr, i64 %idx2 - %gep4 = getelementptr inbounds <4 x float>, <4 x float>* %ptr, i64 %idx4 - %gep1 = getelementptr inbounds <4 x float>, <4 x float>* %ptr, i64 %idx1 - %gep3 = getelementptr inbounds <4 x float>, <4 x float>* %ptr, i64 %idx3 - %ld1 = load <4 x float>, <4 x float>* %gep1, align 16 - %ld2 = load <4 x float>, <4 x float>* %gep2, align 16 - %ld3 = load <4 x float>, <4 x float>* %gep3, align 16 - %ld4 = load <4 x float>, <4 x float>* %gep4, align 16 + %gep2 = getelementptr inbounds <4 x float>, ptr %ptr, i64 %idx2 + %gep4 = getelementptr inbounds <4 x float>, ptr %ptr, i64 %idx4 + %gep1 = getelementptr inbounds <4 x float>, ptr %ptr, i64 %idx1 + %gep3 = getelementptr inbounds <4 x float>, ptr %ptr, i64 %idx3 + %ld1 = load <4 x float>, ptr %gep1, align 16 + %ld2 = load <4 x float>, ptr %gep2, align 16 + %ld3 = load <4 x float>, ptr %gep3, align 16 + %ld4 = load <4 x float>, ptr %gep4, align 16 %sv1 = shufflevector <4 x float> %ld1, <4 x float> %ld2, <4 x i32> %sv2 = shufflevector <4 x float> %ld1, <4 x float> %ld2, <4 x i32> %sv3 = shufflevector <4 x float> %ld3, <4 x float> %ld4, <4 x i32> @@ -104,24 +102,23 @@ entry: %m8_11 = shufflevector <4 x float> %sv2, <4 x float> %sv4, <4 x i32> %m12_15 = shufflevector <4 x float> %sv2, <4 x float> %sv4, <4 x i32> - store <4 x float> %m0_3, <4 x float>* %gep1, align 16 - store <4 x float> %m4_7, <4 x float>* %gep2, align 16 - store <4 x float> %m8_11, <4 x float>* %gep3, align 16 - store <4 x float> %m12_15, <4 x float>* %gep4, align 16 + store <4 x float> %m0_3, ptr %gep1, align 16 + store <4 x float> %m4_7, ptr %gep2, align 16 + store <4 x float> %m8_11, ptr %gep3, align 16 + store <4 x float> %m12_15, ptr %gep4, align 16 ret void } ; This should be lowered into LD4, a offset of has to be taken into account %struct.ilc = type <{ float, [0 x <4 x float>] }> -define void @aarch64_ilc_struct(%struct.ilc* %ptr, i64 %idx) { +define void @aarch64_ilc_struct(ptr %ptr, i64 %idx) { entry: ;;; Check LLVM transformation ; CHECK-LABEL: @aarch64_ilc_struct( ; CHECK-DAG: [[LSHR:%.+]] = lshr i64 %idx, 2 -; CHECK-DAG: [[GEP:%.+]] = getelementptr %struct.ilc, %struct.ilc* %ptr, i32 0, i32 1, i64 [[LSHR]] -; CHECK-DAG: [[CAST:%.+]] = bitcast <4 x float>* [[GEP]] to <16 x float>* -; CHECK-DAG: [[LOAD:%.+]] = load <16 x float>, <16 x float>* [[CAST]], align 4 +; CHECK-DAG: [[GEP:%.+]] = getelementptr %struct.ilc, ptr %ptr, i32 0, i32 1, i64 [[LSHR]] +; CHECK-DAG: [[LOAD:%.+]] = load <16 x float>, ptr [[GEP]], align 4 ; CHECK-DAG: %{{.* }}= shufflevector <16 x float> [[LOAD]], <16 x float> poison, <4 x i32> ; CHECK-DAG: %{{.* }}= shufflevector <16 x float> [[LOAD]], <16 x float> poison, <4 x i32> ; CHECK-DAG: %{{.* }}= shufflevector <16 x float> [[LOAD]], <16 x float> poison, <4 x i32> @@ -147,15 +144,15 @@ entry: %a3 = add i64 %idx, 12 %idx4 = lshr i64 %a3, 2 - %gep2 = getelementptr %struct.ilc, %struct.ilc* %ptr, i32 0, i32 1, i64 %idx2 - %gep3 = getelementptr %struct.ilc, %struct.ilc* %ptr, i32 0, i32 1, i64 %idx3 - %gep4 = getelementptr %struct.ilc, %struct.ilc* %ptr, i32 0, i32 1, i64 %idx4 + %gep2 = getelementptr %struct.ilc, ptr %ptr, i32 0, i32 1, i64 %idx2 + %gep3 = getelementptr %struct.ilc, ptr %ptr, i32 0, i32 1, i64 %idx3 + %gep4 = getelementptr %struct.ilc, ptr %ptr, i32 0, i32 1, i64 %idx4 %idx1 = lshr i64 %idx, 2 - %gep1 = getelementptr %struct.ilc, %struct.ilc* %ptr, i32 0, i32 1, i64 %idx1 - %ld1 = load <4 x float>, <4 x float>* %gep1, align 4 - %ld2 = load <4 x float>, <4 x float>* %gep2, align 4 - %ld3 = load <4 x float>, <4 x float>* %gep3, align 4 - %ld4 = load <4 x float>, <4 x float>* %gep4, align 4 + %gep1 = getelementptr %struct.ilc, ptr %ptr, i32 0, i32 1, i64 %idx1 + %ld1 = load <4 x float>, ptr %gep1, align 4 + %ld2 = load <4 x float>, ptr %gep2, align 4 + %ld3 = load <4 x float>, ptr %gep3, align 4 + %ld4 = load <4 x float>, ptr %gep4, align 4 %sv1 = shufflevector <4 x float> %ld1, <4 x float> %ld2, <4 x i32> %sv2 = shufflevector <4 x float> %ld1, <4 x float> %ld2, <4 x i32> %sv3 = shufflevector <4 x float> %ld3, <4 x float> %ld4, <4 x i32> @@ -165,21 +162,20 @@ entry: %m8_11 = shufflevector <4 x float> %sv2, <4 x float> %sv4, <4 x i32> %m12_15 = shufflevector <4 x float> %sv2, <4 x float> %sv4, <4 x i32> - store <4 x float> %m0_3, <4 x float>* %gep1, align 16 - store <4 x float> %m4_7, <4 x float>* %gep2, align 16 - store <4 x float> %m8_11, <4 x float>* %gep3, align 16 - store <4 x float> %m12_15, <4 x float>* %gep4, align 16 + store <4 x float> %m0_3, ptr %gep1, align 16 + store <4 x float> %m4_7, ptr %gep2, align 16 + store <4 x float> %m8_11, ptr %gep3, align 16 + store <4 x float> %m12_15, ptr %gep4, align 16 ret void } ; This should be lowered into LD2 -define void @aarch64_ilc_idx_ld2(<4 x float>* %ptr, i64 %idx) { +define void @aarch64_ilc_idx_ld2(ptr %ptr, i64 %idx) { entry: ; CHECK-LABEL: @aarch64_ilc_idx_ld2( ; CHECK-DAG: [[LSHR:%.+]] = lshr i64 %idx, 2 -; CHECK-DAG: [[GEP:%.+]] = getelementptr inbounds <4 x float>, <4 x float>* %ptr, i64 [[LSHR]] -; CHECK-DAG: [[CAST:%.+]] = bitcast <4 x float>* [[GEP]] to <8 x float>* -; CHECK-DAG: [[LOAD:%.+]] = load <8 x float>, <8 x float>* [[CAST]], align 16 +; CHECK-DAG: [[GEP:%.+]] = getelementptr inbounds <4 x float>, ptr %ptr, i64 [[LSHR]] +; CHECK-DAG: [[LOAD:%.+]] = load <8 x float>, ptr [[GEP]], align 16 ; CHECK: %{{.* }}= shufflevector <8 x float> [[LOAD]], <8 x float> poison, <4 x i32> ; CHECK: %{{.* }}= shufflevector <8 x float> [[LOAD]], <8 x float> poison, <4 x i32> ; CHECK-DAG: ret void @@ -192,26 +188,25 @@ entry: %a1 = add i64 %idx, 4 %idx2 = lshr i64 %a1, 2 - %gep1 = getelementptr inbounds <4 x float>, <4 x float>* %ptr, i64 %idx1 - %gep2 = getelementptr inbounds <4 x float>, <4 x float>* %ptr, i64 %idx2 - %ld1 = load <4 x float>, <4 x float>* %gep1, align 16 - %ld2 = load <4 x float>, <4 x float>* %gep2, align 16 + %gep1 = getelementptr inbounds <4 x float>, ptr %ptr, i64 %idx1 + %gep2 = getelementptr inbounds <4 x float>, ptr %ptr, i64 %idx2 + %ld1 = load <4 x float>, ptr %gep1, align 16 + %ld2 = load <4 x float>, ptr %gep2, align 16 %m0_3 = shufflevector <4 x float> %ld1, <4 x float> %ld2, <4 x i32> %m4_7 = shufflevector <4 x float> %ld1, <4 x float> %ld2, <4 x i32> - store <4 x float> %m0_3, <4 x float>* %gep1 - store <4 x float> %m4_7, <4 x float>* %gep2 + store <4 x float> %m0_3, ptr %gep1 + store <4 x float> %m4_7, ptr %gep2 ret void } ; This should be lowered into LD3 -define void @aarch64_ilc_idx_ld3(<4 x float>* %ptr, i64 %idx) { +define void @aarch64_ilc_idx_ld3(ptr %ptr, i64 %idx) { entry: ; CHECK-LABEL: @aarch64_ilc_idx_ld3( ; CHECK-DAG: [[LSHR:%.+]] = lshr i64 %idx, 2 -; CHECK-DAG: [[GEP:%.+]] = getelementptr inbounds <4 x float>, <4 x float>* %ptr, i64 [[LSHR]] -; CHECK-DAG: [[CAST:%.+]] = bitcast <4 x float>* [[GEP]] to <12 x float>* -; CHECK-DAG: [[LOAD:%.+]] = load <12 x float>, <12 x float>* [[CAST]], align 16 +; CHECK-DAG: [[GEP:%.+]] = getelementptr inbounds <4 x float>, ptr %ptr, i64 [[LSHR]] +; CHECK-DAG: [[LOAD:%.+]] = load <12 x float>, ptr [[GEP]], align 16 ; CHECK: %{{.* }}= shufflevector <12 x float> [[LOAD]], <12 x float> poison, <4 x i32> ; CHECK: %{{.* }}= shufflevector <12 x float> [[LOAD]], <12 x float> poison, <4 x i32> ; CHECK: %{{.* }}= shufflevector <12 x float> [[LOAD]], <12 x float> poison, <4 x i32> @@ -227,12 +222,12 @@ entry: %a2 = add i64 %idx, 8 %idx3 = lshr i64 %a2, 2 - %gep1 = getelementptr inbounds <4 x float>, <4 x float>* %ptr, i64 %idx1 - %gep2 = getelementptr inbounds <4 x float>, <4 x float>* %ptr, i64 %idx2 - %gep3 = getelementptr inbounds <4 x float>, <4 x float>* %ptr, i64 %idx3 - %ld1 = load <4 x float>, <4 x float>* %gep1, align 16 - %ld2 = load <4 x float>, <4 x float>* %gep2, align 16 - %ld3 = load <4 x float>, <4 x float>* %gep3, align 16 + %gep1 = getelementptr inbounds <4 x float>, ptr %ptr, i64 %idx1 + %gep2 = getelementptr inbounds <4 x float>, ptr %ptr, i64 %idx2 + %gep3 = getelementptr inbounds <4 x float>, ptr %ptr, i64 %idx3 + %ld1 = load <4 x float>, ptr %gep1, align 16 + %ld2 = load <4 x float>, ptr %gep2, align 16 + %ld3 = load <4 x float>, ptr %gep3, align 16 %sv1 = shufflevector <4 x float> %ld1, <4 x float> %ld2, <4 x i32> %sv2 = shufflevector <4 x float> %ld1, <4 x float> %ld2, <4 x i32> @@ -241,27 +236,27 @@ entry: %m4_7 = shufflevector <4 x float> %sv2, <4 x float> %ld3, <4 x i32> %m8_11 = shufflevector <4 x float> %sv3, <4 x float> %ld3, <4 x i32> - store <4 x float> %m0_3, <4 x float>* %gep1, align 16 - store <4 x float> %m4_7, <4 x float>* %gep2, align 16 - store <4 x float> %m8_11, <4 x float>* %gep3, align 16 + store <4 x float> %m0_3, ptr %gep1, align 16 + store <4 x float> %m4_7, ptr %gep2, align 16 + store <4 x float> %m8_11, ptr %gep3, align 16 ret void } ; %sv3 = shufflevector <4 x float> %ld3, <4 x float> %ld4, <4 x i32> ; This must not be lowered -define void @aarch64_ilc_i32_idx(<4 x float>* %ptr, i32 %idx) { +define void @aarch64_ilc_i32_idx(ptr %ptr, i32 %idx) { ; CHECK-LABEL: @aarch64_ilc_i32_idx( ; CHECK: %idx1 = lshr i32 %idx, 2 ; CHECK-NEXT: %a1 = add i32 %idx, 4 ; CHECK-NEXT: %idx2 = lshr i32 %a1, 2 -; CHECK-NEXT: %gep1 = getelementptr inbounds <4 x float>, <4 x float>* %ptr, i32 %idx1 -; CHECK-NEXT: %gep2 = getelementptr inbounds <4 x float>, <4 x float>* %ptr, i32 %idx2 -; CHECK-NEXT: %ld1 = load <4 x float>, <4 x float>* %gep1, align 16 -; CHECK-NEXT: %ld2 = load <4 x float>, <4 x float>* %gep2, align 16 +; CHECK-NEXT: %gep1 = getelementptr inbounds <4 x float>, ptr %ptr, i32 %idx1 +; CHECK-NEXT: %gep2 = getelementptr inbounds <4 x float>, ptr %ptr, i32 %idx2 +; CHECK-NEXT: %ld1 = load <4 x float>, ptr %gep1, align 16 +; CHECK-NEXT: %ld2 = load <4 x float>, ptr %gep2, align 16 ; CHECK-NEXT: %m0_3 = shufflevector <4 x float> %ld1, <4 x float> %ld2, <4 x i32> ; CHECK-NEXT: %m4_7 = shufflevector <4 x float> %ld1, <4 x float> %ld2, <4 x i32> -; CHECK-NEXT: store <4 x float> %m0_3, <4 x float>* %gep1, align 16 -; CHECK-NEXT: store <4 x float> %m4_7, <4 x float>* %gep2, align 16 +; CHECK-NEXT: store <4 x float> %m0_3, ptr %gep1, align 16 +; CHECK-NEXT: store <4 x float> %m4_7, ptr %gep2, align 16 ; CHECK-NEXT: ret void ; AS-LABEL: aarch64_ilc_i32_idx @@ -276,29 +271,28 @@ entry: %a1 = add i32 %idx, 4 %idx2 = lshr i32 %a1, 2 - %gep1 = getelementptr inbounds <4 x float>, <4 x float>* %ptr, i32 %idx1 - %gep2 = getelementptr inbounds <4 x float>, <4 x float>* %ptr, i32 %idx2 - %ld1 = load <4 x float>, <4 x float>* %gep1, align 16 - %ld2 = load <4 x float>, <4 x float>* %gep2, align 16 + %gep1 = getelementptr inbounds <4 x float>, ptr %ptr, i32 %idx1 + %gep2 = getelementptr inbounds <4 x float>, ptr %ptr, i32 %idx2 + %ld1 = load <4 x float>, ptr %gep1, align 16 + %ld2 = load <4 x float>, ptr %gep2, align 16 %m0_3 = shufflevector <4 x float> %ld1, <4 x float> %ld2, <4 x i32> %m4_7 = shufflevector <4 x float> %ld1, <4 x float> %ld2, <4 x i32> - store <4 x float> %m0_3, <4 x float>* %gep1, align 16 - store <4 x float> %m4_7, <4 x float>* %gep2, align 16 + store <4 x float> %m0_3, ptr %gep1, align 16 + store <4 x float> %m4_7, ptr %gep2, align 16 ret void } ; Volatile loads must not be lowered -define void @aarch64_ilc_volatile(<4 x float>* %ptr) { +define void @aarch64_ilc_volatile(ptr %ptr) { ; CHECK-LABEL: @aarch64_ilc_volatile( -; CHECK: %gep1 = getelementptr inbounds <4 x float>, <4 x float>* %ptr, i32 0 -; CHECK-NEXT: %gep2 = getelementptr inbounds <4 x float>, <4 x float>* %ptr, i32 1 -; CHECK-NEXT: %ld1 = load volatile <4 x float>, <4 x float>* %gep1, align 16 -; CHECK-NEXT: %ld2 = load <4 x float>, <4 x float>* %gep2, align 16 +; CHECK: %gep2 = getelementptr inbounds <4 x float>, ptr %ptr, i32 1 +; CHECK-NEXT: %ld1 = load volatile <4 x float>, ptr %ptr, align 16 +; CHECK-NEXT: %ld2 = load <4 x float>, ptr %gep2, align 16 ; CHECK-NEXT: %m0_3 = shufflevector <4 x float> %ld1, <4 x float> %ld2, <4 x i32> ; CHECK-NEXT: %m4_7 = shufflevector <4 x float> %ld1, <4 x float> %ld2, <4 x i32> -; CHECK-NEXT: store <4 x float> %m0_3, <4 x float>* %gep1, align 16 -; CHECK-NEXT: store <4 x float> %m4_7, <4 x float>* %gep2, align 16 +; CHECK-NEXT: store <4 x float> %m0_3, ptr %ptr, align 16 +; CHECK-NEXT: store <4 x float> %m4_7, ptr %gep2, align 16 ; CHECK-NEXT: ret void ; AS-LABEL: aarch64_ilc_volatile @@ -309,30 +303,28 @@ define void @aarch64_ilc_volatile(<4 x float>* %ptr) { ; AS-DAG: ret entry: - %gep1 = getelementptr inbounds <4 x float>, <4 x float>* %ptr, i32 0 - %gep2 = getelementptr inbounds <4 x float>, <4 x float>* %ptr, i32 1 - %ld1 = load volatile <4 x float>, <4 x float>* %gep1, align 16 - %ld2 = load <4 x float>, <4 x float>* %gep2, align 16 + %gep2 = getelementptr inbounds <4 x float>, ptr %ptr, i32 1 + %ld1 = load volatile <4 x float>, ptr %ptr, align 16 + %ld2 = load <4 x float>, ptr %gep2, align 16 %m0_3 = shufflevector <4 x float> %ld1, <4 x float> %ld2, <4 x i32> %m4_7 = shufflevector <4 x float> %ld1, <4 x float> %ld2, <4 x i32> - store <4 x float> %m0_3, <4 x float>* %gep1, align 16 - store <4 x float> %m4_7, <4 x float>* %gep2, align 16 + store <4 x float> %m0_3, ptr %ptr, align 16 + store <4 x float> %m4_7, ptr %gep2, align 16 ret void } ; This must not be lowered -define void @aarch64_ilc_depmem(<4 x float>* %ptr, i32 %idx) { +define void @aarch64_ilc_depmem(ptr %ptr, i32 %idx) { entry: ; CHECK-LABEL: @aarch64_ilc_depmem( -; CHECK: %gep1 = getelementptr inbounds <4 x float>, <4 x float>* %ptr, i32 0 -; CHECK-NEXT: %gep2 = getelementptr inbounds <4 x float>, <4 x float>* %ptr, i32 1 -; CHECK-NEXT: %ld1 = load <4 x float>, <4 x float>* %gep1, align 16 -; CHECK-NEXT: store <4 x float> %ld1, <4 x float>* %gep2, align 16 -; CHECK-NEXT: %ld2 = load <4 x float>, <4 x float>* %gep2, align 16 +; CHECK: %gep2 = getelementptr inbounds <4 x float>, ptr %ptr, i32 1 +; CHECK-NEXT: %ld1 = load <4 x float>, ptr %ptr, align 16 +; CHECK-NEXT: store <4 x float> %ld1, ptr %gep2, align 16 +; CHECK-NEXT: %ld2 = load <4 x float>, ptr %gep2, align 16 ; CHECK-NEXT: %m0_3 = shufflevector <4 x float> %ld1, <4 x float> %ld2, <4 x i32> ; CHECK-NEXT: %m4_7 = shufflevector <4 x float> %ld1, <4 x float> %ld2, <4 x i32> -; CHECK-NEXT: store <4 x float> %m0_3, <4 x float>* %gep1, align 16 -; CHECK-NEXT: store <4 x float> %m4_7, <4 x float>* %gep2, align 16 +; CHECK-NEXT: store <4 x float> %m0_3, ptr %ptr, align 16 +; CHECK-NEXT: store <4 x float> %m4_7, ptr %gep2, align 16 ; CHECK-NEXT: ret void ; AS-LABEL: aarch64_ilc_depmem @@ -342,39 +334,32 @@ entry: ; AS-NOT: ld4 ; AS-DAG: ret - %gep1 = getelementptr inbounds <4 x float>, <4 x float>* %ptr, i32 0 - %gep2 = getelementptr inbounds <4 x float>, <4 x float>* %ptr, i32 1 - %ld1 = load <4 x float>, <4 x float>* %gep1, align 16 - store <4 x float> %ld1, <4 x float>* %gep2, align 16 - %ld2 = load <4 x float>, <4 x float>* %gep2, align 16 + %gep2 = getelementptr inbounds <4 x float>, ptr %ptr, i32 1 + %ld1 = load <4 x float>, ptr %ptr, align 16 + store <4 x float> %ld1, ptr %gep2, align 16 + %ld2 = load <4 x float>, ptr %gep2, align 16 %m0_3 = shufflevector <4 x float> %ld1, <4 x float> %ld2, <4 x i32> %m4_7 = shufflevector <4 x float> %ld1, <4 x float> %ld2, <4 x i32> - store <4 x float> %m0_3, <4 x float>* %gep1, align 16 - store <4 x float> %m4_7, <4 x float>* %gep2, align 16 + store <4 x float> %m0_3, ptr %ptr, align 16 + store <4 x float> %m4_7, ptr %gep2, align 16 ret void } ; This cannot be converted - insertion position cannot be determined -define void @aarch64_no_insertion_pos(float* %ptr) { +define void @aarch64_no_insertion_pos(ptr %ptr) { entry: ; CHECK-LABEL: @aarch64_no_insertion_pos( -; CHECK: %p0 = getelementptr inbounds float, float* %ptr, i32 0 -; CHECK-NEXT: %p1 = getelementptr inbounds float, float* %ptr, i32 4 -; CHECK-NEXT: %b0 = bitcast float* %p0 to <5 x float>* -; CHECK-NEXT: %b1 = bitcast float* %p1 to <5 x float>* -; CHECK-NEXT: %l0 = load <5 x float>, <5 x float>* %b0 -; CHECK-NEXT: %l1 = load <5 x float>, <5 x float>* %b1 +; CHECK: %p1 = getelementptr inbounds float, ptr %ptr, i32 4 +; CHECK-NEXT: %l0 = load <5 x float>, ptr %ptr +; CHECK-NEXT: %l1 = load <5 x float>, ptr %p1 ; CHECK-NEXT: %s0 = shufflevector <5 x float> %l0, <5 x float> %l1, <4 x i32> ; CHECK-NEXT: %s1 = shufflevector <5 x float> %l0, <5 x float> %l1, <4 x i32> ; CHECK-NEXT: ret void - %p0 = getelementptr inbounds float, float* %ptr, i32 0 - %p1 = getelementptr inbounds float, float* %ptr, i32 4 - %b0 = bitcast float* %p0 to <5 x float>* - %b1 = bitcast float* %p1 to <5 x float>* - %l0 = load <5 x float>, <5 x float>* %b0 - %l1 = load <5 x float>, <5 x float>* %b1 + %p1 = getelementptr inbounds float, ptr %ptr, i32 4 + %l0 = load <5 x float>, ptr %ptr + %l1 = load <5 x float>, ptr %p1 %s0 = shufflevector <5 x float> %l0, <5 x float> %l1, <4 x i32> %s1 = shufflevector <5 x float> %l0, <5 x float> %l1, <4 x i32> ret void @@ -382,25 +367,19 @@ entry: ; This cannot be converted - the insertion position does not dominate all ; uses -define void @aarch64_insertpos_does_not_dominate(float* %ptr) { +define void @aarch64_insertpos_does_not_dominate(ptr %ptr) { entry: ; CHECK-LABEL: @aarch64_insertpos_does_not_dominate( -; CHECK: %p0 = getelementptr inbounds float, float* %ptr, i32 0 -; CHECK-NEXT: %p1 = getelementptr inbounds float, float* %ptr, i32 1 -; CHECK-NEXT: %b0 = bitcast float* %p0 to <7 x float>* -; CHECK-NEXT: %b1 = bitcast float* %p1 to <7 x float>* -; CHECK-NEXT: %l1 = load <7 x float>, <7 x float>* %b1 +; CHECK: %p1 = getelementptr inbounds float, ptr %ptr, i32 1 +; CHECK-NEXT: %l1 = load <7 x float>, ptr %p1 ; CHECK-NEXT: %s1 = shufflevector <7 x float> %l1, <7 x float> poison, <4 x i32> -; CHECK-NEXT: %l0 = load <7 x float>, <7 x float>* %b0 +; CHECK-NEXT: %l0 = load <7 x float>, ptr %ptr ; CHECK-NEXT: %s0 = shufflevector <7 x float> %l0, <7 x float> poison, <4 x i32> ; CHECK-NEXT: ret void - %p0 = getelementptr inbounds float, float* %ptr, i32 0 - %p1 = getelementptr inbounds float, float* %ptr, i32 1 - %b0 = bitcast float* %p0 to <7 x float>* - %b1 = bitcast float* %p1 to <7 x float>* - %l1 = load <7 x float>, <7 x float>* %b1 + %p1 = getelementptr inbounds float, ptr %ptr, i32 1 + %l1 = load <7 x float>, ptr %p1 %s1 = shufflevector <7 x float> %l1, <7 x float> poison, <4 x i32> - %l0 = load <7 x float>, <7 x float>* %b0 + %l0 = load <7 x float>, ptr %ptr %s0 = shufflevector <7 x float> %l0, <7 x float> poison, <4 x i32> ret void } diff --git a/llvm/test/CodeGen/AArch64/aarch64-loop-gep-opt.ll b/llvm/test/CodeGen/AArch64/aarch64-loop-gep-opt.ll index 1b2ed4b..c32e0b9 100644 --- a/llvm/test/CodeGen/AArch64/aarch64-loop-gep-opt.ll +++ b/llvm/test/CodeGen/AArch64/aarch64-loop-gep-opt.ll @@ -5,23 +5,21 @@ target triple = "aarch64--linux-android" %typeD = type { i32, i32, [256 x i32], [257 x i32] } ; Function Attrs: noreturn nounwind uwtable -define i32 @test1(%typeD* nocapture %s) { +define i32 @test1(ptr nocapture %s) { entry: ; CHECK-LABEL: entry: -; CHECK: %uglygep = getelementptr i8, i8* %0, i64 1032 +; CHECK: %uglygep = getelementptr i8, ptr %s, i64 1032 ; CHECK: br label %do.body.i - %tPos = getelementptr inbounds %typeD, %typeD* %s, i64 0, i32 0 - %k0 = getelementptr inbounds %typeD, %typeD* %s, i64 0, i32 1 - %.pre = load i32, i32* %tPos, align 4 + %k0 = getelementptr inbounds %typeD, ptr %s, i64 0, i32 1 + %.pre = load i32, ptr %s, align 4 br label %do.body.i do.body.i: ; CHECK-LABEL: do.body.i: -; CHECK: %uglygep2 = getelementptr i8, i8* %uglygep, i64 %3 -; CHECK-NEXT: %4 = bitcast i8* %uglygep2 to i32* -; CHECK-NOT: %uglygep2 = getelementptr i8, i8* %uglygep, i64 1032 +; CHECK: %uglygep2 = getelementptr i8, ptr %uglygep, i64 %2 +; CHECK-NOT: %uglygep2 = getelementptr i8, ptr %uglygep, i64 1032 %0 = phi i32 [ 256, %entry ], [ %.be, %do.body.i.backedge ] @@ -29,8 +27,8 @@ do.body.i: %add.i = add nsw i32 %1, %0 %shr.i = ashr i32 %add.i, 1 %idxprom.i = sext i32 %shr.i to i64 - %arrayidx.i = getelementptr inbounds %typeD, %typeD* %s, i64 0, i32 3, i64 %idxprom.i - %2 = load i32, i32* %arrayidx.i, align 4 + %arrayidx.i = getelementptr inbounds %typeD, ptr %s, i64 0, i32 3, i64 %idxprom.i + %2 = load i32, ptr %arrayidx.i, align 4 %cmp.i = icmp sle i32 %2, %.pre %na.1.i = select i1 %cmp.i, i32 %0, i32 %shr.i %nb.1.i = select i1 %cmp.i, i32 %shr.i, i32 %1 @@ -44,7 +42,7 @@ do.body.i.backedge: br label %do.body.i fooo.exit: ; preds = %do.body.i - store i32 %nb.1.i, i32* %k0, align 4 + store i32 %nb.1.i, ptr %k0, align 4 br label %do.body.i.backedge } diff --git a/llvm/test/CodeGen/AArch64/regalloc-last-chance-recolor-with-split.mir b/llvm/test/CodeGen/AArch64/regalloc-last-chance-recolor-with-split.mir index 8ae00a2..a5a421e 100644 --- a/llvm/test/CodeGen/AArch64/regalloc-last-chance-recolor-with-split.mir +++ b/llvm/test/CodeGen/AArch64/regalloc-last-chance-recolor-with-split.mir @@ -7,7 +7,7 @@ target datalayout = "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128" target triple = "aarch64-none-linux-gnu" - define void @ham(i8 addrspace(1)* %arg) gc "statepoint-example" { + define void @ham(ptr addrspace(1) %arg) gc "statepoint-example" { bb: br i1 undef, label %bb27.preheader, label %bb23 @@ -66,7 +66,7 @@ unreachable } - declare i8 addrspace(1)* @bar(i64, i64, i64, i32*) + declare ptr addrspace(1) @bar(i64, i64, i64, ptr) declare void @wombat() declare void @blam.1() declare void @blam(i32) @@ -303,7 +303,7 @@ body: | ; CHECK-NEXT: bb.1.bb27.preheader: ; CHECK-NEXT: successors: %bb.3(0x80000000) ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: renamable $x24 = LDRXui undef renamable $x8, 0 :: (load unordered (s64) from `i64 addrspace(1)* undef`, addrspace 1) + ; CHECK-NEXT: renamable $x24 = LDRXui undef renamable $x8, 0 :: (load unordered (s64) from `ptr addrspace(1) undef`, addrspace 1) ; CHECK-NEXT: renamable $w21 = MOVi32imm -8280 ; CHECK-NEXT: renamable $w23 = MOVi32imm -6 ; CHECK-NEXT: renamable $w25 = MOVi32imm 3, implicit-def $x25 @@ -479,13 +479,13 @@ body: | ; CHECK-NEXT: liveins: $fp, $w20, $w23, $x10, $x19, $x22, $x24, $x25, $x26, $x27 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: renamable $x8 = COPY $xzr - ; CHECK-NEXT: renamable $w9 = LDRWui renamable $x8, 0 :: (load unordered (s32) from `i32 addrspace(1)* null`, addrspace 1) + ; CHECK-NEXT: renamable $w9 = LDRWui renamable $x8, 0 :: (load unordered (s32) from `ptr addrspace(1) null`, addrspace 1) ; CHECK-NEXT: renamable $w9 = MADDWrrr killed renamable $w9, renamable $w10, $wzr ; CHECK-NEXT: renamable $w23 = nsw SUBWri killed renamable $w23, 2, 0 ; CHECK-NEXT: dead $xzr = SUBSXri killed renamable $x25, 107, 0, implicit-def $nzcv ; CHECK-NEXT: renamable $x25 = COPY killed renamable $fp ; CHECK-NEXT: renamable $w21 = MOVi32imm 2 - ; CHECK-NEXT: STRWui killed renamable $w9, killed renamable $x8, 0 :: (store unordered (s32) into `i32 addrspace(1)* null`, addrspace 1) + ; CHECK-NEXT: STRWui killed renamable $w9, killed renamable $x8, 0 :: (store unordered (s32) into `ptr addrspace(1) null`, addrspace 1) ; CHECK-NEXT: Bcc 8, %bb.16, implicit killed $nzcv ; CHECK-NEXT: B %bb.3 ; CHECK-NEXT: {{ $}} @@ -532,7 +532,7 @@ body: | bb.1.bb27.preheader: successors: %bb.3(0x80000000) - %74:gpr64 = LDRXui undef %75:gpr64sp, 0 :: (load unordered (s64) from `i64 addrspace(1)* undef`, addrspace 1) + %74:gpr64 = LDRXui undef %75:gpr64sp, 0 :: (load unordered (s64) from `ptr addrspace(1) undef`, addrspace 1) %13:gpr32 = MOVi32imm -8280 %130:gpr32common = MOVi32imm -6 undef %129.sub_32:gpr64common = MOVi32imm 3 @@ -671,13 +671,13 @@ body: | successors: %bb.14(0x00000000), %bb.3(0x80000000) %115:gpr64sp = COPY $xzr - %116:gpr32 = LDRWui %115, 0 :: (load unordered (s32) from `i32 addrspace(1)* null`, addrspace 1) + %116:gpr32 = LDRWui %115, 0 :: (load unordered (s32) from `ptr addrspace(1) null`, addrspace 1) %117:gpr32 = MADDWrrr %116, %42.sub_32, $wzr %130:gpr32common = nsw SUBWri %130, 2, 0 dead $xzr = SUBSXri %129, 107, 0, implicit-def $nzcv %129:gpr64common = COPY %14 %13:gpr32 = MOVi32imm 2 - STRWui %117, %115, 0 :: (store unordered (s32) into `i32 addrspace(1)* null`, addrspace 1) + STRWui %117, %115, 0 :: (store unordered (s32) into `ptr addrspace(1) null`, addrspace 1) Bcc 8, %bb.14, implicit killed $nzcv B %bb.3 diff --git a/llvm/test/CodeGen/AArch64/scalable-vector-promotion.ll b/llvm/test/CodeGen/AArch64/scalable-vector-promotion.ll index 77f1747..e6ab52d 100644 --- a/llvm/test/CodeGen/AArch64/scalable-vector-promotion.ll +++ b/llvm/test/CodeGen/AArch64/scalable-vector-promotion.ll @@ -6,18 +6,18 @@ ; will assert once target lowering is ready, then we can bring in implementation for non-splat ; codepath for scalable vector. -define void @simpleOneInstructionPromotion(* %addr1, i32* %dest) { +define void @simpleOneInstructionPromotion(ptr %addr1, ptr %dest) { ; CHECK-LABEL: @simpleOneInstructionPromotion( -; CHECK-NEXT: [[IN1:%.*]] = load , * [[ADDR1:%.*]], align 8 +; CHECK-NEXT: [[IN1:%.*]] = load , ptr [[ADDR1:%.*]], align 8 ; CHECK-NEXT: [[EXTRACT:%.*]] = extractelement [[IN1]], i32 1 ; CHECK-NEXT: [[OUT:%.*]] = or i32 [[EXTRACT]], 1 -; CHECK-NEXT: store i32 [[OUT]], i32* [[DEST:%.*]], align 4 +; CHECK-NEXT: store i32 [[OUT]], ptr [[DEST:%.*]], align 4 ; CHECK-NEXT: ret void ; - %in1 = load , * %addr1, align 8 + %in1 = load , ptr %addr1, align 8 %extract = extractelement %in1, i32 1 %out = or i32 %extract, 1 - store i32 %out, i32* %dest, align 4 + store i32 %out, ptr %dest, align 4 ret void } diff --git a/llvm/test/CodeGen/AArch64/stack-tagging-ex-2.ll b/llvm/test/CodeGen/AArch64/stack-tagging-ex-2.ll index 11389d5..719379b 100644 --- a/llvm/test/CodeGen/AArch64/stack-tagging-ex-2.ll +++ b/llvm/test/CodeGen/AArch64/stack-tagging-ex-2.ll @@ -29,139 +29,133 @@ target datalayout = "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128" target triple = "aarch64-unknown-unknown-eabi" -@_ZTIi = external dso_local constant i8* +@_ZTIi = external dso_local constant ptr ; Function Attrs: noreturn sanitize_memtag define dso_local void @_Z3barv() local_unnamed_addr #0 { entry: - %exception = tail call i8* @__cxa_allocate_exception(i64 4) #4 - %0 = bitcast i8* %exception to i32* - store i32 42, i32* %0, align 16, !tbaa !2 - tail call void @__cxa_throw(i8* %exception, i8* bitcast (i8** @_ZTIi to i8*), i8* null) #5 + %exception = tail call ptr @__cxa_allocate_exception(i64 4) #4 + store i32 42, ptr %exception, align 16, !tbaa !2 + tail call void @__cxa_throw(ptr %exception, ptr @_ZTIi, ptr null) #5 unreachable } -declare dso_local i8* @__cxa_allocate_exception(i64) local_unnamed_addr +declare dso_local ptr @__cxa_allocate_exception(i64) local_unnamed_addr -declare dso_local void @__cxa_throw(i8*, i8*, i8*) local_unnamed_addr +declare dso_local void @__cxa_throw(ptr, ptr, ptr) local_unnamed_addr ; Function Attrs: noreturn sanitize_memtag -define dso_local void @_Z3foov() local_unnamed_addr #0 personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*) { +define dso_local void @_Z3foov() local_unnamed_addr #0 personality ptr @__gxx_personality_v0 { entry: %A0 = alloca i32, align 4 - %0 = bitcast i32* %A0 to i8* - call void @llvm.lifetime.start.p0i8(i64 4, i8* nonnull %0) #4 - call void asm sideeffect "", "r"(i32* nonnull %A0) #4, !srcloc !6 + call void @llvm.lifetime.start.p0(i64 4, ptr nonnull %A0) #4 + call void asm sideeffect "", "r"(ptr nonnull %A0) #4, !srcloc !6 invoke void @_Z3barv() to label %try.cont unwind label %lpad lpad: ; preds = %entry - %1 = landingpad { i8*, i32 } + %0 = landingpad { ptr, i32 } cleanup - catch i8* bitcast (i8** @_ZTIi to i8*) - %2 = extractvalue { i8*, i32 } %1, 1 - %3 = call i32 @llvm.eh.typeid.for(i8* bitcast (i8** @_ZTIi to i8*)) #4 - %matches = icmp eq i32 %2, %3 + catch ptr @_ZTIi + %1 = extractvalue { ptr, i32 } %0, 1 + %2 = call i32 @llvm.eh.typeid.for(ptr @_ZTIi) #4 + %matches = icmp eq i32 %1, %2 br i1 %matches, label %catch, label %ehcleanup catch: ; preds = %lpad - %4 = extractvalue { i8*, i32 } %1, 0 - %5 = call i8* @__cxa_begin_catch(i8* %4) #4 + %3 = extractvalue { ptr, i32 } %0, 0 + %4 = call ptr @__cxa_begin_catch(ptr %3) #4 call void @__cxa_end_catch() #4 br label %try.cont try.cont: ; preds = %entry, %catch - %exception = call i8* @__cxa_allocate_exception(i64 4) #4 - %6 = bitcast i8* %exception to i32* - store i32 15532, i32* %6, align 16, !tbaa !2 - call void @__cxa_throw(i8* %exception, i8* bitcast (i8** @_ZTIi to i8*), i8* null) #5 + %exception = call ptr @__cxa_allocate_exception(i64 4) #4 + store i32 15532, ptr %exception, align 16, !tbaa !2 + call void @__cxa_throw(ptr %exception, ptr @_ZTIi, ptr null) #5 unreachable ehcleanup: ; preds = %lpad - call void @llvm.lifetime.end.p0i8(i64 4, i8* nonnull %0) #4 - resume { i8*, i32 } %1 + call void @llvm.lifetime.end.p0(i64 4, ptr nonnull %A0) #4 + resume { ptr, i32 } %0 } ; Function Attrs: argmemonly nounwind willreturn -declare void @llvm.lifetime.start.p0i8(i64 immarg, i8* nocapture) #1 +declare void @llvm.lifetime.start.p0(i64 immarg, ptr nocapture) #1 declare dso_local i32 @__gxx_personality_v0(...) ; Function Attrs: nounwind readnone -declare i32 @llvm.eh.typeid.for(i8*) #2 +declare i32 @llvm.eh.typeid.for(ptr) #2 -declare dso_local i8* @__cxa_begin_catch(i8*) local_unnamed_addr +declare dso_local ptr @__cxa_begin_catch(ptr) local_unnamed_addr declare dso_local void @__cxa_end_catch() local_unnamed_addr ; Function Attrs: argmemonly nounwind willreturn -declare void @llvm.lifetime.end.p0i8(i64 immarg, i8* nocapture) #1 +declare void @llvm.lifetime.end.p0(i64 immarg, ptr nocapture) #1 ; Function Attrs: norecurse sanitize_memtag -define dso_local i32 @main() local_unnamed_addr #3 personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*) { +define dso_local i32 @main() local_unnamed_addr #3 personality ptr @__gxx_personality_v0 { entry: ; CHECK-LABEL: entry: %A0.i = alloca i32, align 4 - %0 = bitcast i32* %A0.i to i8* - call void @llvm.lifetime.start.p0i8(i64 4, i8* nonnull %0) #4 - call void asm sideeffect "", "r"(i32* nonnull %A0.i) #4, !srcloc !6 -; CHECK: call void @llvm.aarch64.settag(i8* %1, i64 16) + call void @llvm.lifetime.start.p0(i64 4, ptr nonnull %A0.i) #4 + call void asm sideeffect "", "r"(ptr nonnull %A0.i) #4, !srcloc !6 +; CHECK: call void @llvm.aarch64.settag(ptr %A0.i.tag, i64 16) ; CHECK-NEXT: call void asm sideeffect - %exception.i6 = call i8* @__cxa_allocate_exception(i64 4) #4 - %1 = bitcast i8* %exception.i6 to i32* - store i32 42, i32* %1, align 16, !tbaa !2 - invoke void @__cxa_throw(i8* %exception.i6, i8* bitcast (i8** @_ZTIi to i8*), i8* null) #5 + %exception.i6 = call ptr @__cxa_allocate_exception(i64 4) #4 + store i32 42, ptr %exception.i6, align 16, !tbaa !2 + invoke void @__cxa_throw(ptr %exception.i6, ptr @_ZTIi, ptr null) #5 to label %.noexc7 unwind label %lpad.i .noexc7: ; preds = %entry unreachable lpad.i: ; preds = %entry - %2 = landingpad { i8*, i32 } + %0 = landingpad { ptr, i32 } cleanup - catch i8* bitcast (i8** @_ZTIi to i8*) - %3 = extractvalue { i8*, i32 } %2, 1 - %4 = call i32 @llvm.eh.typeid.for(i8* bitcast (i8** @_ZTIi to i8*)) #4 - %matches.i = icmp eq i32 %3, %4 + catch ptr @_ZTIi + %1 = extractvalue { ptr, i32 } %0, 1 + %2 = call i32 @llvm.eh.typeid.for(ptr @_ZTIi) #4 + %matches.i = icmp eq i32 %1, %2 br i1 %matches.i, label %catch.i, label %ehcleanup.i catch.i: ; preds = %lpad.i - %5 = extractvalue { i8*, i32 } %2, 0 - %6 = call i8* @__cxa_begin_catch(i8* %5) #4 + %3 = extractvalue { ptr, i32 } %0, 0 + %4 = call ptr @__cxa_begin_catch(ptr %3) #4 call void @__cxa_end_catch() #4 - %exception.i = call i8* @__cxa_allocate_exception(i64 4) #4 - %7 = bitcast i8* %exception.i to i32* - store i32 15532, i32* %7, align 16, !tbaa !2 - invoke void @__cxa_throw(i8* %exception.i, i8* bitcast (i8** @_ZTIi to i8*), i8* null) #5 + %exception.i = call ptr @__cxa_allocate_exception(i64 4) #4 + store i32 15532, ptr %exception.i, align 16, !tbaa !2 + invoke void @__cxa_throw(ptr %exception.i, ptr @_ZTIi, ptr null) #5 to label %.noexc unwind label %lpad .noexc: ; preds = %catch.i unreachable ehcleanup.i: ; preds = %lpad.i - call void @llvm.lifetime.end.p0i8(i64 4, i8* nonnull %0) #4 + call void @llvm.lifetime.end.p0(i64 4, ptr nonnull %A0.i) #4 br label %lpad.body lpad: ; preds = %catch.i - %8 = landingpad { i8*, i32 } - catch i8* bitcast (i8** @_ZTIi to i8*) - %.pre = extractvalue { i8*, i32 } %8, 1 + %5 = landingpad { ptr, i32 } + catch ptr @_ZTIi + %.pre = extractvalue { ptr, i32 } %5, 1 br label %lpad.body lpad.body: ; preds = %ehcleanup.i, %lpad - %.pre-phi = phi i32 [ %3, %ehcleanup.i ], [ %.pre, %lpad ] - %eh.lpad-body = phi { i8*, i32 } [ %2, %ehcleanup.i ], [ %8, %lpad ] - %matches = icmp eq i32 %.pre-phi, %4 + %.pre-phi = phi i32 [ %1, %ehcleanup.i ], [ %.pre, %lpad ] + %eh.lpad-body = phi { ptr, i32 } [ %0, %ehcleanup.i ], [ %5, %lpad ] + %matches = icmp eq i32 %.pre-phi, %2 br i1 %matches, label %catch, label %eh.resume catch: ; preds = %lpad.body - %9 = extractvalue { i8*, i32 } %eh.lpad-body, 0 - %10 = call i8* @__cxa_begin_catch(i8* %9) #4 + %6 = extractvalue { ptr, i32 } %eh.lpad-body, 0 + %7 = call ptr @__cxa_begin_catch(ptr %6) #4 call void @__cxa_end_catch() #4 ret i32 0 eh.resume: ; preds = %lpad.body - resume { i8*, i32 } %eh.lpad-body + resume { ptr, i32 } %eh.lpad-body } attributes #0 = { noreturn sanitize_memtag "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "frame-pointer"="all" "less-precise-fpmad"="false" "min-legal-vector-width"="0" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="generic" "target-features"="+mte,+neon,+v8.5a" "unsafe-fp-math"="false" "use-soft-float"="false" } diff --git a/llvm/test/CodeGen/AArch64/stack-tagging-initializer-merge.ll b/llvm/test/CodeGen/AArch64/stack-tagging-initializer-merge.ll index f608192..e449fab 100644 --- a/llvm/test/CodeGen/AArch64/stack-tagging-initializer-merge.ll +++ b/llvm/test/CodeGen/AArch64/stack-tagging-initializer-merge.ll @@ -3,48 +3,42 @@ target datalayout = "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128" target triple = "aarch64--linux-android" -declare void @use(i8*) -declare void @llvm.lifetime.start.p0i8(i64, i8* nocapture) -declare void @llvm.lifetime.end.p0i8(i64, i8* nocapture) -declare void @llvm.memset.p0i8.i64(i8* nocapture writeonly, i8, i64, i1 immarg) +declare void @use(ptr) +declare void @llvm.lifetime.start.p0(i64, ptr nocapture) +declare void @llvm.lifetime.end.p0(i64, ptr nocapture) +declare void @llvm.memset.p0.i64(ptr nocapture writeonly, i8, i64, i1 immarg) define void @OneVarNoInit() sanitize_memtag { entry: %x = alloca i32, align 4 - %0 = bitcast i32* %x to i8* - call void @llvm.lifetime.start.p0i8(i64 4, i8* nonnull %0) - call void @use(i8* nonnull %0) - call void @llvm.lifetime.end.p0i8(i64 4, i8* nonnull %0) + call void @llvm.lifetime.start.p0(i64 4, ptr nonnull %x) + call void @use(ptr nonnull %x) + call void @llvm.lifetime.end.p0(i64 4, ptr nonnull %x) ret void } ; CHECK-LABEL: define void @OneVarNoInit( ; CHECK-DAG: [[X:%.*]] = alloca { i32, [12 x i8] }, align 16 -; CHECK-DAG: [[TX:%.*]] = call { i32, [12 x i8] }* @llvm.aarch64.tagp.{{.*}}({ i32, [12 x i8] }* [[X]], {{.*}}, i64 0) -; CHECK-DAG: [[TX32:%.*]] = bitcast { i32, [12 x i8] }* [[TX]] to i32* -; CHECK-DAG: [[TX8:%.*]] = bitcast i32* [[TX32]] to i8* -; CHECK-DAG: call void @llvm.lifetime.start.p0i8(i64 4, i8* nonnull [[TX8]]) -; CHECK-DAG: call void @llvm.aarch64.settag(i8* [[TX8]], i64 16) -; CHECK-DAG: call void @use(i8* nonnull [[TX8]]) -; CHECK-DAG: call void @llvm.lifetime.end.p0i8(i64 4, i8* nonnull [[TX8]]) +; CHECK-DAG: [[TX:%.*]] = call ptr @llvm.aarch64.tagp.{{.*}}(ptr [[X]], {{.*}}, i64 0) +; CHECK-DAG: call void @llvm.lifetime.start.p0(i64 4, ptr nonnull [[TX]]) +; CHECK-DAG: call void @llvm.aarch64.settag(ptr [[TX]], i64 16) +; CHECK-DAG: call void @use(ptr nonnull [[TX]]) +; CHECK-DAG: call void @llvm.lifetime.end.p0(i64 4, ptr nonnull [[TX]]) define void @OneVarInitConst() sanitize_memtag { entry: %x = alloca i32, align 4 - %0 = bitcast i32* %x to i8* - call void @llvm.lifetime.start.p0i8(i64 4, i8* nonnull %0) - store i32 42, i32* %x, align 4 - call void @use(i8* nonnull %0) - call void @llvm.lifetime.end.p0i8(i64 4, i8* nonnull %0) + call void @llvm.lifetime.start.p0(i64 4, ptr nonnull %x) + store i32 42, ptr %x, align 4 + call void @use(ptr nonnull %x) + call void @llvm.lifetime.end.p0(i64 4, ptr nonnull %x) ret void } ; CHECK-LABEL: define void @OneVarInitConst( -; CHECK: [[TX:%.*]] = call { i32, [12 x i8] }* @llvm.aarch64.tagp -; CHECK: [[TX32:%.*]] = bitcast { i32, [12 x i8] }* [[TX]] to i32* -; CHECK: [[TX8:%.*]] = bitcast i32* [[TX32]] to i8* +; CHECK: [[TX:%.*]] = call ptr @llvm.aarch64.tagp ; CHECK-NOT: aarch64.settag -; CHECK: call void @llvm.aarch64.stgp(i8* [[TX8]], i64 42, i64 0) +; CHECK: call void @llvm.aarch64.stgp(ptr [[TX]], i64 42, i64 0) ; Untagging before lifetime.end: ; CHECK: call void @llvm.aarch64.settag( ; CHECK-NOT: aarch64.settag @@ -53,168 +47,149 @@ entry: define void @ArrayInitConst() sanitize_memtag { entry: %x = alloca i32, i32 16, align 4 - %0 = bitcast i32* %x to i8* - call void @llvm.lifetime.start.p0i8(i64 64, i8* nonnull %0) - store i32 42, i32* %x, align 4 - call void @use(i8* nonnull %0) - call void @llvm.lifetime.end.p0i8(i64 64, i8* nonnull %0) + call void @llvm.lifetime.start.p0(i64 64, ptr nonnull %x) + store i32 42, ptr %x, align 4 + call void @use(ptr nonnull %x) + call void @llvm.lifetime.end.p0(i64 64, ptr nonnull %x) ret void } ; CHECK-LABEL: define void @ArrayInitConst( -; CHECK: [[TX:%.*]] = call i32* @llvm.aarch64.tagp. -; CHECK: [[TX8:%.*]] = bitcast i32* [[TX]] to i8* -; CHECK: call void @llvm.aarch64.stgp(i8* [[TX8]], i64 42, i64 0) -; CHECK: [[TX8_16:%.*]] = getelementptr i8, i8* [[TX8]], i32 16 -; CHECK: call void @llvm.aarch64.settag.zero(i8* [[TX8_16]], i64 48) +; CHECK: [[TX:%.*]] = call ptr @llvm.aarch64.tagp. +; CHECK: call void @llvm.aarch64.stgp(ptr [[TX]], i64 42, i64 0) +; CHECK: [[TX8_16:%.*]] = getelementptr i8, ptr [[TX]], i32 16 +; CHECK: call void @llvm.aarch64.settag.zero(ptr [[TX8_16]], i64 48) ; CHECK: ret void define void @ArrayInitConst2() sanitize_memtag { entry: %x = alloca i32, i32 16, align 4 - %0 = bitcast i32* %x to i8* - call void @llvm.lifetime.start.p0i8(i64 64, i8* nonnull %0) - store i32 42, i32* %x, align 4 - %1 = getelementptr i32, i32* %x, i32 1 - store i32 43, i32* %1, align 4 - %2 = getelementptr i32, i32* %x, i32 2 - %3 = bitcast i32* %2 to i64* - store i64 -1, i64* %3, align 4 - call void @use(i8* nonnull %0) - call void @llvm.lifetime.end.p0i8(i64 64, i8* nonnull %0) + call void @llvm.lifetime.start.p0(i64 64, ptr nonnull %x) + store i32 42, ptr %x, align 4 + %0 = getelementptr i32, ptr %x, i32 1 + store i32 43, ptr %0, align 4 + %1 = getelementptr i32, ptr %x, i32 2 + store i64 -1, ptr %1, align 4 + call void @use(ptr nonnull %x) + call void @llvm.lifetime.end.p0(i64 64, ptr nonnull %x) ret void } ; CHECK-LABEL: define void @ArrayInitConst2( -; CHECK: [[TX:%.*]] = call i32* @llvm.aarch64.tagp. -; CHECK: [[TX8:%.*]] = bitcast i32* [[TX]] to i8* -; CHECK: call void @llvm.aarch64.stgp(i8* [[TX8]], i64 184683593770, i64 -1) -; CHECK: [[TX8_16:%.*]] = getelementptr i8, i8* [[TX8]], i32 16 -; CHECK: call void @llvm.aarch64.settag.zero(i8* [[TX8_16]], i64 48) +; CHECK: [[TX:%.*]] = call ptr @llvm.aarch64.tagp. +; CHECK: call void @llvm.aarch64.stgp(ptr [[TX]], i64 184683593770, i64 -1) +; CHECK: [[TX8_16:%.*]] = getelementptr i8, ptr [[TX]], i32 16 +; CHECK: call void @llvm.aarch64.settag.zero(ptr [[TX8_16]], i64 48) ; CHECK: ret void define void @ArrayInitConstSplit() sanitize_memtag { entry: %x = alloca i32, i32 16, align 4 - %0 = bitcast i32* %x to i8* - call void @llvm.lifetime.start.p0i8(i64 64, i8* nonnull %0) - %1 = getelementptr i32, i32* %x, i32 1 - %2 = bitcast i32* %1 to i64* - store i64 -1, i64* %2, align 4 - call void @use(i8* nonnull %0) - call void @llvm.lifetime.end.p0i8(i64 64, i8* nonnull %0) + call void @llvm.lifetime.start.p0(i64 64, ptr nonnull %x) + %0 = getelementptr i32, ptr %x, i32 1 + store i64 -1, ptr %0, align 4 + call void @use(ptr nonnull %x) + call void @llvm.lifetime.end.p0(i64 64, ptr nonnull %x) ret void } ; CHECK-LABEL: define void @ArrayInitConstSplit( -; CHECK: [[TX:%.*]] = call i32* @llvm.aarch64.tagp. -; CHECK: [[TX8:%.*]] = bitcast i32* [[TX]] to i8* -; CHECK: call void @llvm.aarch64.stgp(i8* [[TX8]], i64 -4294967296, i64 4294967295) +; CHECK: [[TX:%.*]] = call ptr @llvm.aarch64.tagp. +; CHECK: call void @llvm.aarch64.stgp(ptr [[TX]], i64 -4294967296, i64 4294967295) ; CHECK: ret void define void @ArrayInitConstWithHoles() sanitize_memtag { entry: %x = alloca i32, i32 32, align 4 - %0 = bitcast i32* %x to i8* - call void @llvm.lifetime.start.p0i8(i64 128, i8* nonnull %0) - %1 = getelementptr i32, i32* %x, i32 5 - store i32 42, i32* %1, align 4 - %2 = getelementptr i32, i32* %x, i32 14 - store i32 43, i32* %2, align 4 - call void @use(i8* nonnull %0) - call void @llvm.lifetime.end.p0i8(i64 128, i8* nonnull %0) + call void @llvm.lifetime.start.p0(i64 128, ptr nonnull %x) + %0 = getelementptr i32, ptr %x, i32 5 + store i32 42, ptr %0, align 4 + %1 = getelementptr i32, ptr %x, i32 14 + store i32 43, ptr %1, align 4 + call void @use(ptr nonnull %x) + call void @llvm.lifetime.end.p0(i64 128, ptr nonnull %x) ret void } ; CHECK-LABEL: define void @ArrayInitConstWithHoles( -; CHECK: [[TX:%.*]] = call i32* @llvm.aarch64.tagp. -; CHECK: [[TX8:%.*]] = bitcast i32* [[TX]] to i8* -; CHECK: call void @llvm.aarch64.settag.zero(i8* [[TX8]], i64 16) -; CHECK: [[TX8_16:%.*]] = getelementptr i8, i8* %0, i32 16 -; CHECK: call void @llvm.aarch64.stgp(i8* [[TX8_16]], i64 180388626432, i64 0) -; CHECK: [[TX8_32:%.*]] = getelementptr i8, i8* %0, i32 32 -; CHECK: call void @llvm.aarch64.settag.zero(i8* [[TX8_32]], i64 16) -; CHECK: [[TX8_48:%.*]] = getelementptr i8, i8* %0, i32 48 -; CHECK: call void @llvm.aarch64.stgp(i8* [[TX8_48]], i64 0, i64 43) -; CHECK: [[TX8_64:%.*]] = getelementptr i8, i8* %0, i32 64 -; CHECK: call void @llvm.aarch64.settag.zero(i8* [[TX8_64]], i64 64) +; CHECK: [[TX:%.*]] = call ptr @llvm.aarch64.tagp. +; CHECK: call void @llvm.aarch64.settag.zero(ptr [[TX]], i64 16) +; CHECK: [[TX8_16:%.*]] = getelementptr i8, ptr %x.tag, i32 16 +; CHECK: call void @llvm.aarch64.stgp(ptr [[TX8_16]], i64 180388626432, i64 0) +; CHECK: [[TX8_32:%.*]] = getelementptr i8, ptr %x.tag, i32 32 +; CHECK: call void @llvm.aarch64.settag.zero(ptr [[TX8_32]], i64 16) +; CHECK: [[TX8_48:%.*]] = getelementptr i8, ptr %x.tag, i32 48 +; CHECK: call void @llvm.aarch64.stgp(ptr [[TX8_48]], i64 0, i64 43) +; CHECK: [[TX8_64:%.*]] = getelementptr i8, ptr %x.tag, i32 64 +; CHECK: call void @llvm.aarch64.settag.zero(ptr [[TX8_64]], i64 64) ; CHECK: ret void define void @InitNonConst(i32 %v) sanitize_memtag { entry: %x = alloca i32, align 4 - %0 = bitcast i32* %x to i8* - call void @llvm.lifetime.start.p0i8(i64 4, i8* nonnull %0) - store i32 %v, i32* %x, align 4 - call void @use(i8* nonnull %0) - call void @llvm.lifetime.end.p0i8(i64 4, i8* nonnull %0) + call void @llvm.lifetime.start.p0(i64 4, ptr nonnull %x) + store i32 %v, ptr %x, align 4 + call void @use(ptr nonnull %x) + call void @llvm.lifetime.end.p0(i64 4, ptr nonnull %x) ret void } ; CHECK-LABEL: define void @InitNonConst( -; CHECK: [[TX:%.*]] = call { i32, [12 x i8] }* @llvm.aarch64.tagp -; CHECK: [[TX32:%.*]] = bitcast { i32, [12 x i8] }* [[TX]] to i32* -; CHECK: [[TX8:%.*]] = bitcast i32* [[TX32]] to i8* +; CHECK: [[TX:%.*]] = call ptr @llvm.aarch64.tagp ; CHECK: [[V:%.*]] = zext i32 %v to i64 -; CHECK: call void @llvm.aarch64.stgp(i8* [[TX8]], i64 [[V]], i64 0) +; CHECK: call void @llvm.aarch64.stgp(ptr [[TX]], i64 [[V]], i64 0) ; CHECK: ret void define void @InitNonConst2(i32 %v, i32 %w) sanitize_memtag { entry: %x = alloca i32, i32 4, align 4 - %0 = bitcast i32* %x to i8* - call void @llvm.lifetime.start.p0i8(i64 16, i8* nonnull %0) - store i32 %v, i32* %x, align 4 - %1 = getelementptr i32, i32* %x, i32 1 - store i32 %w, i32* %1, align 4 - call void @use(i8* nonnull %0) - call void @llvm.lifetime.end.p0i8(i64 16, i8* nonnull %0) + call void @llvm.lifetime.start.p0(i64 16, ptr nonnull %x) + store i32 %v, ptr %x, align 4 + %0 = getelementptr i32, ptr %x, i32 1 + store i32 %w, ptr %0, align 4 + call void @use(ptr nonnull %x) + call void @llvm.lifetime.end.p0(i64 16, ptr nonnull %x) ret void } ; CHECK-LABEL: define void @InitNonConst2( -; CHECK: [[TX:%.*]] = call i32* @llvm.aarch64.tagp -; CHECK: [[TX8:%.*]] = bitcast i32* [[TX]] to i8* +; CHECK: [[TX:%.*]] = call ptr @llvm.aarch64.tagp ; CHECK: [[V:%.*]] = zext i32 %v to i64 ; CHECK: [[W:%.*]] = zext i32 %w to i64 ; CHECK: [[WS:%.*]] = shl i64 [[W]], 32 ; CHECK: [[VW:%.*]] = or i64 [[V]], [[WS]] -; CHECK: call void @llvm.aarch64.stgp(i8* [[TX8]], i64 [[VW]], i64 0) +; CHECK: call void @llvm.aarch64.stgp(ptr [[TX]], i64 [[VW]], i64 0) ; CHECK: ret void define void @InitVector() sanitize_memtag { entry: %x = alloca i32, i32 4, align 4 - %0 = bitcast i32* %x to i8* - call void @llvm.lifetime.start.p0i8(i64 16, i8* nonnull %0) - %1 = bitcast i32* %x to <2 x i32>* - store <2 x i32> , <2 x i32>* %1, align 4 - call void @use(i8* nonnull %0) - call void @llvm.lifetime.end.p0i8(i64 16, i8* nonnull %0) + call void @llvm.lifetime.start.p0(i64 16, ptr nonnull %x) + store <2 x i32> , ptr %x, align 4 + call void @use(ptr nonnull %x) + call void @llvm.lifetime.end.p0(i64 16, ptr nonnull %x) ret void } ; CHECK-LABEL: define void @InitVector( -; CHECK: [[TX:%.*]] = call i32* @llvm.aarch64.tagp -; CHECK: [[TX8:%.*]] = bitcast i32* [[TX]] to i8* -; CHECK: call void @llvm.aarch64.stgp(i8* [[TX8]], i64 bitcast (<2 x i32> to i64), i64 0) +; CHECK: [[TX:%.*]] = call ptr @llvm.aarch64.tagp +; CHECK: call void @llvm.aarch64.stgp(ptr [[TX]], i64 bitcast (<2 x i32> to i64), i64 0) ; CHECK: ret void -define void @InitVectorPtr(i32* %p) sanitize_memtag { +define void @InitVectorPtr(ptr %p) sanitize_memtag { entry: - %s = alloca <4 x i32*>, align 8 - %v0 = insertelement <4 x i32*> undef, i32* %p, i32 0 - %v1 = shufflevector <4 x i32*> %v0, <4 x i32*> undef, <4 x i32> zeroinitializer - store <4 x i32*> %v1, <4 x i32*>* %s - %0 = bitcast <4 x i32*>* %s to i8* - call void @use(i8* nonnull %0) + %s = alloca <4 x ptr>, align 8 + %v0 = insertelement <4 x ptr> undef, ptr %p, i32 0 + %v1 = shufflevector <4 x ptr> %v0, <4 x ptr> undef, <4 x i32> zeroinitializer + store <4 x ptr> %v1, ptr %s + call void @use(ptr nonnull %s) ret void } ; CHECK-LABEL: define void @InitVectorPtr( -; CHECK: call <4 x i32*>* @llvm.aarch64.tagp +; CHECK: call ptr @llvm.aarch64.tagp ; CHECK: [[V1:%.*]] = shufflevector -; CHECK: [[V2:%.*]] = ptrtoint <4 x i32*> [[V1]] to <4 x i64> +; CHECK: [[V2:%.*]] = ptrtoint <4 x ptr> [[V1]] to <4 x i64> ; CHECK: [[V3:%.*]] = bitcast <4 x i64> [[V2]] to i256 ; CHECK: [[A1:%.*]] = trunc i256 [[V3]] to i64 ; CHECK: [[A2_:%.*]] = lshr i256 [[V3]], 64 @@ -230,93 +205,86 @@ entry: define void @InitVectorSplit() sanitize_memtag { entry: %x = alloca i32, i32 4, align 4 - %0 = bitcast i32* %x to i8* - call void @llvm.lifetime.start.p0i8(i64 16, i8* nonnull %0) - %1 = getelementptr i32, i32* %x, i32 1 - %2 = bitcast i32* %1 to <2 x i32>* - store <2 x i32> , <2 x i32>* %2, align 4 - call void @use(i8* nonnull %0) - call void @llvm.lifetime.end.p0i8(i64 16, i8* nonnull %0) + call void @llvm.lifetime.start.p0(i64 16, ptr nonnull %x) + %0 = getelementptr i32, ptr %x, i32 1 + store <2 x i32> , ptr %0, align 4 + call void @use(ptr nonnull %x) + call void @llvm.lifetime.end.p0(i64 16, ptr nonnull %x) ret void } ; CHECK-LABEL: define void @InitVectorSplit( -; CHECK: [[TX:%.*]] = call i32* @llvm.aarch64.tagp -; CHECK: [[TX8:%.*]] = bitcast i32* [[TX]] to i8* -; CHECK: call void @llvm.aarch64.stgp(i8* [[TX8]], i64 shl (i64 bitcast (<2 x i32> to i64), i64 32), i64 lshr (i64 bitcast (<2 x i32> to i64), i64 32)) +; CHECK: [[TX:%.*]] = call ptr @llvm.aarch64.tagp +; CHECK: call void @llvm.aarch64.stgp(ptr [[TX]], i64 shl (i64 bitcast (<2 x i32> to i64), i64 32), i64 lshr (i64 bitcast (<2 x i32> to i64), i64 32)) ; CHECK: ret void define void @MemSetZero() sanitize_memtag { entry: %x = alloca i32, i32 8, align 16 - %0 = bitcast i32* %x to i8* - call void @llvm.memset.p0i8.i64(i8* nonnull align 16 %0, i8 0, i64 32, i1 false) - call void @use(i8* nonnull %0) + call void @llvm.memset.p0.i64(ptr nonnull align 16 %x, i8 0, i64 32, i1 false) + call void @use(ptr nonnull %x) ret void } ; CHECK-LABEL: define void @MemSetZero( -; CHECK: [[TX:%.*]] = call i32* @llvm.aarch64.tagp -; CHECK: [[TX8:%.*]] = bitcast i32* [[TX]] to i8* -; CHECK: call void @llvm.aarch64.settag.zero(i8* [[TX8]], i64 32) +; CHECK: [[TX:%.*]] = call ptr @llvm.aarch64.tagp +; CHECK: call void @llvm.aarch64.settag.zero(ptr [[TX]], i64 32) ; CHECK: ret void define void @MemSetNonZero() sanitize_memtag { entry: %x = alloca i32, i32 8, align 16 - %0 = bitcast i32* %x to i8* - call void @llvm.memset.p0i8.i64(i8* nonnull align 16 %0, i8 42, i64 32, i1 false) - call void @use(i8* nonnull %0) + call void @llvm.memset.p0.i64(ptr nonnull align 16 %x, i8 42, i64 32, i1 false) + call void @use(ptr nonnull %x) ret void } ; CHECK-LABEL: define void @MemSetNonZero( -; CHECK: call void @llvm.aarch64.stgp(i8* {{.*}}, i64 3038287259199220266, i64 3038287259199220266) -; CHECK: call void @llvm.aarch64.stgp(i8* {{.*}}, i64 3038287259199220266, i64 3038287259199220266) +; CHECK: call void @llvm.aarch64.stgp(ptr {{.*}}, i64 3038287259199220266, i64 3038287259199220266) +; CHECK: call void @llvm.aarch64.stgp(ptr {{.*}}, i64 3038287259199220266, i64 3038287259199220266) ; CHECK: ret void define void @MemSetNonZero2() sanitize_memtag { entry: %x = alloca [32 x i8], align 16 - %0 = getelementptr inbounds [32 x i8], [32 x i8]* %x, i64 0, i64 2 - call void @llvm.memset.p0i8.i64(i8* nonnull %0, i8 42, i64 28, i1 false) - call void @use(i8* nonnull %0) + %0 = getelementptr inbounds [32 x i8], ptr %x, i64 0, i64 2 + call void @llvm.memset.p0.i64(ptr nonnull %0, i8 42, i64 28, i1 false) + call void @use(ptr nonnull %0) ret void } ; CHECK-LABEL: define void @MemSetNonZero2( -; CHECK: call void @llvm.aarch64.stgp(i8* {{.*}}, i64 3038287259199209472, i64 3038287259199220266) -; CHECK: call void @llvm.aarch64.stgp(i8* {{.*}}, i64 3038287259199220266, i64 46360584399402) +; CHECK: call void @llvm.aarch64.stgp(ptr {{.*}}, i64 3038287259199209472, i64 3038287259199220266) +; CHECK: call void @llvm.aarch64.stgp(ptr {{.*}}, i64 3038287259199220266, i64 46360584399402) ; CHECK: ret void define void @MemSetNonZero3() sanitize_memtag { entry: %x = alloca [32 x i8], align 16 - %0 = getelementptr inbounds [32 x i8], [32 x i8]* %x, i64 0, i64 2 - call void @llvm.memset.p0i8.i64(i8* nonnull %0, i8 42, i64 4, i1 false) - %1 = getelementptr inbounds [32 x i8], [32 x i8]* %x, i64 0, i64 24 - call void @llvm.memset.p0i8.i64(i8* nonnull %1, i8 42, i64 8, i1 false) - call void @use(i8* nonnull %0) + %0 = getelementptr inbounds [32 x i8], ptr %x, i64 0, i64 2 + call void @llvm.memset.p0.i64(ptr nonnull %0, i8 42, i64 4, i1 false) + %1 = getelementptr inbounds [32 x i8], ptr %x, i64 0, i64 24 + call void @llvm.memset.p0.i64(ptr nonnull %1, i8 42, i64 8, i1 false) + call void @use(ptr nonnull %0) ret void } ; CHECK-LABEL: define void @MemSetNonZero3( -; CHECK: call void @llvm.aarch64.stgp(i8* {{.*}}, i64 46360584388608, i64 0) -; CHECK: call void @llvm.aarch64.stgp(i8* {{.*}}, i64 0, i64 3038287259199220266) +; CHECK: call void @llvm.aarch64.stgp(ptr {{.*}}, i64 46360584388608, i64 0) +; CHECK: call void @llvm.aarch64.stgp(ptr {{.*}}, i64 0, i64 3038287259199220266) ; CHECK: ret void define void @LargeAlloca() sanitize_memtag { entry: %x = alloca i32, i32 256, align 16 - %0 = bitcast i32* %x to i8* - call void @llvm.memset.p0i8.i64(i8* nonnull align 16 %0, i8 42, i64 256, i1 false) - call void @use(i8* nonnull %0) + call void @llvm.memset.p0.i64(ptr nonnull align 16 %x, i8 42, i64 256, i1 false) + call void @use(ptr nonnull %x) ret void } ; CHECK-LABEL: define void @LargeAlloca( -; CHECK: call void @llvm.aarch64.settag(i8* {{.*}}, i64 1024) -; CHECK: call void @llvm.memset.p0i8.i64(i8* {{.*}}, i8 42, i64 256, +; CHECK: call void @llvm.aarch64.settag(ptr {{.*}}, i64 1024) +; CHECK: call void @llvm.memset.p0.i64(ptr {{.*}}, i8 42, i64 256, ; CHECK: ret void diff --git a/llvm/test/CodeGen/AArch64/sve-extract-vector-to-predicate-store.ll b/llvm/test/CodeGen/AArch64/sve-extract-vector-to-predicate-store.ll index 75e1287..e3d368b 100644 --- a/llvm/test/CodeGen/AArch64/sve-extract-vector-to-predicate-store.ll +++ b/llvm/test/CodeGen/AArch64/sve-extract-vector-to-predicate-store.ll @@ -2,77 +2,74 @@ target triple = "aarch64-unknown-linux-gnu" -define void @pred_store_v2i8( %pred, <2 x i8>* %addr) #0 { +define void @pred_store_v2i8( %pred, ptr %addr) #0 { ; CHECK-LABEL: @pred_store_v2i8( -; CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x i8>* %addr to * -; CHECK-NEXT: store %pred, * [[TMP1]] +; CHECK-NEXT: store %pred, ptr %addr ; CHECK-NEXT: ret void %bitcast = bitcast %pred to %extract = tail call <2 x i8> @llvm.vector.extract.v2i8.nxv2i8( %bitcast, i64 0) - store <2 x i8> %extract, <2 x i8>* %addr, align 4 + store <2 x i8> %extract, ptr %addr, align 4 ret void } -define void @pred_store_v4i8( %pred, <4 x i8>* %addr) #1 { +define void @pred_store_v4i8( %pred, ptr %addr) #1 { ; CHECK-LABEL: @pred_store_v4i8( -; CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i8>* %addr to * -; CHECK-NEXT: store %pred, * [[TMP1]] +; CHECK-NEXT: store %pred, ptr %addr ; CHECK-NEXT: ret void %bitcast = bitcast %pred to %extract = tail call <4 x i8> @llvm.vector.extract.v4i8.nxv2i8( %bitcast, i64 0) - store <4 x i8> %extract, <4 x i8>* %addr, align 4 + store <4 x i8> %extract, ptr %addr, align 4 ret void } -define void @pred_store_v8i8( %pred, <8 x i8>* %addr) #2 { +define void @pred_store_v8i8( %pred, ptr %addr) #2 { ; CHECK-LABEL: @pred_store_v8i8( -; CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x i8>* %addr to * -; CHECK-NEXT: store %pred, * [[TMP1]] +; CHECK-NEXT: store %pred, ptr %addr ; CHECK-NEXT: ret void %bitcast = bitcast %pred to %extract = tail call <8 x i8> @llvm.vector.extract.v8i8.nxv2i8( %bitcast, i64 0) - store <8 x i8> %extract, <8 x i8>* %addr, align 4 + store <8 x i8> %extract, ptr %addr, align 4 ret void } ; Check that too small of a vscale prevents optimization -define void @pred_store_neg1( %pred, <4 x i8>* %addr) #0 { +define void @pred_store_neg1( %pred, ptr %addr) #0 { ; CHECK-LABEL: @pred_store_neg1( ; CHECK: call <4 x i8> @llvm.vector.extract %bitcast = bitcast %pred to %extract = tail call <4 x i8> @llvm.vector.extract.v4i8.nxv2i8( %bitcast, i64 0) - store <4 x i8> %extract, <4 x i8>* %addr, align 4 + store <4 x i8> %extract, ptr %addr, align 4 ret void } ; Check that too large of a vscale prevents optimization -define void @pred_store_neg2( %pred, <4 x i8>* %addr) #2 { +define void @pred_store_neg2( %pred, ptr %addr) #2 { ; CHECK-LABEL: @pred_store_neg2( ; CHECK: call <4 x i8> @llvm.vector.extract %bitcast = bitcast %pred to %extract = tail call <4 x i8> @llvm.vector.extract.v4i8.nxv2i8( %bitcast, i64 0) - store <4 x i8> %extract, <4 x i8>* %addr, align 4 + store <4 x i8> %extract, ptr %addr, align 4 ret void } ; Check that a non-zero index prevents optimization -define void @pred_store_neg3( %pred, <4 x i8>* %addr) #1 { +define void @pred_store_neg3( %pred, ptr %addr) #1 { ; CHECK-LABEL: @pred_store_neg3( ; CHECK: call <4 x i8> @llvm.vector.extract %bitcast = bitcast %pred to %extract = tail call <4 x i8> @llvm.vector.extract.v4i8.nxv2i8( %bitcast, i64 4) - store <4 x i8> %extract, <4 x i8>* %addr, align 4 + store <4 x i8> %extract, ptr %addr, align 4 ret void } ; Check that differing vscale min/max prevents optimization -define void @pred_store_neg4( %pred, <4 x i8>* %addr) #3 { +define void @pred_store_neg4( %pred, ptr %addr) #3 { ; CHECK-LABEL: @pred_store_neg4( ; CHECK: call <4 x i8> @llvm.vector.extract %bitcast = bitcast %pred to %extract = tail call <4 x i8> @llvm.vector.extract.v4i8.nxv2i8( %bitcast, i64 0) - store <4 x i8> %extract, <4 x i8>* %addr, align 4 + store <4 x i8> %extract, ptr %addr, align 4 ret void } diff --git a/llvm/test/CodeGen/AArch64/sve-insert-vector-to-predicate-load.ll b/llvm/test/CodeGen/AArch64/sve-insert-vector-to-predicate-load.ll index e676708..b906de7 100644 --- a/llvm/test/CodeGen/AArch64/sve-insert-vector-to-predicate-load.ll +++ b/llvm/test/CodeGen/AArch64/sve-insert-vector-to-predicate-load.ll @@ -2,50 +2,46 @@ target triple = "aarch64-unknown-linux-gnu" -define @pred_load_v2i8(<2 x i8>* %addr) #0 { +define @pred_load_v2i8(ptr %addr) #0 { ; CHECK-LABEL: @pred_load_v2i8( -; CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x i8>* %addr to * -; CHECK-NEXT: [[TMP2:%.*]] = load , * [[TMP1]] +; CHECK-NEXT: [[TMP2:%.*]] = load , ptr %addr ; CHECK-NEXT: ret [[TMP2]] - %load = load <2 x i8>, <2 x i8>* %addr, align 4 + %load = load <2 x i8>, ptr %addr, align 4 %insert = tail call @llvm.vector.insert.nxv2i8.v2i8( undef, <2 x i8> %load, i64 0) %ret = bitcast %insert to ret %ret } -define @pred_load_v4i8(<4 x i8>* %addr) #1 { +define @pred_load_v4i8(ptr %addr) #1 { ; CHECK-LABEL: @pred_load_v4i8( -; CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i8>* %addr to * -; CHECK-NEXT: [[TMP2:%.*]] = load , * [[TMP1]] +; CHECK-NEXT: [[TMP2:%.*]] = load , ptr %addr ; CHECK-NEXT: ret [[TMP2]] - %load = load <4 x i8>, <4 x i8>* %addr, align 4 + %load = load <4 x i8>, ptr %addr, align 4 %insert = tail call @llvm.vector.insert.nxv2i8.v4i8( undef, <4 x i8> %load, i64 0) %ret = bitcast %insert to ret %ret } -define @pred_load_v8i8(<8 x i8>* %addr) #2 { +define @pred_load_v8i8(ptr %addr) #2 { ; CHECK-LABEL: @pred_load_v8i8( -; CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x i8>* %addr to * -; CHECK-NEXT: [[TMP2:%.*]] = load , * [[TMP1]] +; CHECK-NEXT: [[TMP2:%.*]] = load , ptr %addr ; CHECK-NEXT: ret [[TMP2]] - %load = load <8 x i8>, <8 x i8>* %addr, align 4 + %load = load <8 x i8>, ptr %addr, align 4 %insert = tail call @llvm.vector.insert.nxv2i8.v8i8( undef, <8 x i8> %load, i64 0) %ret = bitcast %insert to ret %ret } ; Ensure the insertion point is at the load -define @pred_load_insertion_point(<2 x i8>* %addr) #0 { +define @pred_load_insertion_point(ptr %addr) #0 { ; CHECK-LABEL: @pred_load_insertion_point( ; CHECK-NEXT: entry: -; CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x i8>* %addr to * -; CHECK-NEXT: [[TMP2:%.*]] = load , * [[TMP1]] +; CHECK-NEXT: [[TMP2:%.*]] = load , ptr %addr ; CHECK-NEXT: br label %bb1 ; CHECK: bb1: ; CHECK-NEXT: ret [[TMP2]] entry: - %load = load <2 x i8>, <2 x i8>* %addr, align 4 + %load = load <2 x i8>, ptr %addr, align 4 br label %bb1 bb1: @@ -55,50 +51,50 @@ bb1: } ; Check that too small of a vscale prevents optimization -define @pred_load_neg1(<4 x i8>* %addr) #0 { +define @pred_load_neg1(ptr %addr) #0 { ; CHECK-LABEL: @pred_load_neg1( ; CHECK: call @llvm.vector.insert - %load = load <4 x i8>, <4 x i8>* %addr, align 4 + %load = load <4 x i8>, ptr %addr, align 4 %insert = tail call @llvm.vector.insert.nxv2i8.v4i8( undef, <4 x i8> %load, i64 0) %ret = bitcast %insert to ret %ret } ; Check that too large of a vscale prevents optimization -define @pred_load_neg2(<4 x i8>* %addr) #2 { +define @pred_load_neg2(ptr %addr) #2 { ; CHECK-LABEL: @pred_load_neg2( ; CHECK: call @llvm.vector.insert - %load = load <4 x i8>, <4 x i8>* %addr, align 4 + %load = load <4 x i8>, ptr %addr, align 4 %insert = tail call @llvm.vector.insert.nxv2i8.v4i8( undef, <4 x i8> %load, i64 0) %ret = bitcast %insert to ret %ret } ; Check that a non-zero index prevents optimization -define @pred_load_neg3(<4 x i8>* %addr) #1 { +define @pred_load_neg3(ptr %addr) #1 { ; CHECK-LABEL: @pred_load_neg3( ; CHECK: call @llvm.vector.insert - %load = load <4 x i8>, <4 x i8>* %addr, align 4 + %load = load <4 x i8>, ptr %addr, align 4 %insert = tail call @llvm.vector.insert.nxv2i8.v4i8( undef, <4 x i8> %load, i64 4) %ret = bitcast %insert to ret %ret } ; Check that differing vscale min/max prevents optimization -define @pred_load_neg4(<4 x i8>* %addr) #3 { +define @pred_load_neg4(ptr %addr) #3 { ; CHECK-LABEL: @pred_load_neg4( ; CHECK: call @llvm.vector.insert - %load = load <4 x i8>, <4 x i8>* %addr, align 4 + %load = load <4 x i8>, ptr %addr, align 4 %insert = tail call @llvm.vector.insert.nxv2i8.v4i8( undef, <4 x i8> %load, i64 0) %ret = bitcast %insert to ret %ret } ; Check that insertion into a non-undef vector prevents optimization -define @pred_load_neg5(<4 x i8>* %addr, %passthru) #1 { +define @pred_load_neg5(ptr %addr, %passthru) #1 { ; CHECK-LABEL: @pred_load_neg5( ; CHECK: call @llvm.vector.insert - %load = load <4 x i8>, <4 x i8>* %addr, align 4 + %load = load <4 x i8>, ptr %addr, align 4 %insert = tail call @llvm.vector.insert.nxv2i8.v4i8( %passthru, <4 x i8> %load, i64 0) %ret = bitcast %insert to ret %ret diff --git a/llvm/test/CodeGen/AArch64/sve-lsr-scaled-index-addressing-mode.ll b/llvm/test/CodeGen/AArch64/sve-lsr-scaled-index-addressing-mode.ll index b55e23c..585c5d7 100644 --- a/llvm/test/CodeGen/AArch64/sve-lsr-scaled-index-addressing-mode.ll +++ b/llvm/test/CodeGen/AArch64/sve-lsr-scaled-index-addressing-mode.ll @@ -6,10 +6,10 @@ target datalayout = "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128" target triple = "aarch64-linux-gnu" -; These tests check that the IR coming out of LSR does not cast input/output pointer from i16* to i8* type. +; These tests check that the IR coming out of LSR does not cast input/output pointer from ptr to ptr type. ; And scaled-index addressing mode is leveraged in the generated assembly, i.e. ld1h { z1.h }, p0/z, [x0, x8, lsl #1]. -define void @ld_st_nxv8i16(i16* %in, i16* %out) { +define void @ld_st_nxv8i16(ptr %in, ptr %out) { ; IR-LABEL: @ld_st_nxv8i16( ; IR-NEXT: entry: ; IR-NEXT: br label [[LOOP_PH:%.*]] @@ -21,13 +21,13 @@ define void @ld_st_nxv8i16(i16* %in, i16* %out) { ; IR-NEXT: br label [[LOOP:%.*]] ; IR: loop: ; IR-NEXT: [[INDVAR:%.*]] = phi i64 [ 0, [[LOOP_PH]] ], [ [[INDVAR_NEXT:%.*]], [[LOOP]] ] -; IR-NEXT: [[SCEVGEP2:%.*]] = getelementptr i16, i16* [[IN:%.*]], i64 [[INDVAR]] -; IR-NEXT: [[SCEVGEP23:%.*]] = bitcast i16* [[SCEVGEP2]] to * -; IR-NEXT: [[SCEVGEP:%.*]] = getelementptr i16, i16* [[OUT:%.*]], i64 [[INDVAR]] -; IR-NEXT: [[SCEVGEP1:%.*]] = bitcast i16* [[SCEVGEP]] to * -; IR-NEXT: [[VAL:%.*]] = load , * [[SCEVGEP23]], align 16 +; IR-NEXT: [[TMP0:%.*]] = shl i64 [[INDVAR]], 1 +; IR-NEXT: [[UGLYGEP1:%.*]] = getelementptr i8, ptr [[IN:%.*]], i64 [[TMP0]] +; IR-NEXT: [[TMP1:%.*]] = shl i64 [[INDVAR]], 1 +; IR-NEXT: [[UGLYGEP:%.*]] = getelementptr i8, ptr [[OUT:%.*]], i64 [[TMP1]] +; IR-NEXT: [[VAL:%.*]] = load , ptr [[UGLYGEP1]], align 16 ; IR-NEXT: [[ADDP_VEC:%.*]] = add [[VAL]], [[P_VEC_SPLAT]] -; IR-NEXT: store [[ADDP_VEC]], * [[SCEVGEP1]], align 16 +; IR-NEXT: store [[ADDP_VEC]], ptr [[UGLYGEP]], align 16 ; IR-NEXT: [[INDVAR_NEXT]] = add nsw i64 [[INDVAR]], [[SCALED_VF]] ; IR-NEXT: [[EXIT_COND:%.*]] = icmp eq i64 [[INDVAR_NEXT]], 1024 ; IR-NEXT: br i1 [[EXIT_COND]], label [[LOOP_EXIT:%.*]], label [[LOOP]] @@ -64,13 +64,11 @@ loop.ph: loop: ; preds = %loop, %loop.ph %indvar = phi i64 [ 0, %loop.ph ], [ %indvar.next, %loop ] - %ptr.in = getelementptr inbounds i16, i16* %in, i64 %indvar - %ptr.out = getelementptr inbounds i16, i16* %out, i64 %indvar - %in.ptrcast = bitcast i16* %ptr.in to * - %out.ptrcast = bitcast i16* %ptr.out to * - %val = load , * %in.ptrcast, align 16 + %ptr.in = getelementptr inbounds i16, ptr %in, i64 %indvar + %ptr.out = getelementptr inbounds i16, ptr %out, i64 %indvar + %val = load , ptr %ptr.in, align 16 %addp_vec = add %val, %p_vec.splat - store %addp_vec, * %out.ptrcast, align 16 + store %addp_vec, ptr %ptr.out, align 16 %indvar.next = add nsw i64 %indvar, %scaled_vf %exit.cond = icmp eq i64 %indvar.next, 1024 br i1 %exit.cond, label %loop.exit, label %loop @@ -82,7 +80,7 @@ exit: ret void } -define void @masked_ld_st_nxv8i16(i16* %in, i16* %out, i64 %n) { +define void @masked_ld_st_nxv8i16(ptr %in, ptr %out, i64 %n) { ; IR-LABEL: @masked_ld_st_nxv8i16( ; IR-NEXT: entry: ; IR-NEXT: br label [[LOOP_PH:%.*]] @@ -96,13 +94,13 @@ define void @masked_ld_st_nxv8i16(i16* %in, i16* %out, i64 %n) { ; IR-NEXT: br label [[LOOP:%.*]] ; IR: loop: ; IR-NEXT: [[INDVAR:%.*]] = phi i64 [ 0, [[LOOP_PH]] ], [ [[INDVAR_NEXT:%.*]], [[LOOP]] ] -; IR-NEXT: [[SCEVGEP2:%.*]] = getelementptr i16, i16* [[IN:%.*]], i64 [[INDVAR]] -; IR-NEXT: [[SCEVGEP23:%.*]] = bitcast i16* [[SCEVGEP2]] to * -; IR-NEXT: [[SCEVGEP:%.*]] = getelementptr i16, i16* [[OUT:%.*]], i64 [[INDVAR]] -; IR-NEXT: [[SCEVGEP1:%.*]] = bitcast i16* [[SCEVGEP]] to * -; IR-NEXT: [[VAL:%.*]] = call @llvm.masked.load.nxv8i16.p0nxv8i16(* [[SCEVGEP23]], i32 4, [[PTRUE_VEC_SPLAT]], undef) +; IR-NEXT: [[TMP0:%.*]] = shl i64 [[INDVAR]], 1 +; IR-NEXT: [[UGLYGEP1:%.*]] = getelementptr i8, ptr [[IN:%.*]], i64 [[TMP0]] +; IR-NEXT: [[TMP1:%.*]] = shl i64 [[INDVAR]], 1 +; IR-NEXT: [[UGLYGEP:%.*]] = getelementptr i8, ptr [[OUT:%.*]], i64 [[TMP1]] +; IR-NEXT: [[VAL:%.*]] = call @llvm.masked.load.nxv8i16.p0(ptr [[UGLYGEP1]], i32 4, [[PTRUE_VEC_SPLAT]], undef) ; IR-NEXT: [[ADDP_VEC:%.*]] = add [[VAL]], [[P_VEC_SPLAT]] -; IR-NEXT: call void @llvm.masked.store.nxv8i16.p0nxv8i16( [[ADDP_VEC]], * [[SCEVGEP1]], i32 4, [[PTRUE_VEC_SPLAT]]) +; IR-NEXT: call void @llvm.masked.store.nxv8i16.p0( [[ADDP_VEC]], ptr [[UGLYGEP]], i32 4, [[PTRUE_VEC_SPLAT]]) ; IR-NEXT: [[INDVAR_NEXT]] = add nsw i64 [[INDVAR]], [[SCALED_VF]] ; IR-NEXT: [[EXIT_COND:%.*]] = icmp eq i64 [[N:%.*]], [[INDVAR_NEXT]] ; IR-NEXT: br i1 [[EXIT_COND]], label [[LOOP_EXIT:%.*]], label [[LOOP]] @@ -141,13 +139,11 @@ loop.ph: loop: ; preds = %loop, %loop.ph %indvar = phi i64 [ 0, %loop.ph ], [ %indvar.next, %loop ] - %ptr.in = getelementptr inbounds i16, i16* %in, i64 %indvar - %ptr.out = getelementptr inbounds i16, i16* %out, i64 %indvar - %in.ptrcast = bitcast i16* %ptr.in to * - %out.ptrcast = bitcast i16* %ptr.out to * - %val = call @llvm.masked.load.nxv8i16.p0nxv8i16(* %in.ptrcast, i32 4, %ptrue_vec.splat, undef) + %ptr.in = getelementptr inbounds i16, ptr %in, i64 %indvar + %ptr.out = getelementptr inbounds i16, ptr %out, i64 %indvar + %val = call @llvm.masked.load.nxv8i16.p0(ptr %ptr.in, i32 4, %ptrue_vec.splat, undef) %addp_vec = add %val, %p_vec.splat - call void @llvm.masked.store.nxv8i16.p0nxv8i16( %addp_vec, * %out.ptrcast, i32 4, %ptrue_vec.splat) + call void @llvm.masked.store.nxv8i16.p0( %addp_vec, ptr %ptr.out, i32 4, %ptrue_vec.splat) %indvar.next = add nsw i64 %indvar, %scaled_vf %exit.cond = icmp eq i64 %indvar.next, %n br i1 %exit.cond, label %loop.exit, label %loop @@ -161,6 +157,6 @@ exit: declare i64 @llvm.vscale.i64() -declare @llvm.masked.load.nxv8i16.p0nxv8i16(*, i32 immarg, , ) +declare @llvm.masked.load.nxv8i16.p0(ptr, i32 immarg, , ) -declare void @llvm.masked.store.nxv8i16.p0nxv8i16(, *, i32 immarg, ) +declare void @llvm.masked.store.nxv8i16.p0(, ptr, i32 immarg, ) -- 2.7.4