From cc3ef26f60b12f1999f85bd31b51585312c8c3a9 Mon Sep 17 00:00:00 2001 From: Matt Devereau Date: Fri, 24 Jun 2022 07:33:50 +0000 Subject: [PATCH] [AArch64][SVE] Add sve.dupq.lane(insert(constant vector, 0), 0) ld1rq tests --- .../CodeGen/AArch64/sve-intrinsics-perm-select.ll | 105 +++++++++++++++++++++ 1 file changed, 105 insertions(+) diff --git a/llvm/test/CodeGen/AArch64/sve-intrinsics-perm-select.ll b/llvm/test/CodeGen/AArch64/sve-intrinsics-perm-select.ll index c3befd9..680e3ab 100644 --- a/llvm/test/CodeGen/AArch64/sve-intrinsics-perm-select.ll +++ b/llvm/test/CodeGen/AArch64/sve-intrinsics-perm-select.ll @@ -531,6 +531,102 @@ define @dupq_i64_range( %a) { ret %out } +define dso_local @dupq_ld1rqd_f64() { +; CHECK-LABEL: dupq_ld1rqd_f64: +; CHECK: // %bb.0: +; CHECK-NEXT: adrp x8, .LCPI49_0 +; CHECK-NEXT: ldr q0, [x8, :lo12:.LCPI49_0] +; CHECK-NEXT: mov z0.q, q0 +; CHECK-NEXT: ret + %1 = tail call fast @llvm.experimental.vector.insert.nxv2f64.v2f64( undef, <2 x double> , i64 0) + %2 = tail call fast @llvm.aarch64.sve.dupq.lane.nxv2f64( %1, i64 0) + ret %2 +} + +define dso_local @dupq_ld1rqw_f32() { +; CHECK-LABEL: dupq_ld1rqw_f32: +; CHECK: // %bb.0: +; CHECK-NEXT: adrp x8, .LCPI50_0 +; CHECK-NEXT: ldr q0, [x8, :lo12:.LCPI50_0] +; CHECK-NEXT: mov z0.q, q0 +; CHECK-NEXT: ret + %1 = tail call fast @llvm.experimental.vector.insert.nxv4f32.v4f32( undef, <4 x float> , i64 0) + %2 = tail call fast @llvm.aarch64.sve.dupq.lane.nxv4f32( %1, i64 0) + ret %2 +} + +define dso_local @dupq_ld1rqh_f16() { +; CHECK-LABEL: dupq_ld1rqh_f16: +; CHECK: // %bb.0: +; CHECK-NEXT: adrp x8, .LCPI51_0 +; CHECK-NEXT: ldr q0, [x8, :lo12:.LCPI51_0] +; CHECK-NEXT: mov z0.q, q0 +; CHECK-NEXT: ret + %1 = tail call fast @llvm.experimental.vector.insert.nxv8f16.v8f16( undef, <8 x half> , i64 0) + %2 = tail call fast @llvm.aarch64.sve.dupq.lane.nxv8f16( %1, i64 0) + ret %2 +} + +define dso_local @dupq_ld1rqh_bf16() #0 { +; CHECK-LABEL: dupq_ld1rqh_bf16: +; CHECK: // %bb.0: +; CHECK-NEXT: adrp x8, .LCPI52_0 +; CHECK-NEXT: ldr q0, [x8, :lo12:.LCPI52_0] +; CHECK-NEXT: mov z0.q, q0 +; CHECK-NEXT: ret + %1 = call @llvm.experimental.vector.insert.nxv8bf16.v8bf16( undef, <8 x bfloat> , i64 0) + %2 = call @llvm.aarch64.sve.dupq.lane.nxv8bf16( %1, i64 0) + ret %2 +} + +define dso_local @dupq_ld1rqd_i64() { +; CHECK-LABEL: dupq_ld1rqd_i64: +; CHECK: // %bb.0: +; CHECK-NEXT: adrp x8, .LCPI53_0 +; CHECK-NEXT: ldr q0, [x8, :lo12:.LCPI53_0] +; CHECK-NEXT: mov z0.q, q0 +; CHECK-NEXT: ret + %1 = tail call @llvm.experimental.vector.insert.nxv2i64.v2i64( undef, <2 x i64> , i64 0) + %2 = tail call @llvm.aarch64.sve.dupq.lane.nxv2i64( %1, i64 0) + ret %2 +} + +define dso_local @dupq_ld1rqd_i32() { +; CHECK-LABEL: dupq_ld1rqd_i32: +; CHECK: // %bb.0: +; CHECK-NEXT: adrp x8, .LCPI54_0 +; CHECK-NEXT: ldr q0, [x8, :lo12:.LCPI54_0] +; CHECK-NEXT: mov z0.q, q0 +; CHECK-NEXT: ret + %1 = tail call @llvm.experimental.vector.insert.nxv4i32.v4i32( undef, <4 x i32> , i64 0) + %2 = tail call @llvm.aarch64.sve.dupq.lane.nxv4i32( %1, i64 0) + ret %2 +} + +define dso_local @dupq_ld1rqd_i16() { +; CHECK-LABEL: dupq_ld1rqd_i16: +; CHECK: // %bb.0: +; CHECK-NEXT: adrp x8, .LCPI55_0 +; CHECK-NEXT: ldr q0, [x8, :lo12:.LCPI55_0] +; CHECK-NEXT: mov z0.q, q0 +; CHECK-NEXT: ret + %1 = tail call @llvm.experimental.vector.insert.nxv8i16.v8i16( undef, <8 x i16> , i64 0) + %2 = tail call @llvm.aarch64.sve.dupq.lane.nxv8i16( %1, i64 0) + ret %2 +} + +define dso_local @dupq_ld1rqd_i8() { +; CHECK-LABEL: dupq_ld1rqd_i8: +; CHECK: // %bb.0: +; CHECK-NEXT: adrp x8, .LCPI56_0 +; CHECK-NEXT: ldr q0, [x8, :lo12:.LCPI56_0] +; CHECK-NEXT: mov z0.q, q0 +; CHECK-NEXT: ret + %1 = tail call @llvm.experimental.vector.insert.nxv16i8.v16i8( undef, <16 x i8> , i64 0) + %2 = tail call @llvm.aarch64.sve.dupq.lane.nxv16i8( %1, i64 0) + ret %2 +} + ; ; EXT ; @@ -2253,5 +2349,14 @@ declare @llvm.aarch64.sve.zip2.nxv2f32( declare @llvm.aarch64.sve.zip2.nxv4f32(, ) declare @llvm.aarch64.sve.zip2.nxv2f64(, ) +declare @llvm.experimental.vector.insert.nxv2f64.v2f64(, <2 x double>, i64) +declare @llvm.experimental.vector.insert.nxv4f32.v4f32(, <4 x float>, i64) +declare @llvm.experimental.vector.insert.nxv8f16.v8f16(, <8 x half>, i64) +declare @llvm.experimental.vector.insert.nxv2i64.v2i64(, <2 x i64>, i64) +declare @llvm.experimental.vector.insert.nxv4i32.v4i32(, <4 x i32>, i64) +declare @llvm.experimental.vector.insert.nxv8i16.v8i16(, <8 x i16>, i64) +declare @llvm.experimental.vector.insert.nxv16i8.v16i8(, <16 x i8>, i64) +declare @llvm.experimental.vector.insert.nxv8bf16.v8bf16(, <8 x bfloat>, i64) + ; +bf16 is required for the bfloat version. attributes #0 = { "target-features"="+sve,+bf16" } -- 2.7.4