From 981f7d563aa8ab3ed693d55c055b193adff4bba5 Mon Sep 17 00:00:00 2001 From: Sander de Smalen Date: Tue, 7 Sep 2021 17:07:37 +0100 Subject: [PATCH] [AArch64] Implement extract_subvector for predicates. This patch implements extract_subvector for predicate types when the input type is more than twice the size of the subvector that is being extracted. Reviewed By: CarolineConcatto Differential Revision: https://reviews.llvm.org/D109314 --- llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td | 35 +++++ llvm/test/CodeGen/AArch64/sve-extract-vector.ll | 177 ++++++++++++++++++++++++ 2 files changed, 212 insertions(+) diff --git a/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td b/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td index 3a6f084..4c342d1 100644 --- a/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td +++ b/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td @@ -1234,6 +1234,41 @@ let Predicates = [HasSVEorStreamingSVE] in { def : Pat<(nxv8i1 (extract_subvector (nxv16i1 PPR:$Ps), (i64 8))), (PUNPKHI_PP PPR:$Ps)>; + def : Pat<(nxv2i1 (extract_subvector (nxv8i1 PPR:$Ps), (i64 0))), + (PUNPKLO_PP (PUNPKLO_PP PPR:$Ps))>; + def : Pat<(nxv2i1 (extract_subvector (nxv8i1 PPR:$Ps), (i64 2))), + (PUNPKHI_PP (PUNPKLO_PP PPR:$Ps))>; + def : Pat<(nxv2i1 (extract_subvector (nxv8i1 PPR:$Ps), (i64 4))), + (PUNPKLO_PP (PUNPKHI_PP PPR:$Ps))>; + def : Pat<(nxv2i1 (extract_subvector (nxv8i1 PPR:$Ps), (i64 6))), + (PUNPKHI_PP (PUNPKHI_PP PPR:$Ps))>; + + def : Pat<(nxv4i1 (extract_subvector (nxv16i1 PPR:$Ps), (i64 0))), + (PUNPKLO_PP (PUNPKLO_PP PPR:$Ps))>; + def : Pat<(nxv4i1 (extract_subvector (nxv16i1 PPR:$Ps), (i64 4))), + (PUNPKHI_PP (PUNPKLO_PP PPR:$Ps))>; + def : Pat<(nxv4i1 (extract_subvector (nxv16i1 PPR:$Ps), (i64 8))), + (PUNPKLO_PP (PUNPKHI_PP PPR:$Ps))>; + def : Pat<(nxv4i1 (extract_subvector (nxv16i1 PPR:$Ps), (i64 12))), + (PUNPKHI_PP (PUNPKHI_PP PPR:$Ps))>; + + def : Pat<(nxv2i1 (extract_subvector (nxv16i1 PPR:$Ps), (i64 0))), + (PUNPKLO_PP (PUNPKLO_PP (PUNPKLO_PP PPR:$Ps)))>; + def : Pat<(nxv2i1 (extract_subvector (nxv16i1 PPR:$Ps), (i64 2))), + (PUNPKHI_PP (PUNPKLO_PP (PUNPKLO_PP PPR:$Ps)))>; + def : Pat<(nxv2i1 (extract_subvector (nxv16i1 PPR:$Ps), (i64 4))), + (PUNPKLO_PP (PUNPKHI_PP (PUNPKLO_PP PPR:$Ps)))>; + def : Pat<(nxv2i1 (extract_subvector (nxv16i1 PPR:$Ps), (i64 6))), + (PUNPKHI_PP (PUNPKHI_PP (PUNPKLO_PP PPR:$Ps)))>; + def : Pat<(nxv2i1 (extract_subvector (nxv16i1 PPR:$Ps), (i64 8))), + (PUNPKLO_PP (PUNPKLO_PP (PUNPKHI_PP PPR:$Ps)))>; + def : Pat<(nxv2i1 (extract_subvector (nxv16i1 PPR:$Ps), (i64 10))), + (PUNPKHI_PP (PUNPKLO_PP (PUNPKHI_PP PPR:$Ps)))>; + def : Pat<(nxv2i1 (extract_subvector (nxv16i1 PPR:$Ps), (i64 12))), + (PUNPKLO_PP (PUNPKHI_PP (PUNPKHI_PP PPR:$Ps)))>; + def : Pat<(nxv2i1 (extract_subvector (nxv16i1 PPR:$Ps), (i64 14))), + (PUNPKHI_PP (PUNPKHI_PP (PUNPKHI_PP PPR:$Ps)))>; + // Extract subvectors from FP SVE vectors def : Pat<(nxv2f16 (extract_subvector (nxv4f16 ZPR:$Zs), (i64 0))), (UUNPKLO_ZZ_D ZPR:$Zs)>; diff --git a/llvm/test/CodeGen/AArch64/sve-extract-vector.ll b/llvm/test/CodeGen/AArch64/sve-extract-vector.ll index cb6908d..c9e1486 100644 --- a/llvm/test/CodeGen/AArch64/sve-extract-vector.ll +++ b/llvm/test/CodeGen/AArch64/sve-extract-vector.ll @@ -198,6 +198,179 @@ define <4 x i64> @extract_fixed_v4i64_nxv2i64( %vec) nounwind ret <4 x i64> %retval } +; +; Extracting a predicate from a wider predicate, that is more than twice the size. +; + +define @extract_nxv2i1_nxv16i1_0( %vec) { +; CHECK-LABEL: extract_nxv2i1_nxv16i1_0: +; CHECK: // %bb.0: +; CHECK-NEXT: punpklo p0.h, p0.b +; CHECK-NEXT: punpklo p0.h, p0.b +; CHECK-NEXT: punpklo p0.h, p0.b +; CHECK-NEXT: ret + %res = call @llvm.experimental.vector.extract.nxv2i1.nxv16i1( %vec, i64 0) + ret %res +} + +define @extract_nxv2i1_nxv16i1_2( %vec) { +; CHECK-LABEL: extract_nxv2i1_nxv16i1_2: +; CHECK: // %bb.0: +; CHECK-NEXT: punpklo p0.h, p0.b +; CHECK-NEXT: punpklo p0.h, p0.b +; CHECK-NEXT: punpkhi p0.h, p0.b +; CHECK-NEXT: ret + %res = call @llvm.experimental.vector.extract.nxv2i1.nxv16i1( %vec, i64 2) + ret %res +} + +define @extract_nxv2i1_nxv16i1_4( %vec) { +; CHECK-LABEL: extract_nxv2i1_nxv16i1_4: +; CHECK: // %bb.0: +; CHECK-NEXT: punpklo p0.h, p0.b +; CHECK-NEXT: punpkhi p0.h, p0.b +; CHECK-NEXT: punpklo p0.h, p0.b +; CHECK-NEXT: ret + %res = call @llvm.experimental.vector.extract.nxv2i1.nxv16i1( %vec, i64 4) + ret %res +} + +define @extract_nxv2i1_nxv16i1_6( %vec) { +; CHECK-LABEL: extract_nxv2i1_nxv16i1_6: +; CHECK: // %bb.0: +; CHECK-NEXT: punpklo p0.h, p0.b +; CHECK-NEXT: punpkhi p0.h, p0.b +; CHECK-NEXT: punpkhi p0.h, p0.b +; CHECK-NEXT: ret + %res = call @llvm.experimental.vector.extract.nxv2i1.nxv16i1( %vec, i64 6) + ret %res +} + +define @extract_nxv2i1_nxv16i1_8( %vec) { +; CHECK-LABEL: extract_nxv2i1_nxv16i1_8: +; CHECK: // %bb.0: +; CHECK-NEXT: punpkhi p0.h, p0.b +; CHECK-NEXT: punpklo p0.h, p0.b +; CHECK-NEXT: punpklo p0.h, p0.b +; CHECK-NEXT: ret + %res = call @llvm.experimental.vector.extract.nxv2i1.nxv16i1( %vec, i64 8) + ret %res +} + +define @extract_nxv2i1_nxv16i1_10( %vec) { +; CHECK-LABEL: extract_nxv2i1_nxv16i1_10: +; CHECK: // %bb.0: +; CHECK-NEXT: punpkhi p0.h, p0.b +; CHECK-NEXT: punpklo p0.h, p0.b +; CHECK-NEXT: punpkhi p0.h, p0.b +; CHECK-NEXT: ret + %res = call @llvm.experimental.vector.extract.nxv2i1.nxv16i1( %vec, i64 10) + ret %res +} + +define @extract_nxv2i1_nxv16i1_12( %vec) { +; CHECK-LABEL: extract_nxv2i1_nxv16i1_12: +; CHECK: // %bb.0: +; CHECK-NEXT: punpkhi p0.h, p0.b +; CHECK-NEXT: punpkhi p0.h, p0.b +; CHECK-NEXT: punpklo p0.h, p0.b +; CHECK-NEXT: ret + %res = call @llvm.experimental.vector.extract.nxv2i1.nxv16i1( %vec, i64 12) + ret %res +} + +define @extract_nxv2i1_nxv16i1_14( %vec) { +; CHECK-LABEL: extract_nxv2i1_nxv16i1_14: +; CHECK: // %bb.0: +; CHECK-NEXT: punpkhi p0.h, p0.b +; CHECK-NEXT: punpkhi p0.h, p0.b +; CHECK-NEXT: punpkhi p0.h, p0.b +; CHECK-NEXT: ret + %res = call @llvm.experimental.vector.extract.nxv2i1.nxv16i1( %vec, i64 14) + ret %res +} + +define @extract_nxv2i1_nxv8i1_0( %vec) { +; CHECK-LABEL: extract_nxv2i1_nxv8i1_0: +; CHECK: // %bb.0: +; CHECK-NEXT: punpklo p0.h, p0.b +; CHECK-NEXT: punpklo p0.h, p0.b +; CHECK-NEXT: ret + %res = call @llvm.experimental.vector.extract.nxv2i1.nxv8i1( %vec, i64 0) + ret %res +} + +define @extract_nxv2i1_nxv8i1_2( %vec) { +; CHECK-LABEL: extract_nxv2i1_nxv8i1_2: +; CHECK: // %bb.0: +; CHECK-NEXT: punpklo p0.h, p0.b +; CHECK-NEXT: punpkhi p0.h, p0.b +; CHECK-NEXT: ret + %res = call @llvm.experimental.vector.extract.nxv2i1.nxv8i1( %vec, i64 2) + ret %res +} + +define @extract_nxv2i1_nxv8i1_4( %vec) { +; CHECK-LABEL: extract_nxv2i1_nxv8i1_4: +; CHECK: // %bb.0: +; CHECK-NEXT: punpkhi p0.h, p0.b +; CHECK-NEXT: punpklo p0.h, p0.b +; CHECK-NEXT: ret + %res = call @llvm.experimental.vector.extract.nxv2i1.nxv8i1( %vec, i64 4) + ret %res +} + +define @extract_nxv2i1_nxv8i1_6( %vec) { +; CHECK-LABEL: extract_nxv2i1_nxv8i1_6: +; CHECK: // %bb.0: +; CHECK-NEXT: punpkhi p0.h, p0.b +; CHECK-NEXT: punpkhi p0.h, p0.b +; CHECK-NEXT: ret + %res = call @llvm.experimental.vector.extract.nxv2i1.nxv8i1( %vec, i64 6) + ret %res +} + +define @extract_nxv4i1_nxv16i1_0( %vec) { +; CHECK-LABEL: extract_nxv4i1_nxv16i1_0: +; CHECK: // %bb.0: +; CHECK-NEXT: punpklo p0.h, p0.b +; CHECK-NEXT: punpklo p0.h, p0.b +; CHECK-NEXT: ret + %res = call @llvm.experimental.vector.extract.nxv4i1.nxv16i1( %vec, i64 0) + ret %res +} + +define @extract_nxv4i1_nxv16i1_4( %vec) { +; CHECK-LABEL: extract_nxv4i1_nxv16i1_4: +; CHECK: // %bb.0: +; CHECK-NEXT: punpklo p0.h, p0.b +; CHECK-NEXT: punpkhi p0.h, p0.b +; CHECK-NEXT: ret + %res = call @llvm.experimental.vector.extract.nxv4i1.nxv16i1( %vec, i64 4) + ret %res +} + +define @extract_nxv4i1_nxv16i1_8( %vec) { +; CHECK-LABEL: extract_nxv4i1_nxv16i1_8: +; CHECK: // %bb.0: +; CHECK-NEXT: punpkhi p0.h, p0.b +; CHECK-NEXT: punpklo p0.h, p0.b +; CHECK-NEXT: ret + %res = call @llvm.experimental.vector.extract.nxv4i1.nxv16i1( %vec, i64 8) + ret %res +} + +define @extract_nxv4i1_nxv16i1_12( %vec) { +; CHECK-LABEL: extract_nxv4i1_nxv16i1_12: +; CHECK: // %bb.0: +; CHECK-NEXT: punpkhi p0.h, p0.b +; CHECK-NEXT: punpkhi p0.h, p0.b +; CHECK-NEXT: ret + %res = call @llvm.experimental.vector.extract.nxv4i1.nxv16i1( %vec, i64 12) + ret %res +} + + attributes #0 = { vscale_range(2,2) } declare <2 x i64> @llvm.experimental.vector.extract.v2i64.nxv2i64(, i64) @@ -209,3 +382,7 @@ declare <4 x i64> @llvm.experimental.vector.extract.v4i64.nxv2i64( @llvm.experimental.vector.extract.nxv1i32.nxv4i32(, i64) declare @llvm.experimental.vector.extract.nxv1i16.nxv6i16(, i64) + +declare @llvm.experimental.vector.extract.nxv2i1.nxv16i1(, i64) +declare @llvm.experimental.vector.extract.nxv2i1.nxv8i1(, i64) +declare @llvm.experimental.vector.extract.nxv4i1.nxv16i1(, i64) -- 2.7.4