From 094005eb6602d71811fbf92414e98cf6ed7d1ff8 Mon Sep 17 00:00:00 2001 From: Sander de Smalen Date: Tue, 16 May 2023 12:53:23 +0000 Subject: [PATCH] [AArch64][SME2/SVE2p1] Add predicate-as-counter intrinsics for pext (single) These intrinsics are used to implement the pext intrinsics that extract a predicate (mask) from a predicate-as-counter value, e.g. __attribute__((arm_streaming)) svbool_t svpext_lane_c8(svcount_t pnn, uint64_t imm); As described in https://github.com/ARM-software/acle/pull/217 Reviewed By: david-arm Differential Revision: https://reviews.llvm.org/D150441 --- llvm/include/llvm/IR/IntrinsicsAArch64.td | 5 ++ llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td | 2 +- llvm/lib/Target/AArch64/SVEInstrFormats.td | 9 ++- .../sve2p1-intrinsics-predicate-as-counter.ll | 69 ++++++++++++++++++++++ 4 files changed, 82 insertions(+), 3 deletions(-) diff --git a/llvm/include/llvm/IR/IntrinsicsAArch64.td b/llvm/include/llvm/IR/IntrinsicsAArch64.td index beb47aa..428ef89 100644 --- a/llvm/include/llvm/IR/IntrinsicsAArch64.td +++ b/llvm/include/llvm/IR/IntrinsicsAArch64.td @@ -2768,6 +2768,11 @@ let TargetPrefix = "aarch64" in { // Predicate-as-counter intrinsics // + def int_aarch64_sve_pext + : DefaultAttrsIntrinsic<[llvm_anyvector_ty], + [llvm_aarch64_svcount_ty, llvm_i32_ty], + [IntrNoMem, ImmArg>]>; + def int_aarch64_sve_ptrue_c8 : DefaultAttrsIntrinsic<[llvm_aarch64_svcount_ty], [], [IntrNoMem]>; diff --git a/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td b/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td index 80a5db6..bf9c7e3 100644 --- a/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td +++ b/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td @@ -3752,7 +3752,7 @@ defm SDOT_ZZZI_HtoS : sve2p1_two_way_dot_vvi<"sdot", 0b0, int_aarch64_sve_sdot_l defm UDOT_ZZZI_HtoS : sve2p1_two_way_dot_vvi<"udot", 0b1, int_aarch64_sve_udot_lane_x2>; defm CNTP_XCI : sve2p1_pcount_pn<"cntp", 0b000>; -defm PEXT_PCI : sve2p1_pred_as_ctr_to_mask<"pext">; +defm PEXT_PCI : sve2p1_pred_as_ctr_to_mask<"pext", int_aarch64_sve_pext>; defm PEXT_2PCI : sve2p1_pred_as_ctr_to_mask_pair<"pext">; defm PTRUE_C : sve2p1_ptrue_pn<"ptrue">; diff --git a/llvm/lib/Target/AArch64/SVEInstrFormats.td b/llvm/lib/Target/AArch64/SVEInstrFormats.td index 2358161..f9c3767 100644 --- a/llvm/lib/Target/AArch64/SVEInstrFormats.td +++ b/llvm/lib/Target/AArch64/SVEInstrFormats.td @@ -9212,16 +9212,21 @@ class sve2p1_pred_as_ctr_to_mask_base sz, bits<3> opc, } class sve2p1_pred_as_ctr_to_mask sz, PPRRegOp pprty> - : sve2p1_pred_as_ctr_to_mask_base { + : sve2p1_pred_as_ctr_to_mask_base { bits<2> index; let Inst{9-8} = index; } -multiclass sve2p1_pred_as_ctr_to_mask { +multiclass sve2p1_pred_as_ctr_to_mask { def _B : sve2p1_pred_as_ctr_to_mask; def _H : sve2p1_pred_as_ctr_to_mask; def _S : sve2p1_pred_as_ctr_to_mask; def _D : sve2p1_pred_as_ctr_to_mask; + + def : SVE_2_Op_Imm_Pat(NAME # _B)>; + def : SVE_2_Op_Imm_Pat(NAME # _H)>; + def : SVE_2_Op_Imm_Pat(NAME # _S)>; + def : SVE_2_Op_Imm_Pat(NAME # _D)>; } diff --git a/llvm/test/CodeGen/AArch64/sve2p1-intrinsics-predicate-as-counter.ll b/llvm/test/CodeGen/AArch64/sve2p1-intrinsics-predicate-as-counter.ll index 49d323c..b36d8a6 100644 --- a/llvm/test/CodeGen/AArch64/sve2p1-intrinsics-predicate-as-counter.ll +++ b/llvm/test/CodeGen/AArch64/sve2p1-intrinsics-predicate-as-counter.ll @@ -2,6 +2,75 @@ ; RUN: llc -mtriple=aarch64 -mattr=+sve2p1 < %s | FileCheck %s ; RUN: llc -mtriple=aarch64 -mattr=+sme2 < %s | FileCheck %s +define @pext_b(target("aarch64.svcount") %x) nounwind { +; CHECK-LABEL: pext_b: +; CHECK: // %bb.0: +; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill +; CHECK-NEXT: addvl sp, sp, #-1 +; CHECK-NEXT: str p8, [sp, #7, mul vl] // 2-byte Folded Spill +; CHECK-NEXT: mov p8.b, p0.b +; CHECK-NEXT: pext p0.b, pn8[2] +; CHECK-NEXT: ldr p8, [sp, #7, mul vl] // 2-byte Folded Reload +; CHECK-NEXT: addvl sp, sp, #1 +; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload +; CHECK-NEXT: ret + %res = call @llvm.aarch64.sve.pext.nxv16i1(target("aarch64.svcount") %x, i32 2) + ret %res +} + +define @pext_h(target("aarch64.svcount") %x) nounwind { +; CHECK-LABEL: pext_h: +; CHECK: // %bb.0: +; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill +; CHECK-NEXT: addvl sp, sp, #-1 +; CHECK-NEXT: str p8, [sp, #7, mul vl] // 2-byte Folded Spill +; CHECK-NEXT: mov p8.b, p0.b +; CHECK-NEXT: pext p0.h, pn8[2] +; CHECK-NEXT: ldr p8, [sp, #7, mul vl] // 2-byte Folded Reload +; CHECK-NEXT: addvl sp, sp, #1 +; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload +; CHECK-NEXT: ret + %res = call @llvm.aarch64.sve.pext.nxv8i1(target("aarch64.svcount") %x, i32 2) + ret %res +} + +define @pext_s(target("aarch64.svcount") %x) nounwind { +; CHECK-LABEL: pext_s: +; CHECK: // %bb.0: +; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill +; CHECK-NEXT: addvl sp, sp, #-1 +; CHECK-NEXT: str p8, [sp, #7, mul vl] // 2-byte Folded Spill +; CHECK-NEXT: mov p8.b, p0.b +; CHECK-NEXT: pext p0.s, pn8[2] +; CHECK-NEXT: ldr p8, [sp, #7, mul vl] // 2-byte Folded Reload +; CHECK-NEXT: addvl sp, sp, #1 +; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload +; CHECK-NEXT: ret + %res = call @llvm.aarch64.sve.pext.nxv4i1(target("aarch64.svcount") %x, i32 2) + ret %res +} + +define @pext_d(target("aarch64.svcount") %x) nounwind { +; CHECK-LABEL: pext_d: +; CHECK: // %bb.0: +; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill +; CHECK-NEXT: addvl sp, sp, #-1 +; CHECK-NEXT: str p8, [sp, #7, mul vl] // 2-byte Folded Spill +; CHECK-NEXT: mov p8.b, p0.b +; CHECK-NEXT: pext p0.d, pn8[2] +; CHECK-NEXT: ldr p8, [sp, #7, mul vl] // 2-byte Folded Reload +; CHECK-NEXT: addvl sp, sp, #1 +; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload +; CHECK-NEXT: ret + %res = call @llvm.aarch64.sve.pext.nxv2i1(target("aarch64.svcount") %x, i32 2) + ret %res +} + +declare @llvm.aarch64.sve.pext.nxv16i1(target("aarch64.svcount"), i32) +declare @llvm.aarch64.sve.pext.nxv8i1(target("aarch64.svcount"), i32) +declare @llvm.aarch64.sve.pext.nxv4i1(target("aarch64.svcount"), i32) +declare @llvm.aarch64.sve.pext.nxv2i1(target("aarch64.svcount"), i32) + define target("aarch64.svcount") @ptrue_b() nounwind { ; CHECK-LABEL: ptrue_b: ; CHECK: // %bb.0: -- 2.7.4