From 06191d132771814ff04df7898141e0db81d39215 Mon Sep 17 00:00:00 2001 From: Caroline Concatto Date: Mon, 23 Jan 2023 18:01:48 +0000 Subject: [PATCH] [AArch64][SME2] Add Multi-vector saturating extract narrow and interleave intrinsics Add the following intrinsic: SQCVTN SQCVTUN UQCVTN NOTE: These intrinsics are still in development and are subject to future changes. Reviewed By: kmclaughlin Differential Revision: https://reviews.llvm.org/D142089 --- llvm/include/llvm/IR/IntrinsicsAArch64.td | 10 ++ llvm/lib/Target/AArch64/AArch64SMEInstrInfo.td | 6 +- llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td | 6 +- llvm/lib/Target/AArch64/SMEInstrFormats.td | 6 +- llvm/lib/Target/AArch64/SVEInstrFormats.td | 9 +- llvm/test/CodeGen/AArch64/sme2-intrinsics-qcvtn.ll | 140 +++++++++++++++++++++ 6 files changed, 164 insertions(+), 13 deletions(-) create mode 100644 llvm/test/CodeGen/AArch64/sme2-intrinsics-qcvtn.ll diff --git a/llvm/include/llvm/IR/IntrinsicsAArch64.td b/llvm/include/llvm/IR/IntrinsicsAArch64.td index ca14f62..e365f2a 100644 --- a/llvm/include/llvm/IR/IntrinsicsAArch64.td +++ b/llvm/include/llvm/IR/IntrinsicsAArch64.td @@ -2889,4 +2889,14 @@ let TargetPrefix = "aarch64" in { def int_aarch64_sve_sqcvt_x4 : SME2_CVT_VG4_SINGLE_Intrinsic; def int_aarch64_sve_uqcvt_x4 : SME2_CVT_VG4_SINGLE_Intrinsic; def int_aarch64_sve_sqcvtu_x4 : SME2_CVT_VG4_SINGLE_Intrinsic; + + // + // Multi-vector saturating extract narrow and interleave + // + def int_aarch64_sve_sqcvtn_x2 : SME2_CVT_VG2_SINGLE_Intrinsic; + def int_aarch64_sve_uqcvtn_x2 : SME2_CVT_VG2_SINGLE_Intrinsic; + def int_aarch64_sve_sqcvtun_x2 : SME2_CVT_VG2_SINGLE_Intrinsic; + def int_aarch64_sve_sqcvtn_x4 : SME2_CVT_VG4_SINGLE_Intrinsic; + def int_aarch64_sve_uqcvtn_x4 : SME2_CVT_VG4_SINGLE_Intrinsic; + def int_aarch64_sve_sqcvtun_x4 : SME2_CVT_VG4_SINGLE_Intrinsic; } diff --git a/llvm/lib/Target/AArch64/AArch64SMEInstrInfo.td b/llvm/lib/Target/AArch64/AArch64SMEInstrInfo.td index ba9b4b9..a213d65 100644 --- a/llvm/lib/Target/AArch64/AArch64SMEInstrInfo.td +++ b/llvm/lib/Target/AArch64/AArch64SMEInstrInfo.td @@ -374,9 +374,9 @@ defm SQCVTU_Z2Z_StoH : sme2_cvt_vg2_single<"sqcvtu", 0b1110, nxv8i16, nxv4i32, i defm SQCVT_Z4Z : sme2_int_cvt_vg4_single<"sqcvt", 0b000, int_aarch64_sve_sqcvt_x4>; defm UQCVT_Z4Z : sme2_int_cvt_vg4_single<"uqcvt", 0b001, int_aarch64_sve_uqcvt_x4>; defm SQCVTU_Z4Z : sme2_int_cvt_vg4_single<"sqcvtu", 0b100, int_aarch64_sve_sqcvtu_x4>; -defm SQCVTN_Z4Z : sme2_int_cvt_vg4_single<"sqcvtn", 0b010, null_frag>; -defm SQCVTUN_Z4Z : sme2_int_cvt_vg4_single<"sqcvtun", 0b110, null_frag>; -defm UQCVTN_Z4Z : sme2_int_cvt_vg4_single<"uqcvtn", 0b011, null_frag>; +defm SQCVTN_Z4Z : sme2_int_cvt_vg4_single<"sqcvtn", 0b010, int_aarch64_sve_sqcvtn_x4>; +defm SQCVTUN_Z4Z : sme2_int_cvt_vg4_single<"sqcvtun", 0b110, int_aarch64_sve_sqcvtun_x4>; +defm UQCVTN_Z4Z : sme2_int_cvt_vg4_single<"uqcvtn", 0b011, int_aarch64_sve_uqcvtn_x4>; defm FCVTZS_2Z2Z_StoS : sme2_fp_cvt_vg2_multi<"fcvtzs", 0b00010>; defm FCVTZS_4Z4Z_StoS : sme2_fp_cvt_vg4_multi<"fcvtzs", 0b0001000>; diff --git a/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td b/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td index 8a37916..6a42c4f 100644 --- a/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td +++ b/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td @@ -3691,9 +3691,9 @@ defm PEXT_PCI : sve2p1_pred_as_ctr_to_mask<"pext">; defm PEXT_2PCI : sve2p1_pred_as_ctr_to_mask_pair<"pext">; defm PTRUE_C : sve2p1_ptrue_pn<"ptrue">; -defm SQCVTN_Z2Z_StoH : sve2p1_multi_vec_extract_narrow<"sqcvtn", 0b00>; -defm UQCVTN_Z2Z_StoH : sve2p1_multi_vec_extract_narrow<"uqcvtn", 0b01>; -defm SQCVTUN_Z2Z_StoH : sve2p1_multi_vec_extract_narrow<"sqcvtun", 0b10>; +defm SQCVTN_Z2Z_StoH : sve2p1_multi_vec_extract_narrow<"sqcvtn", 0b00, int_aarch64_sve_sqcvtn_x2>; +defm UQCVTN_Z2Z_StoH : sve2p1_multi_vec_extract_narrow<"uqcvtn", 0b01, int_aarch64_sve_uqcvtn_x2>; +defm SQCVTUN_Z2Z_StoH : sve2p1_multi_vec_extract_narrow<"sqcvtun", 0b10, int_aarch64_sve_sqcvtun_x2>; defm SQRSHRN_Z2ZI_StoH : sve2p1_multi_vec_shift_narrow<"sqrshrn", 0b101>; defm UQRSHRN_Z2ZI_StoH : sve2p1_multi_vec_shift_narrow<"uqrshrn", 0b111>; defm SQRSHRUN_Z2ZI_StoH : sve2p1_multi_vec_shift_narrow<"sqrshrun", 0b001>; diff --git a/llvm/lib/Target/AArch64/SMEInstrFormats.td b/llvm/lib/Target/AArch64/SMEInstrFormats.td index 33ff5f0..22635a3 100644 --- a/llvm/lib/Target/AArch64/SMEInstrFormats.td +++ b/llvm/lib/Target/AArch64/SMEInstrFormats.td @@ -133,10 +133,6 @@ class SME2_ZA_TwoOp_VG4_Multi_Index_Pat; -class SME2_Cvt_VG2_Pat - : Pat<(out_vt (intrinsic in_vt:$Zn1, in_vt:$Zn2)), - (!cast(name) (REG_SEQUENCE ZPR2Mul2, in_vt:$Zn1, zsub0, in_vt:$Zn2, zsub1))>; - class SME2_Cvt_VG4_Pat : Pat<(out_vt (intrinsic in_vt:$Zn1, in_vt:$Zn2, in_vt:$Zn3, in_vt:$Zn4)), (!cast(name) (REG_SEQUENCE ZPR4Mul4, in_vt:$Zn1, zsub0, in_vt:$Zn2, zsub1, in_vt:$Zn3, zsub2, in_vt:$Zn4, zsub3))>; @@ -2087,7 +2083,7 @@ class sme2_cvt_vg2_single op> multiclass sme2_cvt_vg2_single op, ValueType out_vt, ValueType in_vt, SDPatternOperator intrinsic> { def NAME : sme2_cvt_vg2_single; - def : SME2_Cvt_VG2_Pat; + def : SVE2p1_Cvt_VG2_Pat; } class sme2_cvt_unpk_vector_vg2sz, bits<3> op, bit u, RegisterOperand first_ty, diff --git a/llvm/lib/Target/AArch64/SVEInstrFormats.td b/llvm/lib/Target/AArch64/SVEInstrFormats.td index dcaada7..cef8d41 100644 --- a/llvm/lib/Target/AArch64/SVEInstrFormats.td +++ b/llvm/lib/Target/AArch64/SVEInstrFormats.td @@ -549,6 +549,10 @@ class SVE_Shift_Add_All_Active_Pat; +class SVE2p1_Cvt_VG2_Pat + : Pat<(out_vt (intrinsic in_vt:$Zn1, in_vt:$Zn2)), + (!cast(name) (REG_SEQUENCE ZPR2Mul2, in_vt:$Zn1, zsub0, in_vt:$Zn2, zsub1))>; + //===----------------------------------------------------------------------===// // SVE pattern match helpers. //===----------------------------------------------------------------------===// @@ -8861,8 +8865,9 @@ class sve2p1_multi_vec_extract_narrow opc, bits<3> tsz> let Inst{4-0} = Zd; } -multiclass sve2p1_multi_vec_extract_narrow opc> { - def : sve2p1_multi_vec_extract_narrow; +multiclass sve2p1_multi_vec_extract_narrow opc, SDPatternOperator intrinsic> { + def NAME : sve2p1_multi_vec_extract_narrow; + def : SVE2p1_Cvt_VG2_Pat; } // SVE2 multi-vec shift narrow diff --git a/llvm/test/CodeGen/AArch64/sme2-intrinsics-qcvtn.ll b/llvm/test/CodeGen/AArch64/sme2-intrinsics-qcvtn.ll new file mode 100644 index 0000000..072282c --- /dev/null +++ b/llvm/test/CodeGen/AArch64/sme2-intrinsics-qcvtn.ll @@ -0,0 +1,140 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py$ +; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve,+sme2,+bf16 -verify-machineinstrs < %s | FileCheck %s + +; +; SQCVTN +; + +; x2 +define @multi_vector_qcvtn_x2_s16_s32( %unused, %zn1, %zn2) { +; CHECK-LABEL: multi_vector_qcvtn_x2_s16_s32: +; CHECK: // %bb.0: +; CHECK-NEXT: mov z3.d, z2.d +; CHECK-NEXT: mov z2.d, z1.d +; CHECK-NEXT: sqcvtn z0.h, { z2.s, z3.s } +; CHECK-NEXT: ret + %res = call @llvm.aarch64.sve.sqcvtn.x2.nxv4i32( %zn1, %zn2) + ret %res +} + +; x4 +define @multi_vector_qcvtn_x4_s8_s32( %unused, %zn1, %zn2, %zn3, %zn4) { +; CHECK-LABEL: multi_vector_qcvtn_x4_s8_s32: +; CHECK: // %bb.0: +; CHECK-NEXT: mov z7.d, z4.d +; CHECK-NEXT: mov z6.d, z3.d +; CHECK-NEXT: mov z5.d, z2.d +; CHECK-NEXT: mov z4.d, z1.d +; CHECK-NEXT: sqcvtn z0.b, { z4.s - z7.s } +; CHECK-NEXT: ret + %res = call @llvm.aarch64.sve.sqcvtn.x4.nxv4i32( %zn1, %zn2, %zn3, %zn4) + ret %res +} + +define @multi_vector_qcvtn_x4_s16_s64( %unused, %zn1, %zn2, %zn3, %zn4) { +; CHECK-LABEL: multi_vector_qcvtn_x4_s16_s64: +; CHECK: // %bb.0: +; CHECK-NEXT: mov z7.d, z4.d +; CHECK-NEXT: mov z6.d, z3.d +; CHECK-NEXT: mov z5.d, z2.d +; CHECK-NEXT: mov z4.d, z1.d +; CHECK-NEXT: sqcvtn z0.h, { z4.d - z7.d } +; CHECK-NEXT: ret + %res = call @llvm.aarch64.sve.sqcvtn.x4.nxv2i64( %zn1, %zn2, %zn3, %zn4) + ret %res +} + +; +; UQCVTN +; + +; x2 +define @multi_vector_qcvtn_x2_u16_u32( %unused, %zn0, %zn1) { +; CHECK-LABEL: multi_vector_qcvtn_x2_u16_u32: +; CHECK: // %bb.0: +; CHECK-NEXT: mov z3.d, z2.d +; CHECK-NEXT: mov z2.d, z1.d +; CHECK-NEXT: uqcvtn z0.h, { z2.s, z3.s } +; CHECK-NEXT: ret + %res = call @llvm.aarch64.sve.uqcvtn.x2.nxv4i32( %zn0, %zn1) + ret %res +} + +; x4 +define @multi_vector_qcvtn_x4_u8_u32( %unused, %zn1, %zn2, %zn3, %zn4) { +; CHECK-LABEL: multi_vector_qcvtn_x4_u8_u32: +; CHECK: // %bb.0: +; CHECK-NEXT: mov z7.d, z4.d +; CHECK-NEXT: mov z6.d, z3.d +; CHECK-NEXT: mov z5.d, z2.d +; CHECK-NEXT: mov z4.d, z1.d +; CHECK-NEXT: uqcvtn z0.b, { z4.s - z7.s } +; CHECK-NEXT: ret + %res = call @llvm.aarch64.sve.uqcvtn.x4.nxv4i32( %zn1, %zn2, %zn3, %zn4) + ret %res +} + +define @multi_vector_qcvtn_x4_u16_u64( %unused, %zn1, %zn2, %zn3, %zn4) { +; CHECK-LABEL: multi_vector_qcvtn_x4_u16_u64: +; CHECK: // %bb.0: +; CHECK-NEXT: mov z7.d, z4.d +; CHECK-NEXT: mov z6.d, z3.d +; CHECK-NEXT: mov z5.d, z2.d +; CHECK-NEXT: mov z4.d, z1.d +; CHECK-NEXT: uqcvtn z0.h, { z4.d - z7.d } +; CHECK-NEXT: ret + %res = call @llvm.aarch64.sve.uqcvtn.x4.nxv2i64( %zn1, %zn2, %zn3, %zn4) + ret %res +} + +; +; SQCVTUN +; + +; x2 +define @multi_vector_qcvtn_x2_s16_u32( %unused, %zn1, %zn2) { +; CHECK-LABEL: multi_vector_qcvtn_x2_s16_u32: +; CHECK: // %bb.0: +; CHECK-NEXT: mov z3.d, z2.d +; CHECK-NEXT: mov z2.d, z1.d +; CHECK-NEXT: sqcvtun z0.h, { z2.s, z3.s } +; CHECK-NEXT: ret + %res = call @llvm.aarch64.sve.sqcvtun.x2.nxv4i322( %zn1, %zn2) + ret %res +} +; x4 +define @multi_vector_qcvtn_x4_u8_s32( %unused, %zn1, %zn2, %zn3, %zn4) { +; CHECK-LABEL: multi_vector_qcvtn_x4_u8_s32: +; CHECK: // %bb.0: +; CHECK-NEXT: mov z7.d, z4.d +; CHECK-NEXT: mov z6.d, z3.d +; CHECK-NEXT: mov z5.d, z2.d +; CHECK-NEXT: mov z4.d, z1.d +; CHECK-NEXT: sqcvtun z0.b, { z4.s - z7.s } +; CHECK-NEXT: ret + %res = call @llvm.aarch64.sve.sqcvtun.x4.nxv4i32( %zn1, %zn2, %zn3, %zn4) + ret %res +} + +define @multi_vector_qcvtn_x4_u16_s64( %unused, %zn1, %zn2, %zn3, %zn4) { +; CHECK-LABEL: multi_vector_qcvtn_x4_u16_s64: +; CHECK: // %bb.0: +; CHECK-NEXT: mov z7.d, z4.d +; CHECK-NEXT: mov z6.d, z3.d +; CHECK-NEXT: mov z5.d, z2.d +; CHECK-NEXT: mov z4.d, z1.d +; CHECK-NEXT: sqcvtun z0.h, { z4.d - z7.d } +; CHECK-NEXT: ret + %res = call @llvm.aarch64.sve.sqcvtun.x4.nxv2i64( %zn1, %zn2, %zn3, %zn4) + ret %res +} + +declare @llvm.aarch64.sve.uqcvtn.x2.nxv4i32(, ) +declare @llvm.aarch64.sve.sqcvtn.x2.nxv4i32(, ) +declare @llvm.aarch64.sve.sqcvtun.x2.nxv4i322(, ) +declare @llvm.aarch64.sve.sqcvtn.x4.nxv4i32(, , , ) +declare @llvm.aarch64.sve.sqcvtn.x4.nxv2i64(, , , ) +declare @llvm.aarch64.sve.uqcvtn.x4.nxv4i32(, , , ) +declare @llvm.aarch64.sve.uqcvtn.x4.nxv2i64(, , , ) +declare @llvm.aarch64.sve.sqcvtun.x4.nxv4i32(, , , ) +declare @llvm.aarch64.sve.sqcvtun.x4.nxv2i64(, , , ) -- 2.7.4