From e128c208648998e11a4f44edf7ef3bff9a604807 Mon Sep 17 00:00:00 2001 From: Kerry McLaughlin Date: Wed, 30 Oct 2019 11:13:49 +0000 Subject: [PATCH] [AArch64][SVE] Implement additional integer arithmetic intrinsics Summary: Add intrinsics for the following: - sxt[b|h|w] & uxt[b|h|w] - cls & clz - not & cnot Reviewers: huntergr, sdesmalen, dancgr Reviewed By: sdesmalen Subscribers: cameron.mcinally, tschuett, kristof.beyls, hiraditya, rkruppe, psnobl, cfe-commits, llvm-commits Tags: #llvm Differential Revision: https://reviews.llvm.org/D69567 --- llvm/include/llvm/IR/IntrinsicsAArch64.td | 20 +++ llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td | 26 ++-- llvm/lib/Target/AArch64/SVEInstrFormats.td | 18 ++- .../CodeGen/AArch64/sve-intrinsics-conversion.ll | 159 +++++++++++++++++++++ .../AArch64/sve-intrinsics-counting-bits.ll | 98 +++++++++++++ .../test/CodeGen/AArch64/sve-intrinsics-logical.ll | 99 +++++++++++++ 6 files changed, 404 insertions(+), 16 deletions(-) create mode 100644 llvm/test/CodeGen/AArch64/sve-intrinsics-conversion.ll create mode 100644 llvm/test/CodeGen/AArch64/sve-intrinsics-logical.ll diff --git a/llvm/include/llvm/IR/IntrinsicsAArch64.td b/llvm/include/llvm/IR/IntrinsicsAArch64.td index 66e0370..08c3186 100644 --- a/llvm/include/llvm/IR/IntrinsicsAArch64.td +++ b/llvm/include/llvm/IR/IntrinsicsAArch64.td @@ -832,6 +832,8 @@ def int_aarch64_sve_udot_lane : AdvSIMD_SVE_DOT_Indexed_Intrinsic; // Counting bits // +def int_aarch64_sve_cls : AdvSIMD_Merged1VectorArg_Intrinsic; +def int_aarch64_sve_clz : AdvSIMD_Merged1VectorArg_Intrinsic; def int_aarch64_sve_cnt : AdvSIMD_SVE_CNT_Intrinsic; // @@ -845,6 +847,24 @@ def int_aarch64_sve_uunpkhi : AdvSIMD_SVE_Unpack_Intrinsic; def int_aarch64_sve_uunpklo : AdvSIMD_SVE_Unpack_Intrinsic; // +// Logical operations +// + +def int_aarch64_sve_cnot : AdvSIMD_Merged1VectorArg_Intrinsic; +def int_aarch64_sve_not : AdvSIMD_Merged1VectorArg_Intrinsic; + +// +// Conversion +// + +def int_aarch64_sve_sxtb : AdvSIMD_Merged1VectorArg_Intrinsic; +def int_aarch64_sve_sxth : AdvSIMD_Merged1VectorArg_Intrinsic; +def int_aarch64_sve_sxtw : AdvSIMD_Merged1VectorArg_Intrinsic; +def int_aarch64_sve_uxtb : AdvSIMD_Merged1VectorArg_Intrinsic; +def int_aarch64_sve_uxth : AdvSIMD_Merged1VectorArg_Intrinsic; +def int_aarch64_sve_uxtw : AdvSIMD_Merged1VectorArg_Intrinsic; + +// // Floating-point comparisons // diff --git a/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td b/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td index af663f3..290bb32 100644 --- a/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td +++ b/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td @@ -88,20 +88,20 @@ let Predicates = [HasSVE] in { defm SDOT_ZZZI : sve_intx_dot_by_indexed_elem<0b0, "sdot", int_aarch64_sve_sdot_lane>; defm UDOT_ZZZI : sve_intx_dot_by_indexed_elem<0b1, "udot", int_aarch64_sve_udot_lane>; - defm SXTB_ZPmZ : sve_int_un_pred_arit_0_h<0b000, "sxtb">; - defm UXTB_ZPmZ : sve_int_un_pred_arit_0_h<0b001, "uxtb">; - defm SXTH_ZPmZ : sve_int_un_pred_arit_0_w<0b010, "sxth">; - defm UXTH_ZPmZ : sve_int_un_pred_arit_0_w<0b011, "uxth">; - defm SXTW_ZPmZ : sve_int_un_pred_arit_0_d<0b100, "sxtw">; - defm UXTW_ZPmZ : sve_int_un_pred_arit_0_d<0b101, "uxtw">; - defm ABS_ZPmZ : sve_int_un_pred_arit_0< 0b110, "abs", int_aarch64_sve_abs>; - defm NEG_ZPmZ : sve_int_un_pred_arit_0< 0b111, "neg", int_aarch64_sve_neg>; - - defm CLS_ZPmZ : sve_int_un_pred_arit_1< 0b000, "cls", null_frag>; - defm CLZ_ZPmZ : sve_int_un_pred_arit_1< 0b001, "clz", null_frag>; + defm SXTB_ZPmZ : sve_int_un_pred_arit_0_h<0b000, "sxtb", int_aarch64_sve_sxtb>; + defm UXTB_ZPmZ : sve_int_un_pred_arit_0_h<0b001, "uxtb", int_aarch64_sve_uxtb>; + defm SXTH_ZPmZ : sve_int_un_pred_arit_0_w<0b010, "sxth", int_aarch64_sve_sxth>; + defm UXTH_ZPmZ : sve_int_un_pred_arit_0_w<0b011, "uxth", int_aarch64_sve_uxth>; + defm SXTW_ZPmZ : sve_int_un_pred_arit_0_d<0b100, "sxtw", int_aarch64_sve_sxtw>; + defm UXTW_ZPmZ : sve_int_un_pred_arit_0_d<0b101, "uxtw", int_aarch64_sve_uxtw>; + defm ABS_ZPmZ : sve_int_un_pred_arit_0< 0b110, "abs", int_aarch64_sve_abs>; + defm NEG_ZPmZ : sve_int_un_pred_arit_0< 0b111, "neg", int_aarch64_sve_neg>; + + defm CLS_ZPmZ : sve_int_un_pred_arit_1< 0b000, "cls", int_aarch64_sve_cls>; + defm CLZ_ZPmZ : sve_int_un_pred_arit_1< 0b001, "clz", int_aarch64_sve_clz>; defm CNT_ZPmZ : sve_int_un_pred_arit_1< 0b010, "cnt", int_aarch64_sve_cnt>; - defm CNOT_ZPmZ : sve_int_un_pred_arit_1< 0b011, "cnot", null_frag>; - defm NOT_ZPmZ : sve_int_un_pred_arit_1< 0b110, "not", null_frag>; + defm CNOT_ZPmZ : sve_int_un_pred_arit_1< 0b011, "cnot", int_aarch64_sve_cnot>; + defm NOT_ZPmZ : sve_int_un_pred_arit_1< 0b110, "not", int_aarch64_sve_not>; defm FABS_ZPmZ : sve_int_un_pred_arit_1_fp<0b100, "fabs">; defm FNEG_ZPmZ : sve_int_un_pred_arit_1_fp<0b101, "fneg">; diff --git a/llvm/lib/Target/AArch64/SVEInstrFormats.td b/llvm/lib/Target/AArch64/SVEInstrFormats.td index 4d5b3ee..12fdb5c 100644 --- a/llvm/lib/Target/AArch64/SVEInstrFormats.td +++ b/llvm/lib/Target/AArch64/SVEInstrFormats.td @@ -2877,19 +2877,31 @@ multiclass sve_int_un_pred_arit_0 opc, string asm, def : SVE_3_Op_Pat(NAME # _D)>; } -multiclass sve_int_un_pred_arit_0_h opc, string asm> { +multiclass sve_int_un_pred_arit_0_h opc, string asm, + SDPatternOperator op> { def _H : sve_int_un_pred_arit<0b01, { opc, 0b0 }, asm, ZPR16>; def _S : sve_int_un_pred_arit<0b10, { opc, 0b0 }, asm, ZPR32>; def _D : sve_int_un_pred_arit<0b11, { opc, 0b0 }, asm, ZPR64>; + + def : SVE_3_Op_Pat(NAME # _H)>; + def : SVE_3_Op_Pat(NAME # _S)>; + def : SVE_3_Op_Pat(NAME # _D)>; } -multiclass sve_int_un_pred_arit_0_w opc, string asm> { +multiclass sve_int_un_pred_arit_0_w opc, string asm, + SDPatternOperator op> { def _S : sve_int_un_pred_arit<0b10, { opc, 0b0 }, asm, ZPR32>; def _D : sve_int_un_pred_arit<0b11, { opc, 0b0 }, asm, ZPR64>; + + def : SVE_3_Op_Pat(NAME # _S)>; + def : SVE_3_Op_Pat(NAME # _D)>; } -multiclass sve_int_un_pred_arit_0_d opc, string asm> { +multiclass sve_int_un_pred_arit_0_d opc, string asm, + SDPatternOperator op> { def _D : sve_int_un_pred_arit<0b11, { opc, 0b0 }, asm, ZPR64>; + + def : SVE_3_Op_Pat(NAME # _D)>; } multiclass sve_int_un_pred_arit_1 opc, string asm, diff --git a/llvm/test/CodeGen/AArch64/sve-intrinsics-conversion.ll b/llvm/test/CodeGen/AArch64/sve-intrinsics-conversion.ll new file mode 100644 index 0000000..ac2b63d --- /dev/null +++ b/llvm/test/CodeGen/AArch64/sve-intrinsics-conversion.ll @@ -0,0 +1,159 @@ +; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve < %s | FileCheck %s + +; +; SXTB +; + +define @sxtb_i16( %a, %pg, %b) { +; CHECK-LABEL: sxtb_i16: +; CHECK: sxtb z0.h, p0/m, z1.h +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.sxtb.nxv8i16( %a, + %pg, + %b) + ret %out +} + +define @sxtb_i32( %a, %pg, %b) { +; CHECK-LABEL: sxtb_i32: +; CHECK: sxtb z0.s, p0/m, z1.s +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.sxtb.nxv4i32( %a, + %pg, + %b) + ret %out +} + +define @sxtb_i64( %a, %pg, %b) { +; CHECK-LABEL: sxtb_i64: +; CHECK: sxtb z0.d, p0/m, z1.d +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.sxtb.nxv2i64( %a, + %pg, + %b) + ret %out +} + +; +; SXTH +; + +define @sxth_i32( %a, %pg, %b) { +; CHECK-LABEL: sxth_i32: +; CHECK: sxth z0.s, p0/m, z1.s +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.sxth.nxv4i32( %a, + %pg, + %b) + ret %out +} + +define @sxth_i64( %a, %pg, %b) { +; CHECK-LABEL: sxth_i64: +; CHECK: sxth z0.d, p0/m, z1.d +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.sxth.nxv2i64( %a, + %pg, + %b) + ret %out +} + +; +; SXTW +; + +define @sxtw_i64( %a, %pg, %b) { +; CHECK-LABEL: sxtw_i64: +; CHECK: sxtw z0.d, p0/m, z1.d +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.sxtw.nxv2i64( %a, + %pg, + %b) + ret %out +} + +; +; UXTB +; + +define @uxtb_i16( %a, %pg, %b) { +; CHECK-LABEL: uxtb_i16: +; CHECK: uxtb z0.h, p0/m, z1.h +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.uxtb.nxv8i16( %a, + %pg, + %b) + ret %out +} + +define @uxtb_i32( %a, %pg, %b) { +; CHECK-LABEL: uxtb_i32: +; CHECK: uxtb z0.s, p0/m, z1.s +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.uxtb.nxv4i32( %a, + %pg, + %b) + ret %out +} + +define @uxtb_i64( %a, %pg, %b) { +; CHECK-LABEL: uxtb_i64: +; CHECK: uxtb z0.d, p0/m, z1.d +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.uxtb.nxv2i64( %a, + %pg, + %b) + ret %out +} + +; +; UXTH +; + +define @uxth_i32( %a, %pg, %b) { +; CHECK-LABEL: uxth_i32: +; CHECK: uxth z0.s, p0/m, z1.s +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.uxth.nxv4i32( %a, + %pg, + %b) + ret %out +} + +define @uxth_i64( %a, %pg, %b) { +; CHECK-LABEL: uxth_i64: +; CHECK: uxth z0.d, p0/m, z1.d +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.uxth.nxv2i64( %a, + %pg, + %b) + ret %out +} + +; +; UXTW +; + +define @uxtw_i64( %a, %pg, %b) { +; CHECK-LABEL: uxtw_i64: +; CHECK: uxtw z0.d, p0/m, z1.d +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.uxtw.nxv2i64( %a, + %pg, + %b) + ret %out +} + +declare @llvm.aarch64.sve.sxtb.nxv8i16(, , ) +declare @llvm.aarch64.sve.sxtb.nxv4i32(, , ) +declare @llvm.aarch64.sve.sxtb.nxv2i64(, , ) +declare @llvm.aarch64.sve.sxth.nxv4i32(, , ) +declare @llvm.aarch64.sve.sxth.nxv2i64(, , ) +declare @llvm.aarch64.sve.sxtw.nxv2i64(, , ) + +declare @llvm.aarch64.sve.uxtb.nxv8i16(, , ) +declare @llvm.aarch64.sve.uxtb.nxv4i32(, , ) +declare @llvm.aarch64.sve.uxtb.nxv2i64(, , ) +declare @llvm.aarch64.sve.uxth.nxv4i32(, , ) +declare @llvm.aarch64.sve.uxth.nxv2i64(, , ) +declare @llvm.aarch64.sve.uxtw.nxv2i64(, , ) diff --git a/llvm/test/CodeGen/AArch64/sve-intrinsics-counting-bits.ll b/llvm/test/CodeGen/AArch64/sve-intrinsics-counting-bits.ll index 2350353..197e723 100644 --- a/llvm/test/CodeGen/AArch64/sve-intrinsics-counting-bits.ll +++ b/llvm/test/CodeGen/AArch64/sve-intrinsics-counting-bits.ll @@ -1,6 +1,94 @@ ; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve < %s | FileCheck %s ; +; CLS +; + +define @cls_i8( %a, %pg, %b) { +; CHECK-LABEL: cls_i8: +; CHECK: cls z0.b, p0/m, z1.b +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.cls.nxv16i8( %a, + %pg, + %b) + ret %out +} + +define @cls_i16( %a, %pg, %b) { +; CHECK-LABEL: cls_i16: +; CHECK: cls z0.h, p0/m, z1.h +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.cls.nxv8i16( %a, + %pg, + %b) + ret %out +} + +define @cls_i32( %a, %pg, %b) { +; CHECK-LABEL: cls_i32: +; CHECK: cls z0.s, p0/m, z1.s +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.cls.nxv4i32( %a, + %pg, + %b) + ret %out +} + +define @cls_i64( %a, %pg, %b) { +; CHECK-LABEL: cls_i64: +; CHECK: cls z0.d, p0/m, z1.d +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.cls.nxv2i64( %a, + %pg, + %b) + ret %out +} + +; +; CLZ +; + +define @clz_i8( %a, %pg, %b) { +; CHECK-LABEL: clz_i8: +; CHECK: clz z0.b, p0/m, z1.b +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.clz.nxv16i8( %a, + %pg, + %b) + ret %out +} + +define @clz_i16( %a, %pg, %b) { +; CHECK-LABEL: clz_i16: +; CHECK: clz z0.h, p0/m, z1.h +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.clz.nxv8i16( %a, + %pg, + %b) + ret %out +} + +define @clz_i32( %a, %pg, %b) { +; CHECK-LABEL: clz_i32: +; CHECK: clz z0.s, p0/m, z1.s +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.clz.nxv4i32( %a, + %pg, + %b) + ret %out +} + +define @clz_i64( %a, %pg, %b) { +; CHECK-LABEL: clz_i64: +; CHECK: clz z0.d, p0/m, z1.d +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.clz.nxv2i64( %a, + %pg, + %b) + ret %out +} + +; ; CNT ; @@ -74,6 +162,16 @@ define @cnt_f64( %a, %pg, ret %out } +declare @llvm.aarch64.sve.cls.nxv16i8(, , ) +declare @llvm.aarch64.sve.cls.nxv8i16(, , ) +declare @llvm.aarch64.sve.cls.nxv4i32(, , ) +declare @llvm.aarch64.sve.cls.nxv2i64(, , ) + +declare @llvm.aarch64.sve.clz.nxv16i8(, , ) +declare @llvm.aarch64.sve.clz.nxv8i16(, , ) +declare @llvm.aarch64.sve.clz.nxv4i32(, , ) +declare @llvm.aarch64.sve.clz.nxv2i64(, , ) + declare @llvm.aarch64.sve.cnt.nxv16i8(, , ) declare @llvm.aarch64.sve.cnt.nxv8i16(, , ) declare @llvm.aarch64.sve.cnt.nxv4i32(, , ) diff --git a/llvm/test/CodeGen/AArch64/sve-intrinsics-logical.ll b/llvm/test/CodeGen/AArch64/sve-intrinsics-logical.ll new file mode 100644 index 0000000..05c98fb --- /dev/null +++ b/llvm/test/CodeGen/AArch64/sve-intrinsics-logical.ll @@ -0,0 +1,99 @@ +; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve < %s | FileCheck %s + +; +; CNOT +; + +define @cnot_i8( %a, %pg, %b) { +; CHECK-LABEL: cnot_i8: +; CHECK: cnot z0.b, p0/m, z1.b +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.cnot.nxv16i8( %a, + %pg, + %b) + ret %out +} + +define @cnot_i16( %a, %pg, %b) { +; CHECK-LABEL: cnot_i16: +; CHECK: cnot z0.h, p0/m, z1.h +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.cnot.nxv8i16( %a, + %pg, + %b) + ret %out +} + +define @cnot_i32( %a, %pg, %b) { +; CHECK-LABEL: cnot_i32: +; CHECK: cnot z0.s, p0/m, z1.s +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.cnot.nxv4i32( %a, + %pg, + %b) + ret %out +} + +define @cnot_i64( %a, %pg, %b) { +; CHECK-LABEL: cnot_i64: +; CHECK: cnot z0.d, p0/m, z1.d +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.cnot.nxv2i64( %a, + %pg, + %b) + ret %out +} + +; +; NOT +; + +define @not_i8( %a, %pg, %b) { +; CHECK-LABEL: not_i8: +; CHECK: not z0.b, p0/m, z1.b +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.not.nxv16i8( %a, + %pg, + %b) + ret %out +} + +define @not_i16( %a, %pg, %b) { +; CHECK-LABEL: not_i16: +; CHECK: not z0.h, p0/m, z1.h +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.not.nxv8i16( %a, + %pg, + %b) + ret %out +} + +define @not_i32( %a, %pg, %b) { +; CHECK-LABEL: not_i32: +; CHECK: not z0.s, p0/m, z1.s +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.not.nxv4i32( %a, + %pg, + %b) + ret %out +} + +define @not_i64( %a, %pg, %b) { +; CHECK-LABEL: not_i64: +; CHECK: not z0.d, p0/m, z1.d +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.not.nxv2i64( %a, + %pg, + %b) + ret %out +} + +declare @llvm.aarch64.sve.cnot.nxv16i8(, , ) +declare @llvm.aarch64.sve.cnot.nxv8i16(, , ) +declare @llvm.aarch64.sve.cnot.nxv4i32(, , ) +declare @llvm.aarch64.sve.cnot.nxv2i64(, , ) + +declare @llvm.aarch64.sve.not.nxv16i8(, , ) +declare @llvm.aarch64.sve.not.nxv8i16(, , ) +declare @llvm.aarch64.sve.not.nxv4i32(, , ) +declare @llvm.aarch64.sve.not.nxv2i64(, , ) -- 2.7.4