From ce70e2899879e092b153a4078b993833b6696713 Mon Sep 17 00:00:00 2001 From: Danilo Carvalho Grael Date: Thu, 20 Feb 2020 11:24:50 -0500 Subject: [PATCH] [AArch64][SVE] Add intrinsics for SVE2 bitwise ternary operations Summary: Add intrinsics for the following operations: - eor3, bcax - bsl, bsl1n, bsl2n, nbsl Reviewers: kmclaughlin, c-rhodes, sdesmalen, efriedma, rengolin Reviewed By: efriedma Subscribers: tschuett, kristof.beyls, hiraditya, rkruppe, psnobl, llvm-commits Tags: #llvm Differential Revision: https://reviews.llvm.org/D74785 --- llvm/include/llvm/IR/IntrinsicsAArch64.td | 9 +- .../lib/Target/AArch64/AArch64SVEInstrInfo.td | 12 +- llvm/lib/Target/AArch64/SVEInstrFormats.td | 7 +- .../CodeGen/AArch64/sve2-bitwise-ternary.ll | 284 ++++++++++++++++++ 4 files changed, 304 insertions(+), 8 deletions(-) create mode 100644 llvm/test/CodeGen/AArch64/sve2-bitwise-ternary.ll diff --git a/llvm/include/llvm/IR/IntrinsicsAArch64.td b/llvm/include/llvm/IR/IntrinsicsAArch64.td index a3fb5fe840a4..a3234b3bdd5a 100644 --- a/llvm/include/llvm/IR/IntrinsicsAArch64.td +++ b/llvm/include/llvm/IR/IntrinsicsAArch64.td @@ -2001,10 +2001,17 @@ def int_aarch64_sve_sbclt : AdvSIMD_3VectorArg_Intrinsic; // // SVE2 - Polynomial arithmetic // - def int_aarch64_sve_eorbt : AdvSIMD_3VectorArg_Intrinsic; def int_aarch64_sve_eortb : AdvSIMD_3VectorArg_Intrinsic; def int_aarch64_sve_pmullb_pair : AdvSIMD_2VectorArg_Intrinsic; def int_aarch64_sve_pmullt_pair : AdvSIMD_2VectorArg_Intrinsic; +// SVE2 bitwise ternary operations. +def int_aarch64_sve_eor3 : AdvSIMD_3VectorArg_Intrinsic; +def int_aarch64_sve_bcax : AdvSIMD_3VectorArg_Intrinsic; +def int_aarch64_sve_bsl : AdvSIMD_3VectorArg_Intrinsic; +def int_aarch64_sve_bsl1n : AdvSIMD_3VectorArg_Intrinsic; +def int_aarch64_sve_bsl2n : AdvSIMD_3VectorArg_Intrinsic; +def int_aarch64_sve_nbsl : AdvSIMD_3VectorArg_Intrinsic; + } diff --git a/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td b/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td index f8819c6b9b14..59c478a3a386 100644 --- a/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td +++ b/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td @@ -1771,12 +1771,12 @@ let Predicates = [HasSVE2] in { defm FMLSLT_ZZZ_SHH : sve2_fp_mla_long<0b11, "fmlslt", int_aarch64_sve_fmlslt>; // SVE2 bitwise ternary operations - defm EOR3_ZZZZ_D : sve2_int_bitwise_ternary_op<0b000, "eor3">; - defm BCAX_ZZZZ_D : sve2_int_bitwise_ternary_op<0b010, "bcax">; - def BSL_ZZZZ_D : sve2_int_bitwise_ternary_op_d<0b001, "bsl">; - def BSL1N_ZZZZ_D : sve2_int_bitwise_ternary_op_d<0b011, "bsl1n">; - def BSL2N_ZZZZ_D : sve2_int_bitwise_ternary_op_d<0b101, "bsl2n">; - def NBSL_ZZZZ_D : sve2_int_bitwise_ternary_op_d<0b111, "nbsl">; + defm EOR3_ZZZZ : sve2_int_bitwise_ternary_op<0b000, "eor3", int_aarch64_sve_eor3>; + defm BCAX_ZZZZ : sve2_int_bitwise_ternary_op<0b010, "bcax", int_aarch64_sve_bcax>; + defm BSL_ZZZZ : sve2_int_bitwise_ternary_op<0b001, "bsl", int_aarch64_sve_bsl>; + defm BSL1N_ZZZZ : sve2_int_bitwise_ternary_op<0b011, "bsl1n", int_aarch64_sve_bsl1n>; + defm BSL2N_ZZZZ : sve2_int_bitwise_ternary_op<0b101, "bsl2n", int_aarch64_sve_bsl2n>; + defm NBSL_ZZZZ : sve2_int_bitwise_ternary_op<0b111, "nbsl", int_aarch64_sve_nbsl>; // SVE2 bitwise xor and rotate right by immediate defm XAR_ZZZI : sve2_int_rotate_right_imm<"xar">; diff --git a/llvm/lib/Target/AArch64/SVEInstrFormats.td b/llvm/lib/Target/AArch64/SVEInstrFormats.td index 757743084721..8c02b3a95dfe 100644 --- a/llvm/lib/Target/AArch64/SVEInstrFormats.td +++ b/llvm/lib/Target/AArch64/SVEInstrFormats.td @@ -3766,7 +3766,7 @@ class sve2_int_bitwise_ternary_op_d opc, string asm> let ElementSize = ElementSizeNone; } -multiclass sve2_int_bitwise_ternary_op opc, string asm> { +multiclass sve2_int_bitwise_ternary_op opc, string asm, SDPatternOperator op> { def NAME : sve2_int_bitwise_ternary_op_d; def : InstAlias opc, string asm> { (!cast(NAME) ZPR16:$Zdn, ZPR16:$Zm, ZPR16:$Zk), 1>; def : InstAlias(NAME) ZPR32:$Zdn, ZPR32:$Zm, ZPR32:$Zk), 1>; + + def : SVE_3_Op_Pat(NAME)>; + def : SVE_3_Op_Pat(NAME)>; + def : SVE_3_Op_Pat(NAME)>; + def : SVE_3_Op_Pat(NAME)>; } class sve2_int_rotate_right_imm tsz8_64, string asm, diff --git a/llvm/test/CodeGen/AArch64/sve2-bitwise-ternary.ll b/llvm/test/CodeGen/AArch64/sve2-bitwise-ternary.ll new file mode 100644 index 000000000000..8745f7f96e64 --- /dev/null +++ b/llvm/test/CodeGen/AArch64/sve2-bitwise-ternary.ll @@ -0,0 +1,284 @@ +; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve2 < %s | FileCheck %s + +; +; EOR3 (vector, bitwise, unpredicated) +; +define @eor3_i8( %a, + %b, + %c) { +; CHECK-LABEL: eor3_i8 +; CHECK: eor3 z0.d, z0.d, z1.d, z2.d +; CHECK-NEXT: ret + %res = call @llvm.aarch64.sve.eor3.nxv16i8( %a, %b, %c) + ret %res +} + +define @eor3_i16( %a, + %b, + %c) { +; CHECK-LABEL: eor3_i16 +; CHECK: eor3 z0.d, z0.d, z1.d, z2.d +; CHECK-NEXT: ret + %res = call @llvm.aarch64.sve.eor3.nxv8i16( %a, %b, %c) + ret %res +} + +define @eor3_i32( %a, + %b, + %c) { +; CHECK-LABEL: eor3_i32 +; CHECK: eor3 z0.d, z0.d, z1.d, z2.d +; CHECK-NEXT: ret + %res = call @llvm.aarch64.sve.eor3.nxv4i32( %a, %b, %c) + ret %res +} + +define @eor3_i64( %a, + %b, + %c) { +; CHECK-LABEL: eor3_i64 +; CHECK: eor3 z0.d, z0.d, z1.d, z2.d +; CHECK-NEXT: ret + %res = call @llvm.aarch64.sve.eor3.nxv2i64( %a, %b, %c) + ret %res +} + +; +; BCAX (vector, bitwise, unpredicated) +; +define @bcax_i8( %a, + %b, + %c) { +; CHECK-LABEL: bcax_i8 +; CHECK: bcax z0.d, z0.d, z1.d, z2.d +; CHECK-NEXT: ret + %res = call @llvm.aarch64.sve.bcax.nxv16i8( %a, %b, %c) + ret %res +} + +define @bcax_i16( %a, + %b, + %c) { +; CHECK-LABEL: bcax_i16 +; CHECK: bcax z0.d, z0.d, z1.d, z2.d +; CHECK-NEXT: ret + %res = call @llvm.aarch64.sve.bcax.nxv8i16( %a, %b, %c) + ret %res +} + +define @bcax_i32( %a, + %b, + %c) { +; CHECK-LABEL: bcax_i32 +; CHECK: bcax z0.d, z0.d, z1.d, z2.d +; CHECK-NEXT: ret + %res = call @llvm.aarch64.sve.bcax.nxv4i32( %a, %b, %c) + ret %res +} + +define @bcax_i64( %a, + %b, + %c) { +; CHECK-LABEL: bcax_i64 +; CHECK: bcax z0.d, z0.d, z1.d, z2.d +; CHECK-NEXT: ret + %res = call @llvm.aarch64.sve.bcax.nxv2i64( %a, %b, %c) + ret %res +} + +; +; BSL (vector, bitwise, unpredicated) +; +define @bsl_i8( %a, + %b, + %c) { +; CHECK-LABEL: bsl_i8 +; CHECK: bsl z0.d, z0.d, z1.d, z2.d +; CHECK-NEXT: ret + %res = call @llvm.aarch64.sve.bsl.nxv16i8( %a, %b, %c) + ret %res +} + +define @bsl_i16( %a, + %b, + %c) { +; CHECK-LABEL: bsl_i16 +; CHECK: bsl z0.d, z0.d, z1.d, z2.d +; CHECK-NEXT: ret + %res = call @llvm.aarch64.sve.bsl.nxv8i16( %a, %b, %c) + ret %res +} + +define @bsl_i32( %a, + %b, + %c) { +; CHECK-LABEL: bsl_i32 +; CHECK: bsl z0.d, z0.d, z1.d, z2.d +; CHECK-NEXT: ret + %res = call @llvm.aarch64.sve.bsl.nxv4i32( %a, %b, %c) + ret %res +} + +define @bsl_i64( %a, + %b, + %c) { +; CHECK-LABEL: bsl_i64 +; CHECK: bsl z0.d, z0.d, z1.d, z2.d +; CHECK-NEXT: ret + %res = call @llvm.aarch64.sve.bsl.nxv2i64( %a, %b, %c) + ret %res +} + +; +; BSL1N (vector, bitwise, unpredicated) +; +define @bsl1n_i8( %a, + %b, + %c) { +; CHECK-LABEL: bsl1n_i8 +; CHECK: bsl1n z0.d, z0.d, z1.d, z2.d +; CHECK-NEXT: ret + %res = call @llvm.aarch64.sve.bsl1n.nxv16i8( %a, %b, %c) + ret %res +} + +define @bsl1n_i16( %a, + %b, + %c) { +; CHECK-LABEL: bsl1n_i16 +; CHECK: bsl1n z0.d, z0.d, z1.d, z2.d +; CHECK-NEXT: ret + %res = call @llvm.aarch64.sve.bsl1n.nxv8i16( %a, %b, %c) + ret %res +} + +define @bsl1n_i32( %a, + %b, + %c) { +; CHECK-LABEL: bsl1n_i32 +; CHECK: bsl1n z0.d, z0.d, z1.d, z2.d +; CHECK-NEXT: ret + %res = call @llvm.aarch64.sve.bsl1n.nxv4i32( %a, %b, %c) + ret %res +} + +define @bsl1n_i64( %a, + %b, + %c) { +; CHECK-LABEL: bsl1n_i64 +; CHECK: bsl1n z0.d, z0.d, z1.d, z2.d +; CHECK-NEXT: ret + %res = call @llvm.aarch64.sve.bsl1n.nxv2i64( %a, %b, %c) + ret %res +} + +; +; BSL2N (vector, bitwise, unpredicated) +; +define @bsl2n_i8( %a, + %b, + %c) { +; CHECK-LABEL: bsl2n_i8 +; CHECK: bsl2n z0.d, z0.d, z1.d, z2.d +; CHECK-NEXT: ret + %res = call @llvm.aarch64.sve.bsl2n.nxv16i8( %a, %b, %c) + ret %res +} + +define @bsl2n_i16( %a, + %b, + %c) { +; CHECK-LABEL: bsl2n_i16 +; CHECK: bsl2n z0.d, z0.d, z1.d, z2.d +; CHECK-NEXT: ret + %res = call @llvm.aarch64.sve.bsl2n.nxv8i16( %a, %b, %c) + ret %res +} + +define @bsl2n_i32( %a, + %b, + %c) { +; CHECK-LABEL: bsl2n_i32 +; CHECK: bsl2n z0.d, z0.d, z1.d, z2.d +; CHECK-NEXT: ret + %res = call @llvm.aarch64.sve.bsl2n.nxv4i32( %a, %b, %c) + ret %res +} + +define @bsl2n_i64( %a, + %b, + %c) { +; CHECK-LABEL: bsl2n_i64 +; CHECK: bsl2n z0.d, z0.d, z1.d, z2.d +; CHECK-NEXT: ret + %res = call @llvm.aarch64.sve.bsl2n.nxv2i64( %a, %b, %c) + ret %res +} + +; +; NBSL (vector, bitwise, unpredicated) +; +define @nbsl_i8( %a, + %b, + %c) { +; CHECK-LABEL: nbsl_i8 +; CHECK: nbsl z0.d, z0.d, z1.d, z2.d +; CHECK-NEXT: ret + %res = call @llvm.aarch64.sve.nbsl.nxv16i8( %a, %b, %c) + ret %res +} + +define @nbsl_i16( %a, + %b, + %c) { +; CHECK-LABEL: nbsl_i16 +; CHECK: nbsl z0.d, z0.d, z1.d, z2.d +; CHECK-NEXT: ret + %res = call @llvm.aarch64.sve.nbsl.nxv8i16( %a, %b, %c) + ret %res +} + +define @nbsl_i32( %a, + %b, + %c) { +; CHECK-LABEL: nbsl_i32 +; CHECK: nbsl z0.d, z0.d, z1.d, z2.d +; CHECK-NEXT: ret + %res = call @llvm.aarch64.sve.nbsl.nxv4i32( %a, %b, %c) + ret %res +} + +define @nbsl_i64( %a, + %b, + %c) { +; CHECK-LABEL: nbsl_i64 +; CHECK: nbsl z0.d, z0.d, z1.d, z2.d +; CHECK-NEXT: ret + %res = call @llvm.aarch64.sve.nbsl.nxv2i64( %a, %b, %c) + ret %res +} + +declare @llvm.aarch64.sve.eor3.nxv16i8(,,) +declare @llvm.aarch64.sve.eor3.nxv8i16(,,) +declare @llvm.aarch64.sve.eor3.nxv4i32(,,) +declare @llvm.aarch64.sve.eor3.nxv2i64(,,) +declare @llvm.aarch64.sve.bcax.nxv16i8(,,) +declare @llvm.aarch64.sve.bcax.nxv8i16(,,) +declare @llvm.aarch64.sve.bcax.nxv4i32(,,) +declare @llvm.aarch64.sve.bcax.nxv2i64(,,) +declare @llvm.aarch64.sve.bsl.nxv16i8(,,) +declare @llvm.aarch64.sve.bsl.nxv8i16(,,) +declare @llvm.aarch64.sve.bsl.nxv4i32(,,) +declare @llvm.aarch64.sve.bsl.nxv2i64(,,) +declare @llvm.aarch64.sve.bsl1n.nxv16i8(,,) +declare @llvm.aarch64.sve.bsl1n.nxv8i16(,,) +declare @llvm.aarch64.sve.bsl1n.nxv4i32(,,) +declare @llvm.aarch64.sve.bsl1n.nxv2i64(,,) +declare @llvm.aarch64.sve.bsl2n.nxv16i8(,,) +declare @llvm.aarch64.sve.bsl2n.nxv8i16(,,) +declare @llvm.aarch64.sve.bsl2n.nxv4i32(,,) +declare @llvm.aarch64.sve.bsl2n.nxv2i64(,,) +declare @llvm.aarch64.sve.nbsl.nxv16i8(,,) +declare @llvm.aarch64.sve.nbsl.nxv8i16(,,) +declare @llvm.aarch64.sve.nbsl.nxv4i32(,,) +declare @llvm.aarch64.sve.nbsl.nxv2i64(,,) -- 2.34.1