From 6f6fa5aa10d3c0a71a897e17bd228aa9b22f9e01 Mon Sep 17 00:00:00 2001 From: David Sherwood Date: Wed, 15 Jun 2022 11:28:08 +0100 Subject: [PATCH] [AArch64][SME] Add SME cntsb/h/w/d intrinsics These intrinsics return the number of elements in a streaming vector, for example aarch64.sme.cntsw returns the number of 32-bit elements. When in streaming mode these are equivalent to aarch64.sve.cntb/h/w/d with an input value of 1. I have implemented these intrinsics using the rdsvl instruction and added tests here: CodeGen/AArch64/SME/sme-intrinsics-rdsvl.ll Differential Revision: https://reviews.llvm.org/D127853 --- llvm/include/llvm/IR/IntrinsicsAArch64.td | 13 +++++++ llvm/lib/Target/AArch64/AArch64ISelLowering.cpp | 21 +++++++++++ llvm/lib/Target/AArch64/AArch64ISelLowering.h | 3 ++ llvm/lib/Target/AArch64/AArch64SMEInstrInfo.td | 5 +++ llvm/test/CodeGen/AArch64/sme-intrinsics-rdsvl.ll | 46 +++++++++++++++++++++++ 5 files changed, 88 insertions(+) create mode 100644 llvm/test/CodeGen/AArch64/sme-intrinsics-rdsvl.ll diff --git a/llvm/include/llvm/IR/IntrinsicsAArch64.td b/llvm/include/llvm/IR/IntrinsicsAArch64.td index 0e1885c..b67b47c 100644 --- a/llvm/include/llvm/IR/IntrinsicsAArch64.td +++ b/llvm/include/llvm/IR/IntrinsicsAArch64.td @@ -2649,4 +2649,17 @@ let TargetPrefix = "aarch64" in { def int_aarch64_sme_readq_vert : SME_TileToVector_Intrinsic; def int_aarch64_sme_writeq_horiz : SME_VectorToTile_Intrinsic; def int_aarch64_sme_writeq_vert : SME_VectorToTile_Intrinsic; + + + // + // Counting elements + // + + class AdvSIMD_SME_CNTSB_Intrinsic + : DefaultAttrsIntrinsic<[llvm_i64_ty], [], [IntrNoMem]>; + + def int_aarch64_sme_cntsb : AdvSIMD_SME_CNTSB_Intrinsic; + def int_aarch64_sme_cntsh : AdvSIMD_SME_CNTSB_Intrinsic; + def int_aarch64_sme_cntsw : AdvSIMD_SME_CNTSB_Intrinsic; + def int_aarch64_sme_cntsd : AdvSIMD_SME_CNTSB_Intrinsic; } diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp index 4efcec1..32ad1ec 100644 --- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp +++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp @@ -2119,6 +2119,7 @@ const char *AArch64TargetLowering::getTargetNodeName(unsigned Opcode) const { MAKE_CASE(AArch64ISD::FMINNMV_PRED) MAKE_CASE(AArch64ISD::FMUL_PRED) MAKE_CASE(AArch64ISD::FSUB_PRED) + MAKE_CASE(AArch64ISD::RDSVL) MAKE_CASE(AArch64ISD::BIC) MAKE_CASE(AArch64ISD::BIT) MAKE_CASE(AArch64ISD::CBZ) @@ -4402,6 +4403,26 @@ SDValue AArch64TargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op, case Intrinsic::aarch64_sve_clz: return DAG.getNode(AArch64ISD::CTLZ_MERGE_PASSTHRU, dl, Op.getValueType(), Op.getOperand(2), Op.getOperand(3), Op.getOperand(1)); + case Intrinsic::aarch64_sme_cntsb: + return DAG.getNode(AArch64ISD::RDSVL, dl, Op.getValueType(), + DAG.getConstant(1, dl, MVT::i32)); + case Intrinsic::aarch64_sme_cntsh: { + SDValue One = DAG.getConstant(1, dl, MVT::i32); + SDValue Bytes = DAG.getNode(AArch64ISD::RDSVL, dl, Op.getValueType(), One); + return DAG.getNode(ISD::SRL, dl, Op.getValueType(), Bytes, One); + } + case Intrinsic::aarch64_sme_cntsw: { + SDValue Bytes = DAG.getNode(AArch64ISD::RDSVL, dl, Op.getValueType(), + DAG.getConstant(1, dl, MVT::i32)); + return DAG.getNode(ISD::SRL, dl, Op.getValueType(), Bytes, + DAG.getConstant(2, dl, MVT::i32)); + } + case Intrinsic::aarch64_sme_cntsd: { + SDValue Bytes = DAG.getNode(AArch64ISD::RDSVL, dl, Op.getValueType(), + DAG.getConstant(1, dl, MVT::i32)); + return DAG.getNode(ISD::SRL, dl, Op.getValueType(), Bytes, + DAG.getConstant(3, dl, MVT::i32)); + } case Intrinsic::aarch64_sve_cnt: { SDValue Data = Op.getOperand(3); // CTPOP only supports integer operands. diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.h b/llvm/lib/Target/AArch64/AArch64ISelLowering.h index c5205ad..b26871c 100644 --- a/llvm/lib/Target/AArch64/AArch64ISelLowering.h +++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.h @@ -406,6 +406,9 @@ enum NodeType : unsigned { SSTNT1_PRED, SSTNT1_INDEX_PRED, + // SME + RDSVL, + // Asserts that a function argument (i32) is zero-extended to i8 by // the caller ASSERT_ZEXT_BOOL, diff --git a/llvm/lib/Target/AArch64/AArch64SMEInstrInfo.td b/llvm/lib/Target/AArch64/AArch64SMEInstrInfo.td index 47a5156..07a5c1b 100644 --- a/llvm/lib/Target/AArch64/AArch64SMEInstrInfo.td +++ b/llvm/lib/Target/AArch64/AArch64SMEInstrInfo.td @@ -14,6 +14,9 @@ // Add vector elements horizontally or vertically to ZA tile. //===----------------------------------------------------------------------===// +def SDT_AArch64RDSVL : SDTypeProfile<1, 1, [SDTCisInt<0>, SDTCisInt<1>]>; +def AArch64rdsvl : SDNode<"AArch64ISD::RDSVL", SDT_AArch64RDSVL>; + let Predicates = [HasSME] in { def RDSVLI_XI : sve_int_read_vl_a<0b0, 0b11111, "rdsvl", /*streaming_sve=*/0b1>; def ADDSPL_XXI : sve_int_arith_vl<0b1, "addspl", /*streaming_sve=*/0b1>; @@ -21,6 +24,8 @@ def ADDSVL_XXI : sve_int_arith_vl<0b0, "addsvl", /*streaming_sve=*/0b1>; def ADDHA_MPPZ_S : sme_add_vector_to_tile_u32<0b0, "addha">; def ADDVA_MPPZ_S : sme_add_vector_to_tile_u32<0b1, "addva">; + +def : Pat<(AArch64rdsvl (i32 simm6_32b:$imm)), (RDSVLI_XI simm6_32b:$imm)>; } let Predicates = [HasSMEI64] in { diff --git a/llvm/test/CodeGen/AArch64/sme-intrinsics-rdsvl.ll b/llvm/test/CodeGen/AArch64/sme-intrinsics-rdsvl.ll new file mode 100644 index 0000000..5d10d7e --- /dev/null +++ b/llvm/test/CodeGen/AArch64/sme-intrinsics-rdsvl.ll @@ -0,0 +1,46 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sme -verify-machineinstrs < %s | FileCheck %s + +define i64 @sme_cntsb() { +; CHECK-LABEL: sme_cntsb: +; CHECK: // %bb.0: +; CHECK-NEXT: rdsvl x0, #1 +; CHECK-NEXT: ret + %v = call i64 @llvm.aarch64.sme.cntsb() + ret i64 %v +} + +define i64 @sme_cntsh() { +; CHECK-LABEL: sme_cntsh: +; CHECK: // %bb.0: +; CHECK-NEXT: rdsvl x8, #1 +; CHECK-NEXT: lsr x0, x8, #1 +; CHECK-NEXT: ret + %v = call i64 @llvm.aarch64.sme.cntsh() + ret i64 %v +} + +define i64 @sme_cntsw() { +; CHECK-LABEL: sme_cntsw: +; CHECK: // %bb.0: +; CHECK-NEXT: rdsvl x8, #1 +; CHECK-NEXT: lsr x0, x8, #2 +; CHECK-NEXT: ret + %v = call i64 @llvm.aarch64.sme.cntsw() + ret i64 %v +} + +define i64 @sme_cntsd() { +; CHECK-LABEL: sme_cntsd: +; CHECK: // %bb.0: +; CHECK-NEXT: rdsvl x8, #1 +; CHECK-NEXT: lsr x0, x8, #3 +; CHECK-NEXT: ret + %v = call i64 @llvm.aarch64.sme.cntsd() + ret i64 %v +} + +declare i64 @llvm.aarch64.sme.cntsb() +declare i64 @llvm.aarch64.sme.cntsh() +declare i64 @llvm.aarch64.sme.cntsw() +declare i64 @llvm.aarch64.sme.cntsd() -- 2.7.4