From fcf70e1e3b1d57d5fde6b99d0188d1b1774429af Mon Sep 17 00:00:00 2001 From: Kerry McLaughlin Date: Thu, 1 Oct 2020 11:06:55 +0100 Subject: [PATCH] [SVE][CodeGen] Lower scalable fp_extend & fp_round operations This patch adds FP_EXTEND_MERGE_PASSTHRU & FP_ROUND_MERGE_PASSTHRU ISD nodes, used to lower scalable vector fp_extend/fp_round operations. fp_round has an additional argument, the 'trunc' flag, which is an integer of zero or one. This also fixes a warning introduced by the new tests added to sve-split-fcvt.ll, resulting from an implicit TypeSize -> uint64_t cast in SplitVecOp_FP_ROUND. Reviewed By: sdesmalen, paulwalker-arm Differential Revision: https://reviews.llvm.org/D88321 --- .../CodeGen/SelectionDAG/LegalizeVectorTypes.cpp | 2 +- llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp | 4 +- llvm/lib/Target/AArch64/AArch64ISelLowering.cpp | 15 ++- llvm/lib/Target/AArch64/AArch64ISelLowering.h | 2 + llvm/lib/Target/AArch64/AArch64InstrFormats.td | 7 + llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td | 88 ++++++++----- llvm/lib/Target/AArch64/SVEInstrFormats.td | 26 ++++ llvm/test/CodeGen/AArch64/sve-fcvt.ll | 88 +++++++++++++ llvm/test/CodeGen/AArch64/sve-split-fcvt.ll | 146 +++++++++++++++++++++ 9 files changed, 340 insertions(+), 38 deletions(-) diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp index 356eb1c..0b3edc3 100644 --- a/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp @@ -2715,7 +2715,7 @@ SDValue DAGTypeLegalizer::SplitVecOp_FP_ROUND(SDNode *N) { EVT InVT = Lo.getValueType(); EVT OutVT = EVT::getVectorVT(*DAG.getContext(), ResVT.getVectorElementType(), - InVT.getVectorNumElements()); + InVT.getVectorElementCount()); if (N->isStrictFPOpcode()) { Lo = DAG.getNode(N->getOpcode(), DL, { OutVT, MVT::Other }, diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp index b9362f1..eef467d 100644 --- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp @@ -4613,8 +4613,8 @@ SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT, Operand.getValueType().isFloatingPoint() && "Invalid FP cast!"); if (Operand.getValueType() == VT) return Operand; // noop conversion. assert((!VT.isVector() || - VT.getVectorNumElements() == - Operand.getValueType().getVectorNumElements()) && + VT.getVectorElementCount() == + Operand.getValueType().getVectorElementCount()) && "Vector element count mismatch!"); assert(Operand.getValueType().bitsLT(VT) && "Invalid fpext node, dst < src!"); diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp index d8072db..fb70b2d 100644 --- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp +++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp @@ -183,6 +183,8 @@ static bool isMergePassthruOpcode(unsigned Opc) { case AArch64ISD::FROUND_MERGE_PASSTHRU: case AArch64ISD::FROUNDEVEN_MERGE_PASSTHRU: case AArch64ISD::FTRUNC_MERGE_PASSTHRU: + case AArch64ISD::FP_ROUND_MERGE_PASSTHRU: + case AArch64ISD::FP_EXTEND_MERGE_PASSTHRU: case AArch64ISD::SINT_TO_FP_MERGE_PASSTHRU: case AArch64ISD::UINT_TO_FP_MERGE_PASSTHRU: case AArch64ISD::FCVTZU_MERGE_PASSTHRU: @@ -1052,6 +1054,8 @@ AArch64TargetLowering::AArch64TargetLowering(const TargetMachine &TM, setOperationAction(ISD::FROUNDEVEN, VT, Custom); setOperationAction(ISD::FTRUNC, VT, Custom); setOperationAction(ISD::FSQRT, VT, Custom); + setOperationAction(ISD::FP_EXTEND, VT, Custom); + setOperationAction(ISD::FP_ROUND, VT, Custom); } setOperationAction(ISD::SPLAT_VECTOR, MVT::nxv8bf16, Custom); @@ -1580,6 +1584,8 @@ const char *AArch64TargetLowering::getTargetNodeName(unsigned Opcode) const { MAKE_CASE(AArch64ISD::FROUND_MERGE_PASSTHRU) MAKE_CASE(AArch64ISD::FROUNDEVEN_MERGE_PASSTHRU) MAKE_CASE(AArch64ISD::FTRUNC_MERGE_PASSTHRU) + MAKE_CASE(AArch64ISD::FP_ROUND_MERGE_PASSTHRU) + MAKE_CASE(AArch64ISD::FP_EXTEND_MERGE_PASSTHRU) MAKE_CASE(AArch64ISD::SINT_TO_FP_MERGE_PASSTHRU) MAKE_CASE(AArch64ISD::UINT_TO_FP_MERGE_PASSTHRU) MAKE_CASE(AArch64ISD::FCVTZU_MERGE_PASSTHRU) @@ -2908,6 +2914,9 @@ static SDValue LowerPREFETCH(SDValue Op, SelectionDAG &DAG) { SDValue AArch64TargetLowering::LowerFP_EXTEND(SDValue Op, SelectionDAG &DAG) const { + if (Op.getValueType().isScalableVector()) + return LowerToPredicatedOp(Op, DAG, AArch64ISD::FP_EXTEND_MERGE_PASSTHRU); + assert(Op.getValueType() == MVT::f128 && "Unexpected lowering"); RTLIB::Libcall LC; @@ -2918,6 +2927,9 @@ SDValue AArch64TargetLowering::LowerFP_EXTEND(SDValue Op, SDValue AArch64TargetLowering::LowerFP_ROUND(SDValue Op, SelectionDAG &DAG) const { + if (Op.getValueType().isScalableVector()) + return LowerToPredicatedOp(Op, DAG, AArch64ISD::FP_ROUND_MERGE_PASSTHRU); + bool IsStrict = Op->isStrictFPOpcode(); SDValue SrcVal = Op.getOperand(IsStrict ? 1 : 0); EVT SrcVT = SrcVal.getValueType(); @@ -16003,7 +16015,8 @@ SDValue AArch64TargetLowering::LowerToPredicatedOp(SDValue Op, SmallVector Operands = {Pg}; for (const SDValue &V : Op->op_values()) { - assert((isa(V) || V.getValueType().isScalableVector()) && + assert((!V.getValueType().isVector() || + V.getValueType().isScalableVector()) && "Only scalable vectors are supported!"); Operands.push_back(V); } diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.h b/llvm/lib/Target/AArch64/AArch64ISelLowering.h index 51391d3..1b8f62e 100644 --- a/llvm/lib/Target/AArch64/AArch64ISelLowering.h +++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.h @@ -105,6 +105,8 @@ enum NodeType : unsigned { FROUNDEVEN_MERGE_PASSTHRU, FSQRT_MERGE_PASSTHRU, FTRUNC_MERGE_PASSTHRU, + FP_ROUND_MERGE_PASSTHRU, + FP_EXTEND_MERGE_PASSTHRU, UINT_TO_FP_MERGE_PASSTHRU, SINT_TO_FP_MERGE_PASSTHRU, FCVTZU_MERGE_PASSTHRU, diff --git a/llvm/lib/Target/AArch64/AArch64InstrFormats.td b/llvm/lib/Target/AArch64/AArch64InstrFormats.td index 6115508..68dc477 100644 --- a/llvm/lib/Target/AArch64/AArch64InstrFormats.td +++ b/llvm/lib/Target/AArch64/AArch64InstrFormats.td @@ -914,6 +914,13 @@ def imm0_1 : Operand, ImmLeaf, TImmLeaf { + let ParserMatchClass = Imm0_1Operand; +} + // imm0_15 predicate - True if the immediate is in the range [0,15] def imm0_15 : Operand, ImmLeaf ]>; +def SDT_AArch64FCVTR : SDTypeProfile<1, 4, [ + SDTCisVec<0>, SDTCisVec<1>, SDTCisVec<2>, SDTCisInt<3>, SDTCisVec<4>, + SDTCVecEltisVT<1,i1> +]>; + +def AArch64fcvtr_mt : SDNode<"AArch64ISD::FP_ROUND_MERGE_PASSTHRU", SDT_AArch64FCVTR>; +def AArch64fcvte_mt : SDNode<"AArch64ISD::FP_EXTEND_MERGE_PASSTHRU", SDT_AArch64FCVT>; def AArch64ucvtf_mt : SDNode<"AArch64ISD::UINT_TO_FP_MERGE_PASSTHRU", SDT_AArch64FCVT>; def AArch64scvtf_mt : SDNode<"AArch64ISD::SINT_TO_FP_MERGE_PASSTHRU", SDT_AArch64FCVT>; def AArch64fcvtzu_mt : SDNode<"AArch64ISD::FCVTZU_MERGE_PASSTHRU", SDT_AArch64FCVT>; @@ -1178,6 +1185,11 @@ multiclass sve_prefetch; // Extract subvectors from FP SVE vectors + def : Pat<(nxv2f16 (extract_subvector (nxv4f16 ZPR:$Zs), (i64 0))), + (UUNPKLO_ZZ_D ZPR:$Zs)>; + def : Pat<(nxv2f16 (extract_subvector (nxv4f16 ZPR:$Zs), (i64 2))), + (UUNPKHI_ZZ_D ZPR:$Zs)>; + def : Pat<(nxv4f16 (extract_subvector (nxv8f16 ZPR:$Zs), (i64 0))), (UUNPKLO_ZZ_S ZPR:$Zs)>; def : Pat<(nxv4f16 (extract_subvector (nxv8f16 ZPR:$Zs), (i64 4))), @@ -1400,40 +1412,48 @@ multiclass sve_prefetch; defm LSL_WIDE_ZPmZ : sve_int_bin_pred_shift_wide<0b011, "lsl", int_aarch64_sve_lsl_wide>; - defm FCVT_ZPmZ_StoH : sve_fp_2op_p_zd<0b1001000, "fcvt", ZPR32, ZPR16, int_aarch64_sve_fcvt_f16f32, null_frag, nxv8f16, nxv4i1, nxv4f32, ElementSizeS>; - defm FCVT_ZPmZ_HtoS : sve_fp_2op_p_zd<0b1001001, "fcvt", ZPR16, ZPR32, int_aarch64_sve_fcvt_f32f16, null_frag, nxv4f32, nxv4i1, nxv8f16, ElementSizeS>; - defm SCVTF_ZPmZ_HtoH : sve_fp_2op_p_zd<0b0110010, "scvtf", ZPR16, ZPR16, null_frag, AArch64scvtf_mt, nxv8f16, nxv8i1, nxv8i16, ElementSizeH>; - defm SCVTF_ZPmZ_StoS : sve_fp_2op_p_zd<0b1010100, "scvtf", ZPR32, ZPR32, null_frag, AArch64scvtf_mt, nxv4f32, nxv4i1, nxv4i32, ElementSizeS>; - defm UCVTF_ZPmZ_StoS : sve_fp_2op_p_zd<0b1010101, "ucvtf", ZPR32, ZPR32, null_frag, AArch64ucvtf_mt, nxv4f32, nxv4i1, nxv4i32, ElementSizeS>; - defm UCVTF_ZPmZ_HtoH : sve_fp_2op_p_zd<0b0110011, "ucvtf", ZPR16, ZPR16, null_frag, AArch64ucvtf_mt, nxv8f16, nxv8i1, nxv8i16, ElementSizeH>; - defm FCVTZS_ZPmZ_HtoH : sve_fp_2op_p_zd<0b0111010, "fcvtzs", ZPR16, ZPR16, null_frag, AArch64fcvtzs_mt, nxv8i16, nxv8i1, nxv8f16, ElementSizeH>; - defm FCVTZS_ZPmZ_StoS : sve_fp_2op_p_zd<0b1011100, "fcvtzs", ZPR32, ZPR32, null_frag, AArch64fcvtzs_mt, nxv4i32, nxv4i1, nxv4f32, ElementSizeS>; - defm FCVTZU_ZPmZ_HtoH : sve_fp_2op_p_zd<0b0111011, "fcvtzu", ZPR16, ZPR16, null_frag, AArch64fcvtzu_mt, nxv8i16, nxv8i1, nxv8f16, ElementSizeH>; - defm FCVTZU_ZPmZ_StoS : sve_fp_2op_p_zd<0b1011101, "fcvtzu", ZPR32, ZPR32, null_frag, AArch64fcvtzu_mt, nxv4i32, nxv4i1, nxv4f32, ElementSizeS>; - defm FCVT_ZPmZ_DtoH : sve_fp_2op_p_zd<0b1101000, "fcvt", ZPR64, ZPR16, int_aarch64_sve_fcvt_f16f64, null_frag, nxv8f16, nxv2i1, nxv2f64, ElementSizeD>; - defm FCVT_ZPmZ_HtoD : sve_fp_2op_p_zd<0b1101001, "fcvt", ZPR16, ZPR64, int_aarch64_sve_fcvt_f64f16, null_frag, nxv2f64, nxv2i1, nxv8f16, ElementSizeD>; - defm FCVT_ZPmZ_DtoS : sve_fp_2op_p_zd<0b1101010, "fcvt", ZPR64, ZPR32, int_aarch64_sve_fcvt_f32f64, null_frag, nxv4f32, nxv2i1, nxv2f64, ElementSizeD>; - defm FCVT_ZPmZ_StoD : sve_fp_2op_p_zd<0b1101011, "fcvt", ZPR32, ZPR64, int_aarch64_sve_fcvt_f64f32, null_frag, nxv2f64, nxv2i1, nxv4f32, ElementSizeD>; - defm SCVTF_ZPmZ_StoD : sve_fp_2op_p_zd<0b1110000, "scvtf", ZPR32, ZPR64, int_aarch64_sve_scvtf_f64i32, AArch64scvtf_mt, nxv2f64, nxv2i1, nxv4i32, ElementSizeD>; - defm UCVTF_ZPmZ_StoD : sve_fp_2op_p_zd<0b1110001, "ucvtf", ZPR32, ZPR64, int_aarch64_sve_ucvtf_f64i32, AArch64ucvtf_mt, nxv2f64, nxv2i1, nxv4i32, ElementSizeD>; - defm UCVTF_ZPmZ_StoH : sve_fp_2op_p_zd<0b0110101, "ucvtf", ZPR32, ZPR16, int_aarch64_sve_ucvtf_f16i32, AArch64ucvtf_mt, nxv4f16, nxv4i1, nxv4i32, ElementSizeS>; - defm SCVTF_ZPmZ_DtoS : sve_fp_2op_p_zd<0b1110100, "scvtf", ZPR64, ZPR32, int_aarch64_sve_scvtf_f32i64, AArch64scvtf_mt, nxv2f32, nxv2i1, nxv2i64, ElementSizeD>; - defm SCVTF_ZPmZ_StoH : sve_fp_2op_p_zd<0b0110100, "scvtf", ZPR32, ZPR16, int_aarch64_sve_scvtf_f16i32, AArch64scvtf_mt, nxv4f16, nxv4i1, nxv4i32, ElementSizeS>; - defm SCVTF_ZPmZ_DtoH : sve_fp_2op_p_zd<0b0110110, "scvtf", ZPR64, ZPR16, int_aarch64_sve_scvtf_f16i64, AArch64scvtf_mt, nxv2f16, nxv2i1, nxv2i64, ElementSizeD>; - defm UCVTF_ZPmZ_DtoS : sve_fp_2op_p_zd<0b1110101, "ucvtf", ZPR64, ZPR32, int_aarch64_sve_ucvtf_f32i64, AArch64ucvtf_mt, nxv2f32, nxv2i1, nxv2i64, ElementSizeD>; - defm UCVTF_ZPmZ_DtoH : sve_fp_2op_p_zd<0b0110111, "ucvtf", ZPR64, ZPR16, int_aarch64_sve_ucvtf_f16i64, AArch64ucvtf_mt, nxv2f16, nxv2i1, nxv2i64, ElementSizeD>; - defm SCVTF_ZPmZ_DtoD : sve_fp_2op_p_zd<0b1110110, "scvtf", ZPR64, ZPR64, null_frag, AArch64scvtf_mt, nxv2f64, nxv2i1, nxv2i64, ElementSizeD>; - defm UCVTF_ZPmZ_DtoD : sve_fp_2op_p_zd<0b1110111, "ucvtf", ZPR64, ZPR64, null_frag, AArch64ucvtf_mt, nxv2f64, nxv2i1, nxv2i64, ElementSizeD>; - defm FCVTZS_ZPmZ_DtoS : sve_fp_2op_p_zd<0b1111000, "fcvtzs", ZPR64, ZPR32, int_aarch64_sve_fcvtzs_i32f64, null_frag, nxv4i32, nxv2i1, nxv2f64, ElementSizeD>; - defm FCVTZU_ZPmZ_DtoS : sve_fp_2op_p_zd<0b1111001, "fcvtzu", ZPR64, ZPR32, int_aarch64_sve_fcvtzu_i32f64, null_frag, nxv4i32, nxv2i1, nxv2f64, ElementSizeD>; - defm FCVTZS_ZPmZ_StoD : sve_fp_2op_p_zd<0b1111100, "fcvtzs", ZPR32, ZPR64, int_aarch64_sve_fcvtzs_i64f32, AArch64fcvtzs_mt, nxv2i64, nxv2i1, nxv2f32, ElementSizeD>; - defm FCVTZS_ZPmZ_HtoS : sve_fp_2op_p_zd<0b0111100, "fcvtzs", ZPR16, ZPR32, int_aarch64_sve_fcvtzs_i32f16, AArch64fcvtzs_mt, nxv4i32, nxv4i1, nxv4f16, ElementSizeS>; - defm FCVTZS_ZPmZ_HtoD : sve_fp_2op_p_zd<0b0111110, "fcvtzs", ZPR16, ZPR64, int_aarch64_sve_fcvtzs_i64f16, AArch64fcvtzs_mt, nxv2i64, nxv2i1, nxv2f16, ElementSizeD>; - defm FCVTZU_ZPmZ_HtoS : sve_fp_2op_p_zd<0b0111101, "fcvtzu", ZPR16, ZPR32, int_aarch64_sve_fcvtzu_i32f16, AArch64fcvtzu_mt, nxv4i32, nxv4i1, nxv4f16, ElementSizeS>; - defm FCVTZU_ZPmZ_HtoD : sve_fp_2op_p_zd<0b0111111, "fcvtzu", ZPR16, ZPR64, int_aarch64_sve_fcvtzu_i64f16, AArch64fcvtzu_mt, nxv2i64, nxv2i1, nxv2f16, ElementSizeD>; - defm FCVTZU_ZPmZ_StoD : sve_fp_2op_p_zd<0b1111101, "fcvtzu", ZPR32, ZPR64, int_aarch64_sve_fcvtzu_i64f32, AArch64fcvtzu_mt, nxv2i64, nxv2i1, nxv2f32, ElementSizeD>; - defm FCVTZS_ZPmZ_DtoD : sve_fp_2op_p_zd<0b1111110, "fcvtzs", ZPR64, ZPR64, null_frag, AArch64fcvtzs_mt, nxv2i64, nxv2i1, nxv2f64, ElementSizeD>; - defm FCVTZU_ZPmZ_DtoD : sve_fp_2op_p_zd<0b1111111, "fcvtzu", ZPR64, ZPR64, null_frag, AArch64fcvtzu_mt, nxv2i64, nxv2i1, nxv2f64, ElementSizeD>; + defm FCVT_ZPmZ_StoH : sve_fp_2op_p_zdr<0b1001000, "fcvt", ZPR32, ZPR16, int_aarch64_sve_fcvt_f16f32, AArch64fcvtr_mt, nxv4f16, nxv4i1, nxv4f32, ElementSizeS>; + defm FCVT_ZPmZ_HtoS : sve_fp_2op_p_zd< 0b1001001, "fcvt", ZPR16, ZPR32, int_aarch64_sve_fcvt_f32f16, AArch64fcvte_mt, nxv4f32, nxv4i1, nxv4f16, ElementSizeS>; + defm SCVTF_ZPmZ_HtoH : sve_fp_2op_p_zd< 0b0110010, "scvtf", ZPR16, ZPR16, null_frag, AArch64scvtf_mt, nxv8f16, nxv8i1, nxv8i16, ElementSizeH>; + defm SCVTF_ZPmZ_StoS : sve_fp_2op_p_zd< 0b1010100, "scvtf", ZPR32, ZPR32, null_frag, AArch64scvtf_mt, nxv4f32, nxv4i1, nxv4i32, ElementSizeS>; + defm UCVTF_ZPmZ_StoS : sve_fp_2op_p_zd< 0b1010101, "ucvtf", ZPR32, ZPR32, null_frag, AArch64ucvtf_mt, nxv4f32, nxv4i1, nxv4i32, ElementSizeS>; + defm UCVTF_ZPmZ_HtoH : sve_fp_2op_p_zd< 0b0110011, "ucvtf", ZPR16, ZPR16, null_frag, AArch64ucvtf_mt, nxv8f16, nxv8i1, nxv8i16, ElementSizeH>; + defm FCVTZS_ZPmZ_HtoH : sve_fp_2op_p_zd< 0b0111010, "fcvtzs", ZPR16, ZPR16, null_frag, AArch64fcvtzs_mt, nxv8i16, nxv8i1, nxv8f16, ElementSizeH>; + defm FCVTZS_ZPmZ_StoS : sve_fp_2op_p_zd< 0b1011100, "fcvtzs", ZPR32, ZPR32, null_frag, AArch64fcvtzs_mt, nxv4i32, nxv4i1, nxv4f32, ElementSizeS>; + defm FCVTZU_ZPmZ_HtoH : sve_fp_2op_p_zd< 0b0111011, "fcvtzu", ZPR16, ZPR16, null_frag, AArch64fcvtzu_mt, nxv8i16, nxv8i1, nxv8f16, ElementSizeH>; + defm FCVTZU_ZPmZ_StoS : sve_fp_2op_p_zd< 0b1011101, "fcvtzu", ZPR32, ZPR32, null_frag, AArch64fcvtzu_mt, nxv4i32, nxv4i1, nxv4f32, ElementSizeS>; + defm FCVT_ZPmZ_DtoH : sve_fp_2op_p_zdr<0b1101000, "fcvt", ZPR64, ZPR16, int_aarch64_sve_fcvt_f16f64, AArch64fcvtr_mt, nxv2f16, nxv2i1, nxv2f64, ElementSizeD>; + defm FCVT_ZPmZ_HtoD : sve_fp_2op_p_zd< 0b1101001, "fcvt", ZPR16, ZPR64, int_aarch64_sve_fcvt_f64f16, AArch64fcvte_mt, nxv2f64, nxv2i1, nxv2f16, ElementSizeD>; + defm FCVT_ZPmZ_DtoS : sve_fp_2op_p_zdr<0b1101010, "fcvt", ZPR64, ZPR32, int_aarch64_sve_fcvt_f32f64, AArch64fcvtr_mt, nxv2f32, nxv2i1, nxv2f64, ElementSizeD>; + defm FCVT_ZPmZ_StoD : sve_fp_2op_p_zd< 0b1101011, "fcvt", ZPR32, ZPR64, int_aarch64_sve_fcvt_f64f32, AArch64fcvte_mt, nxv2f64, nxv2i1, nxv2f32, ElementSizeD>; + defm SCVTF_ZPmZ_StoD : sve_fp_2op_p_zd< 0b1110000, "scvtf", ZPR32, ZPR64, int_aarch64_sve_scvtf_f64i32, AArch64scvtf_mt, nxv2f64, nxv2i1, nxv4i32, ElementSizeD>; + defm UCVTF_ZPmZ_StoD : sve_fp_2op_p_zd< 0b1110001, "ucvtf", ZPR32, ZPR64, int_aarch64_sve_ucvtf_f64i32, AArch64ucvtf_mt, nxv2f64, nxv2i1, nxv4i32, ElementSizeD>; + defm UCVTF_ZPmZ_StoH : sve_fp_2op_p_zd< 0b0110101, "ucvtf", ZPR32, ZPR16, int_aarch64_sve_ucvtf_f16i32, AArch64ucvtf_mt, nxv4f16, nxv4i1, nxv4i32, ElementSizeS>; + defm SCVTF_ZPmZ_DtoS : sve_fp_2op_p_zd< 0b1110100, "scvtf", ZPR64, ZPR32, int_aarch64_sve_scvtf_f32i64, AArch64scvtf_mt, nxv2f32, nxv2i1, nxv2i64, ElementSizeD>; + defm SCVTF_ZPmZ_StoH : sve_fp_2op_p_zd< 0b0110100, "scvtf", ZPR32, ZPR16, int_aarch64_sve_scvtf_f16i32, AArch64scvtf_mt, nxv4f16, nxv4i1, nxv4i32, ElementSizeS>; + defm SCVTF_ZPmZ_DtoH : sve_fp_2op_p_zd< 0b0110110, "scvtf", ZPR64, ZPR16, int_aarch64_sve_scvtf_f16i64, AArch64scvtf_mt, nxv2f16, nxv2i1, nxv2i64, ElementSizeD>; + defm UCVTF_ZPmZ_DtoS : sve_fp_2op_p_zd< 0b1110101, "ucvtf", ZPR64, ZPR32, int_aarch64_sve_ucvtf_f32i64, AArch64ucvtf_mt, nxv2f32, nxv2i1, nxv2i64, ElementSizeD>; + defm UCVTF_ZPmZ_DtoH : sve_fp_2op_p_zd< 0b0110111, "ucvtf", ZPR64, ZPR16, int_aarch64_sve_ucvtf_f16i64, AArch64ucvtf_mt, nxv2f16, nxv2i1, nxv2i64, ElementSizeD>; + defm SCVTF_ZPmZ_DtoD : sve_fp_2op_p_zd< 0b1110110, "scvtf", ZPR64, ZPR64, null_frag, AArch64scvtf_mt, nxv2f64, nxv2i1, nxv2i64, ElementSizeD>; + defm UCVTF_ZPmZ_DtoD : sve_fp_2op_p_zd< 0b1110111, "ucvtf", ZPR64, ZPR64, null_frag, AArch64ucvtf_mt, nxv2f64, nxv2i1, nxv2i64, ElementSizeD>; + defm FCVTZS_ZPmZ_DtoS : sve_fp_2op_p_zd< 0b1111000, "fcvtzs", ZPR64, ZPR32, int_aarch64_sve_fcvtzs_i32f64, null_frag, nxv4i32, nxv2i1, nxv2f64, ElementSizeD>; + defm FCVTZU_ZPmZ_DtoS : sve_fp_2op_p_zd< 0b1111001, "fcvtzu", ZPR64, ZPR32, int_aarch64_sve_fcvtzu_i32f64, null_frag, nxv4i32, nxv2i1, nxv2f64, ElementSizeD>; + defm FCVTZS_ZPmZ_StoD : sve_fp_2op_p_zd< 0b1111100, "fcvtzs", ZPR32, ZPR64, int_aarch64_sve_fcvtzs_i64f32, AArch64fcvtzs_mt, nxv2i64, nxv2i1, nxv2f32, ElementSizeD>; + defm FCVTZS_ZPmZ_HtoS : sve_fp_2op_p_zd< 0b0111100, "fcvtzs", ZPR16, ZPR32, int_aarch64_sve_fcvtzs_i32f16, AArch64fcvtzs_mt, nxv4i32, nxv4i1, nxv4f16, ElementSizeS>; + defm FCVTZS_ZPmZ_HtoD : sve_fp_2op_p_zd< 0b0111110, "fcvtzs", ZPR16, ZPR64, int_aarch64_sve_fcvtzs_i64f16, AArch64fcvtzs_mt, nxv2i64, nxv2i1, nxv2f16, ElementSizeD>; + defm FCVTZU_ZPmZ_HtoS : sve_fp_2op_p_zd< 0b0111101, "fcvtzu", ZPR16, ZPR32, int_aarch64_sve_fcvtzu_i32f16, AArch64fcvtzu_mt, nxv4i32, nxv4i1, nxv4f16, ElementSizeS>; + defm FCVTZU_ZPmZ_HtoD : sve_fp_2op_p_zd< 0b0111111, "fcvtzu", ZPR16, ZPR64, int_aarch64_sve_fcvtzu_i64f16, AArch64fcvtzu_mt, nxv2i64, nxv2i1, nxv2f16, ElementSizeD>; + defm FCVTZU_ZPmZ_StoD : sve_fp_2op_p_zd< 0b1111101, "fcvtzu", ZPR32, ZPR64, int_aarch64_sve_fcvtzu_i64f32, AArch64fcvtzu_mt, nxv2i64, nxv2i1, nxv2f32, ElementSizeD>; + defm FCVTZS_ZPmZ_DtoD : sve_fp_2op_p_zd< 0b1111110, "fcvtzs", ZPR64, ZPR64, null_frag, AArch64fcvtzs_mt, nxv2i64, nxv2i1, nxv2f64, ElementSizeD>; + defm FCVTZU_ZPmZ_DtoD : sve_fp_2op_p_zd< 0b1111111, "fcvtzu", ZPR64, ZPR64, null_frag, AArch64fcvtzu_mt, nxv2i64, nxv2i1, nxv2f64, ElementSizeD>; + + def : Pat<(nxv2f32 (AArch64fcvte_mt (nxv2i1 PPR:$Pg), (nxv2f16 ZPR:$Zs), (nxv2f32 ZPR:$Zd))), + (FCVT_ZPmZ_HtoS ZPR:$Zd, PPR:$Pg, ZPR:$Zs)>; + + // FP_ROUND has an additional 'precise' flag which indicates the type of rounding. + // This is ignored by the pattern below where it is matched by (i64 timm0_1) + def : Pat<(nxv2f16 (AArch64fcvtr_mt (nxv2i1 PPR:$Pg), (nxv2f32 ZPR:$Zs), (i64 timm0_1), (nxv2f16 ZPR:$Zd))), + (FCVT_ZPmZ_StoH ZPR:$Zd, PPR:$Pg, ZPR:$Zs)>; // Floating-point -> signed integer def : Pat<(nxv2f16 (AArch64scvtf_mt (nxv2i1 PPR:$Pg), diff --git a/llvm/lib/Target/AArch64/SVEInstrFormats.td b/llvm/lib/Target/AArch64/SVEInstrFormats.td index d0226a7..45a712c 100644 --- a/llvm/lib/Target/AArch64/SVEInstrFormats.td +++ b/llvm/lib/Target/AArch64/SVEInstrFormats.td @@ -318,6 +318,13 @@ class SVE_1_Op_Passthru_Pat; +// Used to match FP_ROUND_MERGE_PASSTHRU, which has an additional flag for the +// type of rounding. This is matched by timm0_1 in pattern below and ignored. +class SVE_1_Op_Passthru_Round_Pat +: Pat<(vtd (op pg:$Op1, vts:$Op2, (i64 timm0_1), vtd:$Op3)), + (inst $Op3, $Op1, $Op2)>; + class SVE_1_Op_Imm_OptLsl_Reverse_Pat : Pat<(vt (op (vt (AArch64dup (it (cpx i32:$imm, i32:$shift)))), (vt zprty:$Op1))), @@ -2299,6 +2306,25 @@ multiclass sve_fp_2op_p_zd opc, string asm, def : SVE_1_Op_Passthru_Pat(NAME)>; } +multiclass sve_fp_2op_p_zdr opc, string asm, + RegisterOperand i_zprtype, + RegisterOperand o_zprtype, + SDPatternOperator int_op, + SDPatternOperator ir_op, ValueType vt1, + ValueType vt2, ValueType vt3, ElementSizeEnum Sz> { + def NAME : sve_fp_2op_p_zd; + + // convert vt1 to a packed type for the intrinsic patterns + defvar packedvt1 = !cond(!eq(!cast(vt1), "nxv2f16"): nxv8f16, + !eq(!cast(vt1), "nxv4f16"): nxv8f16, + !eq(!cast(vt1), "nxv2f32"): nxv4f32, + 1 : vt1); + + def : SVE_3_Op_Pat(NAME)>; + + def : SVE_1_Op_Passthru_Round_Pat(NAME)>; +} + multiclass sve_fp_2op_p_zd_HSD opc, string asm, SDPatternOperator op> { def _H : sve_fp_2op_p_zd<{ 0b01, opc }, asm, ZPR16, ZPR16, ElementSizeH>; def _S : sve_fp_2op_p_zd<{ 0b10, opc }, asm, ZPR32, ZPR32, ElementSizeS>; diff --git a/llvm/test/CodeGen/AArch64/sve-fcvt.ll b/llvm/test/CodeGen/AArch64/sve-fcvt.ll index 9b980ac..1b39580 100644 --- a/llvm/test/CodeGen/AArch64/sve-fcvt.ll +++ b/llvm/test/CodeGen/AArch64/sve-fcvt.ll @@ -6,6 +6,94 @@ ; WARN-NOT: warning ; +; FP_EXTEND +; + +define @fcvts_nxv2f16( %a) { +; CHECK-LABEL: fcvts_nxv2f16: +; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p0.d +; CHECK-NEXT: fcvt z0.s, p0/m, z0.h +; CHECK-NEXT: ret + %res = fpext %a to + ret %res +} + +define @fcvts_nxv4f16( %a) { +; CHECK-LABEL: fcvts_nxv4f16: +; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p0.s +; CHECK-NEXT: fcvt z0.s, p0/m, z0.h +; CHECK-NEXT: ret + %res = fpext %a to + ret %res +} + +define @fcvtd_nxv2f16( %a) { +; CHECK-LABEL: fcvtd_nxv2f16: +; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p0.d +; CHECK-NEXT: fcvt z0.d, p0/m, z0.h +; CHECK-NEXT: ret + %res = fpext %a to + ret %res +} + +define @fcvtd_nxv2f32( %a) { +; CHECK-LABEL: fcvtd_nxv2f32: +; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p0.d +; CHECK-NEXT: fcvt z0.d, p0/m, z0.s +; CHECK-NEXT: ret + %res = fpext %a to + ret %res +} + +; +; FP_ROUND +; + +define @fcvth_nxv2f32( %a) { +; CHECK-LABEL: fcvth_nxv2f32: +; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p0.d +; CHECK-NEXT: fcvt z0.h, p0/m, z0.s +; CHECK-NEXT: ret + %res = fptrunc %a to + ret %res +} + +define @fcvth_nxv4f32( %a) { +; CHECK-LABEL: fcvth_nxv4f32: +; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p0.s +; CHECK-NEXT: fcvt z0.h, p0/m, z0.s +; CHECK-NEXT: ret + %res = fptrunc %a to + ret %res +} + +define @fcvth_nxv2f64( %a) { +; CHECK-LABEL: fcvth_nxv2f64: +; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p0.d +; CHECK-NEXT: fcvt z0.h, p0/m, z0.d +; CHECK-NEXT: ret + %res = fptrunc %a to + ret %res +} + +define @fcvts_nxv2f64( %a) { +; CHECK-LABEL: fcvts_nxv2f64: +; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p0.d +; CHECK-NEXT: fcvt z0.s, p0/m, z0.d +; CHECK-NEXT: ret + %res = fptrunc %a to + ret %res +} + +; ; FP_TO_SINT ; diff --git a/llvm/test/CodeGen/AArch64/sve-split-fcvt.ll b/llvm/test/CodeGen/AArch64/sve-split-fcvt.ll index 41b3e0e..6f608c8 100644 --- a/llvm/test/CodeGen/AArch64/sve-split-fcvt.ll +++ b/llvm/test/CodeGen/AArch64/sve-split-fcvt.ll @@ -5,6 +5,152 @@ ; If this check fails please read test/CodeGen/AArch64/README for instructions on how to resolve it. ; WARN-NOT: warning +; FP_EXTEND + +define @fcvts_nxv8f16( %a) { +; CHECK-LABEL: fcvts_nxv8f16: +; CHECK: // %bb.0: +; CHECK-NEXT: uunpklo z1.s, z0.h +; CHECK-NEXT: ptrue p0.s +; CHECK-NEXT: uunpkhi z2.s, z0.h +; CHECK-NEXT: fcvt z0.s, p0/m, z1.h +; CHECK-NEXT: fcvt z1.s, p0/m, z2.h +; CHECK-NEXT: ret + %res = fpext %a to + ret %res +} + +define @fcvtd_nxv4f16( %a) { +; CHECK-LABEL: fcvtd_nxv4f16: +; CHECK: // %bb.0: +; CHECK-NEXT: uunpklo z1.d, z0.s +; CHECK-NEXT: ptrue p0.d +; CHECK-NEXT: uunpkhi z2.d, z0.s +; CHECK-NEXT: fcvt z0.d, p0/m, z1.h +; CHECK-NEXT: fcvt z1.d, p0/m, z2.h +; CHECK-NEXT: ret + %res = fpext %a to + ret %res +} + +define @fcvtd_nxv8f16( %a) { +; CHECK-LABEL: fcvtd_nxv8f16: +; CHECK: // %bb.0: +; CHECK-NEXT: uunpklo z1.s, z0.h +; CHECK-NEXT: uunpkhi z0.s, z0.h +; CHECK-NEXT: ptrue p0.d +; CHECK-NEXT: uunpklo z2.d, z1.s +; CHECK-NEXT: uunpkhi z1.d, z1.s +; CHECK-NEXT: uunpklo z3.d, z0.s +; CHECK-NEXT: uunpkhi z4.d, z0.s +; CHECK-NEXT: fcvt z0.d, p0/m, z2.h +; CHECK-NEXT: fcvt z1.d, p0/m, z1.h +; CHECK-NEXT: fcvt z2.d, p0/m, z3.h +; CHECK-NEXT: fcvt z3.d, p0/m, z4.h +; CHECK-NEXT: ret + %res = fpext %a to + ret %res +} + +define @fcvtd_nxv4f32( %a) { +; CHECK-LABEL: fcvtd_nxv4f32: +; CHECK: // %bb.0: +; CHECK-NEXT: uunpklo z1.d, z0.s +; CHECK-NEXT: ptrue p0.d +; CHECK-NEXT: uunpkhi z2.d, z0.s +; CHECK-NEXT: fcvt z0.d, p0/m, z1.s +; CHECK-NEXT: fcvt z1.d, p0/m, z2.s +; CHECK-NEXT: ret + %res = fpext %a to + ret %res +} + +define @fcvtd_nxv8f32( %a) { +; CHECK-LABEL: fcvtd_nxv8f32: +; CHECK: // %bb.0: +; CHECK-NEXT: uunpklo z2.d, z0.s +; CHECK-NEXT: ptrue p0.d +; CHECK-NEXT: uunpkhi z3.d, z0.s +; CHECK-NEXT: uunpklo z4.d, z1.s +; CHECK-NEXT: uunpkhi z5.d, z1.s +; CHECK-NEXT: fcvt z0.d, p0/m, z2.s +; CHECK-NEXT: fcvt z1.d, p0/m, z3.s +; CHECK-NEXT: fcvt z2.d, p0/m, z4.s +; CHECK-NEXT: fcvt z3.d, p0/m, z5.s +; CHECK-NEXT: ret + %res = fpext %a to + ret %res +} + +; FP_ROUND + +define @fcvth_nxv8f32( %a) { +; CHECK-LABEL: fcvth_nxv8f32: +; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p0.s +; CHECK-NEXT: fcvt z1.h, p0/m, z1.s +; CHECK-NEXT: fcvt z0.h, p0/m, z0.s +; CHECK-NEXT: uzp1 z0.h, z0.h, z1.h +; CHECK-NEXT: ret + %res = fptrunc %a to + ret %res +} + +define @fcvth_nxv8f64( %a) { +; CHECK-LABEL: fcvth_nxv8f64: +; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p0.d +; CHECK-NEXT: fcvt z3.h, p0/m, z3.d +; CHECK-NEXT: fcvt z2.h, p0/m, z2.d +; CHECK-NEXT: fcvt z1.h, p0/m, z1.d +; CHECK-NEXT: fcvt z0.h, p0/m, z0.d +; CHECK-NEXT: uzp1 z2.s, z2.s, z3.s +; CHECK-NEXT: uzp1 z0.s, z0.s, z1.s +; CHECK-NEXT: uzp1 z0.h, z0.h, z2.h +; CHECK-NEXT: ret + %res = fptrunc %a to + ret %res +} + +define @fcvth_nxv4f64( %a) { +; CHECK-LABEL: fcvth_nxv4f64: +; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p0.d +; CHECK-NEXT: fcvt z1.h, p0/m, z1.d +; CHECK-NEXT: fcvt z0.h, p0/m, z0.d +; CHECK-NEXT: uzp1 z0.s, z0.s, z1.s +; CHECK-NEXT: ret + %res = fptrunc %a to + ret %res +} + +define @fcvts_nxv4f64( %a) { +; CHECK-LABEL: fcvts_nxv4f64: +; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p0.d +; CHECK-NEXT: fcvt z1.s, p0/m, z1.d +; CHECK-NEXT: fcvt z0.s, p0/m, z0.d +; CHECK-NEXT: uzp1 z0.s, z0.s, z1.s +; CHECK-NEXT: ret + %res = fptrunc %a to + ret %res +} + +define @fcvts_nxv8f64( %a) { +; CHECK-LABEL: fcvts_nxv8f64: +; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p0.d +; CHECK-NEXT: fcvt z1.s, p0/m, z1.d +; CHECK-NEXT: fcvt z0.s, p0/m, z0.d +; CHECK-NEXT: fcvt z3.s, p0/m, z3.d +; CHECK-NEXT: fcvt z2.s, p0/m, z2.d +; CHECK-NEXT: uzp1 z0.s, z0.s, z1.s +; CHECK-NEXT: uzp1 z1.s, z2.s, z3.s +; CHECK-NEXT: ret + %res = fptrunc %a to + ret %res +} + ; FP_TO_SINT ; Split operand -- 2.7.4