From 73ac3c0ede4ca08b72618620f92f1efcb76f2c89 Mon Sep 17 00:00:00 2001 From: Paul Walker Date: Fri, 21 Aug 2020 19:00:36 +0100 Subject: [PATCH] [SVE] Lower scalable vector ISD::FNEG operations. Also updates isConstOrConstSplatFP to allow the mul(A,-1) -> neg(A) transformation when -1 is expressed as an ISD::SPLAT_VECTOR. Differential Revision: https://reviews.llvm.org/D86415 --- llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp | 4 ++ llvm/lib/Target/AArch64/AArch64ISelLowering.cpp | 8 +++ llvm/lib/Target/AArch64/AArch64ISelLowering.h | 1 + llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td | 9 ++-- llvm/lib/Target/AArch64/SVEInstrFormats.td | 22 ++++++-- llvm/test/CodeGen/AArch64/sve-fp.ll | 72 +++++++++++++++++++++++++ 6 files changed, 108 insertions(+), 8 deletions(-) diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp index 1aefe8c..0e2fe9b 100644 --- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp @@ -9076,6 +9076,10 @@ ConstantFPSDNode *llvm::isConstOrConstSplatFP(SDValue N, bool AllowUndefs) { return CN; } + if (N.getOpcode() == ISD::SPLAT_VECTOR) + if (ConstantFPSDNode *CN = dyn_cast(N.getOperand(0))) + return CN; + return nullptr; } diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp index da4ca30..425e309 100644 --- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp +++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp @@ -135,6 +135,7 @@ static bool isMergePassthruOpcode(unsigned Opc) { default: return false; case AArch64ISD::DUP_MERGE_PASSTHRU: + case AArch64ISD::FNEG_MERGE_PASSTHRU: case AArch64ISD::SIGN_EXTEND_INREG_MERGE_PASSTHRU: case AArch64ISD::ZERO_EXTEND_INREG_MERGE_PASSTHRU: return true; @@ -969,6 +970,7 @@ AArch64TargetLowering::AArch64TargetLowering(const TargetMachine &TM, setOperationAction(ISD::FDIV, VT, Custom); setOperationAction(ISD::FMA, VT, Custom); setOperationAction(ISD::FMUL, VT, Custom); + setOperationAction(ISD::FNEG, VT, Custom); setOperationAction(ISD::FSUB, VT, Custom); } } @@ -1471,6 +1473,7 @@ const char *AArch64TargetLowering::getTargetNodeName(unsigned Opcode) const { MAKE_CASE(AArch64ISD::UDIV_PRED) MAKE_CASE(AArch64ISD::UMAX_PRED) MAKE_CASE(AArch64ISD::UMIN_PRED) + MAKE_CASE(AArch64ISD::FNEG_MERGE_PASSTHRU) MAKE_CASE(AArch64ISD::SIGN_EXTEND_INREG_MERGE_PASSTHRU) MAKE_CASE(AArch64ISD::ZERO_EXTEND_INREG_MERGE_PASSTHRU) MAKE_CASE(AArch64ISD::SETCC_MERGE_ZERO) @@ -3331,6 +3334,9 @@ SDValue AArch64TargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op, case Intrinsic::aarch64_sve_convert_from_svbool: return DAG.getNode(AArch64ISD::REINTERPRET_CAST, dl, Op.getValueType(), Op.getOperand(1)); + case Intrinsic::aarch64_sve_fneg: + return DAG.getNode(AArch64ISD::FNEG_MERGE_PASSTHRU, dl, Op.getValueType(), + Op.getOperand(2), Op.getOperand(3), Op.getOperand(1)); case Intrinsic::aarch64_sve_convert_to_svbool: { EVT OutVT = Op.getValueType(); EVT InVT = Op.getOperand(1).getValueType(); @@ -3625,6 +3631,8 @@ SDValue AArch64TargetLowering::LowerOperation(SDValue Op, if (Op.getValueType() == MVT::f128) return LowerF128Call(Op, DAG, RTLIB::DIV_F128); return LowerToPredicatedOp(Op, DAG, AArch64ISD::FDIV_PRED); + case ISD::FNEG: + return LowerToPredicatedOp(Op, DAG, AArch64ISD::FNEG_MERGE_PASSTHRU); case ISD::FP_ROUND: case ISD::STRICT_FP_ROUND: return LowerFP_ROUND(Op, DAG); diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.h b/llvm/lib/Target/AArch64/AArch64ISelLowering.h index e550713..099d83a 100644 --- a/llvm/lib/Target/AArch64/AArch64ISelLowering.h +++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.h @@ -95,6 +95,7 @@ enum NodeType : unsigned { // Predicated instructions with the result of inactive lanes provided by the // last operand. + FNEG_MERGE_PASSTHRU, SIGN_EXTEND_INREG_MERGE_PASSTHRU, ZERO_EXTEND_INREG_MERGE_PASSTHRU, diff --git a/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td b/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td index 027db63..d6c4900 100644 --- a/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td +++ b/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td @@ -199,8 +199,9 @@ def SDT_AArch64IntExtend : SDTypeProfile<1, 4, [ ]>; // Predicated operations with the result of inactive lanes provided by the last operand. -def AArch64sxt_mt : SDNode<"AArch64ISD::SIGN_EXTEND_INREG_MERGE_PASSTHRU", SDT_AArch64IntExtend>; -def AArch64uxt_mt : SDNode<"AArch64ISD::ZERO_EXTEND_INREG_MERGE_PASSTHRU", SDT_AArch64IntExtend>; +def AArch64fneg_mt : SDNode<"AArch64ISD::FNEG_MERGE_PASSTHRU", SDT_AArch64Arith>; +def AArch64sxt_mt : SDNode<"AArch64ISD::SIGN_EXTEND_INREG_MERGE_PASSTHRU", SDT_AArch64IntExtend>; +def AArch64uxt_mt : SDNode<"AArch64ISD::ZERO_EXTEND_INREG_MERGE_PASSTHRU", SDT_AArch64IntExtend>; def SDT_AArch64ReduceWithInit : SDTypeProfile<1, 3, [SDTCisVec<1>, SDTCisVec<3>]>; def AArch64clasta_n : SDNode<"AArch64ISD::CLASTA_N", SDT_AArch64ReduceWithInit>; @@ -349,8 +350,8 @@ let Predicates = [HasSVE] in { defm CNOT_ZPmZ : sve_int_un_pred_arit_1< 0b011, "cnot", int_aarch64_sve_cnot>; defm NOT_ZPmZ : sve_int_un_pred_arit_1< 0b110, "not", int_aarch64_sve_not>; - defm FABS_ZPmZ : sve_int_un_pred_arit_1_fp<0b100, "fabs", int_aarch64_sve_fabs>; - defm FNEG_ZPmZ : sve_int_un_pred_arit_1_fp<0b101, "fneg", int_aarch64_sve_fneg>; + defm FABS_ZPmZ : sve_int_un_pred_arit_1_fp<0b100, "fabs", int_aarch64_sve_fabs, null_frag>; + defm FNEG_ZPmZ : sve_int_un_pred_arit_1_fp<0b101, "fneg", null_frag, AArch64fneg_mt>; defm SMAX_ZPmZ : sve_int_bin_pred_arit_1<0b000, "smax", "SMAX_ZPZZ", int_aarch64_sve_smax, DestructiveBinaryComm>; defm UMAX_ZPmZ : sve_int_bin_pred_arit_1<0b001, "umax", "UMAX_ZPZZ", int_aarch64_sve_umax, DestructiveBinaryComm>; diff --git a/llvm/lib/Target/AArch64/SVEInstrFormats.td b/llvm/lib/Target/AArch64/SVEInstrFormats.td index cd3ec64..4fd9bcd 100644 --- a/llvm/lib/Target/AArch64/SVEInstrFormats.td +++ b/llvm/lib/Target/AArch64/SVEInstrFormats.td @@ -312,6 +312,11 @@ class SVE_1_Op_Pat; +class SVE_1_Op_Passthru_Pat +: Pat<(vtd (op pg:$Op1, vts:$Op2, vtd:$Op3)), + (inst $Op3, $Op1, $Op2)>; + class SVE_1_Op_Imm_OptLsl_Reverse_Pat : Pat<(vt (op (vt (AArch64dup (it (cpx i32:$imm, i32:$shift)))), (vt zprty:$Op1))), @@ -3755,15 +3760,24 @@ multiclass sve_int_un_pred_arit_1 opc, string asm, def : SVE_3_Op_Pat(NAME # _D)>; } +// TODO: Remove int_op once its last use is converted to ir_op. multiclass sve_int_un_pred_arit_1_fp opc, string asm, - SDPatternOperator op> { + SDPatternOperator int_op, + SDPatternOperator ir_op> { def _H : sve_int_un_pred_arit<0b01, { opc, 0b1 }, asm, ZPR16>; def _S : sve_int_un_pred_arit<0b10, { opc, 0b1 }, asm, ZPR32>; def _D : sve_int_un_pred_arit<0b11, { opc, 0b1 }, asm, ZPR64>; - def : SVE_3_Op_Pat(NAME # _H)>; - def : SVE_3_Op_Pat(NAME # _S)>; - def : SVE_3_Op_Pat(NAME # _D)>; + def : SVE_3_Op_Pat(NAME # _H)>; + def : SVE_3_Op_Pat(NAME # _S)>; + def : SVE_3_Op_Pat(NAME # _D)>; + + def : SVE_1_Op_Passthru_Pat(NAME # _H)>; + def : SVE_1_Op_Passthru_Pat(NAME # _H)>; + def : SVE_1_Op_Passthru_Pat(NAME # _H)>; + def : SVE_1_Op_Passthru_Pat(NAME # _S)>; + def : SVE_1_Op_Passthru_Pat(NAME # _S)>; + def : SVE_1_Op_Passthru_Pat(NAME # _D)>; } //===----------------------------------------------------------------------===// diff --git a/llvm/test/CodeGen/AArch64/sve-fp.ll b/llvm/test/CodeGen/AArch64/sve-fp.ll index 891a5c1..e4aea28 100644 --- a/llvm/test/CodeGen/AArch64/sve-fp.ll +++ b/llvm/test/CodeGen/AArch64/sve-fp.ll @@ -323,6 +323,78 @@ define @fma_nxv2f64_3( %a, %r } +define @fneg_nxv8f16( %a) { +; CHECK-LABEL: fneg_nxv8f16: +; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p0.h +; CHECK-NEXT: fneg z0.h, p0/m, z0.h +; CHECK-NEXT: ret + %minus.one = insertelement undef, half -1.0, i64 0 + %minus.one.vec = shufflevector %minus.one, undef, zeroinitializer + %neg = fmul %a, %minus.one.vec + ret %neg +} + +define @fneg_nxv4f16( %a) { +; CHECK-LABEL: fneg_nxv4f16: +; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p0.s +; CHECK-NEXT: fneg z0.h, p0/m, z0.h +; CHECK-NEXT: ret + %minus.one = insertelement undef, half -1.0, i64 0 + %minus.one.vec = shufflevector %minus.one, undef, zeroinitializer + %neg = fmul %a, %minus.one.vec + ret %neg +} + +define @fneg_nxv2f16( %a) { +; CHECK-LABEL: fneg_nxv2f16: +; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p0.d +; CHECK-NEXT: fneg z0.h, p0/m, z0.h +; CHECK-NEXT: ret + %minus.one = insertelement undef, half -1.0, i64 0 + %minus.one.vec = shufflevector %minus.one, undef, zeroinitializer + %neg = fmul %a, %minus.one.vec + ret %neg +} + +define @fneg_nxv4f32( %a) { +; CHECK-LABEL: fneg_nxv4f32: +; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p0.s +; CHECK-NEXT: fneg z0.s, p0/m, z0.s +; CHECK-NEXT: ret + %minus.one = insertelement undef, float -1.0, i64 0 + %minus.one.vec = shufflevector %minus.one, undef, zeroinitializer + %neg = fmul %a, %minus.one.vec + ret %neg +} + +define @fneg_nxv2f32( %a) { +; CHECK-LABEL: fneg_nxv2f32: +; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p0.d +; CHECK-NEXT: fneg z0.s, p0/m, z0.s +; CHECK-NEXT: ret + %minus.one = insertelement undef, float -1.0, i64 0 + %minus.one.vec = shufflevector %minus.one, undef, zeroinitializer + %neg = fmul %a, %minus.one.vec + ret %neg +} + +define @fneg_nxv2f64( %a) { +; CHECK-LABEL: fneg_nxv2f64: +; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p0.d +; CHECK-NEXT: fneg z0.d, p0/m, z0.d +; CHECK-NEXT: ret + %minus.one = insertelement undef, double -1.0, i64 0 + %minus.one.vec = shufflevector %minus.one, undef, zeroinitializer + %neg = fmul %a, %minus.one.vec + ret %neg +} + define @frecps_h( %a, %b) { ; CHECK-LABEL: frecps_h: ; CHECK: // %bb.0: -- 2.7.4