From e86f4e606b7c4f96df88de2816b2a4a025a2e04b Mon Sep 17 00:00:00 2001 From: David Green Date: Sun, 11 Jun 2023 09:42:34 +0100 Subject: [PATCH] [AArch64] Make VECREDUCE_FMIN/FMAX legal. NFCI VECREDUCE_FMIN/FMAX wil be lowered to fminnmv/fmaxnmv. They needn't be custom lowered through an intrinsic to do it, and can be treated as Legal instead. --- llvm/include/llvm/Target/TargetSelectionDAG.td | 2 ++ llvm/lib/Target/AArch64/AArch64ISelLowering.cpp | 16 ++-------------- llvm/lib/Target/AArch64/AArch64InstrFormats.td | 2 +- llvm/lib/Target/AArch64/AArch64InstrInfo.td | 19 +++++++++++++------ 4 files changed, 18 insertions(+), 21 deletions(-) diff --git a/llvm/include/llvm/Target/TargetSelectionDAG.td b/llvm/include/llvm/Target/TargetSelectionDAG.td index a172d01..74fa24b 100644 --- a/llvm/include/llvm/Target/TargetSelectionDAG.td +++ b/llvm/include/llvm/Target/TargetSelectionDAG.td @@ -472,6 +472,8 @@ def vecreduce_umax : SDNode<"ISD::VECREDUCE_UMAX", SDTVecReduce>; def vecreduce_smin : SDNode<"ISD::VECREDUCE_SMIN", SDTVecReduce>; def vecreduce_umin : SDNode<"ISD::VECREDUCE_UMIN", SDTVecReduce>; def vecreduce_fadd : SDNode<"ISD::VECREDUCE_FADD", SDTFPVecReduce>; +def vecreduce_fmin : SDNode<"ISD::VECREDUCE_FMIN", SDTFPVecReduce>; +def vecreduce_fmax : SDNode<"ISD::VECREDUCE_FMAX", SDTFPVecReduce>; def fadd : SDNode<"ISD::FADD" , SDTFPBinOp, [SDNPCommutative]>; def fsub : SDNode<"ISD::FSUB" , SDTFPBinOp>; diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp index 0956dcf..50fccd5 100644 --- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp +++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp @@ -1158,8 +1158,8 @@ AArch64TargetLowering::AArch64TargetLowering(const TargetMachine &TM, for (MVT VT : { MVT::v4f16, MVT::v2f32, MVT::v8f16, MVT::v4f32, MVT::v2f64 }) { if (VT.getVectorElementType() != MVT::f16 || Subtarget->hasFullFP16()) { - setOperationAction(ISD::VECREDUCE_FMAX, VT, Custom); - setOperationAction(ISD::VECREDUCE_FMIN, VT, Custom); + setOperationAction(ISD::VECREDUCE_FMAX, VT, Legal); + setOperationAction(ISD::VECREDUCE_FMIN, VT, Legal); setOperationAction(ISD::VECREDUCE_FADD, VT, Legal); } @@ -13588,18 +13588,6 @@ SDValue AArch64TargetLowering::LowerVECREDUCE(SDValue Op, return getReductionSDNode(AArch64ISD::UMAXV, dl, Op, DAG); case ISD::VECREDUCE_UMIN: return getReductionSDNode(AArch64ISD::UMINV, dl, Op, DAG); - case ISD::VECREDUCE_FMAX: { - return DAG.getNode( - ISD::INTRINSIC_WO_CHAIN, dl, Op.getValueType(), - DAG.getConstant(Intrinsic::aarch64_neon_fmaxnmv, dl, MVT::i32), - Src); - } - case ISD::VECREDUCE_FMIN: { - return DAG.getNode( - ISD::INTRINSIC_WO_CHAIN, dl, Op.getValueType(), - DAG.getConstant(Intrinsic::aarch64_neon_fminnmv, dl, MVT::i32), - Src); - } default: llvm_unreachable("Unhandled reduction"); } diff --git a/llvm/lib/Target/AArch64/AArch64InstrFormats.td b/llvm/lib/Target/AArch64/AArch64InstrFormats.td index 34be726..5d4baf6 100644 --- a/llvm/lib/Target/AArch64/AArch64InstrFormats.td +++ b/llvm/lib/Target/AArch64/AArch64InstrFormats.td @@ -7463,7 +7463,7 @@ multiclass SIMDAcrossLanesHSD opcode, string asm> { let mayRaiseFPException = 1, Uses = [FPCR] in multiclass SIMDFPAcrossLanes opcode, bit sz1, string asm, - Intrinsic intOp> { + SDPatternOperator intOp> { let Predicates = [HasNEON, HasFullFP16] in { def v4i16v : BaseSIMDAcrossLanes<0, 0, {sz1, 0}, opcode, FPR16, V64, asm, ".4h", diff --git a/llvm/lib/Target/AArch64/AArch64InstrInfo.td b/llvm/lib/Target/AArch64/AArch64InstrInfo.td index 45c8edc..fcbb6fe 100644 --- a/llvm/lib/Target/AArch64/AArch64InstrInfo.td +++ b/llvm/lib/Target/AArch64/AArch64InstrInfo.td @@ -781,6 +781,13 @@ def AArch64facgt : PatFrags<(ops node:$Rn, node:$Rm), [(AArch64fcmgt (fabs node:$Rn), (fabs node:$Rm)), (int_aarch64_neon_facgt node:$Rn, node:$Rm)]>; +def AArch64fmaxnmv : PatFrags<(ops node:$Rn), + [(vecreduce_fmax node:$Rn), + (int_aarch64_neon_fmaxnmv node:$Rn)]>; +def AArch64fminnmv : PatFrags<(ops node:$Rn), + [(vecreduce_fmin node:$Rn), + (int_aarch64_neon_fminnmv node:$Rn)]>; + def SDT_AArch64SETTAG : SDTypeProfile<0, 2, [SDTCisPtrTy<0>, SDTCisPtrTy<1>]>; def AArch64stg : SDNode<"AArch64ISD::STG", SDT_AArch64SETTAG, [SDNPHasChain, SDNPMayStore, SDNPMemOperand]>; def AArch64stzg : SDNode<"AArch64ISD::STZG", SDT_AArch64SETTAG, [SDNPHasChain, SDNPMayStore, SDNPMemOperand]>; @@ -5812,17 +5819,17 @@ def : Pat<(f32 (int_aarch64_neon_faddv (v4f32 V128:$Rn))), (FADDPv2i32p (EXTRACT_SUBREG (FADDPv4f32 V128:$Rn, V128:$Rn), dsub))>; def : Pat<(f64 (int_aarch64_neon_faddv (v2f64 V128:$Rn))), (FADDPv2i64p V128:$Rn)>; -def : Pat<(f32 (int_aarch64_neon_fmaxnmv (v2f32 V64:$Rn))), +def : Pat<(f32 (AArch64fmaxnmv (v2f32 V64:$Rn))), (FMAXNMPv2i32p V64:$Rn)>; -def : Pat<(f64 (int_aarch64_neon_fmaxnmv (v2f64 V128:$Rn))), +def : Pat<(f64 (AArch64fmaxnmv (v2f64 V128:$Rn))), (FMAXNMPv2i64p V128:$Rn)>; def : Pat<(f32 (int_aarch64_neon_fmaxv (v2f32 V64:$Rn))), (FMAXPv2i32p V64:$Rn)>; def : Pat<(f64 (int_aarch64_neon_fmaxv (v2f64 V128:$Rn))), (FMAXPv2i64p V128:$Rn)>; -def : Pat<(f32 (int_aarch64_neon_fminnmv (v2f32 V64:$Rn))), +def : Pat<(f32 (AArch64fminnmv (v2f32 V64:$Rn))), (FMINNMPv2i32p V64:$Rn)>; -def : Pat<(f64 (int_aarch64_neon_fminnmv (v2f64 V128:$Rn))), +def : Pat<(f64 (AArch64fminnmv (v2f64 V128:$Rn))), (FMINNMPv2i64p V128:$Rn)>; def : Pat<(f32 (int_aarch64_neon_fminv (v2f32 V64:$Rn))), (FMINPv2i32p V64:$Rn)>; @@ -6290,9 +6297,9 @@ defm UMAXV : SIMDAcrossLanesBHS<1, 0b01010, "umaxv">; defm UMINV : SIMDAcrossLanesBHS<1, 0b11010, "uminv">; defm SADDLV : SIMDAcrossLanesHSD<0, 0b00011, "saddlv">; defm UADDLV : SIMDAcrossLanesHSD<1, 0b00011, "uaddlv">; -defm FMAXNMV : SIMDFPAcrossLanes<0b01100, 0, "fmaxnmv", int_aarch64_neon_fmaxnmv>; +defm FMAXNMV : SIMDFPAcrossLanes<0b01100, 0, "fmaxnmv", AArch64fmaxnmv>; defm FMAXV : SIMDFPAcrossLanes<0b01111, 0, "fmaxv", int_aarch64_neon_fmaxv>; -defm FMINNMV : SIMDFPAcrossLanes<0b01100, 1, "fminnmv", int_aarch64_neon_fminnmv>; +defm FMINNMV : SIMDFPAcrossLanes<0b01100, 1, "fminnmv", AArch64fminnmv>; defm FMINV : SIMDFPAcrossLanes<0b01111, 1, "fminv", int_aarch64_neon_fminv>; multiclass SIMDAcrossLaneLongPairIntrinsic { -- 2.7.4