From 262e4200d1977335d62dbab93b5d8e1f9f5c5e86 Mon Sep 17 00:00:00 2001 From: Simon Pilgrim Date: Sun, 16 May 2021 10:53:04 +0100 Subject: [PATCH] [X86][SSE] Pull out combineToHorizontalAddSub helper from inside (F)ADD/SUB combines (REAPPLIED). NFCI. The intention is to be able to run this from additional locations (such as shuffle combining) in the future. Reapplies rGb95a103808ac (after reversion at rGc012a388a15b), with SSE3/SSSE3 typo fix, test added at rG0afb10de1449. --- llvm/lib/Target/X86/X86ISelLowering.cpp | 107 +++++++++++++++++--------------- 1 file changed, 56 insertions(+), 51 deletions(-) diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp index 1adab3b..33af959 100644 --- a/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -46248,32 +46248,67 @@ static bool isHorizontalBinOp(unsigned HOpcode, SDValue &LHS, SDValue &RHS, return true; } -/// Do target-specific dag combines on floating-point adds/subs. -static SDValue combineFaddFsub(SDNode *N, SelectionDAG &DAG, - const X86Subtarget &Subtarget) { +// Try to synthesize horizontal (f)hadd/hsub from (f)adds/subs of shuffles. +static SDValue combineToHorizontalAddSub(SDNode *N, SelectionDAG &DAG, + const X86Subtarget &Subtarget) { EVT VT = N->getValueType(0); - SDValue LHS = N->getOperand(0); - SDValue RHS = N->getOperand(1); - bool IsFadd = N->getOpcode() == ISD::FADD; - auto HorizOpcode = IsFadd ? X86ISD::FHADD : X86ISD::FHSUB; - assert((IsFadd || N->getOpcode() == ISD::FSUB) && "Wrong opcode"); - - // Try to synthesize horizontal add/sub from adds/subs of shuffles. + unsigned Opcode = N->getOpcode(); + bool IsAdd = (Opcode == ISD::FADD) || (Opcode == ISD::ADD); SmallVector PostShuffleMask; - if (((Subtarget.hasSSE3() && (VT == MVT::v4f32 || VT == MVT::v2f64)) || - (Subtarget.hasAVX() && (VT == MVT::v8f32 || VT == MVT::v4f64))) && - isHorizontalBinOp(HorizOpcode, LHS, RHS, DAG, Subtarget, IsFadd, - PostShuffleMask)) { - SDValue HorizBinOp = DAG.getNode(HorizOpcode, SDLoc(N), VT, LHS, RHS); - if (!PostShuffleMask.empty()) - HorizBinOp = DAG.getVectorShuffle(VT, SDLoc(HorizBinOp), HorizBinOp, - DAG.getUNDEF(VT), PostShuffleMask); - return HorizBinOp; + + switch (Opcode) { + case ISD::FADD: + case ISD::FSUB: + if ((Subtarget.hasSSE3() && (VT == MVT::v4f32 || VT == MVT::v2f64)) || + (Subtarget.hasAVX() && (VT == MVT::v8f32 || VT == MVT::v4f64))) { + SDValue LHS = N->getOperand(0); + SDValue RHS = N->getOperand(1); + auto HorizOpcode = IsAdd ? X86ISD::FHADD : X86ISD::FHSUB; + if (isHorizontalBinOp(HorizOpcode, LHS, RHS, DAG, Subtarget, IsAdd, + PostShuffleMask)) { + SDValue HorizBinOp = DAG.getNode(HorizOpcode, SDLoc(N), VT, LHS, RHS); + if (!PostShuffleMask.empty()) + HorizBinOp = DAG.getVectorShuffle(VT, SDLoc(HorizBinOp), HorizBinOp, + DAG.getUNDEF(VT), PostShuffleMask); + return HorizBinOp; + } + } + break; + case ISD::ADD: + case ISD::SUB: + if (Subtarget.hasSSSE3() && (VT == MVT::v8i16 || VT == MVT::v4i32 || + VT == MVT::v16i16 || VT == MVT::v8i32)) { + SDValue LHS = N->getOperand(0); + SDValue RHS = N->getOperand(1); + auto HorizOpcode = IsAdd ? X86ISD::HADD : X86ISD::HSUB; + if (isHorizontalBinOp(HorizOpcode, LHS, RHS, DAG, Subtarget, IsAdd, + PostShuffleMask)) { + auto HOpBuilder = [HorizOpcode](SelectionDAG &DAG, const SDLoc &DL, + ArrayRef Ops) { + return DAG.getNode(HorizOpcode, DL, Ops[0].getValueType(), Ops); + }; + SDValue HorizBinOp = SplitOpsAndApply(DAG, Subtarget, SDLoc(N), VT, + {LHS, RHS}, HOpBuilder); + if (!PostShuffleMask.empty()) + HorizBinOp = DAG.getVectorShuffle(VT, SDLoc(HorizBinOp), HorizBinOp, + DAG.getUNDEF(VT), PostShuffleMask); + return HorizBinOp; + } + } + break; } return SDValue(); } +/// Do target-specific dag combines on floating-point adds/subs. +static SDValue combineFaddFsub(SDNode *N, SelectionDAG &DAG, + const X86Subtarget &Subtarget) { + if (SDValue HOp = combineToHorizontalAddSub(N, DAG, Subtarget)) + return HOp; + return SDValue(); +} + /// Attempt to pre-truncate inputs to arithmetic ops if it will simplify /// the codegen. /// e.g. TRUNC( BINOP( X, Y ) ) --> BINOP( TRUNC( X ), TRUNC( Y ) ) @@ -49507,36 +49542,6 @@ static SDValue matchPMADDWD_2(SelectionDAG &DAG, SDValue N0, SDValue N1, PMADDBuilder); } -static SDValue combineAddOrSubToHADDorHSUB(SDNode *N, SelectionDAG &DAG, - const X86Subtarget &Subtarget) { - EVT VT = N->getValueType(0); - SDValue Op0 = N->getOperand(0); - SDValue Op1 = N->getOperand(1); - bool IsAdd = N->getOpcode() == ISD::ADD; - auto HorizOpcode = IsAdd ? X86ISD::HADD : X86ISD::HSUB; - assert((IsAdd || N->getOpcode() == ISD::SUB) && "Wrong opcode"); - - SmallVector PostShuffleMask; - if ((VT == MVT::v8i16 || VT == MVT::v4i32 || VT == MVT::v16i16 || - VT == MVT::v8i32) && - Subtarget.hasSSSE3() && - isHorizontalBinOp(HorizOpcode, Op0, Op1, DAG, Subtarget, IsAdd, - PostShuffleMask)) { - auto HOpBuilder = [HorizOpcode](SelectionDAG &DAG, const SDLoc &DL, - ArrayRef Ops) { - return DAG.getNode(HorizOpcode, DL, Ops[0].getValueType(), Ops); - }; - SDValue HorizBinOp = - SplitOpsAndApply(DAG, Subtarget, SDLoc(N), VT, {Op0, Op1}, HOpBuilder); - if (!PostShuffleMask.empty()) - HorizBinOp = DAG.getVectorShuffle(VT, SDLoc(HorizBinOp), HorizBinOp, - DAG.getUNDEF(VT), PostShuffleMask); - return HorizBinOp; - } - - return SDValue(); -} - static SDValue combineAdd(SDNode *N, SelectionDAG &DAG, TargetLowering::DAGCombinerInfo &DCI, const X86Subtarget &Subtarget) { @@ -49550,7 +49555,7 @@ static SDValue combineAdd(SDNode *N, SelectionDAG &DAG, return MAdd; // Try to synthesize horizontal adds from adds of shuffles. - if (SDValue V = combineAddOrSubToHADDorHSUB(N, DAG, Subtarget)) + if (SDValue V = combineToHorizontalAddSub(N, DAG, Subtarget)) return V; // If vectors of i1 are legal, turn (add (zext (vXi1 X)), Y) into @@ -49612,7 +49617,7 @@ static SDValue combineSub(SDNode *N, SelectionDAG &DAG, } // Try to synthesize horizontal subs from subs of shuffles. - if (SDValue V = combineAddOrSubToHADDorHSUB(N, DAG, Subtarget)) + if (SDValue V = combineToHorizontalAddSub(N, DAG, Subtarget)) return V; return combineAddOrSubToADCOrSBB(N, DAG); -- 2.7.4