From f0815e01d822d7a4913b5a429f4308e4740aab65 Mon Sep 17 00:00:00 2001
From: Craig Topper <craig.topper@intel.com>
Date: Fri, 16 Mar 2018 18:25:59 +0000
Subject: [PATCH] [X86] Merge ADDSUB/SUBADD detection into single methods that
 can detect either and indicate what they found.

Previously, we called the same functions twice with a bool flag determining whether we should look for ADDSUB or SUBADD. It would be more efficient to run the code once and detect either pattern with a flag to tell which type it found.

Differential Revision: https://reviews.llvm.org/D44540

llvm-svn: 327730
---
 llvm/lib/Target/X86/X86ISelLowering.cpp | 175 ++++++++++++++------------------
 1 file changed, 74 insertions(+), 101 deletions(-)

diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
index e258e65..0df0918 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -7512,7 +7512,7 @@ static bool isAddSubOrSubAdd(const BuildVectorSDNode *BV,
                              const X86Subtarget &Subtarget, SelectionDAG &DAG,
                              SDValue &Opnd0, SDValue &Opnd1,
                              unsigned &NumExtracts,
-                             bool matchSubAdd) {
+                             bool &IsSubAdd) {
 
   MVT VT = BV->getSimpleValueType(0);
   if (!Subtarget.hasSSE3() || !VT.isFloatingPoint())
@@ -7525,26 +7525,20 @@ static bool isAddSubOrSubAdd(const BuildVectorSDNode *BV,
   NumExtracts = 0;
 
   // Odd-numbered elements in the input build vector are obtained from
-  // adding two integer/float elements.
+  // adding/subtracting two integer/float elements.
   // Even-numbered elements in the input build vector are obtained from
-  // subtracting two integer/float elements.
-  unsigned ExpectedOpcode = matchSubAdd ? ISD::FADD : ISD::FSUB;
-  unsigned NextExpectedOpcode = matchSubAdd ? ISD::FSUB : ISD::FADD;
-  bool AddFound = false;
-  bool SubFound = false;
-
+  // subtracting/adding two integer/float elements.
+  unsigned Opc[2] {0, 0};
   for (unsigned i = 0, e = NumElts; i != e; ++i) {
     SDValue Op = BV->getOperand(i);
 
     // Skip 'undef' values.
     unsigned Opcode = Op.getOpcode();
-    if (Opcode == ISD::UNDEF) {
-      std::swap(ExpectedOpcode, NextExpectedOpcode);
+    if (Opcode == ISD::UNDEF)
       continue;
-    }
 
     // Early exit if we found an unexpected opcode.
-    if (Opcode != ExpectedOpcode)
+    if (Opcode != ISD::FADD && Opcode != ISD::FSUB)
       return false;
 
     SDValue Op0 = Op.getOperand(0);
@@ -7564,11 +7558,11 @@ static bool isAddSubOrSubAdd(const BuildVectorSDNode *BV,
     if (I0 != i)
       return false;
 
-    // We found a valid add/sub node. Update the information accordingly.
-    if (i & 1)
-      AddFound = true;
-    else
-      SubFound = true;
+    // We found a valid add/sub node, make sure its the same opcode as previous
+    // elements for this parity.
+    if (Opc[i % 2] != 0 && Opc[i % 2] != Opcode)
+      return false;
+    Opc[i % 2] = Opcode;
 
     // Update InVec0 and InVec1.
     if (InVec0.isUndef()) {
@@ -7585,7 +7579,7 @@ static bool isAddSubOrSubAdd(const BuildVectorSDNode *BV,
     // Make sure that operands in input to each add/sub node always
     // come from a same pair of vectors.
     if (InVec0 != Op0.getOperand(0)) {
-      if (ExpectedOpcode == ISD::FSUB)
+      if (Opcode == ISD::FSUB)
         return false;
 
       // FADD is commutable. Try to commute the operands
@@ -7598,17 +7592,19 @@ static bool isAddSubOrSubAdd(const BuildVectorSDNode *BV,
     if (InVec1 != Op1.getOperand(0))
       return false;
 
-    // Update the pair of expected opcodes.
-    std::swap(ExpectedOpcode, NextExpectedOpcode);
-
     // Increment the number of extractions done.
     ++NumExtracts;
   }
 
-  // Don't try to fold this build_vector into an ADDSUB if the inputs are undef.
-  if (!AddFound || !SubFound || InVec0.isUndef() || InVec1.isUndef())
+  // Ensure we have found an opcode for both parities and that they are
+  // different. Don't try to fold this build_vector into an ADDSUB/SUBADD if the
+  // inputs are undef.
+  if (!Opc[0] || !Opc[1] || Opc[0] == Opc[1] ||
+      InVec0.isUndef() || InVec1.isUndef())
     return false;
 
+  IsSubAdd = Opc[0] == ISD::FADD;
+
   Opnd0 = InVec0;
   Opnd1 = InVec1;
   return true;
@@ -7665,15 +7661,17 @@ static bool isFMAddSubOrFMSubAdd(const X86Subtarget &Subtarget,
   return true;
 }
 
-/// Try to fold a build_vector that performs an 'addsub' or 'fmaddsub' operation
-/// accordingly to X86ISD::ADDSUB or X86ISD::FMADDSUB node.
+/// Try to fold a build_vector that performs an 'addsub' or 'fmaddsub' or
+/// 'fsubadd' operation accordingly to X86ISD::ADDSUB or X86ISD::FMADDSUB or
+/// X86ISD::FMSUBADD node.
 static SDValue lowerToAddSubOrFMAddSub(const BuildVectorSDNode *BV,
                                        const X86Subtarget &Subtarget,
                                        SelectionDAG &DAG) {
   SDValue Opnd0, Opnd1;
   unsigned NumExtracts;
+  bool IsSubAdd;
   if (!isAddSubOrSubAdd(BV, Subtarget, DAG, Opnd0, Opnd1, NumExtracts,
-                        /*matchSubAdd*/false))
+                        IsSubAdd))
     return SDValue();
 
   MVT VT = BV->getSimpleValueType(0);
@@ -7681,8 +7679,14 @@ static SDValue lowerToAddSubOrFMAddSub(const BuildVectorSDNode *BV,
 
   // Try to generate X86ISD::FMADDSUB node here.
   SDValue Opnd2;
-  if (isFMAddSubOrFMSubAdd(Subtarget, DAG, Opnd0, Opnd1, Opnd2, NumExtracts))
-    return DAG.getNode(X86ISD::FMADDSUB, DL, VT, Opnd0, Opnd1, Opnd2);
+  if (isFMAddSubOrFMSubAdd(Subtarget, DAG, Opnd0, Opnd1, Opnd2, NumExtracts)) {
+    unsigned Opc = IsSubAdd ? X86ISD::FMSUBADD : X86ISD::FMADDSUB;
+    return DAG.getNode(Opc, DL, VT, Opnd0, Opnd1, Opnd2);
+  }
+
+  // We only support ADDSUB.
+  if (IsSubAdd)
+    return SDValue();
 
   // Do not generate X86ISD::ADDSUB node for 512-bit types even though
   // the ADDSUB idiom has been successfully recognized. There are no known
@@ -7695,28 +7699,6 @@ static SDValue lowerToAddSubOrFMAddSub(const BuildVectorSDNode *BV,
   return DAG.getNode(X86ISD::ADDSUB, DL, VT, Opnd0, Opnd1);
 }
 
-/// Try to fold a build_vector that performs an 'fmsubadd' operation
-/// accordingly to X86ISD::FMSUBADD node.
-static SDValue lowerToFMSubAdd(const BuildVectorSDNode *BV,
-                               const X86Subtarget &Subtarget,
-                               SelectionDAG &DAG) {
-  SDValue Opnd0, Opnd1;
-  unsigned NumExtracts;
-  if (!isAddSubOrSubAdd(BV, Subtarget, DAG, Opnd0, Opnd1, NumExtracts,
-                        /*matchSubAdd*/true))
-    return SDValue();
-
-  MVT VT = BV->getSimpleValueType(0);
-  SDLoc DL(BV);
-
-  // Try to generate X86ISD::FMSUBADD node here.
-  SDValue Opnd2;
-  if (isFMAddSubOrFMSubAdd(Subtarget, DAG, Opnd0, Opnd1, Opnd2, NumExtracts))
-    return DAG.getNode(X86ISD::FMSUBADD, DL, VT, Opnd0, Opnd1, Opnd2);
-
-  return SDValue();
-}
-
 /// Lower BUILD_VECTOR to a horizontal add/sub operation if possible.
 static SDValue LowerToHorizontalOp(const BuildVectorSDNode *BV,
                                    const X86Subtarget &Subtarget,
@@ -8253,8 +8235,6 @@ X86TargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) const {
   BuildVectorSDNode *BV = cast<BuildVectorSDNode>(Op.getNode());
   if (SDValue AddSub = lowerToAddSubOrFMAddSub(BV, Subtarget, DAG))
     return AddSub;
-  if (SDValue SubAdd = lowerToFMSubAdd(BV, Subtarget, DAG))
-    return SubAdd;
   if (SDValue HorizontalOp = LowerToHorizontalOp(BV, Subtarget, DAG))
     return HorizontalOp;
   if (SDValue Broadcast = lowerBuildVectorAsBroadcast(BV, Subtarget, DAG))
@@ -30437,7 +30417,7 @@ static SDValue combineTargetShuffle(SDValue N, SelectionDAG &DAG,
 /// the fact that they're unused.
 static bool isAddSubOrSubAdd(SDNode *N, const X86Subtarget &Subtarget,
                              SelectionDAG &DAG, SDValue &Opnd0, SDValue &Opnd1,
-                             bool matchSubAdd) {
+                             bool &IsSubAdd) {
 
   EVT VT = N->getValueType(0);
   const TargetLowering &TLI = DAG.getTargetLoweringInfo();
@@ -30451,23 +30431,13 @@ static bool isAddSubOrSubAdd(SDNode *N, const X86Subtarget &Subtarget,
   if (N->getOpcode() != ISD::VECTOR_SHUFFLE)
     return false;
 
-  ArrayRef<int> OrigMask = cast<ShuffleVectorSDNode>(N)->getMask();
-  SmallVector<int, 16> Mask(OrigMask.begin(), OrigMask.end());
-
   SDValue V1 = N->getOperand(0);
   SDValue V2 = N->getOperand(1);
 
-  unsigned ExpectedOpcode = matchSubAdd ? ISD::FADD : ISD::FSUB;
-  unsigned NextExpectedOpcode = matchSubAdd ? ISD::FSUB : ISD::FADD;
-
-  // We require the first shuffle operand to be the ExpectedOpcode node,
-  // and the second to be the NextExpectedOpcode node.
-  if (V1.getOpcode() == NextExpectedOpcode &&
-      V2.getOpcode() == ExpectedOpcode) {
-    ShuffleVectorSDNode::commuteMask(Mask);
-    std::swap(V1, V2);
-  } else if (V1.getOpcode() != ExpectedOpcode ||
-             V2.getOpcode() != NextExpectedOpcode)
+  // Make sure we have an FADD and an FSUB.
+  if ((V1.getOpcode() != ISD::FADD && V1.getOpcode() != ISD::FSUB) ||
+      (V2.getOpcode() != ISD::FADD && V2.getOpcode() != ISD::FSUB) ||
+      V1.getOpcode() == V2.getOpcode())
     return false;
 
   // If there are other uses of these operations we can't fold them.
@@ -30477,27 +30447,47 @@ static bool isAddSubOrSubAdd(SDNode *N, const X86Subtarget &Subtarget,
   // Ensure that both operations have the same operands. Note that we can
   // commute the FADD operands.
   SDValue LHS, RHS;
-  if (ExpectedOpcode == ISD::FSUB) {
+  if (V1.getOpcode() == ISD::FSUB) {
     LHS = V1->getOperand(0); RHS = V1->getOperand(1);
     if ((V2->getOperand(0) != LHS || V2->getOperand(1) != RHS) &&
         (V2->getOperand(0) != RHS || V2->getOperand(1) != LHS))
       return false;
   } else {
+    assert(V2.getOpcode() == ISD::FSUB && "Unexpected opcode");
     LHS = V2->getOperand(0); RHS = V2->getOperand(1);
     if ((V1->getOperand(0) != LHS || V1->getOperand(1) != RHS) &&
         (V1->getOperand(0) != RHS || V1->getOperand(1) != LHS))
       return false;
   }
 
-  // We're looking for blends between FADD and FSUB nodes. We insist on these
-  // nodes being lined up in a specific expected pattern.
-  if (!(isShuffleEquivalent(V1, V2, Mask, {0, 3}) ||
-        isShuffleEquivalent(V1, V2, Mask, {0, 5, 2, 7}) ||
-        isShuffleEquivalent(V1, V2, Mask, {0, 9, 2, 11, 4, 13, 6, 15}) ||
-        isShuffleEquivalent(V1, V2, Mask, {0, 17, 2, 19, 4, 21, 6, 23,
-                                           8, 25, 10, 27, 12, 29, 14, 31})))
+  ArrayRef<int> Mask = cast<ShuffleVectorSDNode>(N)->getMask();
+
+  int ParitySrc[2] = {-1, -1};
+  unsigned Size = Mask.size();
+  for (unsigned i = 0; i != Size; ++i) {
+    int M = Mask[i];
+    if (M < 0)
+      continue;
+
+    // Make sure we are using the matching element from the input.
+    if ((M % Size) != i)
+      return false;
+
+    // Make sure we use the same input for all elements of the same parity.
+    int Src = M / Size;
+    if (ParitySrc[i % 2] >= 0 && ParitySrc[i % 2] != Src)
+      return false;
+    ParitySrc[i % 2] = Src;
+  }
+
+  // Make sure each input is used.
+  if (ParitySrc[0] < 0 || ParitySrc[1] < 0 || ParitySrc[0] == ParitySrc[1])
     return false;
 
+  // It's a subadd if the vector in the even parity is an FADD.
+  IsSubAdd = ParitySrc[0] == 0 ? V1->getOpcode() == ISD::FADD
+                               : V2->getOpcode() == ISD::FADD;
+
   Opnd0 = LHS;
   Opnd1 = RHS;
   return true;
@@ -30509,7 +30499,8 @@ static SDValue combineShuffleToAddSubOrFMAddSub(SDNode *N,
                                                 const X86Subtarget &Subtarget,
                                                 SelectionDAG &DAG) {
   SDValue Opnd0, Opnd1;
-  if (!isAddSubOrSubAdd(N, Subtarget, DAG, Opnd0, Opnd1, /*matchSubAdd*/false))
+  bool IsSubAdd;
+  if (!isAddSubOrSubAdd(N, Subtarget, DAG, Opnd0, Opnd1, IsSubAdd))
     return SDValue();
 
   MVT VT = N->getSimpleValueType(0);
@@ -30517,8 +30508,13 @@ static SDValue combineShuffleToAddSubOrFMAddSub(SDNode *N,
 
   // Try to generate X86ISD::FMADDSUB node here.
   SDValue Opnd2;
-  if (isFMAddSubOrFMSubAdd(Subtarget, DAG, Opnd0, Opnd1, Opnd2, 2))
-    return DAG.getNode(X86ISD::FMADDSUB, DL, VT, Opnd0, Opnd1, Opnd2);
+  if (isFMAddSubOrFMSubAdd(Subtarget, DAG, Opnd0, Opnd1, Opnd2, 2)) {
+    unsigned Opc = IsSubAdd ? X86ISD::FMSUBADD : X86ISD::FMADDSUB;
+    return DAG.getNode(Opc, DL, VT, Opnd0, Opnd1, Opnd2);
+  }
+
+  if (IsSubAdd)
+    return SDValue();
 
   // Do not generate X86ISD::ADDSUB node for 512-bit types even though
   // the ADDSUB idiom has been successfully recognized. There are no known
@@ -30529,26 +30525,6 @@ static SDValue combineShuffleToAddSubOrFMAddSub(SDNode *N,
   return DAG.getNode(X86ISD::ADDSUB, DL, VT, Opnd0, Opnd1);
 }
 
-/// \brief Try to combine a shuffle into a target-specific
-/// mul-sub-add node.
-static SDValue combineShuffleToFMSubAdd(SDNode *N,
-                                        const X86Subtarget &Subtarget,
-                                        SelectionDAG &DAG) {
-  SDValue Opnd0, Opnd1;
-  if (!isAddSubOrSubAdd(N, Subtarget, DAG, Opnd0, Opnd1, /*matchSubAdd*/true))
-    return SDValue();
-
-  MVT VT = N->getSimpleValueType(0);
-  SDLoc DL(N);
-
-  // Try to generate X86ISD::FMSUBADD node here.
-  SDValue Opnd2;
-  if (isFMAddSubOrFMSubAdd(Subtarget, DAG, Opnd0, Opnd1, Opnd2, 2))
-    return DAG.getNode(X86ISD::FMSUBADD, DL, VT, Opnd0, Opnd1, Opnd2);
-
-  return SDValue();
-}
-
 // We are looking for a shuffle where both sources are concatenated with undef
 // and have a width that is half of the output's width. AVX2 has VPERMD/Q, so
 // if we can express this as a single-source shuffle, that's preferable.
@@ -30640,9 +30616,6 @@ static SDValue combineShuffle(SDNode *N, SelectionDAG &DAG,
     if (SDValue AddSub = combineShuffleToAddSubOrFMAddSub(N, Subtarget, DAG))
       return AddSub;
 
-    if (SDValue FMSubAdd = combineShuffleToFMSubAdd(N, Subtarget, DAG))
-      return FMSubAdd;
-
     if (SDValue HAddSub = foldShuffleOfHorizOp(N))
       return HAddSub;
   }
-- 
2.7.4