From: Sanjay Patel <spatel@rotateright.com>
Date: Tue, 5 Jan 2021 18:21:49 +0000 (-0500)
Subject: [SLP] delete unused pairwise reduction option
X-Git-Tag: llvmorg-13-init~2067
X-Git-Url: http://review.tizen.org/git/?a=commitdiff_plain;h=3b8b2c7da2efb88d9f13e911e383af430ab463ef;p=platform%2Fupstream%2Fllvm.git

[SLP] delete unused pairwise reduction option

SLP tries to model 2 forms of vector reductions: pairwise and splitting.
From the cost model code comments, those are defined using an example as:

  /// Pairwise:
  ///  (v0, v1, v2, v3)
  ///  ((v0+v1), (v2+v3), undef, undef)
  /// Split:
  ///  (v0, v1, v2, v3)
  ///  ((v0+v2), (v1+v3), undef, undef)

I don't know the full history of this functionality, but it was partly
added back in D29402. There are apparently no users at this point (no
regression tests change). X86 might have managed to work-around the need
for this through cost model and codegen improvements.

Removing this code makes it easier to continue the work that was started
in D87416 / D88193. The alternative -- if there is some target that is
silently using this option -- is to move this logic into LoopUtils. We
have related/duplicate functionality there via llvm::createTargetReduction().

Differential Revision: https://reviews.llvm.org/D93860
---

diff --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
index a655d3d..8965a44 100644
--- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
+++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
@@ -6382,35 +6382,6 @@ bool SLPVectorizerPass::tryToVectorize(Instruction *I, BoUpSLP &R) {
   return false;
 }
 
-/// Generate a shuffle mask to be used in a reduction tree.
-///
-/// \param VecLen The length of the vector to be reduced.
-/// \param NumEltsToRdx The number of elements that should be reduced in the
-///        vector.
-/// \param IsPairwise Whether the reduction is a pairwise or splitting
-///        reduction. A pairwise reduction will generate a mask of
-///        <0,2,...> or <1,3,..> while a splitting reduction will generate
-///        <2,3, undef,undef> for a vector of 4 and NumElts = 2.
-/// \param IsLeft True will generate a mask of even elements, odd otherwise.
-static SmallVector<int, 32> createRdxShuffleMask(unsigned VecLen,
-                                                 unsigned NumEltsToRdx,
-                                                 bool IsPairwise, bool IsLeft) {
-  assert((IsPairwise || !IsLeft) && "Don't support a <0,1,undef,...> mask");
-
-  SmallVector<int, 32> ShuffleMask(VecLen, -1);
-
-  if (IsPairwise)
-    // Build a mask of 0, 2, ... (left) or 1, 3, ... (right).
-    for (unsigned i = 0; i != NumEltsToRdx; ++i)
-      ShuffleMask[i] = 2 * i + !IsLeft;
-  else
-    // Move the upper half of the vector to the lower half.
-    for (unsigned i = 0; i != NumEltsToRdx; ++i)
-      ShuffleMask[i] = NumEltsToRdx + i;
-
-  return ShuffleMask;
-}
-
 namespace {
 
 /// Model horizontal reductions.
@@ -6730,10 +6701,6 @@ class HorizontalReduction {
   /// The operation data for the leaf values that we perform a reduction on.
   OperationData RdxLeafVal;
 
-  /// Should we model this reduction as a pairwise reduction tree or a tree that
-  /// splits the vector in halves and adds those halves.
-  bool IsPairwiseReduction = false;
-
   /// Checks if the ParentStackElem.first should be marked as a reduction
   /// operation with an extra argument or as extra argument itself.
   void markExtraArg(std::pair<Instruction *, unsigned> &ParentStackElem,
@@ -7170,7 +7137,6 @@ private:
     Type *ScalarTy = FirstReducedVal->getType();
     auto *VecTy = FixedVectorType::get(ScalarTy, ReduxWidth);
 
-    int PairwiseRdxCost;
     int SplittingRdxCost;
     switch (RdxTreeInst.getKind()) {
     case RecurKind::Add:
@@ -7180,9 +7146,6 @@ private:
     case RecurKind::Xor:
     case RecurKind::FAdd:
     case RecurKind::FMul:
-      PairwiseRdxCost =
-          TTI->getArithmeticReductionCost(RdxTreeInst.getOpcode(), VecTy,
-                                          /*IsPairwiseForm=*/true);
       SplittingRdxCost =
           TTI->getArithmeticReductionCost(RdxTreeInst.getOpcode(), VecTy,
                                           /*IsPairwiseForm=*/false);
@@ -7194,9 +7157,6 @@ private:
       auto *VecCondTy = cast<VectorType>(CmpInst::makeCmpResultType(VecTy));
       RecurKind Kind = RdxTreeInst.getKind();
       bool IsUnsigned = Kind == RecurKind::UMax || Kind == RecurKind::UMin;
-      PairwiseRdxCost =
-          TTI->getMinMaxReductionCost(VecTy, VecCondTy,
-                                      /*IsPairwiseForm=*/true, IsUnsigned);
       SplittingRdxCost =
           TTI->getMinMaxReductionCost(VecTy, VecCondTy,
                                       /*IsPairwiseForm=*/false, IsUnsigned);
@@ -7206,9 +7166,6 @@ private:
       llvm_unreachable("Expected arithmetic or min/max reduction operation");
     }
 
-    IsPairwiseReduction = PairwiseRdxCost < SplittingRdxCost;
-    int VecReduxCost = IsPairwiseReduction ? PairwiseRdxCost : SplittingRdxCost;
-
     int ScalarReduxCost = 0;
     switch (RdxTreeInst.getKind()) {
     case RecurKind::Add:
@@ -7235,13 +7192,12 @@ private:
     }
     ScalarReduxCost *= (ReduxWidth - 1);
 
-    LLVM_DEBUG(dbgs() << "SLP: Adding cost " << VecReduxCost - ScalarReduxCost
+    LLVM_DEBUG(dbgs() << "SLP: Adding cost "
+                      << SplittingRdxCost - ScalarReduxCost
                       << " for reduction that starts with " << *FirstReducedVal
-                      << " (It is a "
-                      << (IsPairwiseReduction ? "pairwise" : "splitting")
-                      << " reduction)\n");
+                      << " (It is a splitting reduction)\n");
 
-    return VecReduxCost - ScalarReduxCost;
+    return SplittingRdxCost - ScalarReduxCost;
   }
 
   /// Emit a horizontal reduction of the vectorized value.
@@ -7251,30 +7207,12 @@ private:
     assert(isPowerOf2_32(ReduxWidth) &&
            "We only handle power-of-two reductions for now");
 
-    if (!IsPairwiseReduction) {
-      // FIXME: The builder should use an FMF guard. It should not be hard-coded
-      //        to 'fast'.
-      assert(Builder.getFastMathFlags().isFast() && "Expected 'fast' FMF");
-      return createSimpleTargetReduction(Builder, TTI, VectorizedValue,
-                                         RdxTreeInst.getKind(),
-                                         ReductionOps.back());
-    }
-
-    Value *TmpVec = VectorizedValue;
-    for (unsigned i = ReduxWidth / 2; i != 0; i >>= 1) {
-      auto LeftMask = createRdxShuffleMask(ReduxWidth, i, true, true);
-      auto RightMask = createRdxShuffleMask(ReduxWidth, i, true, false);
-
-      Value *LeftShuf =
-          Builder.CreateShuffleVector(TmpVec, LeftMask, "rdx.shuf.l");
-      Value *RightShuf =
-          Builder.CreateShuffleVector(TmpVec, RightMask, "rdx.shuf.r");
-      TmpVec = RdxTreeInst.createOp(Builder, LeftShuf, RightShuf, "op.rdx",
-                                    ReductionOps);
-    }
-
-    // The result is in the first element of the vector.
-    return Builder.CreateExtractElement(TmpVec, Builder.getInt32(0));
+    // FIXME: The builder should use an FMF guard. It should not be hard-coded
+    //        to 'fast'.
+    assert(Builder.getFastMathFlags().isFast() && "Expected 'fast' FMF");
+    return createSimpleTargetReduction(Builder, TTI, VectorizedValue,
+                                       RdxTreeInst.getKind(),
+                                       ReductionOps.back());
   }
 };