[LV] Move getScalarizationOverhead and vector call cost computations to CM. (NFC)

author Florian Hahn <flo@fhahn.com>

Wed, 15 May 2019 10:05:49 +0000 (10:05 +0000)

committer Florian Hahn <flo@fhahn.com>

Wed, 15 May 2019 10:05:49 +0000 (10:05 +0000)
author Florian Hahn <flo@fhahn.com>
Wed, 15 May 2019 10:05:49 +0000 (10:05 +0000)
committer Florian Hahn <flo@fhahn.com>
Wed, 15 May 2019 10:05:49 +0000 (10:05 +0000)
diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp

index 83f1c70..ce03b85 100644 (file)
--- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
+++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
@@ -1169,6 +1169,18 @@ public:
      return foldTailByMasking() || Legal->blockNeedsPredication(BB);
    }
  
+  /// Estimate cost of an intrinsic call instruction CI if it were vectorized
+  /// with factor VF.  Return the cost of the instruction, including
+  /// scalarization overhead if it's needed.
+  unsigned getVectorIntrinsicCost(CallInst *CI, unsigned VF);
+
+  /// Estimate cost of a call instruction CI if it were vectorized with factor
+  /// VF. Return the cost of the instruction, including scalarization overhead
+  /// if it's needed. The flag NeedToScalarize shows if the call needs to be
+  /// scalarized -
+  // i.e. either vector version isn't available, or is too expensive.
+  unsigned getVectorCallCost(CallInst *CI, unsigned VF, bool &NeedToScalarize);
+
  private:
    unsigned NumPredStores = 0;
  
@@ -1221,6 +1233,10 @@ private:
    /// element)
    unsigned getUniformMemOpCost(Instruction *I, unsigned VF);
  
+  /// Estimate the overhead of scalarizing an instruction. This is a
+  /// convenience wrapper for the type-based getScalarizationOverhead API.
+  unsigned getScalarizationOverhead(Instruction *I, unsigned VF);
+
    /// Returns whether the instruction is a load or store and will be a emitted
    /// as a vector operation.
    bool isConsecutiveLoadOrStore(Instruction *I);
@@ -3057,45 +3073,9 @@ static void cse(BasicBlock *BB) {
    }
  }
  
-/// Estimate the overhead of scalarizing an instruction. This is a
-/// convenience wrapper for the type-based getScalarizationOverhead API.
-static unsigned getScalarizationOverhead(Instruction *I, unsigned VF,
-                                         const TargetTransformInfo &TTI) {
-  if (VF == 1)
-    return 0;
-
-  unsigned Cost = 0;
-  Type *RetTy = ToVectorTy(I->getType(), VF);
-  if (!RetTy->isVoidTy() &&
-      (!isa<LoadInst>(I) ||
-       !TTI.supportsEfficientVectorElementLoadStore()))
-    Cost += TTI.getScalarizationOverhead(RetTy, true, false);
-
-  // Some targets keep addresses scalar.
-  if (isa<LoadInst>(I) && !TTI.prefersVectorizedAddressing())
-    return Cost;
-
-  if (CallInst *CI = dyn_cast<CallInst>(I)) {
-    SmallVector<const Value *, 4> Operands(CI->arg_operands());
-    Cost += TTI.getOperandsScalarizationOverhead(Operands, VF);
-  }
-  else if (!isa<StoreInst>(I) ||
-           !TTI.supportsEfficientVectorElementLoadStore()) {
-    SmallVector<const Value *, 4> Operands(I->operand_values());
-    Cost += TTI.getOperandsScalarizationOverhead(Operands, VF);
-  }
-
-  return Cost;
-}
-
-// Estimate cost of a call instruction CI if it were vectorized with factor VF.
-// Return the cost of the instruction, including scalarization overhead if it's
-// needed. The flag NeedToScalarize shows if the call needs to be scalarized -
-// i.e. either vector version isn't available, or is too expensive.
-static unsigned getVectorCallCost(CallInst *CI, unsigned VF,
-                                  const TargetTransformInfo &TTI,
-                                  const TargetLibraryInfo *TLI,
-                                  bool &NeedToScalarize) {
+unsigned LoopVectorizationCostModel::getVectorCallCost(CallInst *CI,
+                                                       unsigned VF,
+                                                       bool &NeedToScalarize) {
    Function *F = CI->getCalledFunction();
    StringRef FnName = CI->getCalledFunction()->getName();
    Type *ScalarRetTy = CI->getType();
@@ -3118,7 +3098,7 @@ static unsigned getVectorCallCost(CallInst *CI, unsigned VF,
  
    // Compute costs of unpacking argument values for the scalar calls and
    // packing the return values to a vector.
-  unsigned ScalarizationCost = getScalarizationOverhead(CI, VF, TTI);
+  unsigned ScalarizationCost = getScalarizationOverhead(CI, VF);
  
    unsigned Cost = ScalarCallCost * VF + ScalarizationCost;
  
@@ -3137,12 +3117,8 @@ static unsigned getVectorCallCost(CallInst *CI, unsigned VF,
    return Cost;
  }
  
-// Estimate cost of an intrinsic call instruction CI if it were vectorized with
-// factor VF.  Return the cost of the instruction, including scalarization
-// overhead if it's needed.
-static unsigned getVectorIntrinsicCost(CallInst *CI, unsigned VF,
-                                       const TargetTransformInfo &TTI,
-                                       const TargetLibraryInfo *TLI) {
+unsigned LoopVectorizationCostModel::getVectorIntrinsicCost(CallInst *CI,
+                                                            unsigned VF) {
    Intrinsic::ID ID = getVectorIntrinsicIDForCall(CI, TLI);
    assert(ID && "Expected intrinsic call!");
  
@@ -4126,9 +4102,9 @@ void InnerLoopVectorizer::widenInstruction(Instruction &I) {
      // version of the instruction.
      // Is it beneficial to perform intrinsic call compared to lib call?
      bool NeedToScalarize;
-    unsigned CallCost = getVectorCallCost(CI, VF, *TTI, TLI, NeedToScalarize);
+    unsigned CallCost = Cost->getVectorCallCost(CI, VF, NeedToScalarize);
      bool UseVectorIntrinsic =
-        ID && getVectorIntrinsicCost(CI, VF, *TTI, TLI) <= CallCost;
+        ID && Cost->getVectorIntrinsicCost(CI, VF) <= CallCost;
      assert((UseVectorIntrinsic || !NeedToScalarize) &&
             "Instruction should be scalarized elsewhere.");
  
@@ -5522,7 +5498,7 @@ unsigned LoopVectorizationCostModel::getMemInstScalarizationCost(Instruction *I,
  
    // Get the overhead of the extractelement and insertelement instructions
    // we might create due to scalarization.
-  Cost += getScalarizationOverhead(I, VF, TTI);
+  Cost += getScalarizationOverhead(I, VF);
  
    // If we have a predicated store, it may not be executed for each vector
    // lane. Scale the cost by the probability of executing the predicated
@@ -5674,6 +5650,34 @@ LoopVectorizationCostModel::getInstructionCost(Instruction *I, unsigned VF) {
    return VectorizationCostTy(C, TypeNotScalarized);
  }
  
+unsigned LoopVectorizationCostModel::getScalarizationOverhead(Instruction *I,
+                                                              unsigned VF) {
+
+  if (VF == 1)
+    return 0;
+
+  unsigned Cost = 0;
+  Type *RetTy = ToVectorTy(I->getType(), VF);
+  if (!RetTy->isVoidTy() &&
+      (!isa<LoadInst>(I) || !TTI.supportsEfficientVectorElementLoadStore()))
+    Cost += TTI.getScalarizationOverhead(RetTy, true, false);
+
+  // Some targets keep addresses scalar.
+  if (isa<LoadInst>(I) && !TTI.prefersVectorizedAddressing())
+    return Cost;
+
+  if (CallInst *CI = dyn_cast<CallInst>(I)) {
+    SmallVector<const Value *, 4> Operands(CI->arg_operands());
+    Cost += TTI.getOperandsScalarizationOverhead(Operands, VF);
+  } else if (!isa<StoreInst>(I) ||
+             !TTI.supportsEfficientVectorElementLoadStore()) {
+    SmallVector<const Value *, 4> Operands(I->operand_values());
+    Cost += TTI.getOperandsScalarizationOverhead(Operands, VF);
+  }
+
+  return Cost;
+}
+
  void LoopVectorizationCostModel::setCostBasedWideningDecision(unsigned VF) {
    if (VF == 1)
      return;
@@ -5914,7 +5918,7 @@ unsigned LoopVectorizationCostModel::getInstructionCost(Instruction *I,
  
        // The cost of insertelement and extractelement instructions needed for
        // scalarization.
-      Cost += getScalarizationOverhead(I, VF, TTI);
+      Cost += getScalarizationOverhead(I, VF);
  
        // Scale the cost by the probability of executing the predicated blocks.
        // This assumes the predicated block for each vector lane is equally
@@ -6035,16 +6039,16 @@ unsigned LoopVectorizationCostModel::getInstructionCost(Instruction *I,
    case Instruction::Call: {
      bool NeedToScalarize;
      CallInst *CI = cast<CallInst>(I);
-    unsigned CallCost = getVectorCallCost(CI, VF, TTI, TLI, NeedToScalarize);
+    unsigned CallCost = getVectorCallCost(CI, VF, NeedToScalarize);
      if (getVectorIntrinsicIDForCall(CI, TLI))
-      return std::min(CallCost, getVectorIntrinsicCost(CI, VF, TTI, TLI));
+      return std::min(CallCost, getVectorIntrinsicCost(CI, VF));
      return CallCost;
    }
    default:
      // The cost of executing VF copies of the scalar instruction. This opcode
      // is unknown. Assume that it is the same as 'mul'.
      return VF * TTI.getArithmeticInstrCost(Instruction::Mul, VectorTy) +
-           getScalarizationOverhead(I, VF, TTI);
+           getScalarizationOverhead(I, VF);
    } // end of switch.
  }
  
@@ -6638,9 +6642,9 @@ bool VPRecipeBuilder::tryToWiden(Instruction *I, VPBasicBlock *VPBB,
        // version of the instruction.
        // Is it beneficial to perform intrinsic call compared to lib call?
        bool NeedToScalarize;
-      unsigned CallCost = getVectorCallCost(CI, VF, *TTI, TLI, NeedToScalarize);
+      unsigned CallCost = CM.getVectorCallCost(CI, VF, NeedToScalarize);
        bool UseVectorIntrinsic =
-          ID && getVectorIntrinsicCost(CI, VF, *TTI, TLI) <= CallCost;
+          ID && CM.getVectorIntrinsicCost(CI, VF) <= CallCost;
        return UseVectorIntrinsic || !NeedToScalarize;
      }
      if (isa<LoadInst>(I) || isa<StoreInst>(I)) {
@@ -6828,7 +6832,7 @@ LoopVectorizationPlanner::buildVPlanWithVPRecipes(
    VPBasicBlock *VPBB = new VPBasicBlock("Pre-Entry");
    auto Plan = llvm::make_unique<VPlan>(VPBB);
  
-  VPRecipeBuilder RecipeBuilder(OrigLoop, TLI, TTI, Legal, CM, Builder);
+  VPRecipeBuilder RecipeBuilder(OrigLoop, TLI, Legal, CM, Builder);
    // Represent values that will have defs inside VPlan.
    for (Value *V : NeedDef)
      Plan->addVPValue(V);
diff --git a/llvm/lib/Transforms/Vectorize/VPRecipeBuilder.h b/llvm/lib/Transforms/Vectorize/VPRecipeBuilder.h

index bc6b221..0ca6a6b 100644 (file)
--- a/llvm/lib/Transforms/Vectorize/VPRecipeBuilder.h
+++ b/llvm/lib/Transforms/Vectorize/VPRecipeBuilder.h
@@ -29,9 +29,6 @@ class VPRecipeBuilder {
    /// Target Library Info.
    const TargetLibraryInfo *TLI;
  
-  /// Target Transform Info.
-  const TargetTransformInfo *TTI;
-
    /// The legality analysis.
    LoopVectorizationLegality *Legal;
  
@@ -104,11 +101,9 @@ public:
  
  public:
    VPRecipeBuilder(Loop *OrigLoop, const TargetLibraryInfo *TLI,
-                  const TargetTransformInfo *TTI,
                    LoopVectorizationLegality *Legal,
                    LoopVectorizationCostModel &CM, VPBuilder &Builder)
-      : OrigLoop(OrigLoop), TLI(TLI), TTI(TTI), Legal(Legal), CM(CM),
-        Builder(Builder) {}
+      : OrigLoop(OrigLoop), TLI(TLI), Legal(Legal), CM(CM), Builder(Builder) {}
  
    /// Check if a recipe can be create for \p I withing the given VF \p Range.
    /// If a recipe can be created, it adds it to \p VPBB.
author	Florian Hahn <flo@fhahn.com>
	Wed, 15 May 2019 10:05:49 +0000 (10:05 +0000)
committer	Florian Hahn <flo@fhahn.com>
	Wed, 15 May 2019 10:05:49 +0000 (10:05 +0000)
llvm/lib/Transforms/Vectorize/LoopVectorize.cpp		patch \| blob \| history
llvm/lib/Transforms/Vectorize/VPRecipeBuilder.h		patch \| blob \| history