[TTI] NFC: Reduce InstructionCost::getValue() usage...

author Daniil Fukalov <1671137+dfukalov@users.noreply.github.com>

Wed, 17 Aug 2022 23:34:22 +0000 (02:34 +0300)

committer Daniil Fukalov <1671137+dfukalov@users.noreply.github.com>

Fri, 26 Aug 2022 13:37:32 +0000 (16:37 +0300)
author Daniil Fukalov <1671137+dfukalov@users.noreply.github.com>
Wed, 17 Aug 2022 23:34:22 +0000 (02:34 +0300)
committer Daniil Fukalov <1671137+dfukalov@users.noreply.github.com>
Fri, 26 Aug 2022 13:37:32 +0000 (16:37 +0300)
diff --git a/llvm/include/llvm/Support/InstructionCost.h b/llvm/include/llvm/Support/InstructionCost.h

index 7656f45..aa8825e 100644 (file)
--- a/llvm/include/llvm/Support/InstructionCost.h
+++ b/llvm/include/llvm/Support/InstructionCost.h
@@ -244,7 +244,7 @@ public:
    template <class Function>
    auto map(const Function &F) const -> InstructionCost {
      if (isValid())
-      return F(*getValue());
+      return F(Value);
      return getInvalid();
    }
  };
diff --git a/llvm/lib/Transforms/IPO/FunctionSpecialization.cpp b/llvm/lib/Transforms/IPO/FunctionSpecialization.cpp

index fbc9638..9884bdf 100644 (file)
--- a/llvm/lib/Transforms/IPO/FunctionSpecialization.cpp
+++ b/llvm/lib/Transforms/IPO/FunctionSpecialization.cpp
@@ -552,7 +552,7 @@ private:
      // inlined so that we shouldn't specialize it.
      if (Metrics.notDuplicatable || !Metrics.NumInsts.isValid() ||
          (!ForceFunctionSpecialization &&
-         *Metrics.NumInsts.getValue() < SmallFunctionThreshold)) {
+         Metrics.NumInsts < SmallFunctionThreshold)) {
        InstructionCost C{};
        C.setInvalid();
        return C;
diff --git a/llvm/lib/Transforms/IPO/PartialInlining.cpp b/llvm/lib/Transforms/IPO/PartialInlining.cpp

index ab2be53..d2b37f6 100644 (file)
--- a/llvm/lib/Transforms/IPO/PartialInlining.cpp
+++ b/llvm/lib/Transforms/IPO/PartialInlining.cpp
@@ -1353,16 +1353,13 @@ bool PartialInlinerImpl::tryPartialInline(FunctionCloner &Cloner) {
    if (Cloner.OutlinedFunctions.empty())
      return false;
  
-  int SizeCost = 0;
-  BlockFrequency WeightedRcost;
-  int NonWeightedRcost;
-
    auto OutliningCosts = computeOutliningCosts(Cloner);
-  assert(std::get<0>(OutliningCosts).isValid() &&
-         std::get<1>(OutliningCosts).isValid() && "Expected valid costs");
  
-  SizeCost = *std::get<0>(OutliningCosts).getValue();
-  NonWeightedRcost = *std::get<1>(OutliningCosts).getValue();
+  InstructionCost SizeCost = std::get<0>(OutliningCosts);
+  InstructionCost NonWeightedRcost = std::get<1>(OutliningCosts);
+
+  assert(SizeCost.isValid() && NonWeightedRcost.isValid() &&
+         "Expected valid costs");
  
    // Only calculate RelativeToEntryFreq when we are doing single region
    // outlining.
@@ -1377,7 +1374,8 @@ bool PartialInlinerImpl::tryPartialInline(FunctionCloner &Cloner) {
      // execute the calls to outlined functions.
      RelativeToEntryFreq = BranchProbability(0, 1);
  
-  WeightedRcost = BlockFrequency(NonWeightedRcost) * RelativeToEntryFreq;
+  BlockFrequency WeightedRcost =
+      BlockFrequency(*NonWeightedRcost.getValue()) * RelativeToEntryFreq;
  
    // The call sequence(s) to the outlined function(s) are larger than the sum of
    // the original outlined region size(s), it does not increase the chances of
diff --git a/llvm/lib/Transforms/Scalar/DFAJumpThreading.cpp b/llvm/lib/Transforms/Scalar/DFAJumpThreading.cpp

index 5667eef..1689314 100644 (file)
--- a/llvm/lib/Transforms/Scalar/DFAJumpThreading.cpp
+++ b/llvm/lib/Transforms/Scalar/DFAJumpThreading.cpp
@@ -840,7 +840,7 @@ private:
        }
      }
  
-    unsigned DuplicationCost = 0;
+    InstructionCost DuplicationCost = 0;
  
      unsigned JumpTableSize = 0;
      TTI->getEstimatedNumberOfCaseClusters(*Switch, JumpTableSize, nullptr,
@@ -851,7 +851,7 @@ private:
        // using binary search, hence the LogBase2().
        unsigned CondBranches =
            APInt(32, Switch->getNumSuccessors()).ceilLogBase2();
-      DuplicationCost = *Metrics.NumInsts.getValue() / CondBranches;
+      DuplicationCost = Metrics.NumInsts / CondBranches;
      } else {
        // Compared with jump tables, the DFA optimizer removes an indirect branch
        // on each loop iteration, thus making branch prediction more precise. The
@@ -859,7 +859,7 @@ private:
        // predictor to make a mistake, and the more benefit there is in the DFA
        // optimizer. Thus, the more branch targets there are, the lower is the
        // cost of the DFA opt.
-      DuplicationCost = *Metrics.NumInsts.getValue() / JumpTableSize;
+      DuplicationCost = Metrics.NumInsts / JumpTableSize;
      }
  
      LLVM_DEBUG(dbgs() << "\nDFA Jump Threading: Cost to jump thread block "
diff --git a/llvm/lib/Transforms/Scalar/LoopUnrollPass.cpp b/llvm/lib/Transforms/Scalar/LoopUnrollPass.cpp

index 5f958ee..21c0088 100644 (file)
--- a/llvm/lib/Transforms/Scalar/LoopUnrollPass.cpp
+++ b/llvm/lib/Transforms/Scalar/LoopUnrollPass.cpp
@@ -682,7 +682,7 @@ InstructionCost llvm::ApproximateLoopSize(
    // that each loop has at least three instructions (likely a conditional
    // branch, a comparison feeding that branch, and some kind of loop increment
    // feeding that comparison instruction).
-  if (LoopSize.isValid() && *LoopSize.getValue() < BEInsns + 1)
+  if (LoopSize.isValid() && LoopSize < BEInsns + 1)
      // This is an open coded max() on InstructionCost
      LoopSize = BEInsns + 1;
  
diff --git a/llvm/lib/Transforms/Utils/LoopRotationUtils.cpp b/llvm/lib/Transforms/Utils/LoopRotationUtils.cpp

index 1d55299..7bb7cea 100644 (file)
--- a/llvm/lib/Transforms/Utils/LoopRotationUtils.cpp
+++ b/llvm/lib/Transforms/Utils/LoopRotationUtils.cpp
@@ -316,7 +316,7 @@ bool LoopRotate::rotateLoop(Loop *L, bool SimplifiedLatch) {
                     L->dump());
          return Rotated;
        }
-      if (*Metrics.NumInsts.getValue() > MaxHeaderSize) {
+      if (Metrics.NumInsts > MaxHeaderSize) {
          LLVM_DEBUG(dbgs() << "LoopRotation: NOT rotating - contains "
                            << Metrics.NumInsts
                            << " instructions, which is more than the threshold ("
diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp

index 76529e3..efff051 100644 (file)
--- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
+++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
@@ -1182,7 +1182,7 @@ public:
    /// If interleave count has been specified by metadata it will be returned.
    /// Otherwise, the interleave count is computed and returned. VF and LoopCost
    /// are the selected vectorization factor and the cost of the selected VF.
-  unsigned selectInterleaveCount(ElementCount VF, unsigned LoopCost);
+  unsigned selectInterleaveCount(ElementCount VF, InstructionCost LoopCost);
  
    /// Memory access instruction may be vectorized in more than one way.
    /// Form of instruction after vectorization depends on cost.
@@ -1701,8 +1701,9 @@ private:
    /// scalarize and their scalar costs are collected in \p ScalarCosts. A
    /// non-negative return value implies the expression will be scalarized.
    /// Currently, only single-use chains are considered for scalarization.
-  int computePredInstDiscount(Instruction *PredInst, ScalarCostsTy &ScalarCosts,
-                              ElementCount VF);
+  InstructionCost computePredInstDiscount(Instruction *PredInst,
+                                          ScalarCostsTy &ScalarCosts,
+                                          ElementCount VF);
  
    /// Collect the instructions that are uniform after vectorization. An
    /// instruction is uniform if we represent it with a single scalar value in
@@ -5636,8 +5637,9 @@ void LoopVectorizationCostModel::collectElementTypesForWidening() {
    }
  }
  
-unsigned LoopVectorizationCostModel::selectInterleaveCount(ElementCount VF,
-                                                           unsigned LoopCost) {
+unsigned
+LoopVectorizationCostModel::selectInterleaveCount(ElementCount VF,
+                                                  InstructionCost LoopCost) {
    // -- The interleave heuristics --
    // We interleave the loop in order to expose ILP and reduce the loop overhead.
    // There are many micro-architectural considerations that we can't predict
@@ -5673,9 +5675,8 @@ unsigned LoopVectorizationCostModel::selectInterleaveCount(ElementCount VF,
    // If we did not calculate the cost for VF (because the user selected the VF)
    // then we calculate the cost of VF here.
    if (LoopCost == 0) {
-    InstructionCost C = expectedCost(VF).first;
-    assert(C.isValid() && "Expected to have chosen a VF with valid cost");
-    LoopCost = *C.getValue();
+    LoopCost = expectedCost(VF).first;
+    assert(LoopCost.isValid() && "Expected to have chosen a VF with valid cost");
  
      // Loop body is free and there is no need for interleaving.
      if (LoopCost == 0)
@@ -5803,8 +5804,8 @@ unsigned LoopVectorizationCostModel::selectInterleaveCount(ElementCount VF,
      // We assume that the cost overhead is 1 and we use the cost model
      // to estimate the cost of the loop and interleave until the cost of the
      // loop overhead is about 5% of the cost of the loop.
-    unsigned SmallIC =
-        std::min(IC, (unsigned)PowerOf2Floor(SmallLoopCost / LoopCost));
+    unsigned SmallIC = std::min(
+        IC, (unsigned)PowerOf2Floor(SmallLoopCost / *LoopCost.getValue()));
  
      // Interleave until store/load ports (estimated by max interleave count) are
      // saturated.
@@ -6130,7 +6131,7 @@ void LoopVectorizationCostModel::collectInstsToScalarize(ElementCount VF) {
    }
  }
  
-int LoopVectorizationCostModel::computePredInstDiscount(
+InstructionCost LoopVectorizationCostModel::computePredInstDiscount(
      Instruction *PredInst, ScalarCostsTy &ScalarCosts, ElementCount VF) {
    assert(!isUniformAfterVectorization(PredInst, VF) &&
           "Instruction marked uniform-after-vectorization will be predicated");
@@ -6239,7 +6240,7 @@ int LoopVectorizationCostModel::computePredInstDiscount(
      ScalarCosts[I] = ScalarCost;
    }
  
-  return *Discount.getValue();
+  return Discount;
  }
  
  LoopVectorizationCostModel::VectorizationCostTy
@@ -10305,7 +10306,7 @@ bool LoopVectorizePass::processLoop(Loop *L) {
    if (MaybeVF) {
      VF = *MaybeVF;
      // Select the interleave count.
-    IC = CM.selectInterleaveCount(VF.Width, *VF.Cost.getValue());
+    IC = CM.selectInterleaveCount(VF.Width, VF.Cost);
  
      unsigned SelectedIC = std::max(IC, UserIC);
      //  Optimistically generate runtime checks if they are needed. Drop them if
author	Daniil Fukalov <1671137+dfukalov@users.noreply.github.com>
	Wed, 17 Aug 2022 23:34:22 +0000 (02:34 +0300)
committer	Daniil Fukalov <1671137+dfukalov@users.noreply.github.com>
	Fri, 26 Aug 2022 13:37:32 +0000 (16:37 +0300)
llvm/include/llvm/Support/InstructionCost.h		patch \| blob \| history
llvm/lib/Transforms/IPO/FunctionSpecialization.cpp		patch \| blob \| history
llvm/lib/Transforms/IPO/PartialInlining.cpp		patch \| blob \| history
llvm/lib/Transforms/Scalar/DFAJumpThreading.cpp		patch \| blob \| history
llvm/lib/Transforms/Scalar/LoopUnrollPass.cpp		patch \| blob \| history
llvm/lib/Transforms/Utils/LoopRotationUtils.cpp		patch \| blob \| history
llvm/lib/Transforms/Vectorize/LoopVectorize.cpp		patch \| blob \| history