if (Cloner.OutlinedFunctions.empty())
return false;
- int SizeCost = 0;
- BlockFrequency WeightedRcost;
- int NonWeightedRcost;
-
auto OutliningCosts = computeOutliningCosts(Cloner);
- assert(std::get<0>(OutliningCosts).isValid() &&
- std::get<1>(OutliningCosts).isValid() && "Expected valid costs");
- SizeCost = *std::get<0>(OutliningCosts).getValue();
- NonWeightedRcost = *std::get<1>(OutliningCosts).getValue();
+ InstructionCost SizeCost = std::get<0>(OutliningCosts);
+ InstructionCost NonWeightedRcost = std::get<1>(OutliningCosts);
+
+ assert(SizeCost.isValid() && NonWeightedRcost.isValid() &&
+ "Expected valid costs");
// Only calculate RelativeToEntryFreq when we are doing single region
// outlining.
// execute the calls to outlined functions.
RelativeToEntryFreq = BranchProbability(0, 1);
- WeightedRcost = BlockFrequency(NonWeightedRcost) * RelativeToEntryFreq;
+ BlockFrequency WeightedRcost =
+ BlockFrequency(*NonWeightedRcost.getValue()) * RelativeToEntryFreq;
// The call sequence(s) to the outlined function(s) are larger than the sum of
// the original outlined region size(s), it does not increase the chances of
}
}
- unsigned DuplicationCost = 0;
+ InstructionCost DuplicationCost = 0;
unsigned JumpTableSize = 0;
TTI->getEstimatedNumberOfCaseClusters(*Switch, JumpTableSize, nullptr,
// using binary search, hence the LogBase2().
unsigned CondBranches =
APInt(32, Switch->getNumSuccessors()).ceilLogBase2();
- DuplicationCost = *Metrics.NumInsts.getValue() / CondBranches;
+ DuplicationCost = Metrics.NumInsts / CondBranches;
} else {
// Compared with jump tables, the DFA optimizer removes an indirect branch
// on each loop iteration, thus making branch prediction more precise. The
// predictor to make a mistake, and the more benefit there is in the DFA
// optimizer. Thus, the more branch targets there are, the lower is the
// cost of the DFA opt.
- DuplicationCost = *Metrics.NumInsts.getValue() / JumpTableSize;
+ DuplicationCost = Metrics.NumInsts / JumpTableSize;
}
LLVM_DEBUG(dbgs() << "\nDFA Jump Threading: Cost to jump thread block "
/// If interleave count has been specified by metadata it will be returned.
/// Otherwise, the interleave count is computed and returned. VF and LoopCost
/// are the selected vectorization factor and the cost of the selected VF.
- unsigned selectInterleaveCount(ElementCount VF, unsigned LoopCost);
+ unsigned selectInterleaveCount(ElementCount VF, InstructionCost LoopCost);
/// Memory access instruction may be vectorized in more than one way.
/// Form of instruction after vectorization depends on cost.
/// scalarize and their scalar costs are collected in \p ScalarCosts. A
/// non-negative return value implies the expression will be scalarized.
/// Currently, only single-use chains are considered for scalarization.
- int computePredInstDiscount(Instruction *PredInst, ScalarCostsTy &ScalarCosts,
- ElementCount VF);
+ InstructionCost computePredInstDiscount(Instruction *PredInst,
+ ScalarCostsTy &ScalarCosts,
+ ElementCount VF);
/// Collect the instructions that are uniform after vectorization. An
/// instruction is uniform if we represent it with a single scalar value in
}
}
-unsigned LoopVectorizationCostModel::selectInterleaveCount(ElementCount VF,
- unsigned LoopCost) {
+unsigned
+LoopVectorizationCostModel::selectInterleaveCount(ElementCount VF,
+ InstructionCost LoopCost) {
// -- The interleave heuristics --
// We interleave the loop in order to expose ILP and reduce the loop overhead.
// There are many micro-architectural considerations that we can't predict
// If we did not calculate the cost for VF (because the user selected the VF)
// then we calculate the cost of VF here.
if (LoopCost == 0) {
- InstructionCost C = expectedCost(VF).first;
- assert(C.isValid() && "Expected to have chosen a VF with valid cost");
- LoopCost = *C.getValue();
+ LoopCost = expectedCost(VF).first;
+ assert(LoopCost.isValid() && "Expected to have chosen a VF with valid cost");
// Loop body is free and there is no need for interleaving.
if (LoopCost == 0)
// We assume that the cost overhead is 1 and we use the cost model
// to estimate the cost of the loop and interleave until the cost of the
// loop overhead is about 5% of the cost of the loop.
- unsigned SmallIC =
- std::min(IC, (unsigned)PowerOf2Floor(SmallLoopCost / LoopCost));
+ unsigned SmallIC = std::min(
+ IC, (unsigned)PowerOf2Floor(SmallLoopCost / *LoopCost.getValue()));
// Interleave until store/load ports (estimated by max interleave count) are
// saturated.
}
}
-int LoopVectorizationCostModel::computePredInstDiscount(
+InstructionCost LoopVectorizationCostModel::computePredInstDiscount(
Instruction *PredInst, ScalarCostsTy &ScalarCosts, ElementCount VF) {
assert(!isUniformAfterVectorization(PredInst, VF) &&
"Instruction marked uniform-after-vectorization will be predicated");
ScalarCosts[I] = ScalarCost;
}
- return *Discount.getValue();
+ return Discount;
}
LoopVectorizationCostModel::VectorizationCostTy
if (MaybeVF) {
VF = *MaybeVF;
// Select the interleave count.
- IC = CM.selectInterleaveCount(VF.Width, *VF.Cost.getValue());
+ IC = CM.selectInterleaveCount(VF.Width, VF.Cost);
unsigned SelectedIC = std::max(IC, UserIC);
// Optimistically generate runtime checks if they are needed. Drop them if