/// \returns the cost incurred by unwanted spills and fills, caused by
/// holding live values over call sites.
- int getSpillCost() const;
+ InstructionCost getSpillCost() const;
/// \returns the vectorization cost of the subtree that starts at \p VL.
/// A negative number means that this is profitable.
- int getTreeCost();
+ InstructionCost getTreeCost();
/// Construct a vectorizable tree that starts at \p Roots, ignoring users for
/// the purpose of scheduling and extraction in the \p UserIgnoreLst.
return true;
}
-int BoUpSLP::getSpillCost() const {
+InstructionCost BoUpSLP::getSpillCost() const {
// Walk from the bottom of the tree to the top, tracking which values are
// live. When we see a call instruction that is not part of our tree,
// query TTI to see if there is a cost to keeping values live over it
// (for example, if spills and fills are required).
unsigned BundleWidth = VectorizableTree.front()->Scalars.size();
- int Cost = 0;
+ InstructionCost Cost = 0;
SmallPtrSet<Instruction*, 4> LiveValues;
Instruction *PrevInst = nullptr;
return Cost;
}
-int BoUpSLP::getTreeCost() {
- int Cost = 0;
+InstructionCost BoUpSLP::getTreeCost() {
+ InstructionCost Cost = 0;
LLVM_DEBUG(dbgs() << "SLP: Calculating cost for tree of size "
<< VectorizableTree.size() << ".\n");
}))
continue;
- int C = getEntryCost(&TE);
+ InstructionCost C = getEntryCost(&TE);
Cost += C;
LLVM_DEBUG(dbgs() << "SLP: Adding cost " << C
<< " for bundle that starts with " << *TE.Scalars[0]
}
SmallPtrSet<Value *, 16> ExtractCostCalculated;
- int ExtractCost = 0;
+ InstructionCost ExtractCost = 0;
for (ExternalUser &EU : ExternalUses) {
// We only add extract cost once for the same scalar.
if (!ExtractCostCalculated.insert(EU.Scalar).second)
}
}
- int SpillCost = getSpillCost();
+ InstructionCost SpillCost = getSpillCost();
Cost += SpillCost + ExtractCost;
#ifndef NDEBUG
R.computeMinimumValueSizes();
- int Cost = R.getTreeCost();
+ InstructionCost Cost = R.getTreeCost();
LLVM_DEBUG(dbgs() << "SLP: Found cost = " << Cost << " for VF =" << VF << "\n");
- if (Cost < -SLPCostThreshold) {
+ if (Cost.isValid() && Cost < -SLPCostThreshold) {
LLVM_DEBUG(dbgs() << "SLP: Decided to vectorize cost = " << Cost << "\n");
using namespace ore;
bool Changed = false;
bool CandidateFound = false;
- int MinCost = SLPCostThreshold;
+ InstructionCost MinCost = SLPCostThreshold.getValue();
bool CompensateUseCost =
!InsertUses.empty() && llvm::all_of(InsertUses, [](const Value *V) {
continue;
R.computeMinimumValueSizes();
- int Cost = R.getTreeCost();
+ InstructionCost Cost = R.getTreeCost();
CandidateFound = true;
if (CompensateUseCost) {
// TODO: Use TTI's getScalarizationOverhead for sequence of inserts
// Switching to the TTI interface might help a bit.
// Alternative solution could be pattern-match to detect a no-op or
// shuffle.
- unsigned UserCost = 0;
+ InstructionCost UserCost = 0;
for (unsigned Lane = 0; Lane < OpsWidth; Lane++) {
auto *IE = cast<InsertElementInst>(InsertUses[I + Lane]);
if (auto *CI = dyn_cast<ConstantInt>(IE->getOperand(2)))
Cost -= UserCost;
}
- MinCost = std::min(MinCost, Cost);
+ MinCost = InstructionCost::min(MinCost, Cost);
- if (Cost < -SLPCostThreshold) {
+ if (Cost.isValid() && Cost < -SLPCostThreshold) {
LLVM_DEBUG(dbgs() << "SLP: Vectorizing list at cost:" << Cost << ".\n");
R.getORE()->emit(OptimizationRemark(SV_NAME, "VectorizedList",
cast<Instruction>(Ops[0]))
V.computeMinimumValueSizes();
// Estimate cost.
- int TreeCost = V.getTreeCost();
- int ReductionCost = getReductionCost(TTI, ReducedVals[i], ReduxWidth);
- int Cost = TreeCost + ReductionCost;
+ InstructionCost TreeCost = V.getTreeCost();
+ InstructionCost ReductionCost =
+ getReductionCost(TTI, ReducedVals[i], ReduxWidth);
+ InstructionCost Cost = TreeCost + ReductionCost;
+ if (!Cost.isValid()) {
+ LLVM_DEBUG(dbgs() << "Encountered invalid baseline cost.\n");
+ return false;
+ }
if (Cost >= -SLPCostThreshold) {
V.getORE()->emit([&]() {
return OptimizationRemarkMissed(SV_NAME, "HorSLPNotBeneficial",