This patch migrates the TTI cost interfaces to return an InstructionCost.
See this patch for the introduction of the type: https://reviews.llvm.org/D91174
See this thread for context: http://lists.llvm.org/pipermail/llvm-dev/2020-November/146408.html
Depends on D97382
Reviewed By: ctetreau, paulwalker-arm
Differential Revision: https://reviews.llvm.org/D97466
///
/// The returned cost is defined in terms of \c TargetCostConstants, see its
/// comments for a detailed explanation of the cost values.
- int getUserCost(const User *U, ArrayRef<const Value *> Operands,
- TargetCostKind CostKind) const;
+ InstructionCost getUserCost(const User *U, ArrayRef<const Value *> Operands,
+ TargetCostKind CostKind) const;
/// This is a helper function which calls the two-argument getUserCost
/// with \p Operands which are the current operands U has.
- int getUserCost(const User *U, TargetCostKind CostKind) const {
+ InstructionCost getUserCost(const User *U, TargetCostKind CostKind) const {
SmallVector<const Value *, 4> Operands(U->operand_values());
return getUserCost(U, Operands, CostKind);
}
private:
/// Estimate the latency of specified instruction.
/// Returns 1 as the default value.
- int getInstructionLatency(const Instruction *I) const;
+ InstructionCost getInstructionLatency(const Instruction *I) const;
/// Returns the expected throughput cost of the instruction.
/// Returns -1 if the cost is unknown.
- int getInstructionThroughput(const Instruction *I) const;
+ InstructionCost getInstructionThroughput(const Instruction *I) const;
/// The abstract base class used to type erase specific TTI
/// implementations.
getEstimatedNumberOfCaseClusters(const SwitchInst &SI, unsigned &JTSize,
ProfileSummaryInfo *PSI,
BlockFrequencyInfo *BFI) = 0;
- virtual int getUserCost(const User *U, ArrayRef<const Value *> Operands,
- TargetCostKind CostKind) = 0;
+ virtual InstructionCost getUserCost(const User *U,
+ ArrayRef<const Value *> Operands,
+ TargetCostKind CostKind) = 0;
virtual BranchProbability getPredictableBranchThreshold() = 0;
virtual bool hasBranchDivergence() = 0;
virtual bool useGPUDivergenceAnalysis() = 0;
virtual unsigned getGISelRematGlobalCost() const = 0;
virtual bool supportsScalableVectors() const = 0;
virtual bool hasActiveVectorLength() const = 0;
- virtual int getInstructionLatency(const Instruction *I) = 0;
+ virtual InstructionCost getInstructionLatency(const Instruction *I) = 0;
};
template <typename T>
int getMemcpyCost(const Instruction *I) override {
return Impl.getMemcpyCost(I);
}
- int getUserCost(const User *U, ArrayRef<const Value *> Operands,
- TargetCostKind CostKind) override {
+ InstructionCost getUserCost(const User *U, ArrayRef<const Value *> Operands,
+ TargetCostKind CostKind) override {
return Impl.getUserCost(U, Operands, CostKind);
}
BranchProbability getPredictableBranchThreshold() override {
return Impl.hasActiveVectorLength();
}
- int getInstructionLatency(const Instruction *I) override {
+ InstructionCost getInstructionLatency(const Instruction *I) override {
return Impl.getInstructionLatency(I);
}
};
return TTI::TCC_Basic;
}
- int getUserCost(const User *U, ArrayRef<const Value *> Operands,
- TTI::TargetCostKind CostKind) {
+ InstructionCost getUserCost(const User *U, ArrayRef<const Value *> Operands,
+ TTI::TargetCostKind CostKind) {
auto *TargetTTI = static_cast<T *>(this);
// Handle non-intrinsic calls, invokes, and callbr.
// FIXME: Unlikely to be true for anything but CodeSize.
return TTI::TCC_Basic;
}
- int getInstructionLatency(const Instruction *I) {
+ InstructionCost getInstructionLatency(const Instruction *I) {
SmallVector<const Value *, 4> Operands(I->operand_values());
if (getUserCost(I, Operands, TTI::TCK_Latency) == TTI::TCC_Free)
return 0;
SimplifyAndSetOp);
}
- int getInstructionLatency(const Instruction *I) {
+ InstructionCost getInstructionLatency(const Instruction *I) {
if (isa<LoadInst>(I))
return getST()->getSchedModel().DefaultLoadLatency;
Operands.push_back(SimpleOp);
else
Operands.push_back(Op);
- return TargetTransformInfo::TCC_Free ==
- TTI.getUserCost(&GEP, Operands,
- TargetTransformInfo::TCK_SizeAndLatency);
+ return TTI.getUserCost(&GEP, Operands,
+ TargetTransformInfo::TCK_SizeAndLatency) ==
+ TargetTransformInfo::TCC_Free;
}
bool CallAnalyzer::visitAlloca(AllocaInst &I) {
if (auto *SROAArg = getSROAArgForValueOrNull(I.getOperand(0)))
SROAArgValues[&I] = SROAArg;
- return TargetTransformInfo::TCC_Free ==
- TTI.getUserCost(&I, TargetTransformInfo::TCK_SizeAndLatency);
+ return TTI.getUserCost(&I, TargetTransformInfo::TCK_SizeAndLatency) ==
+ TargetTransformInfo::TCC_Free;
}
bool CallAnalyzer::visitIntToPtr(IntToPtrInst &I) {
if (auto *SROAArg = getSROAArgForValueOrNull(Op))
SROAArgValues[&I] = SROAArg;
- return TargetTransformInfo::TCC_Free ==
- TTI.getUserCost(&I, TargetTransformInfo::TCK_SizeAndLatency);
+ return TTI.getUserCost(&I, TargetTransformInfo::TCK_SizeAndLatency) ==
+ TargetTransformInfo::TCC_Free;
}
bool CallAnalyzer::visitCastInst(CastInst &I) {
break;
}
- return TargetTransformInfo::TCC_Free ==
- TTI.getUserCost(&I, TargetTransformInfo::TCK_SizeAndLatency);
+ return TTI.getUserCost(&I, TargetTransformInfo::TCK_SizeAndLatency) ==
+ TargetTransformInfo::TCC_Free;
}
bool CallAnalyzer::visitUnaryInstruction(UnaryInstruction &I) {
bool CallAnalyzer::visitInstruction(Instruction &I) {
// Some instructions are free. All of the free intrinsics can also be
// handled by SROA, etc.
- if (TargetTransformInfo::TCC_Free ==
- TTI.getUserCost(&I, TargetTransformInfo::TCK_SizeAndLatency))
+ if (TTI.getUserCost(&I, TargetTransformInfo::TCK_SizeAndLatency) ==
+ TargetTransformInfo::TCC_Free)
return true;
// We found something we don't understand or can't handle. Mark any SROA-able
return TTIImpl->getEstimatedNumberOfCaseClusters(SI, JTSize, PSI, BFI);
}
-int TargetTransformInfo::getUserCost(const User *U,
- ArrayRef<const Value *> Operands,
- enum TargetCostKind CostKind) const {
- int Cost = TTIImpl->getUserCost(U, Operands, CostKind);
+InstructionCost
+TargetTransformInfo::getUserCost(const User *U,
+ ArrayRef<const Value *> Operands,
+ enum TargetCostKind CostKind) const {
+ InstructionCost Cost = TTIImpl->getUserCost(U, Operands, CostKind);
assert((CostKind == TTI::TCK_RecipThroughput || Cost >= 0) &&
"TTI should not produce negative costs!");
return Cost;
return TTIImpl->supportsScalableVectors();
}
-int TargetTransformInfo::getInstructionLatency(const Instruction *I) const {
+InstructionCost
+TargetTransformInfo::getInstructionLatency(const Instruction *I) const {
return TTIImpl->getInstructionLatency(I);
}
return matchPairwiseReduction(Root, Opcode, Ty);
}
-int TargetTransformInfo::getInstructionThroughput(const Instruction *I) const {
+InstructionCost
+TargetTransformInfo::getInstructionThroughput(const Instruction *I) const {
TTI::TargetCostKind CostKind = TTI::TCK_RecipThroughput;
switch (I->getOpcode()) {
// Scan the loop: don't unroll loops with calls as this could prevent
// inlining.
- unsigned Cost = 0;
+ InstructionCost Cost = 0;
for (auto *BB : L->getBlocks()) {
for (auto &I : *BB) {
// Don't unroll vectorised loop. MVE does not benefit from it as much as
return ST.getL1CacheLineSize();
}
-int
-HexagonTTIImpl::getUserCost(const User *U,
- ArrayRef<const Value *> Operands,
- TTI::TargetCostKind CostKind) {
+InstructionCost HexagonTTIImpl::getUserCost(const User *U,
+ ArrayRef<const Value *> Operands,
+ TTI::TargetCostKind CostKind) {
auto isCastFoldedIntoLoad = [this](const CastInst *CI) -> bool {
if (!CI->isIntegerCast())
return false;
/// @}
- int getUserCost(const User *U, ArrayRef<const Value *> Operands,
- TTI::TargetCostKind CostKind);
+ InstructionCost getUserCost(const User *U, ArrayRef<const Value *> Operands,
+ TTI::TargetCostKind CostKind);
// Hexagon specific decision to generate a lookup table.
bool shouldBuildLookupTables() const;
return PPCTTIImpl::getIntImmCost(Imm, Ty, CostKind);
}
-unsigned
-PPCTTIImpl::getUserCost(const User *U, ArrayRef<const Value *> Operands,
- TTI::TargetCostKind CostKind) {
+InstructionCost PPCTTIImpl::getUserCost(const User *U,
+ ArrayRef<const Value *> Operands,
+ TTI::TargetCostKind CostKind) {
// We already implement getCastInstrCost and getMemoryOpCost where we perform
// the vector adjustment there.
if (isa<CastInst>(U) || isa<LoadInst>(U) || isa<StoreInst>(U))
int getIntImmCostIntrin(Intrinsic::ID IID, unsigned Idx, const APInt &Imm,
Type *Ty, TTI::TargetCostKind CostKind);
- unsigned getUserCost(const User *U, ArrayRef<const Value *> Operands,
- TTI::TargetCostKind CostKind);
+ InstructionCost getUserCost(const User *U, ArrayRef<const Value *> Operands,
+ TTI::TargetCostKind CostKind);
TTI::PopcntSupportKind getPopcntSupport(unsigned TyWidth);
bool isHardwareLoopProfitable(Loop *L, ScalarEvolution &SE,