/// extracted from vectors.
InstructionCost getScalarizationOverhead(VectorType *Ty,
const APInt &DemandedElts,
- bool Insert, bool Extract) const;
+ bool Insert, bool Extract,
+ TTI::TargetCostKind CostKind) const;
/// Estimate the overhead of scalarizing an instructions unique
/// non-constant operands. The (potentially vector) types to use for each of
/// argument are passes via Tys.
- InstructionCost getOperandsScalarizationOverhead(ArrayRef<const Value *> Args,
- ArrayRef<Type *> Tys) const;
+ InstructionCost
+ getOperandsScalarizationOverhead(ArrayRef<const Value *> Args,
+ ArrayRef<Type *> Tys,
+ TTI::TargetCostKind CostKind) const;
/// If target has efficient vector element load/store instructions, it can
/// return true here so that insertion/extraction costs are not added to
/// case is to provision the cost of vectorization/scalarization in
/// vectorizer passes.
InstructionCost getVectorInstrCost(unsigned Opcode, Type *Val,
+ TTI::TargetCostKind CostKind,
unsigned Index = -1, Value *Op0 = nullptr,
Value *Op1 = nullptr) const;
/// A typical suitable use case is cost estimation when vector instruction
/// exists (e.g., from basic blocks during transformation).
InstructionCost getVectorInstrCost(const Instruction &I, Type *Val,
+ TTI::TargetCostKind CostKind,
unsigned Index = -1) const;
/// \return The cost of replication shuffle of \p VF elements typed \p EltTy
virtual bool useColdCCForColdCall(Function &F) = 0;
virtual InstructionCost getScalarizationOverhead(VectorType *Ty,
const APInt &DemandedElts,
- bool Insert,
- bool Extract) = 0;
+ bool Insert, bool Extract,
+ TargetCostKind CostKind) = 0;
virtual InstructionCost
getOperandsScalarizationOverhead(ArrayRef<const Value *> Args,
- ArrayRef<Type *> Tys) = 0;
+ ArrayRef<Type *> Tys,
+ TargetCostKind CostKind) = 0;
virtual bool supportsEfficientVectorElementLoadStore() = 0;
virtual bool supportsTailCalls() = 0;
virtual bool supportsTailCallFor(const CallBase *CB) = 0;
TTI::TargetCostKind CostKind,
const Instruction *I) = 0;
virtual InstructionCost getVectorInstrCost(unsigned Opcode, Type *Val,
+ TTI::TargetCostKind CostKind,
unsigned Index, Value *Op0,
Value *Op1) = 0;
virtual InstructionCost getVectorInstrCost(const Instruction &I, Type *Val,
+ TTI::TargetCostKind CostKind,
unsigned Index) = 0;
virtual InstructionCost
InstructionCost getScalarizationOverhead(VectorType *Ty,
const APInt &DemandedElts,
- bool Insert, bool Extract) override {
- return Impl.getScalarizationOverhead(Ty, DemandedElts, Insert, Extract);
+ bool Insert, bool Extract,
+ TargetCostKind CostKind) override {
+ return Impl.getScalarizationOverhead(Ty, DemandedElts, Insert, Extract,
+ CostKind);
}
InstructionCost
getOperandsScalarizationOverhead(ArrayRef<const Value *> Args,
- ArrayRef<Type *> Tys) override {
- return Impl.getOperandsScalarizationOverhead(Args, Tys);
+ ArrayRef<Type *> Tys,
+ TargetCostKind CostKind) override {
+ return Impl.getOperandsScalarizationOverhead(Args, Tys, CostKind);
}
bool supportsEfficientVectorElementLoadStore() override {
const Instruction *I) override {
return Impl.getCmpSelInstrCost(Opcode, ValTy, CondTy, VecPred, CostKind, I);
}
- InstructionCost getVectorInstrCost(unsigned Opcode, Type *Val, unsigned Index,
- Value *Op0, Value *Op1) override {
- return Impl.getVectorInstrCost(Opcode, Val, Index, Op0, Op1);
+ InstructionCost getVectorInstrCost(unsigned Opcode, Type *Val,
+ TTI::TargetCostKind CostKind,
+ unsigned Index, Value *Op0,
+ Value *Op1) override {
+ return Impl.getVectorInstrCost(Opcode, Val, CostKind, Index, Op0, Op1);
}
InstructionCost getVectorInstrCost(const Instruction &I, Type *Val,
+ TTI::TargetCostKind CostKind,
unsigned Index) override {
- return Impl.getVectorInstrCost(I, Val, Index);
+ return Impl.getVectorInstrCost(I, Val, CostKind, Index);
}
InstructionCost
getReplicationShuffleCost(Type *EltTy, int ReplicationFactor, int VF,
InstructionCost getScalarizationOverhead(VectorType *Ty,
const APInt &DemandedElts,
- bool Insert, bool Extract) const {
+ bool Insert, bool Extract,
+ TTI::TargetCostKind CostKind) const {
return 0;
}
- InstructionCost getOperandsScalarizationOverhead(ArrayRef<const Value *> Args,
- ArrayRef<Type *> Tys) const {
+ InstructionCost
+ getOperandsScalarizationOverhead(ArrayRef<const Value *> Args,
+ ArrayRef<Type *> Tys,
+ TTI::TargetCostKind CostKind) const {
return 0;
}
return 1;
}
- InstructionCost getVectorInstrCost(unsigned Opcode, Type *Val, unsigned Index,
- Value *Op0, Value *Op1) const {
+ InstructionCost getVectorInstrCost(unsigned Opcode, Type *Val,
+ TTI::TargetCostKind CostKind,
+ unsigned Index, Value *Op0,
+ Value *Op1) const {
return 1;
}
InstructionCost getVectorInstrCost(const Instruction &I, Type *Val,
+ TTI::TargetCostKind CostKind,
unsigned Index) const {
return 1;
}
if (auto *CI = dyn_cast<ConstantInt>(IE->getOperand(2)))
if (CI->getValue().getActiveBits() <= 32)
Idx = CI->getZExtValue();
- return TargetTTI->getVectorInstrCost(*IE, Ty, Idx);
+ return TargetTTI->getVectorInstrCost(*IE, Ty, CostKind, Idx);
}
case Instruction::ShuffleVector: {
auto *Shuffle = dyn_cast<ShuffleVectorInst>(U);
if (CI->getValue().getActiveBits() <= 32)
Idx = CI->getZExtValue();
Type *DstTy = U->getOperand(0)->getType();
- return TargetTTI->getVectorInstrCost(*EEI, DstTy, Idx);
+ return TargetTTI->getVectorInstrCost(*EEI, DstTy, CostKind, Idx);
}
}
/// Estimate a cost of Broadcast as an extract and sequence of insert
/// operations.
- InstructionCost getBroadcastShuffleOverhead(FixedVectorType *VTy) {
+ InstructionCost getBroadcastShuffleOverhead(FixedVectorType *VTy,
+ TTI::TargetCostKind CostKind) {
InstructionCost Cost = 0;
// Broadcast cost is equal to the cost of extracting the zero'th element
// plus the cost of inserting it into every element of the result vector.
- Cost += thisT()->getVectorInstrCost(Instruction::ExtractElement, VTy, 0,
- nullptr, nullptr);
+ Cost += thisT()->getVectorInstrCost(Instruction::ExtractElement, VTy,
+ CostKind, 0, nullptr, nullptr);
for (int i = 0, e = VTy->getNumElements(); i < e; ++i) {
- Cost += thisT()->getVectorInstrCost(Instruction::InsertElement, VTy, i,
- nullptr, nullptr);
+ Cost += thisT()->getVectorInstrCost(Instruction::InsertElement, VTy,
+ CostKind, i, nullptr, nullptr);
}
return Cost;
}
/// Estimate a cost of shuffle as a sequence of extract and insert
/// operations.
- InstructionCost getPermuteShuffleOverhead(FixedVectorType *VTy) {
+ InstructionCost getPermuteShuffleOverhead(FixedVectorType *VTy,
+ TTI::TargetCostKind CostKind) {
InstructionCost Cost = 0;
// Shuffle cost is equal to the cost of extracting element from its argument
// plus the cost of inserting them onto the result vector.
// vector and finally index 3 of second vector and insert them at index
// <0,1,2,3> of result vector.
for (int i = 0, e = VTy->getNumElements(); i < e; ++i) {
- Cost += thisT()->getVectorInstrCost(Instruction::InsertElement, VTy, i,
- nullptr, nullptr);
- Cost += thisT()->getVectorInstrCost(Instruction::ExtractElement, VTy, i,
- nullptr, nullptr);
+ Cost += thisT()->getVectorInstrCost(Instruction::InsertElement, VTy,
+ CostKind, i, nullptr, nullptr);
+ Cost += thisT()->getVectorInstrCost(Instruction::ExtractElement, VTy,
+ CostKind, i, nullptr, nullptr);
}
return Cost;
}
/// Estimate a cost of subvector extraction as a sequence of extract and
/// insert operations.
- InstructionCost getExtractSubvectorOverhead(VectorType *VTy, int Index,
- FixedVectorType *SubVTy) {
+ InstructionCost getExtractSubvectorOverhead(VectorType *VTy,
+ TTI::TargetCostKind CostKind,
+ int Index,
+ FixedVectorType *SubVTy) {
assert(VTy && SubVTy &&
"Can only extract subvectors from vectors");
int NumSubElts = SubVTy->getNumElements();
// the source type plus the cost of inserting them into the result vector
// type.
for (int i = 0; i != NumSubElts; ++i) {
- Cost += thisT()->getVectorInstrCost(Instruction::ExtractElement, VTy,
- i + Index, nullptr, nullptr);
- Cost += thisT()->getVectorInstrCost(Instruction::InsertElement, SubVTy, i,
- nullptr, nullptr);
+ Cost +=
+ thisT()->getVectorInstrCost(Instruction::ExtractElement, VTy,
+ CostKind, i + Index, nullptr, nullptr);
+ Cost += thisT()->getVectorInstrCost(Instruction::InsertElement, SubVTy,
+ CostKind, i, nullptr, nullptr);
}
return Cost;
}
/// Estimate a cost of subvector insertion as a sequence of extract and
/// insert operations.
- InstructionCost getInsertSubvectorOverhead(VectorType *VTy, int Index,
- FixedVectorType *SubVTy) {
+ InstructionCost getInsertSubvectorOverhead(VectorType *VTy,
+ TTI::TargetCostKind CostKind,
+ int Index,
+ FixedVectorType *SubVTy) {
assert(VTy && SubVTy &&
"Can only insert subvectors into vectors");
int NumSubElts = SubVTy->getNumElements();
// type.
for (int i = 0; i != NumSubElts; ++i) {
Cost += thisT()->getVectorInstrCost(Instruction::ExtractElement, SubVTy,
- i, nullptr, nullptr);
- Cost += thisT()->getVectorInstrCost(Instruction::InsertElement, VTy,
- i + Index, nullptr, nullptr);
+ CostKind, i, nullptr, nullptr);
+ Cost +=
+ thisT()->getVectorInstrCost(Instruction::InsertElement, VTy, CostKind,
+ i + Index, nullptr, nullptr);
}
return Cost;
}
FixedVectorType::get(
PointerType::get(VT->getElementType(), 0),
VT->getNumElements()),
- -1, nullptr, nullptr)
+ CostKind, -1, nullptr, nullptr)
: 0;
InstructionCost LoadCost =
VT->getNumElements() *
getMemoryOpCost(Opcode, VT->getElementType(), Alignment, 0, CostKind));
// Next, compute the cost of packing the result in a vector.
- InstructionCost PackingCost = getScalarizationOverhead(
- VT, Opcode != Instruction::Store, Opcode == Instruction::Store);
+ InstructionCost PackingCost =
+ getScalarizationOverhead(VT, Opcode != Instruction::Store,
+ Opcode == Instruction::Store, CostKind);
InstructionCost ConditionalCost = 0;
if (VariableMask) {
Instruction::ExtractElement,
FixedVectorType::get(Type::getInt1Ty(DataTy->getContext()),
VT->getNumElements()),
- -1, nullptr, nullptr) +
+ CostKind, -1, nullptr, nullptr) +
getCFInstrCost(Instruction::Br, CostKind) +
getCFInstrCost(Instruction::PHI, CostKind));
}
/// extracted from vectors.
InstructionCost getScalarizationOverhead(VectorType *InTy,
const APInt &DemandedElts,
- bool Insert, bool Extract) {
+ bool Insert, bool Extract,
+ TTI::TargetCostKind CostKind) {
/// FIXME: a bitfield is not a reasonable abstraction for talking about
/// which elements are needed from a scalable vector
if (isa<ScalableVectorType>(InTy))
if (!DemandedElts[i])
continue;
if (Insert)
- Cost += thisT()->getVectorInstrCost(Instruction::InsertElement, Ty, i,
- nullptr, nullptr);
+ Cost += thisT()->getVectorInstrCost(Instruction::InsertElement, Ty,
+ CostKind, i, nullptr, nullptr);
if (Extract)
- Cost += thisT()->getVectorInstrCost(Instruction::ExtractElement, Ty, i,
- nullptr, nullptr);
+ Cost += thisT()->getVectorInstrCost(Instruction::ExtractElement, Ty,
+ CostKind, i, nullptr, nullptr);
}
return Cost;
/// Helper wrapper for the DemandedElts variant of getScalarizationOverhead.
InstructionCost getScalarizationOverhead(VectorType *InTy, bool Insert,
- bool Extract) {
+ bool Extract,
+ TTI::TargetCostKind CostKind) {
if (isa<ScalableVectorType>(InTy))
return InstructionCost::getInvalid();
auto *Ty = cast<FixedVectorType>(InTy);
APInt DemandedElts = APInt::getAllOnes(Ty->getNumElements());
- return thisT()->getScalarizationOverhead(Ty, DemandedElts, Insert, Extract);
+ return thisT()->getScalarizationOverhead(Ty, DemandedElts, Insert, Extract,
+ CostKind);
}
/// Estimate the overhead of scalarizing an instructions unique
/// non-constant operands. The (potentially vector) types to use for each of
/// argument are passes via Tys.
- InstructionCost getOperandsScalarizationOverhead(ArrayRef<const Value *> Args,
- ArrayRef<Type *> Tys) {
+ InstructionCost
+ getOperandsScalarizationOverhead(ArrayRef<const Value *> Args,
+ ArrayRef<Type *> Tys,
+ TTI::TargetCostKind CostKind) {
assert(Args.size() == Tys.size() && "Expected matching Args and Tys");
InstructionCost Cost = 0;
if (!isa<Constant>(A) && UniqueOperands.insert(A).second) {
if (auto *VecTy = dyn_cast<VectorType>(Ty))
- Cost += getScalarizationOverhead(VecTy, false, true);
+ Cost += getScalarizationOverhead(VecTy, /*Insert*/ false,
+ /*Extract*/ true, CostKind);
}
}
/// added as a heuristic.
InstructionCost getScalarizationOverhead(VectorType *RetTy,
ArrayRef<const Value *> Args,
- ArrayRef<Type *> Tys) {
- InstructionCost Cost = getScalarizationOverhead(RetTy, true, false);
+ ArrayRef<Type *> Tys,
+ TTI::TargetCostKind CostKind) {
+ InstructionCost Cost = getScalarizationOverhead(
+ RetTy, /*Insert*/ true, /*Extract*/ false, CostKind);
if (!Args.empty())
- Cost += getOperandsScalarizationOverhead(Args, Tys);
+ Cost += getOperandsScalarizationOverhead(Args, Tys, CostKind);
else
// When no information on arguments is provided, we add the cost
// associated with one argument as a heuristic.
- Cost += getScalarizationOverhead(RetTy, false, true);
+ Cost += getScalarizationOverhead(RetTy, /*Insert*/ false,
+ /*Extract*/ true, CostKind);
return Cost;
}
// Return the cost of multiple scalar invocation plus the cost of
// inserting and extracting the values.
SmallVector<Type *> Tys(Args.size(), Ty);
- return getScalarizationOverhead(VTy, Args, Tys) +
+ return getScalarizationOverhead(VTy, Args, Tys, CostKind) +
VTy->getNumElements() * Cost;
}
switch (improveShuffleKindFromMask(Kind, Mask)) {
case TTI::SK_Broadcast:
if (auto *FVT = dyn_cast<FixedVectorType>(Tp))
- return getBroadcastShuffleOverhead(FVT);
+ return getBroadcastShuffleOverhead(FVT, CostKind);
return InstructionCost::getInvalid();
case TTI::SK_Select:
case TTI::SK_Splice:
case TTI::SK_PermuteSingleSrc:
case TTI::SK_PermuteTwoSrc:
if (auto *FVT = dyn_cast<FixedVectorType>(Tp))
- return getPermuteShuffleOverhead(FVT);
+ return getPermuteShuffleOverhead(FVT, CostKind);
return InstructionCost::getInvalid();
case TTI::SK_ExtractSubvector:
- return getExtractSubvectorOverhead(Tp, Index,
+ return getExtractSubvectorOverhead(Tp, CostKind, Index,
cast<FixedVectorType>(SubTp));
case TTI::SK_InsertSubvector:
- return getInsertSubvectorOverhead(Tp, Index,
+ return getInsertSubvectorOverhead(Tp, CostKind, Index,
cast<FixedVectorType>(SubTp));
}
llvm_unreachable("Unknown TTI::ShuffleKind");
// Return the cost of multiple scalar invocation plus the cost of
// inserting and extracting the values.
- return getScalarizationOverhead(DstVTy, true, true) + Num * Cost;
+ return getScalarizationOverhead(DstVTy, /*Insert*/ true, /*Extract*/ true,
+ CostKind) +
+ Num * Cost;
}
// We already handled vector-to-vector and scalar-to-scalar conversions.
// that the conversion is scalarized in one way or another.
if (Opcode == Instruction::BitCast) {
// Illegal bitcasts are done by storing and loading from a stack slot.
- return (SrcVTy ? getScalarizationOverhead(SrcVTy, false, true) : 0) +
- (DstVTy ? getScalarizationOverhead(DstVTy, true, false) : 0);
+ return (SrcVTy ? getScalarizationOverhead(SrcVTy, /*Insert*/ false,
+ /*Extract*/ true, CostKind)
+ : 0) +
+ (DstVTy ? getScalarizationOverhead(DstVTy, /*Insert*/ true,
+ /*Extract*/ false, CostKind)
+ : 0);
}
llvm_unreachable("Unhandled cast");
InstructionCost getExtractWithExtendCost(unsigned Opcode, Type *Dst,
VectorType *VecTy, unsigned Index) {
+ TTI::TargetCostKind CostKind = TTI::TCK_RecipThroughput;
return thisT()->getVectorInstrCost(Instruction::ExtractElement, VecTy,
- Index, nullptr, nullptr) +
+ CostKind, Index, nullptr, nullptr) +
thisT()->getCastInstrCost(Opcode, Dst, VecTy->getElementType(),
- TTI::CastContextHint::None,
- TTI::TCK_RecipThroughput);
+ TTI::CastContextHint::None, CostKind);
}
InstructionCost getCFInstrCost(unsigned Opcode, TTI::TargetCostKind CostKind,
// Return the cost of multiple scalar invocation plus the cost of
// inserting and extracting the values.
- return getScalarizationOverhead(ValVTy, true, false) + Num * Cost;
+ return getScalarizationOverhead(ValVTy, /*Insert*/ true,
+ /*Extract*/ false, CostKind) +
+ Num * Cost;
}
// Unknown scalar opcode.
return 1;
}
- InstructionCost getVectorInstrCost(unsigned Opcode, Type *Val, unsigned Index,
- Value *Op0, Value *Op1) {
+ InstructionCost getVectorInstrCost(unsigned Opcode, Type *Val,
+ TTI::TargetCostKind CostKind,
+ unsigned Index, Value *Op0, Value *Op1) {
return getRegUsageForType(Val->getScalarType());
}
InstructionCost getVectorInstrCost(const Instruction &I, Type *Val,
+ TTI::TargetCostKind CostKind,
unsigned Index) {
Value *Op0 = nullptr;
Value *Op1 = nullptr;
Op0 = IE->getOperand(0);
Op1 = IE->getOperand(1);
}
- return thisT()->getVectorInstrCost(I.getOpcode(), Val, Index, Op0, Op1);
+ return thisT()->getVectorInstrCost(I.getOpcode(), Val, CostKind, Index, Op0,
+ Op1);
}
InstructionCost getReplicationShuffleCost(Type *EltTy, int ReplicationFactor,
APInt DemandedSrcElts = APIntOps::ScaleBitMask(DemandedDstElts, VF);
Cost += thisT()->getScalarizationOverhead(SrcVT, DemandedSrcElts,
/*Insert*/ false,
- /*Extract*/ true);
- Cost +=
- thisT()->getScalarizationOverhead(ReplicatedVT, DemandedDstElts,
- /*Insert*/ true, /*Extract*/ false);
+ /*Extract*/ true, CostKind);
+ Cost += thisT()->getScalarizationOverhead(ReplicatedVT, DemandedDstElts,
+ /*Insert*/ true,
+ /*Extract*/ false, CostKind);
return Cost;
}
if (LA != TargetLowering::Legal && LA != TargetLowering::Custom) {
// This is a vector load/store for some illegal type that is scalarized.
// We must account for the cost of building or decomposing the vector.
- Cost += getScalarizationOverhead(cast<VectorType>(Src),
- Opcode != Instruction::Store,
- Opcode == Instruction::Store);
+ Cost += getScalarizationOverhead(
+ cast<VectorType>(Src), Opcode != Instruction::Store,
+ Opcode == Instruction::Store, CostKind);
}
}
// %v0 = shuffle %vec, undef, <0, 2, 4, 6> ; Index 0
// The cost is estimated as extract elements at 0, 2, 4, 6 from the
// <8 x i32> vector and insert them into a <4 x i32> vector.
- InstructionCost InsSubCost =
- thisT()->getScalarizationOverhead(SubVT, DemandedAllSubElts,
- /*Insert*/ true, /*Extract*/ false);
+ InstructionCost InsSubCost = thisT()->getScalarizationOverhead(
+ SubVT, DemandedAllSubElts,
+ /*Insert*/ true, /*Extract*/ false, CostKind);
Cost += Indices.size() * InsSubCost;
- Cost +=
- thisT()->getScalarizationOverhead(VT, DemandedLoadStoreElts,
- /*Insert*/ false, /*Extract*/ true);
+ Cost += thisT()->getScalarizationOverhead(VT, DemandedLoadStoreElts,
+ /*Insert*/ false,
+ /*Extract*/ true, CostKind);
} else {
// The interleave cost is extract elements from sub vectors, and
// insert them into the wide vector.
// The cost is estimated as extract all elements (of actual members,
// excluding gaps) from both <4 x i32> vectors and insert into the <12 x
// i32> vector.
- InstructionCost ExtSubCost =
- thisT()->getScalarizationOverhead(SubVT, DemandedAllSubElts,
- /*Insert*/ false, /*Extract*/ true);
+ InstructionCost ExtSubCost = thisT()->getScalarizationOverhead(
+ SubVT, DemandedAllSubElts,
+ /*Insert*/ false, /*Extract*/ true, CostKind);
Cost += ExtSubCost * Indices.size();
Cost += thisT()->getScalarizationOverhead(VT, DemandedLoadStoreElts,
/*Insert*/ true,
- /*Extract*/ false);
+ /*Extract*/ false, CostKind);
}
if (!UseMaskForCond)
if (RetVF.isVector() && !RetVF.isScalable()) {
ScalarizationCost = 0;
if (!RetTy->isVoidTy())
- ScalarizationCost +=
- getScalarizationOverhead(cast<VectorType>(RetTy), true, false);
+ ScalarizationCost += getScalarizationOverhead(
+ cast<VectorType>(RetTy),
+ /*Insert*/ true, /*Extract*/ false, CostKind);
ScalarizationCost +=
- getOperandsScalarizationOverhead(Args, ICA.getArgTypes());
+ getOperandsScalarizationOverhead(Args, ICA.getArgTypes(), CostKind);
}
IntrinsicCostAttributes Attrs(IID, RetTy, ICA.getArgTypes(), FMF, I,
Type *ScalarRetTy = RetTy;
if (auto *RetVTy = dyn_cast<VectorType>(RetTy)) {
if (!SkipScalarizationCost)
- ScalarizationCost = getScalarizationOverhead(RetVTy, true, false);
+ ScalarizationCost = getScalarizationOverhead(
+ RetVTy, /*Insert*/ true, /*Extract*/ false, CostKind);
ScalarCalls = std::max(ScalarCalls,
cast<FixedVectorType>(RetVTy)->getNumElements());
ScalarRetTy = RetTy->getScalarType();
Type *Ty = Tys[i];
if (auto *VTy = dyn_cast<VectorType>(Ty)) {
if (!SkipScalarizationCost)
- ScalarizationCost += getScalarizationOverhead(VTy, false, true);
+ ScalarizationCost += getScalarizationOverhead(
+ VTy, /*Insert*/ false, /*Extract*/ true, CostKind);
ScalarCalls = std::max(ScalarCalls,
cast<FixedVectorType>(VTy)->getNumElements());
Ty = Ty->getScalarType();
return InstructionCost::getInvalid();
InstructionCost ScalarizationCost =
- SkipScalarizationCost ? ScalarizationCostPassed
- : getScalarizationOverhead(RetVTy, true, false);
+ SkipScalarizationCost
+ ? ScalarizationCostPassed
+ : getScalarizationOverhead(RetVTy, /*Insert*/ true,
+ /*Extract*/ false, CostKind);
unsigned ScalarCalls = cast<FixedVectorType>(RetVTy)->getNumElements();
SmallVector<Type *, 4> ScalarTys;
for (unsigned i = 0, ie = Tys.size(); i != ie; ++i) {
if (auto *VTy = dyn_cast<VectorType>(Tys[i])) {
if (!ICA.skipScalarizationCost())
- ScalarizationCost += getScalarizationOverhead(VTy, false, true);
+ ScalarizationCost += getScalarizationOverhead(
+ VTy, /*Insert*/ false, /*Extract*/ true, CostKind);
ScalarCalls = std::max(ScalarCalls,
cast<FixedVectorType>(VTy)->getNumElements());
}
ArithCost +=
NumReduxLevels * thisT()->getArithmeticInstrCost(Opcode, Ty, CostKind);
return ShuffleCost + ArithCost +
- thisT()->getVectorInstrCost(Instruction::ExtractElement, Ty, 0,
- nullptr, nullptr);
+ thisT()->getVectorInstrCost(Instruction::ExtractElement, Ty,
+ CostKind, 0, nullptr, nullptr);
}
/// Try to calculate the cost of performing strict (in-order) reductions,
return InstructionCost::getInvalid();
auto *VTy = cast<FixedVectorType>(Ty);
- InstructionCost ExtractCost =
- getScalarizationOverhead(VTy, /*Insert=*/false, /*Extract=*/true);
+ InstructionCost ExtractCost = getScalarizationOverhead(
+ VTy, /*Insert=*/false, /*Extract=*/true, CostKind);
InstructionCost ArithCost = thisT()->getArithmeticInstrCost(
Opcode, VTy->getElementType(), CostKind);
ArithCost *= VTy->getNumElements();
// The last min/max should be in vector registers and we counted it above.
// So just need a single extractelement.
return ShuffleCost + MinMaxCost +
- thisT()->getVectorInstrCost(Instruction::ExtractElement, Ty, 0,
- nullptr, nullptr);
+ thisT()->getVectorInstrCost(Instruction::ExtractElement, Ty,
+ CostKind, 0, nullptr, nullptr);
}
InstructionCost getExtendedReductionCost(unsigned Opcode, bool IsUnsigned,
return TTIImpl->useColdCCForColdCall(F);
}
-InstructionCost
-TargetTransformInfo::getScalarizationOverhead(VectorType *Ty,
- const APInt &DemandedElts,
- bool Insert, bool Extract) const {
- return TTIImpl->getScalarizationOverhead(Ty, DemandedElts, Insert, Extract);
+InstructionCost TargetTransformInfo::getScalarizationOverhead(
+ VectorType *Ty, const APInt &DemandedElts, bool Insert, bool Extract,
+ TTI::TargetCostKind CostKind) const {
+ return TTIImpl->getScalarizationOverhead(Ty, DemandedElts, Insert, Extract,
+ CostKind);
}
InstructionCost TargetTransformInfo::getOperandsScalarizationOverhead(
- ArrayRef<const Value *> Args, ArrayRef<Type *> Tys) const {
- return TTIImpl->getOperandsScalarizationOverhead(Args, Tys);
+ ArrayRef<const Value *> Args, ArrayRef<Type *> Tys,
+ TTI::TargetCostKind CostKind) const {
+ return TTIImpl->getOperandsScalarizationOverhead(Args, Tys, CostKind);
}
bool TargetTransformInfo::supportsEfficientVectorElementLoadStore() const {
}
InstructionCost TargetTransformInfo::getVectorInstrCost(
- unsigned Opcode, Type *Val, unsigned Index, Value *Op0, Value *Op1) const {
+ unsigned Opcode, Type *Val, TTI::TargetCostKind CostKind, unsigned Index,
+ Value *Op0, Value *Op1) const {
// FIXME: Assert that Opcode is either InsertElement or ExtractElement.
// This is mentioned in the interface description and respected by all
// callers, but never asserted upon.
InstructionCost Cost =
- TTIImpl->getVectorInstrCost(Opcode, Val, Index, Op0, Op1);
+ TTIImpl->getVectorInstrCost(Opcode, Val, CostKind, Index, Op0, Op1);
assert(Cost >= 0 && "TTI should not produce negative costs!");
return Cost;
}
-InstructionCost TargetTransformInfo::getVectorInstrCost(const Instruction &I,
- Type *Val,
- unsigned Index) const {
+InstructionCost
+TargetTransformInfo::getVectorInstrCost(const Instruction &I, Type *Val,
+ TTI::TargetCostKind CostKind,
+ unsigned Index) const {
// FIXME: Assert that Opcode is either InsertElement or ExtractElement.
// This is mentioned in the interface description and respected by all
// callers, but never asserted upon.
- InstructionCost Cost = TTIImpl->getVectorInstrCost(I, Val, Index);
+ InstructionCost Cost = TTIImpl->getVectorInstrCost(I, Val, CostKind, Index);
assert(Cost >= 0 && "TTI should not produce negative costs!");
return Cost;
}
// The scalar chain of computation has to pay for the transition
// scalar to vector.
// The vector chain has to account for the combining cost.
- InstructionCost ScalarCost =
- TTI.getVectorInstrCost(*Transition, PromotedType, Index);
- InstructionCost VectorCost = StoreExtractCombineCost;
enum TargetTransformInfo::TargetCostKind CostKind =
TargetTransformInfo::TCK_RecipThroughput;
+ InstructionCost ScalarCost =
+ TTI.getVectorInstrCost(*Transition, PromotedType, CostKind, Index);
+ InstructionCost VectorCost = StoreExtractCombineCost;
for (const auto &Inst : InstsToBePromoted) {
// Compute the cost.
// By construction, all instructions being promoted are arithmetic ones.
// Get the cost for the extract. We compute the cost (if any) for the extend
// below.
+ TTI::TargetCostKind CostKind = TTI::TCK_RecipThroughput;
InstructionCost Cost = getVectorInstrCost(Instruction::ExtractElement, VecTy,
- Index, nullptr, nullptr);
+ CostKind, Index, nullptr, nullptr);
// Legalize the types.
auto VecLT = getTypeLegalizationCost(VecTy);
auto DstVT = TLI->getValueType(DL, Dst);
auto SrcVT = TLI->getValueType(DL, Src);
- TTI::TargetCostKind CostKind = TTI::TCK_RecipThroughput;
// If the resulting type is still a vector and the destination type is legal,
// we may get the extension for free. If not, get the default cost for the
}
InstructionCost AArch64TTIImpl::getVectorInstrCost(unsigned Opcode, Type *Val,
+ TTI::TargetCostKind CostKind,
unsigned Index, Value *Op0,
Value *Op1) {
return getVectorInstrCostHelper(Val, Index, false /* HasRealUse */);
}
InstructionCost AArch64TTIImpl::getVectorInstrCost(const Instruction &I,
- Type *Val, unsigned Index) {
+ Type *Val,
+ TTI::TargetCostKind CostKind,
+ unsigned Index) {
return getVectorInstrCostHelper(Val, Index, true /* HasRealUse */);
}
InstructionCost getCFInstrCost(unsigned Opcode, TTI::TargetCostKind CostKind,
const Instruction *I = nullptr);
- InstructionCost getVectorInstrCost(unsigned Opcode, Type *Val, unsigned Index,
- Value *Op0, Value *Op1);
+ InstructionCost getVectorInstrCost(unsigned Opcode, Type *Val,
+ TTI::TargetCostKind CostKind,
+ unsigned Index, Value *Op0, Value *Op1);
InstructionCost getVectorInstrCost(const Instruction &I, Type *Val,
+ TTI::TargetCostKind CostKind,
unsigned Index);
InstructionCost getMinMaxReductionCost(VectorType *Ty, VectorType *CondTy,
}
InstructionCost GCNTTIImpl::getVectorInstrCost(unsigned Opcode, Type *ValTy,
+ TTI::TargetCostKind CostKind,
unsigned Index, Value *Op0,
Value *Op1) {
switch (Opcode) {
if (EltSize < 32) {
if (EltSize == 16 && Index == 0 && ST->has16BitInsts())
return 0;
- return BaseT::getVectorInstrCost(Opcode, ValTy, Index, Op0, Op1);
+ return BaseT::getVectorInstrCost(Opcode, ValTy, CostKind, Index, Op0,
+ Op1);
}
// Extracts are just reads of a subregister, so are free. Inserts are
return Index == ~0u ? 2 : 0;
}
default:
- return BaseT::getVectorInstrCost(Opcode, ValTy, Index, Op0, Op1);
+ return BaseT::getVectorInstrCost(Opcode, ValTy, CostKind, Index, Op0, Op1);
}
}
using BaseT::getVectorInstrCost;
InstructionCost getVectorInstrCost(unsigned Opcode, Type *ValTy,
+ TTI::TargetCostKind CostKind,
unsigned Index, Value *Op0, Value *Op1);
bool isReadRegisterSourceOfDivergence(const IntrinsicInst *ReadReg) const;
}
InstructionCost R600TTIImpl::getVectorInstrCost(unsigned Opcode, Type *ValTy,
+ TTI::TargetCostKind CostKind,
unsigned Index, Value *Op0,
Value *Op1) {
switch (Opcode) {
unsigned EltSize =
DL.getTypeSizeInBits(cast<VectorType>(ValTy)->getElementType());
if (EltSize < 32) {
- return BaseT::getVectorInstrCost(Opcode, ValTy, Index, Op0, Op1);
+ return BaseT::getVectorInstrCost(Opcode, ValTy, CostKind, Index, Op0,
+ Op1);
}
// Extracts are just reads of a subregister, so are free. Inserts are
return Index == ~0u ? 2 : 0;
}
default:
- return BaseT::getVectorInstrCost(Opcode, ValTy, Index, Op0, Op1);
+ return BaseT::getVectorInstrCost(Opcode, ValTy, CostKind, Index, Op0, Op1);
}
}
const Instruction *I = nullptr);
using BaseT::getVectorInstrCost;
InstructionCost getVectorInstrCost(unsigned Opcode, Type *ValTy,
+ TTI::TargetCostKind CostKind,
unsigned Index, Value *Op0, Value *Op1);
};
}
InstructionCost ARMTTIImpl::getVectorInstrCost(unsigned Opcode, Type *ValTy,
+ TTI::TargetCostKind CostKind,
unsigned Index, Value *Op0,
Value *Op1) {
// Penalize inserting into an D-subregister. We end up with a three times
if (ValTy->isVectorTy() &&
ValTy->getScalarSizeInBits() <= 32)
return std::max<InstructionCost>(
- BaseT::getVectorInstrCost(Opcode, ValTy, Index, Op0, Op1), 2U);
+ BaseT::getVectorInstrCost(Opcode, ValTy, CostKind, Index, Op0, Op1),
+ 2U);
}
if (ST->hasMVEIntegerOps() && (Opcode == Instruction::InsertElement ||
return LT.first * (ValTy->getScalarType()->isIntegerTy() ? 4 : 1);
}
- return BaseT::getVectorInstrCost(Opcode, ValTy, Index, Op0, Op1);
+ return BaseT::getVectorInstrCost(Opcode, ValTy, CostKind, Index, Op0, Op1);
}
InstructionCost ARMTTIImpl::getCmpSelInstrCost(unsigned Opcode, Type *ValTy,
if (Opcode == Instruction::FCmp && !ST->hasMVEFloatOps()) {
// One scalaization insert, one scalarization extract and the cost of the
// fcmps.
- return BaseT::getScalarizationOverhead(VecValTy, false, true) +
- BaseT::getScalarizationOverhead(VecCondTy, true, false) +
+ return BaseT::getScalarizationOverhead(VecValTy, /*Insert*/ false,
+ /*Extract*/ true, CostKind) +
+ BaseT::getScalarizationOverhead(VecCondTy, /*Insert*/ true,
+ /*Extract*/ false, CostKind) +
VecValTy->getNumElements() *
getCmpSelInstrCost(Opcode, ValTy->getScalarType(),
- VecCondTy->getScalarType(), VecPred, CostKind,
- I);
+ VecCondTy->getScalarType(), VecPred,
+ CostKind, I);
}
std::pair<InstructionCost, MVT> LT = getTypeLegalizationCost(ValTy);
if (LT.second.isVector() && LT.second.getVectorNumElements() > 2) {
if (LT.first > 1)
return LT.first * BaseCost +
- BaseT::getScalarizationOverhead(VecCondTy, true, false);
+ BaseT::getScalarizationOverhead(VecCondTy, /*Insert*/ true,
+ /*Extract*/ false, CostKind);
return BaseCost;
}
}
// Return the cost of multiple scalar invocation plus the cost of
// inserting and extracting the values.
SmallVector<Type *> Tys(Args.size(), Ty);
- return BaseT::getScalarizationOverhead(VTy, Args, Tys) + Num * Cost;
+ return BaseT::getScalarizationOverhead(VTy, Args, Tys, CostKind) +
+ Num * Cost;
}
return BaseCost;
// The scalarization cost should be a lot higher. We use the number of vector
// elements plus the scalarization overhead.
InstructionCost ScalarCost =
- NumElems * LT.first + BaseT::getScalarizationOverhead(VTy, true, false) +
- BaseT::getScalarizationOverhead(VTy, false, true);
+ NumElems * LT.first +
+ BaseT::getScalarizationOverhead(VTy, /*Insert*/ true, /*Extract*/ false,
+ CostKind) +
+ BaseT::getScalarizationOverhead(VTy, /*Insert*/ false, /*Extract*/ true,
+ CostKind);
if (EltSize < 8 || Alignment < EltSize / 8)
return ScalarCost;
const Instruction *I = nullptr);
using BaseT::getVectorInstrCost;
- InstructionCost getVectorInstrCost(unsigned Opcode, Type *Val, unsigned Index,
- Value *Op0, Value *Op1);
+ InstructionCost getVectorInstrCost(unsigned Opcode, Type *Val,
+ TTI::TargetCostKind CostKind,
+ unsigned Index, Value *Op0, Value *Op1);
InstructionCost getAddressComputationCost(Type *Val, ScalarEvolution *SE,
const SCEV *Ptr);
}
InstructionCost HexagonTTIImpl::getScalarizationOverhead(
- VectorType *Ty, const APInt &DemandedElts, bool Insert, bool Extract) {
- return BaseT::getScalarizationOverhead(Ty, DemandedElts, Insert, Extract);
+ VectorType *Ty, const APInt &DemandedElts, bool Insert, bool Extract,
+ TTI::TargetCostKind CostKind) {
+ return BaseT::getScalarizationOverhead(Ty, DemandedElts, Insert, Extract,
+ CostKind);
}
InstructionCost
HexagonTTIImpl::getOperandsScalarizationOverhead(ArrayRef<const Value *> Args,
- ArrayRef<Type *> Tys) {
- return BaseT::getOperandsScalarizationOverhead(Args, Tys);
+ ArrayRef<Type *> Tys,
+ TTI::TargetCostKind CostKind) {
+ return BaseT::getOperandsScalarizationOverhead(Args, Tys, CostKind);
}
InstructionCost HexagonTTIImpl::getCallInstrCost(Function *F, Type *RetTy,
}
InstructionCost HexagonTTIImpl::getVectorInstrCost(unsigned Opcode, Type *Val,
+ TTI::TargetCostKind CostKind,
unsigned Index, Value *Op0,
Value *Op1) {
Type *ElemTy = Val->isVectorTy() ? cast<VectorType>(Val)->getElementType()
if (ElemTy->isIntegerTy(32))
return Cost;
// If it's not a 32-bit value, there will need to be an extract.
- return Cost + getVectorInstrCost(Instruction::ExtractElement, Val, Index,
- Op0, Op1);
+ return Cost + getVectorInstrCost(Instruction::ExtractElement, Val, CostKind,
+ Index, Op0, Op1);
}
if (Opcode == Instruction::ExtractElement)
InstructionCost getScalarizationOverhead(VectorType *Ty,
const APInt &DemandedElts,
- bool Insert, bool Extract);
- InstructionCost getOperandsScalarizationOverhead(ArrayRef<const Value *> Args,
- ArrayRef<Type *> Tys);
+ bool Insert, bool Extract,
+ TTI::TargetCostKind CostKind);
+ InstructionCost
+ getOperandsScalarizationOverhead(ArrayRef<const Value *> Args,
+ ArrayRef<Type *> Tys,
+ TTI::TargetCostKind CostKind);
InstructionCost getCallInstrCost(Function *F, Type *RetTy,
ArrayRef<Type *> Tys,
TTI::TargetCostKind CostKind);
TTI::TargetCostKind CostKind,
const Instruction *I = nullptr);
using BaseT::getVectorInstrCost;
- InstructionCost getVectorInstrCost(unsigned Opcode, Type *Val, unsigned Index,
- Value *Op0, Value *Op1);
+ InstructionCost getVectorInstrCost(unsigned Opcode, Type *Val,
+ TTI::TargetCostKind CostKind,
+ unsigned Index, Value *Op0, Value *Op1);
InstructionCost getCFInstrCost(unsigned Opcode, TTI::TargetCostKind CostKind,
const Instruction *I = nullptr) {
}
InstructionCost PPCTTIImpl::getVectorInstrCost(unsigned Opcode, Type *Val,
+ TTI::TargetCostKind CostKind,
unsigned Index, Value *Op0,
Value *Op1) {
assert(Val->isVectorTy() && "This must be a vector type");
return InstructionCost::getMax();
InstructionCost Cost =
- BaseT::getVectorInstrCost(Opcode, Val, Index, Op0, Op1);
+ BaseT::getVectorInstrCost(Opcode, Val, CostKind, Index, Op0, Op1);
Cost *= CostFactor;
if (ST->hasVSX() && Val->getScalarType()->isDoubleTy()) {
if (Src->isVectorTy() && Opcode == Instruction::Store)
for (int i = 0, e = cast<FixedVectorType>(Src)->getNumElements(); i < e;
++i)
- Cost += getVectorInstrCost(Instruction::ExtractElement, Src, i, nullptr,
- nullptr);
+ Cost += getVectorInstrCost(Instruction::ExtractElement, Src, CostKind, i,
+ nullptr, nullptr);
return Cost;
}
TTI::TargetCostKind CostKind,
const Instruction *I = nullptr);
using BaseT::getVectorInstrCost;
- InstructionCost getVectorInstrCost(unsigned Opcode, Type *Val, unsigned Index,
- Value *Op0, Value *Op1);
+ InstructionCost getVectorInstrCost(unsigned Opcode, Type *Val,
+ TTI::TargetCostKind CostKind,
+ unsigned Index, Value *Op0, Value *Op1);
InstructionCost
getMemoryOpCost(unsigned Opcode, Type *Src, MaybeAlign Alignment,
unsigned AddressSpace, TTI::TargetCostKind CostKind,
}
InstructionCost RISCVTTIImpl::getVectorInstrCost(unsigned Opcode, Type *Val,
+ TTI::TargetCostKind CostKind,
unsigned Index, Value *Op0,
Value *Op1) {
assert(Val->isVectorTy() && "This must be a vector type");
if (Opcode != Instruction::ExtractElement &&
Opcode != Instruction::InsertElement)
- return BaseT::getVectorInstrCost(Opcode, Val, Index, Op0, Op1);
+ return BaseT::getVectorInstrCost(Opcode, Val, CostKind, Index, Op0, Op1);
// Legalize the type.
std::pair<InstructionCost, MVT> LT = getTypeLegalizationCost(Val);
return LT.first;
if (!isTypeLegal(Val))
- return BaseT::getVectorInstrCost(Opcode, Val, Index, Op0, Op1);
+ return BaseT::getVectorInstrCost(Opcode, Val, CostKind, Index, Op0, Op1);
// In RVV, we could use vslidedown + vmv.x.s to extract element from vector
// and vslideup + vmv.s.x to insert element to vector.
const Instruction *I = nullptr);
using BaseT::getVectorInstrCost;
- InstructionCost getVectorInstrCost(unsigned Opcode, Type *Val, unsigned Index,
- Value *Op0, Value *Op1);
+ InstructionCost getVectorInstrCost(unsigned Opcode, Type *Val,
+ TTI::TargetCostKind CostKind,
+ unsigned Index, Value *Op0, Value *Op1);
InstructionCost getArithmeticInstrCost(
unsigned Opcode, Type *Ty, TTI::TargetCostKind CostKind,
return (NumVectors * (SignedDivRem ? SDivPow2Cost : 1));
if (DivRemConst) {
SmallVector<Type *> Tys(Args.size(), Ty);
- return VF * DivMulSeqCost + getScalarizationOverhead(VTy, Args, Tys);
+ return VF * DivMulSeqCost +
+ getScalarizationOverhead(VTy, Args, Tys, CostKind);
}
if ((SignedDivRem || UnsignedDivRem) && VF > 4)
// Temporary hack: disable high vectorization factors with integer
getArithmeticInstrCost(Opcode, Ty->getScalarType(), CostKind);
SmallVector<Type *> Tys(Args.size(), Ty);
InstructionCost Cost =
- (VF * ScalarCost) + getScalarizationOverhead(VTy, Args, Tys);
+ (VF * ScalarCost) +
+ getScalarizationOverhead(VTy, Args, Tys, CostKind);
// FIXME: VF 2 for these FP operations are currently just as
// expensive as for VF 4.
if (VF == 2)
// There is no native support for FRem.
if (Opcode == Instruction::FRem) {
SmallVector<Type *> Tys(Args.size(), Ty);
- InstructionCost Cost =
- (VF * LIBCALL_COST) + getScalarizationOverhead(VTy, Args, Tys);
+ InstructionCost Cost = (VF * LIBCALL_COST) +
+ getScalarizationOverhead(VTy, Args, Tys, CostKind);
// FIXME: VF 2 for float is currently just as expensive as for VF 4.
if (VF == 2 && ScalarBits == 32)
Cost *= 2;
(Opcode == Instruction::FPToSI || Opcode == Instruction::FPToUI))
NeedsExtracts = false;
- TotCost += getScalarizationOverhead(SrcVecTy, false, NeedsExtracts);
- TotCost += getScalarizationOverhead(DstVecTy, NeedsInserts, false);
+ TotCost += getScalarizationOverhead(SrcVecTy, /*Insert*/ false,
+ NeedsExtracts, CostKind);
+ TotCost += getScalarizationOverhead(DstVecTy, NeedsInserts,
+ /*Extract*/ false, CostKind);
// FIXME: VF 2 for float<->i32 is currently just as expensive as for VF 4.
if (VF == 2 && SrcScalarBits == 32 && DstScalarBits == 32)
if (Opcode == Instruction::FPTrunc) {
if (SrcScalarBits == 128) // fp128 -> double/float + inserts of elements.
return VF /*ldxbr/lexbr*/ +
- getScalarizationOverhead(DstVecTy, true, false);
+ getScalarizationOverhead(DstVecTy, /*Insert*/ true,
+ /*Extract*/ false, CostKind);
else // double -> float
return VF / 2 /*vledb*/ + std::max(1U, VF / 4 /*vperm*/);
}
return VF * 2;
}
// -> fp128. VF * lxdb/lxeb + extraction of elements.
- return VF + getScalarizationOverhead(SrcVecTy, false, true);
+ return VF + getScalarizationOverhead(SrcVecTy, /*Insert*/ false,
+ /*Extract*/ true, CostKind);
}
}
}
InstructionCost SystemZTTIImpl::getVectorInstrCost(unsigned Opcode, Type *Val,
+ TTI::TargetCostKind CostKind,
unsigned Index, Value *Op0,
Value *Op1) {
// vlvgp will insert two grs into a vector register, so only count half the
return Cost;
}
- return BaseT::getVectorInstrCost(Opcode, Val, Index, Op0, Op1);
+ return BaseT::getVectorInstrCost(Opcode, Val, CostKind, Index, Op0, Op1);
}
// Check if a load may be folded as a memory operand in its user.
TTI::TargetCostKind CostKind,
const Instruction *I = nullptr);
using BaseT::getVectorInstrCost;
- InstructionCost getVectorInstrCost(unsigned Opcode, Type *Val, unsigned Index,
- Value *Op0, Value *Op1);
+ InstructionCost getVectorInstrCost(unsigned Opcode, Type *Val,
+ TTI::TargetCostKind CostKind,
+ unsigned Index, Value *Op0, Value *Op1);
bool isFoldableLoad(const LoadInst *Ld, const Instruction *&FoldedValue);
InstructionCost
getMemoryOpCost(unsigned Opcode, Type *Src, MaybeAlign Alignment,
return Cost;
}
-InstructionCost WebAssemblyTTIImpl::getVectorInstrCost(unsigned Opcode,
- Type *Val,
- unsigned Index,
- Value *Op0, Value *Op1) {
- InstructionCost Cost =
- BasicTTIImplBase::getVectorInstrCost(Opcode, Val, Index, Op0, Op1);
+InstructionCost
+WebAssemblyTTIImpl::getVectorInstrCost(unsigned Opcode, Type *Val,
+ TTI::TargetCostKind CostKind,
+ unsigned Index, Value *Op0, Value *Op1) {
+ InstructionCost Cost = BasicTTIImplBase::getVectorInstrCost(
+ Opcode, Val, CostKind, Index, Op0, Op1);
// SIMD128's insert/extract currently only take constant indices.
if (Index == -1u)
ArrayRef<const Value *> Args = ArrayRef<const Value *>(),
const Instruction *CxtI = nullptr);
using BaseT::getVectorInstrCost;
- InstructionCost getVectorInstrCost(unsigned Opcode, Type *Val, unsigned Index,
- Value *Op0, Value *Op1);
+ InstructionCost getVectorInstrCost(unsigned Opcode, Type *Val,
+ TTI::TargetCostKind CostKind,
+ unsigned Index, Value *Op0, Value *Op1);
/// @}
}
InstructionCost X86TTIImpl::getVectorInstrCost(unsigned Opcode, Type *Val,
+ TTI::TargetCostKind CostKind,
unsigned Index, Value *Op0,
Value *Op1) {
static const CostTblEntry SLMCostTbl[] = {
assert(Val->isVectorTy() && "This must be a vector type");
Type *ScalarType = Val->getScalarType();
InstructionCost RegisterFileMoveCost = 0;
- TTI::TargetCostKind CostKind = TTI::TargetCostKind::TCK_RecipThroughput;
// Non-immediate extraction/insertion can be handled as a sequence of
// aliased loads+stores via the stack.
if (Opcode == Instruction::ExtractElement && ScalarType->isPointerTy())
RegisterFileMoveCost += 1;
- return BaseT::getVectorInstrCost(Opcode, Val, Index, Op0, Op1) +
+ return BaseT::getVectorInstrCost(Opcode, Val, CostKind, Index, Op0, Op1) +
RegisterFileMoveCost;
}
-InstructionCost X86TTIImpl::getScalarizationOverhead(VectorType *Ty,
- const APInt &DemandedElts,
- bool Insert,
- bool Extract) {
+InstructionCost
+X86TTIImpl::getScalarizationOverhead(VectorType *Ty, const APInt &DemandedElts,
+ bool Insert, bool Extract,
+ TTI::TargetCostKind CostKind) {
assert(DemandedElts.getBitWidth() ==
cast<FixedVectorType>(Ty)->getNumElements() &&
"Vector size mismatch");
std::pair<InstructionCost, MVT> LT = getTypeLegalizationCost(Ty);
MVT MScalarTy = LT.second.getScalarType();
unsigned LegalVectorBitWidth = LT.second.getSizeInBits();
- TTI::TargetCostKind CostKind = TTI::TargetCostKind::TCK_RecipThroughput;
InstructionCost Cost = 0;
constexpr unsigned LaneBitWidth = 128;
// For types we can insert directly, insertion into 128-bit sub vectors is
// cheap, followed by a cheap chain of concatenations.
if (LegalVectorBitWidth <= LaneBitWidth) {
- Cost +=
- BaseT::getScalarizationOverhead(Ty, DemandedElts, Insert, false);
+ Cost += BaseT::getScalarizationOverhead(Ty, DemandedElts, Insert,
+ /*Extract*/ false, CostKind);
} else {
// In each 128-lane, if at least one index is demanded but not all
// indices are demanded and this 128-lane is not the first 128-lane of
Cost += getShuffleCost(TTI::SK_ExtractSubvector, Ty, std::nullopt,
CostKind, I * NumEltsPerLane, LaneTy);
Cost += BaseT::getScalarizationOverhead(LaneTy, LaneEltMask, Insert,
- false);
+ /*Extract*/ false, CostKind);
}
APInt AffectedLanes =
continue;
Cost += getShuffleCost(TTI::SK_ExtractSubvector, Ty, std::nullopt,
CostKind, I * NumEltsPerLane, LaneTy);
- Cost += BaseT::getScalarizationOverhead(LaneTy, LaneEltMask, false,
- Extract);
+ Cost += BaseT::getScalarizationOverhead(
+ LaneTy, LaneEltMask, /*Insert*/ false, Extract, CostKind);
}
return Cost;
}
// Fallback to default extraction.
- Cost += BaseT::getScalarizationOverhead(Ty, DemandedElts, false, Extract);
+ Cost += BaseT::getScalarizationOverhead(Ty, DemandedElts, /*Insert*/ false,
+ Extract, CostKind);
}
return Cost;
CoalescedVecEltIdx, CoalescedVecEltIdx + 1);
assert(DemandedElts.countPopulation() == 1 && "Inserting single value");
Cost += getScalarizationOverhead(CoalescedVecTy, DemandedElts, IsLoad,
- !IsLoad);
+ !IsLoad, CostKind);
}
// This isn't exactly right. We're using slow unaligned 32-byte accesses
(IsStore && !isLegalMaskedStore(SrcVTy, Alignment))) {
// Scalarization
APInt DemandedElts = APInt::getAllOnes(NumElem);
- InstructionCost MaskSplitCost =
- getScalarizationOverhead(MaskTy, DemandedElts, false, true);
+ InstructionCost MaskSplitCost = getScalarizationOverhead(
+ MaskTy, DemandedElts, /*Insert*/ false, /*Extract*/ true, CostKind);
InstructionCost ScalarCompareCost = getCmpSelInstrCost(
Instruction::ICmp, Type::getInt8Ty(SrcVTy->getContext()), nullptr,
CmpInst::BAD_ICMP_PREDICATE, CostKind);
InstructionCost BranchCost = getCFInstrCost(Instruction::Br, CostKind);
InstructionCost MaskCmpCost = NumElem * (BranchCost + ScalarCompareCost);
- InstructionCost ValueSplitCost =
- getScalarizationOverhead(SrcVTy, DemandedElts, IsLoad, IsStore);
+ InstructionCost ValueSplitCost = getScalarizationOverhead(
+ SrcVTy, DemandedElts, IsLoad, IsStore, CostKind);
InstructionCost MemopCost =
NumElem * BaseT::getMemoryOpCost(Opcode, SrcVTy->getScalarType(),
Alignment, AddressSpace, CostKind);
}
// Add the final extract element to the cost.
- return ReductionCost + getVectorInstrCost(Instruction::ExtractElement, Ty, 0,
- nullptr, nullptr);
+ return ReductionCost + getVectorInstrCost(Instruction::ExtractElement, Ty,
+ CostKind, 0, nullptr, nullptr);
}
InstructionCost X86TTIImpl::getMinMaxCost(Type *Ty, Type *CondTy,
}
// Add the final extract element to the cost.
- return MinMaxCost + getVectorInstrCost(Instruction::ExtractElement, Ty, 0,
- nullptr, nullptr);
+ return MinMaxCost + getVectorInstrCost(Instruction::ExtractElement, Ty,
+ CostKind, 0, nullptr, nullptr);
}
/// Calculate the cost of materializing a 64-bit value. This helper
auto *MaskTy =
FixedVectorType::get(Type::getInt1Ty(SrcVTy->getContext()), VF);
MaskUnpackCost = getScalarizationOverhead(
- MaskTy, DemandedElts, /*Insert=*/false, /*Extract=*/true);
+ MaskTy, DemandedElts, /*Insert=*/false, /*Extract=*/true, CostKind);
InstructionCost ScalarCompareCost = getCmpSelInstrCost(
Instruction::ICmp, Type::getInt1Ty(SrcVTy->getContext()), nullptr,
CmpInst::BAD_ICMP_PREDICATE, CostKind);
InstructionCost AddressUnpackCost = getScalarizationOverhead(
FixedVectorType::get(ScalarTy->getPointerTo(), VF), DemandedElts,
- /*Insert=*/false, /*Extract=*/true);
+ /*Insert=*/false, /*Extract=*/true, CostKind);
// The cost of the scalar loads/stores.
InstructionCost MemoryOpCost =
// The cost of forming the vector from loaded scalars/
// scalarizing the vector to perform scalar stores.
- InstructionCost InsertExtractCost =
- getScalarizationOverhead(cast<FixedVectorType>(SrcVTy), DemandedElts,
- /*Insert=*/Opcode == Instruction::Load,
- /*Extract=*/Opcode == Instruction::Store);
+ InstructionCost InsertExtractCost = getScalarizationOverhead(
+ cast<FixedVectorType>(SrcVTy), DemandedElts,
+ /*Insert=*/Opcode == Instruction::Load,
+ /*Extract=*/Opcode == Instruction::Store, CostKind);
return AddressUnpackCost + MemoryOpCost + MaskUnpackCost + InsertExtractCost;
}
TTI::TargetCostKind CostKind,
const Instruction *I = nullptr);
using BaseT::getVectorInstrCost;
- InstructionCost getVectorInstrCost(unsigned Opcode, Type *Val, unsigned Index,
- Value *Op0, Value *Op1);
+ InstructionCost getVectorInstrCost(unsigned Opcode, Type *Val,
+ TTI::TargetCostKind CostKind,
+ unsigned Index, Value *Op0, Value *Op1);
InstructionCost getScalarizationOverhead(VectorType *Ty,
const APInt &DemandedElts,
- bool Insert, bool Extract);
+ bool Insert, bool Extract,
+ TTI::TargetCostKind CostKind);
InstructionCost getReplicationShuffleCost(Type *EltTy, int ReplicationFactor,
int VF,
const APInt &DemandedDstElts,
/// Estimate the overhead of scalarizing an instruction. This is a
/// convenience wrapper for the type-based getScalarizationOverhead API.
- InstructionCost getScalarizationOverhead(Instruction *I,
- ElementCount VF) const;
+ InstructionCost getScalarizationOverhead(Instruction *I, ElementCount VF,
+ TTI::TargetCostKind CostKind) const;
/// Returns true if an artificially high cost for emulated masked memrefs
/// should be used.
// to be vectors, so we need to extract individual elements from there,
// execute VF scalar calls, and then gather the result into the vector return
// value.
+ TTI::TargetCostKind CostKind = TTI::TCK_RecipThroughput;
InstructionCost ScalarCallCost =
- TTI.getCallInstrCost(F, ScalarRetTy, ScalarTys, TTI::TCK_RecipThroughput);
+ TTI.getCallInstrCost(F, ScalarRetTy, ScalarTys, CostKind);
if (VF.isScalar())
return ScalarCallCost;
// Compute costs of unpacking argument values for the scalar calls and
// packing the return values to a vector.
- InstructionCost ScalarizationCost = getScalarizationOverhead(CI, VF);
+ InstructionCost ScalarizationCost =
+ getScalarizationOverhead(CI, VF, CostKind);
InstructionCost Cost =
ScalarCallCost * VF.getKnownMinValue() + ScalarizationCost;
// If the corresponding vector cost is cheaper, return its cost.
InstructionCost VectorCallCost =
- TTI.getCallInstrCost(nullptr, RetTy, Tys, TTI::TCK_RecipThroughput);
+ TTI.getCallInstrCost(nullptr, RetTy, Tys, CostKind);
if (VectorCallCost < Cost) {
NeedToScalarize = false;
Cost = VectorCallCost;
// The cost of insertelement and extractelement instructions needed for
// scalarization.
- ScalarizationCost += getScalarizationOverhead(I, VF);
+ ScalarizationCost += getScalarizationOverhead(I, VF, CostKind);
// Scale the cost by the probability of executing the predicated blocks.
// This assumes the predicated block for each vector lane is equally
// Compute the scalarization overhead of needed insertelement instructions
// and phi nodes.
+ TTI::TargetCostKind CostKind = TTI::TCK_RecipThroughput;
if (isScalarWithPredication(I, VF) && !I->getType()->isVoidTy()) {
ScalarCost += TTI.getScalarizationOverhead(
cast<VectorType>(ToVectorTy(I->getType(), VF)),
- APInt::getAllOnes(VF.getFixedValue()), true, false);
+ APInt::getAllOnes(VF.getFixedValue()), /*Insert*/ true,
+ /*Extract*/ false, CostKind);
ScalarCost +=
- VF.getFixedValue() *
- TTI.getCFInstrCost(Instruction::PHI, TTI::TCK_RecipThroughput);
+ VF.getFixedValue() * TTI.getCFInstrCost(Instruction::PHI, CostKind);
}
// Compute the scalarization overhead of needed extractelement
else if (needsExtract(J, VF)) {
ScalarCost += TTI.getScalarizationOverhead(
cast<VectorType>(ToVectorTy(J->getType(), VF)),
- APInt::getAllOnes(VF.getFixedValue()), false, true);
+ APInt::getAllOnes(VF.getFixedValue()), /*Insert*/ false,
+ /*Extract*/ true, CostKind);
}
}
// Don't pass *I here, since it is scalar but will actually be part of a
// vectorized loop where the user of it is a vectorized instruction.
+ TTI::TargetCostKind CostKind = TTI::TCK_RecipThroughput;
const Align Alignment = getLoadStoreAlignment(I);
- Cost += VF.getKnownMinValue() *
- TTI.getMemoryOpCost(I->getOpcode(), ValTy->getScalarType(), Alignment,
- AS, TTI::TCK_RecipThroughput);
+ Cost += VF.getKnownMinValue() * TTI.getMemoryOpCost(I->getOpcode(),
+ ValTy->getScalarType(),
+ Alignment, AS, CostKind);
// Get the overhead of the extractelement and insertelement instructions
// we might create due to scalarization.
- Cost += getScalarizationOverhead(I, VF);
+ Cost += getScalarizationOverhead(I, VF, CostKind);
// If we have a predicated load/store, it will need extra i1 extracts and
// conditional branches, but may not be executed for each vector lane. Scale
VectorType::get(IntegerType::getInt1Ty(ValTy->getContext()), VF);
Cost += TTI.getScalarizationOverhead(
Vec_i1Ty, APInt::getAllOnes(VF.getKnownMinValue()),
- /*Insert=*/false, /*Extract=*/true);
- Cost += TTI.getCFInstrCost(Instruction::Br, TTI::TCK_RecipThroughput);
+ /*Insert=*/false, /*Extract=*/true, CostKind);
+ Cost += TTI.getCFInstrCost(Instruction::Br, CostKind);
if (useEmulatedMaskMemRefHack(I, VF))
// Artificially setting to a high enough value to practically disable
(isLoopInvariantStoreValue
? 0
: TTI.getVectorInstrCost(Instruction::ExtractElement, VectorTy,
- VF.getKnownMinValue() - 1));
+ CostKind, VF.getKnownMinValue() - 1));
}
InstructionCost
return VectorizationCostTy(C, TypeNotScalarized);
}
-InstructionCost
-LoopVectorizationCostModel::getScalarizationOverhead(Instruction *I,
- ElementCount VF) const {
+InstructionCost LoopVectorizationCostModel::getScalarizationOverhead(
+ Instruction *I, ElementCount VF, TTI::TargetCostKind CostKind) const {
// There is no mechanism yet to create a scalable scalarization loop,
// so this is currently Invalid.
if (!RetTy->isVoidTy() &&
(!isa<LoadInst>(I) || !TTI.supportsEfficientVectorElementLoadStore()))
Cost += TTI.getScalarizationOverhead(
- cast<VectorType>(RetTy), APInt::getAllOnes(VF.getKnownMinValue()), true,
- false);
+ cast<VectorType>(RetTy), APInt::getAllOnes(VF.getKnownMinValue()),
+ /*Insert*/ true,
+ /*Extract*/ false, CostKind);
// Some targets keep addresses scalar.
if (isa<LoadInst>(I) && !TTI.prefersVectorizedAddressing())
for (auto *V : filterExtractingOperands(Ops, VF))
Tys.push_back(MaybeVectorizeType(V->getType(), VF));
return Cost + TTI.getOperandsScalarizationOverhead(
- filterExtractingOperands(Ops, VF), Tys);
+ filterExtractingOperands(Ops, VF), Tys, CostKind);
}
void LoopVectorizationCostModel::setCostBasedWideningDecision(ElementCount VF) {
VectorType::get(IntegerType::getInt1Ty(RetTy->getContext()), VF);
return (
TTI.getScalarizationOverhead(
- Vec_i1Ty, APInt::getAllOnes(VF.getFixedValue()), false, true) +
+ Vec_i1Ty, APInt::getAllOnes(VF.getFixedValue()),
+ /*Insert*/ false, /*Extract*/ true, CostKind) +
(TTI.getCFInstrCost(Instruction::Br, CostKind) * VF.getFixedValue()));
} else if (I->getParent() == TheLoop->getLoopLatch() || VF.isScalar())
// The back-edge branch will remain, as will all scalar branches.
continue;
}
}
- Cost -= TTI->getVectorInstrCost(*EE, EE->getVectorOperandType(), Idx);
+ Cost -= TTI->getVectorInstrCost(*EE, EE->getVectorOperandType(), CostKind,
+ Idx);
}
// Add a cost for subvector extracts/inserts if required.
for (const auto &Data : ExtractVectorsTys) {
bool NeedShuffle =
VL.front() != *It || !all_of(VL.drop_front(), UndefValue::classof);
InstructionCost InsertCost =
- TTI->getVectorInstrCost(Instruction::InsertElement, VecTy,
+ TTI->getVectorInstrCost(Instruction::InsertElement, VecTy, CostKind,
/*Index=*/0, PoisonValue::get(VecTy), *It);
return InsertCost + (NeedShuffle
? TTI->getShuffleCost(
}
}
return TTI->getVectorInstrCost(Instruction::ExtractElement, SrcVecTy,
- *getExtractIndex(I));
+ CostKind, *getExtractIndex(I));
};
auto GetVectorCost = [](InstructionCost CommonCost) { return CommonCost; };
return GetCostDiff(GetScalarCost, GetVectorCost);
InstructionCost Cost = 0;
Cost -= TTI->getScalarizationOverhead(SrcVecTy, DemandedElts,
- /*Insert*/ true, /*Extract*/ false);
+ /*Insert*/ true, /*Extract*/ false,
+ CostKind);
// First cost - resize to actual vector size if not identity shuffle or
// need to shift the vector.
// extend the extracted value back to the original type. Here, we account
// for the extract and the added cost of the sign extend if needed.
auto *VecTy = FixedVectorType::get(EU.Scalar->getType(), BundleWidth);
+ TTI::TargetCostKind CostKind = TTI::TCK_RecipThroughput;
auto *ScalarRoot = VectorizableTree[0]->Scalars[0];
if (MinBWs.count(ScalarRoot)) {
auto *MinTy = IntegerType::get(F->getContext(), MinBWs[ScalarRoot].first);
ExtractCost += TTI->getExtractWithExtendCost(Extend, EU.Scalar->getType(),
VecTy, EU.Lane);
} else {
- ExtractCost +=
- TTI->getVectorInstrCost(Instruction::ExtractElement, VecTy, EU.Lane);
+ ExtractCost += TTI->getVectorInstrCost(Instruction::ExtractElement, VecTy,
+ CostKind, EU.Lane);
}
}
EstimateShufflesCost);
InstructionCost InsertCost = TTI->getScalarizationOverhead(
cast<FixedVectorType>(FirstUsers[I].first->getType()), DemandedElts[I],
- /*Insert*/ true, /*Extract*/ false);
+ /*Insert*/ true, /*Extract*/ false, TTI::TCK_RecipThroughput);
Cost -= InsertCost;
}
InstructionCost BoUpSLP::getGatherCost(FixedVectorType *Ty,
const APInt &ShuffledIndices,
bool NeedToShuffle) const {
+ TTI::TargetCostKind CostKind = TTI::TCK_RecipThroughput;
InstructionCost Cost =
TTI->getScalarizationOverhead(Ty, ~ShuffledIndices, /*Insert*/ true,
- /*Extract*/ false);
+ /*Extract*/ false, CostKind);
if (NeedToShuffle)
Cost += TTI->getShuffleCost(TargetTransformInfo::SK_PermuteSingleSrc, Ty);
return Cost;
InstructionCost OldCost =
TTI.getMemoryOpCost(Instruction::Load, LoadTy, Alignment, AS);
APInt DemandedElts = APInt::getOneBitSet(MinVecNumElts, 0);
- OldCost += TTI.getScalarizationOverhead(MinVecTy, DemandedElts,
- /* Insert */ true, HasExtract);
+ TTI::TargetCostKind CostKind = TTI::TCK_RecipThroughput;
+ OldCost +=
+ TTI.getScalarizationOverhead(MinVecTy, DemandedElts,
+ /* Insert */ true, HasExtract, CostKind);
// New pattern: load VecPtr
InstructionCost NewCost =
return nullptr;
Type *VecTy = Ext0->getVectorOperand()->getType();
+ TTI::TargetCostKind CostKind = TTI::TCK_RecipThroughput;
assert(VecTy == Ext1->getVectorOperand()->getType() && "Need matching types");
- InstructionCost Cost0 = TTI.getVectorInstrCost(*Ext0, VecTy, Index0);
- InstructionCost Cost1 = TTI.getVectorInstrCost(*Ext1, VecTy, Index1);
+ InstructionCost Cost0 =
+ TTI.getVectorInstrCost(*Ext0, VecTy, CostKind, Index0);
+ InstructionCost Cost1 =
+ TTI.getVectorInstrCost(*Ext1, VecTy, CostKind, Index1);
// If both costs are invalid no shuffle is needed
if (!Cost0.isValid() && !Cost1.isValid())
// both sequences.
unsigned Ext0Index = Ext0IndexC->getZExtValue();
unsigned Ext1Index = Ext1IndexC->getZExtValue();
+ TTI::TargetCostKind CostKind = TTI::TCK_RecipThroughput;
InstructionCost Extract0Cost =
- TTI.getVectorInstrCost(*Ext0, VecTy, Ext0Index);
+ TTI.getVectorInstrCost(*Ext0, VecTy, CostKind, Ext0Index);
InstructionCost Extract1Cost =
- TTI.getVectorInstrCost(*Ext1, VecTy, Ext1Index);
+ TTI.getVectorInstrCost(*Ext1, VecTy, CostKind, Ext1Index);
// A more expensive extract will always be replaced by a splat shuffle.
// For example, if Ext0 is more expensive:
Mask[Index] = Index + NumElts;
Type *ScalarTy = VecTy->getScalarType();
+ TTI::TargetCostKind CostKind = TTI::TCK_RecipThroughput;
InstructionCost OldCost =
TTI.getArithmeticInstrCost(Instruction::FNeg, ScalarTy) +
- TTI.getVectorInstrCost(I, VecTy, Index);
+ TTI.getVectorInstrCost(I, VecTy, CostKind, Index);
// If the extract has one use, it will be eliminated, so count it in the
// original cost. If it has more than one use, ignore the cost because it will
// be the same before/after.
if (Extract->hasOneUse())
- OldCost += TTI.getVectorInstrCost(*Extract, VecTy, Index);
+ OldCost += TTI.getVectorInstrCost(*Extract, VecTy, CostKind, Index);
InstructionCost NewCost =
TTI.getArithmeticInstrCost(Instruction::FNeg, VecTy) +
// Get cost estimate for the insert element. This cost will factor into
// both sequences.
- InstructionCost InsertCost =
- TTI.getVectorInstrCost(Instruction::InsertElement, VecTy, Index);
+ TTI::TargetCostKind CostKind = TTI::TCK_RecipThroughput;
+ InstructionCost InsertCost = TTI.getVectorInstrCost(
+ Instruction::InsertElement, VecTy, CostKind, Index);
InstructionCost OldCost =
(IsConst0 ? 0 : InsertCost) + (IsConst1 ? 0 : InsertCost) + VectorOpCost;
InstructionCost NewCost = ScalarOpCost + InsertCost +
if (!VecTy)
return false;
- InstructionCost OldCost = TTI.getVectorInstrCost(*Ext0, VecTy, Index0);
- OldCost += TTI.getVectorInstrCost(*Ext1, VecTy, Index1);
+ TTI::TargetCostKind CostKind = TTI::TCK_RecipThroughput;
+ InstructionCost OldCost =
+ TTI.getVectorInstrCost(*Ext0, VecTy, CostKind, Index0);
+ OldCost += TTI.getVectorInstrCost(*Ext1, VecTy, CostKind, Index1);
OldCost +=
TTI.getCmpSelInstrCost(CmpOpcode, I0->getType(),
CmpInst::makeCmpResultType(I0->getType()), Pred) *
NewCost += TTI.getShuffleCost(TargetTransformInfo::SK_PermuteSingleSrc, CmpTy,
ShufMask);
NewCost += TTI.getArithmeticInstrCost(I.getOpcode(), CmpTy);
- NewCost += TTI.getVectorInstrCost(*Ext0, CmpTy, CheapIndex);
+ NewCost += TTI.getVectorInstrCost(*Ext0, CmpTy, CostKind, CheapIndex);
// Aggressively form vector ops if the cost is equal because the transform
// may enable further optimization.
}
auto *Index = dyn_cast<ConstantInt>(UI->getOperand(1));
+ TTI::TargetCostKind CostKind = TTI::TCK_RecipThroughput;
OriginalCost +=
- TTI.getVectorInstrCost(Instruction::ExtractElement, FixedVT,
+ TTI.getVectorInstrCost(Instruction::ExtractElement, FixedVT, CostKind,
Index ? Index->getZExtValue() : -1);
ScalarizedCost +=
TTI.getMemoryOpCost(Instruction::Load, FixedVT->getElementType(),
; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_45 = shufflevector <8 x double> %src512, <8 x double> undef, <2 x i32> <i32 4, i32 5>
; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_67 = shufflevector <8 x double> %src512, <8 x double> undef, <2 x i32> <i32 6, i32 7>
; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_0123 = shufflevector <8 x double> %src512, <8 x double> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
-; AVX512-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V512_2345 = shufflevector <8 x double> %src512, <8 x double> undef, <4 x i32> <i32 2, i32 3, i32 4, i32 5>
+; AVX512-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V512_2345 = shufflevector <8 x double> %src512, <8 x double> undef, <4 x i32> <i32 2, i32 3, i32 4, i32 5>
; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_4567 = shufflevector <8 x double> %src512, <8 x double> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_567u = shufflevector <8 x double> %src512, <8 x double> undef, <4 x i32> <i32 5, i32 6, i32 7, i32 undef>
; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void