/// \return The maximum interleave factor that any transform should try to
/// perform for this target. This number depends on the level of parallelism
/// and the number of execution units in the CPU.
- unsigned getMaxInterleaveFactor(unsigned VF) const;
+ unsigned getMaxInterleaveFactor(ElementCount VF) const;
/// Collect properties of V used in cost analysis, e.g. OP_PowerOf2.
static OperandValueInfo getOperandInfo(const Value *V);
/// \return if target want to issue a prefetch in address space \p AS.
virtual bool shouldPrefetchAddressSpace(unsigned AS) const = 0;
- virtual unsigned getMaxInterleaveFactor(unsigned VF) = 0;
+ virtual unsigned getMaxInterleaveFactor(ElementCount VF) = 0;
virtual InstructionCost getArithmeticInstrCost(
unsigned Opcode, Type *Ty, TTI::TargetCostKind CostKind,
OperandValueInfo Opd1Info, OperandValueInfo Opd2Info,
return Impl.shouldPrefetchAddressSpace(AS);
}
- unsigned getMaxInterleaveFactor(unsigned VF) override {
+ unsigned getMaxInterleaveFactor(ElementCount VF) override {
return Impl.getMaxInterleaveFactor(VF);
}
unsigned getEstimatedNumberOfCaseClusters(const SwitchInst &SI,
bool enableWritePrefetching() const { return false; }
bool shouldPrefetchAddressSpace(unsigned AS) const { return !AS; }
- unsigned getMaxInterleaveFactor(unsigned VF) const { return 1; }
+ unsigned getMaxInterleaveFactor(ElementCount VF) const { return 1; }
InstructionCost getArithmeticInstrCost(
unsigned Opcode, Type *Ty, TTI::TargetCostKind CostKind,
}
}
- unsigned getMaxInterleaveFactor(unsigned VF) { return 1; }
+ unsigned getMaxInterleaveFactor(ElementCount VF) { return 1; }
InstructionCost getArithmeticInstrCost(
unsigned Opcode, Type *Ty, TTI::TargetCostKind CostKind,
return TTIImpl->shouldPrefetchAddressSpace(AS);
}
-unsigned TargetTransformInfo::getMaxInterleaveFactor(unsigned VF) const {
+unsigned TargetTransformInfo::getMaxInterleaveFactor(ElementCount VF) const {
return TTIImpl->getMaxInterleaveFactor(VF);
}
return Cost;
}
-unsigned AArch64TTIImpl::getMaxInterleaveFactor(unsigned VF) {
+unsigned AArch64TTIImpl::getMaxInterleaveFactor(ElementCount VF) {
return ST->getMaxInterleaveFactor();
}
return VF.getKnownMinValue() * ST->getVScaleForTuning();
}
- unsigned getMaxInterleaveFactor(unsigned VF);
+ unsigned getMaxInterleaveFactor(ElementCount VF);
bool prefersVectorizedAddressing() const;
}
}
-unsigned GCNTTIImpl::getMaxInterleaveFactor(unsigned VF) {
+unsigned GCNTTIImpl::getMaxInterleaveFactor(ElementCount VF) {
// Disable unrolling if the loop is not vectorized.
// TODO: Enable this again.
- if (VF == 1)
+ if (VF.isScalar())
return 1;
return 8;
unsigned RemainingBytes, unsigned SrcAddrSpace, unsigned DestAddrSpace,
unsigned SrcAlign, unsigned DestAlign,
std::optional<uint32_t> AtomicCpySize) const;
- unsigned getMaxInterleaveFactor(unsigned VF);
+ unsigned getMaxInterleaveFactor(ElementCount VF);
bool getTgtMemIntrinsic(IntrinsicInst *Inst, MemIntrinsicInfo &Info) const;
return isLegalToVectorizeMemChain(ChainSizeInBytes, Alignment, AddrSpace);
}
-unsigned R600TTIImpl::getMaxInterleaveFactor(unsigned VF) {
+unsigned R600TTIImpl::getMaxInterleaveFactor(ElementCount VF) {
// Disable unrolling if the loop is not vectorized.
// TODO: Enable this again.
- if (VF == 1)
+ if (VF.isScalar())
return 1;
return 8;
unsigned AddrSpace) const;
bool isLegalToVectorizeStoreChain(unsigned ChainSizeInBytes, Align Alignment,
unsigned AddrSpace) const;
- unsigned getMaxInterleaveFactor(unsigned VF);
+ unsigned getMaxInterleaveFactor(ElementCount VF);
InstructionCost getCFInstrCost(unsigned Opcode, TTI::TargetCostKind CostKind,
const Instruction *I = nullptr);
using BaseT::getVectorInstrCost;
llvm_unreachable("Unsupported register kind");
}
- unsigned getMaxInterleaveFactor(unsigned VF) {
+ unsigned getMaxInterleaveFactor(ElementCount VF) {
return ST->getMaxInterleaveFactor();
}
return 32;
}
-unsigned HexagonTTIImpl::getMaxInterleaveFactor(unsigned VF) {
+unsigned HexagonTTIImpl::getMaxInterleaveFactor(ElementCount VF) {
return useHVX() ? 2 : 1;
}
/// @{
unsigned getNumberOfRegisters(bool vector) const;
- unsigned getMaxInterleaveFactor(unsigned VF);
+ unsigned getMaxInterleaveFactor(ElementCount VF);
TypeSize getRegisterBitWidth(TargetTransformInfo::RegisterKind K) const;
unsigned getMinVectorRegisterBitWidth() const;
ElementCount getMinimumVF(unsigned ElemWidth, bool IsScalable) const;
return 300;
}
-unsigned PPCTTIImpl::getMaxInterleaveFactor(unsigned VF) {
+unsigned PPCTTIImpl::getMaxInterleaveFactor(ElementCount VF) {
unsigned Directive = ST->getCPUDirective();
// The 440 has no SIMD support, but floating-point instructions
// have a 5-cycle latency, so unroll by 5x for latency hiding.
TypeSize getRegisterBitWidth(TargetTransformInfo::RegisterKind K) const;
unsigned getCacheLineSize() const override;
unsigned getPrefetchDistance() const override;
- unsigned getMaxInterleaveFactor(unsigned VF);
+ unsigned getMaxInterleaveFactor(ElementCount VF);
InstructionCost vectorCostAdjustmentFactor(unsigned Opcode, Type *Ty1,
Type *Ty2);
InstructionCost getArithmeticInstrCost(
}
}
- unsigned getMaxInterleaveFactor(unsigned VF) {
+ unsigned getMaxInterleaveFactor(ElementCount VF) {
// If the loop will not be vectorized, don't interleave the loop.
// Let regular unroll to unroll the loop.
- return VF == 1 ? 1 : ST->getMaxInterleaveFactor();
+ return VF.getKnownMinValue() == 1 ? 1 : ST->getMaxInterleaveFactor();
}
enum RISCVRegisterClass { GPRRC, FPRRC, VRRC };
.getFixedValue();
}
-unsigned X86TTIImpl::getMaxInterleaveFactor(unsigned VF) {
+unsigned X86TTIImpl::getMaxInterleaveFactor(ElementCount VF) {
// If the loop will not be vectorized, don't interleave the loop.
// Let regular unroll to unroll the loop, which saves the overflow
// check and memory check cost.
- if (VF == 1)
+ if (VF.isScalar())
return 1;
if (ST->isAtom())
unsigned getNumberOfRegisters(unsigned ClassID) const;
TypeSize getRegisterBitWidth(TargetTransformInfo::RegisterKind K) const;
unsigned getLoadStoreVecRegBitWidth(unsigned AS) const;
- unsigned getMaxInterleaveFactor(unsigned VF);
+ unsigned getMaxInterleaveFactor(ElementCount VF);
InstructionCost getArithmeticInstrCost(
unsigned Opcode, Type *Ty, TTI::TargetCostKind CostKind,
TTI::OperandValueInfo Op1Info = {TTI::OK_AnyValue, TTI::OP_None},
// We also consider epilogue vectorization unprofitable for targets that don't
// consider interleaving beneficial (eg. MVE).
- if (TTI.getMaxInterleaveFactor(VF.getKnownMinValue()) <= 1)
+ if (TTI.getMaxInterleaveFactor(VF) <= 1)
return false;
// FIXME: We should consider changing the threshold for scalable
// vectors to take VScaleForTuning into account.
}
// Clamp the interleave ranges to reasonable counts.
- unsigned MaxInterleaveCount =
- TTI.getMaxInterleaveFactor(VF.getKnownMinValue());
+ unsigned MaxInterleaveCount = TTI.getMaxInterleaveFactor(VF);
// Check if the user has overridden the max.
if (VF.isScalar()) {
// vector registers, loop vectorization may still enable scalar
// interleaving.
if (!TTI->getNumberOfRegisters(TTI->getRegisterClassForType(true)) &&
- TTI->getMaxInterleaveFactor(1) < 2)
+ TTI->getMaxInterleaveFactor(ElementCount::getFixed(1)) < 2)
return LoopVectorizeResult(false, false);
bool Changed = false, CFGChanged = false;