/// Estimate the cost of a chain of pointers (typically pointer operands of a
/// chain of loads or stores within same block) operations set when lowered.
+ /// \p AccessTy is the type of the loads/stores that will ultimately use the
+ /// \p Ptrs.
InstructionCost
getPointersChainCost(ArrayRef<const Value *> Ptrs, const Value *Base,
- const PointersChainInfo &Info,
+ const PointersChainInfo &Info, Type *AccessTy,
TargetCostKind CostKind = TTI::TCK_RecipThroughput
) const;
TTI::TargetCostKind CostKind) = 0;
virtual InstructionCost
getPointersChainCost(ArrayRef<const Value *> Ptrs, const Value *Base,
- const TTI::PointersChainInfo &Info,
+ const TTI::PointersChainInfo &Info, Type *AccessTy,
TTI::TargetCostKind CostKind) = 0;
virtual unsigned getInliningThresholdMultiplier() = 0;
virtual unsigned adjustInliningThreshold(const CallBase *CB) = 0;
InstructionCost getPointersChainCost(ArrayRef<const Value *> Ptrs,
const Value *Base,
const PointersChainInfo &Info,
+ Type *AccessTy,
TargetCostKind CostKind) override {
- return Impl.getPointersChainCost(Ptrs, Base, Info, CostKind);
+ return Impl.getPointersChainCost(Ptrs, Base, Info, AccessTy, CostKind);
}
unsigned getInliningThresholdMultiplier() override {
return Impl.getInliningThresholdMultiplier();
InstructionCost getPointersChainCost(ArrayRef<const Value *> Ptrs,
const Value *Base,
const TTI::PointersChainInfo &Info,
+ Type *AccessTy,
TTI::TargetCostKind CostKind) {
InstructionCost Cost = TTI::TCC_Free;
// In the basic model we take into account GEP instructions only
InstructionCost TargetTransformInfo::getPointersChainCost(
ArrayRef<const Value *> Ptrs, const Value *Base,
- const TTI::PointersChainInfo &Info, TTI::TargetCostKind CostKind) const {
+ const TTI::PointersChainInfo &Info, Type *AccessTy,
+ TTI::TargetCostKind CostKind) const {
assert((Base || !Info.isSameBase()) &&
"If pointers have same base address it has to be provided.");
- return TTIImpl->getPointersChainCost(Ptrs, Base, Info, CostKind);
+ return TTIImpl->getPointersChainCost(Ptrs, Base, Info, AccessTy, CostKind);
}
unsigned TargetTransformInfo::getEstimatedNumberOfCaseClusters(
}
}
+// TODO: Deduplicate from TargetTransformInfoImplCRTPBase.
+InstructionCost RISCVTTIImpl::getPointersChainCost(
+ ArrayRef<const Value *> Ptrs, const Value *Base,
+ const TTI::PointersChainInfo &Info, Type *AccessTy,
+ TTI::TargetCostKind CostKind) {
+ InstructionCost Cost = TTI::TCC_Free;
+ // In the basic model we take into account GEP instructions only
+ // (although here can come alloca instruction, a value, constants and/or
+ // constant expressions, PHIs, bitcasts ... whatever allowed to be used as a
+ // pointer). Typically, if Base is a not a GEP-instruction and all the
+ // pointers are relative to the same base address, all the rest are
+ // either GEP instructions, PHIs, bitcasts or constants. When we have same
+ // base, we just calculate cost of each non-Base GEP as an ADD operation if
+ // any their index is a non-const.
+ // If no known dependecies between the pointers cost is calculated as a sum
+ // of costs of GEP instructions.
+ for (auto [I, V] : enumerate(Ptrs)) {
+ const auto *GEP = dyn_cast<GetElementPtrInst>(V);
+ if (!GEP)
+ continue;
+ if (Info.isSameBase() && V != Base) {
+ if (GEP->hasAllConstantIndices())
+ continue;
+ // If the chain is unit-stride and BaseReg + stride*i is a legal
+ // addressing mode, then presume the base GEP is sitting around in a
+ // register somewhere and check if we can fold the offset relative to
+ // it.
+ unsigned Stride = DL.getTypeStoreSize(AccessTy);
+ if (Info.isUnitStride() &&
+ isLegalAddressingMode(AccessTy,
+ /* BaseGV */ nullptr,
+ /* BaseOffset */ Stride * I,
+ /* HasBaseReg */ true,
+ /* Scale */ 0,
+ GEP->getType()->getPointerAddressSpace()))
+ continue;
+ Cost += getArithmeticInstrCost(Instruction::Add, GEP->getType(), CostKind,
+ {TTI::OK_AnyValue, TTI::OP_None},
+ {TTI::OK_AnyValue, TTI::OP_None},
+ std::nullopt);
+ } else {
+ SmallVector<const Value *> Indices(GEP->indices());
+ Cost += getGEPCost(GEP->getSourceElementType(), GEP->getPointerOperand(),
+ Indices, CostKind);
+ }
+ }
+ return Cost;
+}
+
void RISCVTTIImpl::getUnrollingPreferences(Loop *L, ScalarEvolution &SE,
TTI::UnrollingPreferences &UP,
OptimizationRemarkEmitter *ORE) {
Align Alignment, unsigned AddressSpace,
TTI::TargetCostKind CostKind);
+ InstructionCost getPointersChainCost(ArrayRef<const Value *> Ptrs,
+ const Value *Base,
+ const TTI::PointersChainInfo &Info,
+ Type *AccessTy,
+ TTI::TargetCostKind CostKind);
+
void getUnrollingPreferences(Loop *L, ScalarEvolution &SE,
TTI::UnrollingPreferences &UP,
OptimizationRemarkEmitter *ORE);
return Cost + LT.first;
}
-InstructionCost X86TTIImpl::getPointersChainCost(
- ArrayRef<const Value *> Ptrs, const Value *Base,
- const TTI::PointersChainInfo &Info, TTI::TargetCostKind CostKind) {
+InstructionCost
+X86TTIImpl::getPointersChainCost(ArrayRef<const Value *> Ptrs,
+ const Value *Base,
+ const TTI::PointersChainInfo &Info,
+ Type *AccessTy, TTI::TargetCostKind CostKind) {
if (Info.isSameBase() && Info.isKnownStride()) {
// If all the pointers have known stride all the differences are translated
// into constants. X86 memory addressing allows encoding it into
}
return TTI::TCC_Free;
}
- return BaseT::getPointersChainCost(Ptrs, Base, Info, CostKind);
+ return BaseT::getPointersChainCost(Ptrs, Base, Info, AccessTy, CostKind);
}
InstructionCost X86TTIImpl::getAddressComputationCost(Type *Ty,
InstructionCost getPointersChainCost(ArrayRef<const Value *> Ptrs,
const Value *Base,
const TTI::PointersChainInfo &Info,
+ Type *AccessTy,
TTI::TargetCostKind CostKind);
InstructionCost getAddressComputationCost(Type *PtrTy, ScalarEvolution *SE,
const SCEV *Ptr);
// stay in vectorized code due to uses outside of these scalar
// loads/stores.
ScalarCost = TTI->getPointersChainCost(
- Ptrs, BasePtr, TTI::PointersChainInfo::getUnitStride(), CostKind);
+ Ptrs, BasePtr, TTI::PointersChainInfo::getUnitStride(), ScalarTy,
+ CostKind);
SmallVector<const Value *> PtrsRetainedInVecCode;
for (Value *V : Ptrs) {
}
VecCost = TTI->getPointersChainCost(
PtrsRetainedInVecCode, BasePtr,
- TTI::PointersChainInfo::getKnownStride(), CostKind);
+ TTI::PointersChainInfo::getKnownStride(), VecTy, CostKind);
} else {
// Case 1: Ptrs are the arguments of loads that we are going to transform
// into masked gather load intrinsic.
? TTI::PointersChainInfo::getUnknownStride()
: TTI::PointersChainInfo::getKnownStride();
- ScalarCost = TTI->getPointersChainCost(Ptrs, BasePtr, PtrsInfo, CostKind);
+ ScalarCost = TTI->getPointersChainCost(Ptrs, BasePtr, PtrsInfo, ScalarTy,
+ CostKind);
// Remark: it not quite correct to use scalar GEP cost for a vector GEP,
// but it's not clear how to do that without having vector GEP arguments
ret void
}
-; FIXME: When computing the scalar pointers chain cost here, there is a cost of
+; When computing the scalar pointers chain cost here, there is a cost of
; 1 for the base pointer, and the rest can be folded in, so the scalar cost
; should be 1.
define void @h(ptr %dest, i32 %i) {
; YAML-NEXT: Function: h
; YAML-NEXT: Args:
; YAML-NEXT: - String: 'Stores SLP vectorized with cost '
-; YAML-NEXT: - Cost: '-5'
+; YAML-NEXT: - Cost: '-2'
; YAML-NEXT: - String: ' and with tree size '
; YAML-NEXT: - TreeSize: '2'
%p1 = getelementptr [4 x i32], ptr %dest, i32 %i, i32 0
; RUN: opt < %s -passes=slp-vectorizer -mtriple=riscv64 -mattr=+v \
; RUN: -riscv-v-slp-max-vf=0 -S | FileCheck %s
-; FIXME: This should not be vectorized
+; This shouldn't be vectorized as the extra address computation required for the
+; vector store make it unprofitable (vle/vse don't have an offset in their
+; addressing modes)
%struct.2i32 = type { i32, i32 }
; CHECK-LABEL: @splat_store_v2i32(
; CHECK-NEXT: entry:
; CHECK-NEXT: [[P1:%.*]] = getelementptr [[STRUCT_2I32:%.*]], ptr [[DEST:%.*]], i64 [[I:%.*]], i32 0
-; CHECK-NEXT: store <2 x i32> <i32 1, i32 1>, ptr [[P1]], align 4
+; CHECK-NEXT: store i32 1, ptr [[P1]], align 4
+; CHECK-NEXT: [[P2:%.*]] = getelementptr [[STRUCT_2I32]], ptr [[DEST]], i64 [[I]], i32 1
+; CHECK-NEXT: store i32 1, ptr [[P2]], align 4
; CHECK-NEXT: ret void
;
entry: