From d484cc152b1d9282230a17a218337342d52536e2 Mon Sep 17 00:00:00 2001 From: Roman Lebedev Date: Tue, 9 Nov 2021 13:59:40 +0300 Subject: [PATCH] [TTI] Adjust `getReplicationShuffleCost()` interface It is trivial to produce DemandedSrcElts given DemandedReplicatedElts, so don't pass the former. Also, it isn't really useful so far to have the overload taking the Mask, so just inline it. --- llvm/include/llvm/Analysis/TargetTransformInfo.h | 18 +-------- .../llvm/Analysis/TargetTransformInfoImpl.h | 17 +++++---- llvm/include/llvm/CodeGen/BasicTTIImpl.h | 43 +++------------------- llvm/lib/Analysis/TargetTransformInfo.cpp | 13 +------ llvm/lib/Target/X86/X86TargetTransformInfo.cpp | 2 +- 5 files changed, 19 insertions(+), 74 deletions(-) diff --git a/llvm/include/llvm/Analysis/TargetTransformInfo.h b/llvm/include/llvm/Analysis/TargetTransformInfo.h index 4312c2a..e93a1e2 100644 --- a/llvm/include/llvm/Analysis/TargetTransformInfo.h +++ b/llvm/include/llvm/Analysis/TargetTransformInfo.h @@ -1127,12 +1127,8 @@ public: /// <0,0,0,1,1,1,2,2,2,3,3,3> InstructionCost getReplicationShuffleCost(Type *EltTy, int ReplicationFactor, int VF, - const APInt &DemandedSrcElts, const APInt &DemandedReplicatedElts, TTI::TargetCostKind CostKind); - InstructionCost getReplicationShuffleCost(Type *EltTy, int ReplicationFactor, - int VF, ArrayRef Mask, - TTI::TargetCostKind CostKind); /// \return The cost of Load and Store instructions. InstructionCost @@ -1661,12 +1657,9 @@ public: virtual InstructionCost getVectorInstrCost(unsigned Opcode, Type *Val, unsigned Index) = 0; - virtual InstructionCost getReplicationShuffleCost( - Type *EltTy, int ReplicationFactor, int VF, const APInt &DemandedSrcElts, - const APInt &DemandedReplicatedElts, TTI::TargetCostKind CostKind) = 0; virtual InstructionCost getReplicationShuffleCost(Type *EltTy, int ReplicationFactor, int VF, - ArrayRef Mask, + const APInt &DemandedReplicatedElts, TTI::TargetCostKind CostKind) = 0; virtual InstructionCost getMemoryOpCost(unsigned Opcode, Type *Src, @@ -2180,20 +2173,11 @@ public: } InstructionCost getReplicationShuffleCost(Type *EltTy, int ReplicationFactor, int VF, - const APInt &DemandedSrcElts, const APInt &DemandedReplicatedElts, TTI::TargetCostKind CostKind) override { return Impl.getReplicationShuffleCost(EltTy, ReplicationFactor, VF, - DemandedSrcElts, DemandedReplicatedElts, CostKind); } - InstructionCost - getReplicationShuffleCost(Type *EltTy, int ReplicationFactor, int VF, - ArrayRef Mask, - TTI::TargetCostKind CostKind) override { - return Impl.getReplicationShuffleCost(EltTy, ReplicationFactor, VF, Mask, - CostKind); - } InstructionCost getMemoryOpCost(unsigned Opcode, Type *Src, Align Alignment, unsigned AddressSpace, TTI::TargetCostKind CostKind, diff --git a/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h b/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h index 707912d..a0bae8e 100644 --- a/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h +++ b/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h @@ -552,16 +552,10 @@ public: } unsigned getReplicationShuffleCost(Type *EltTy, int ReplicationFactor, int VF, - const APInt &DemandedSrcElts, const APInt &DemandedReplicatedElts, TTI::TargetCostKind CostKind) { return 1; } - unsigned getReplicationShuffleCost(Type *EltTy, int ReplicationFactor, int VF, - ArrayRef Mask, - TTI::TargetCostKind CostKind) { - return 1; - } InstructionCost getMemoryOpCost(unsigned Opcode, Type *Src, Align Alignment, unsigned AddressSpace, @@ -1119,10 +1113,17 @@ public: FixedVectorType::get(VecTy->getScalarType(), NumSubElts)); int ReplicationFactor, VF; - if (Shuffle->isReplicationMask(ReplicationFactor, VF)) + if (Shuffle->isReplicationMask(ReplicationFactor, VF)) { + APInt DemandedReplicatedElts = + APInt::getNullValue(Shuffle->getShuffleMask().size()); + for (auto I : enumerate(Shuffle->getShuffleMask())) { + if (I.value() != UndefMaskElem) + DemandedReplicatedElts.setBit(I.index()); + } return TargetTTI->getReplicationShuffleCost( VecSrcTy->getElementType(), ReplicationFactor, VF, - Shuffle->getShuffleMask(), CostKind); + DemandedReplicatedElts, CostKind); + } return CostKind == TTI::TCK_RecipThroughput ? -1 : 1; } diff --git a/llvm/include/llvm/CodeGen/BasicTTIImpl.h b/llvm/include/llvm/CodeGen/BasicTTIImpl.h index aeefa01..aeea6e4 100644 --- a/llvm/include/llvm/CodeGen/BasicTTIImpl.h +++ b/llvm/include/llvm/CodeGen/BasicTTIImpl.h @@ -1121,9 +1121,12 @@ public: InstructionCost getReplicationShuffleCost(Type *EltTy, int ReplicationFactor, int VF, - const APInt &DemandedSrcElts, const APInt &DemandedReplicatedElts, TTI::TargetCostKind CostKind) { + assert(DemandedReplicatedElts.getBitWidth() == + (unsigned)VF * ReplicationFactor && + "Unexpected size of DemandedReplicatedElts."); + InstructionCost Cost; auto *SrcVT = FixedVectorType::get(EltTy, VF); @@ -1139,6 +1142,7 @@ public: // The cost is estimated as extract all mask elements from the <8xi1> mask // vector and insert them factor times into the <24xi1> shuffled mask // vector. + APInt DemandedSrcElts = APIntOps::ScaleBitMask(DemandedReplicatedElts, VF); Cost += thisT()->getScalarizationOverhead(SrcVT, DemandedSrcElts, /*Insert*/ false, /*Extract*/ true); @@ -1149,41 +1153,6 @@ public: return Cost; } - InstructionCost getReplicationShuffleCost(Type *EltTy, int ReplicationFactor, - int VF, ArrayRef Mask, - TTI::TargetCostKind CostKind) { - assert(Mask.size() == (unsigned)VF * ReplicationFactor && "Bad mask size."); - - APInt DemandedSrcElts = APInt::getNullValue(VF); - - ArrayRef RemainingMask = Mask; - for (int i = 0; i < VF; i++) { - ArrayRef CurrSubMask = RemainingMask.take_front(ReplicationFactor); - RemainingMask = RemainingMask.drop_front(CurrSubMask.size()); - - assert(all_of(CurrSubMask, - [i](int MaskElt) { - return MaskElt == UndefMaskElem || MaskElt == i; - }) && - "Not a replication mask."); - - if (any_of(CurrSubMask, - [](int MaskElt) { return MaskElt != UndefMaskElem; })) - DemandedSrcElts.setBit(i); - } - assert(RemainingMask.empty() && "Did not consume the entire mask?"); - - APInt DemandedReplicatedElts = APInt::getNullValue(Mask.size()); - for (auto I : enumerate(Mask)) { - if (I.value() != UndefMaskElem) - DemandedReplicatedElts.setBit(I.index()); - } - - return thisT()->getReplicationShuffleCost(EltTy, ReplicationFactor, VF, - DemandedSrcElts, - DemandedReplicatedElts, CostKind); - } - InstructionCost getMemoryOpCost(unsigned Opcode, Type *Src, MaybeAlign Alignment, unsigned AddressSpace, TTI::TargetCostKind CostKind, @@ -1365,7 +1334,7 @@ public: Type *I8Type = Type::getInt8Ty(VT->getContext()); Cost += thisT()->getReplicationShuffleCost( - I8Type, Factor, NumSubElts, DemandedAllSubElts, + I8Type, Factor, NumSubElts, UseMaskForGaps ? DemandedLoadStoreElts : DemandedAllResultElts, CostKind); diff --git a/llvm/lib/Analysis/TargetTransformInfo.cpp b/llvm/lib/Analysis/TargetTransformInfo.cpp index 8c5254d..dcd015b 100644 --- a/llvm/lib/Analysis/TargetTransformInfo.cpp +++ b/llvm/lib/Analysis/TargetTransformInfo.cpp @@ -834,19 +834,10 @@ InstructionCost TargetTransformInfo::getVectorInstrCost(unsigned Opcode, } InstructionCost TargetTransformInfo::getReplicationShuffleCost( - Type *EltTy, int ReplicationFactor, int VF, const APInt &DemandedSrcElts, + Type *EltTy, int ReplicationFactor, int VF, const APInt &DemandedReplicatedElts, TTI::TargetCostKind CostKind) { InstructionCost Cost = TTIImpl->getReplicationShuffleCost( - EltTy, ReplicationFactor, VF, DemandedSrcElts, DemandedReplicatedElts, - CostKind); - assert(Cost >= 0 && "TTI should not produce negative costs!"); - return Cost; -} -InstructionCost TargetTransformInfo::getReplicationShuffleCost( - Type *EltTy, int ReplicationFactor, int VF, ArrayRef Mask, - TTI::TargetCostKind CostKind) { - InstructionCost Cost = TTIImpl->getReplicationShuffleCost( - EltTy, ReplicationFactor, VF, Mask, CostKind); + EltTy, ReplicationFactor, VF, DemandedReplicatedElts, CostKind); assert(Cost >= 0 && "TTI should not produce negative costs!"); return Cost; } diff --git a/llvm/lib/Target/X86/X86TargetTransformInfo.cpp b/llvm/lib/Target/X86/X86TargetTransformInfo.cpp index ebde29f..83f4e0b 100644 --- a/llvm/lib/Target/X86/X86TargetTransformInfo.cpp +++ b/llvm/lib/Target/X86/X86TargetTransformInfo.cpp @@ -5077,7 +5077,7 @@ InstructionCost X86TTIImpl::getInterleavedMemoryOpCostAVX512( Type *I8Type = Type::getInt8Ty(VecTy->getContext()); MaskCost = getReplicationShuffleCost( - I8Type, Factor, VF, APInt::getAllOnes(VF), + I8Type, Factor, VF, UseMaskForGaps ? DemandedLoadStoreElts : APInt::getAllOnes(VecTy->getNumElements()), CostKind); -- 2.7.4