From 73f01e3df58dca9d1596440b866b52929e3878de Mon Sep 17 00:00:00 2001 From: Florian Hahn Date: Fri, 30 Oct 2020 13:48:28 +0000 Subject: [PATCH] [TTI] Add VecPred argument to getCmpSelInstrCost. On some targets, like AArch64, vector selects can be efficiently lowered if the vector condition is a compare with a supported predicate. This patch adds a new argument to getCmpSelInstrCost, to indicate the predicate of the feeding select condition. Note that it is not sufficient to use the context instruction when querying the cost of a vector select starting from a scalar one, because the condition of the vector select could be composed of compares with different predicates. This change greatly improves modeling the costs of certain compare/select patterns on AArch64. I am also planning on putting up patches to make use of the new argument in SLPVectorizer & LV. Reviewed By: dmgreen, RKSimon Differential Revision: https://reviews.llvm.org/D90070 --- llvm/include/llvm/Analysis/TargetTransformInfo.h | 17 ++++-- .../llvm/Analysis/TargetTransformInfoImpl.h | 3 + llvm/include/llvm/CodeGen/BasicTTIImpl.h | 71 +++++++++++++--------- llvm/lib/Analysis/TargetTransformInfo.cpp | 4 +- .../Target/AArch64/AArch64TargetTransformInfo.cpp | 31 ++++++++-- .../Target/AArch64/AArch64TargetTransformInfo.h | 1 + llvm/lib/Target/ARM/ARMTargetTransformInfo.cpp | 8 ++- llvm/lib/Target/ARM/ARMTargetTransformInfo.h | 1 + .../Target/Hexagon/HexagonTargetTransformInfo.cpp | 7 ++- .../Target/Hexagon/HexagonTargetTransformInfo.h | 2 + llvm/lib/Target/PowerPC/PPCTargetTransformInfo.cpp | 4 +- llvm/lib/Target/PowerPC/PPCTargetTransformInfo.h | 1 + .../Target/SystemZ/SystemZTargetTransformInfo.cpp | 6 +- .../Target/SystemZ/SystemZTargetTransformInfo.h | 1 + llvm/lib/Target/X86/X86TargetTransformInfo.cpp | 20 +++--- llvm/lib/Target/X86/X86TargetTransformInfo.h | 1 + .../Transforms/Utils/ScalarEvolutionExpander.cpp | 6 +- llvm/lib/Transforms/Utils/SimplifyCFG.cpp | 2 +- llvm/lib/Transforms/Vectorize/LoopVectorize.cpp | 8 +-- llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp | 16 ++--- .../Analysis/CostModel/AArch64/vector-select.ll | 16 ++--- 21 files changed, 148 insertions(+), 78 deletions(-) diff --git a/llvm/include/llvm/Analysis/TargetTransformInfo.h b/llvm/include/llvm/Analysis/TargetTransformInfo.h index 775d90b..2b9dc2b 100644 --- a/llvm/include/llvm/Analysis/TargetTransformInfo.h +++ b/llvm/include/llvm/Analysis/TargetTransformInfo.h @@ -21,6 +21,7 @@ #ifndef LLVM_ANALYSIS_TARGETTRANSFORMINFO_H #define LLVM_ANALYSIS_TARGETTRANSFORMINFO_H +#include "llvm/IR/InstrTypes.h" #include "llvm/IR/Operator.h" #include "llvm/IR/PassManager.h" #include "llvm/Pass.h" @@ -1092,10 +1093,14 @@ public: /// \returns The expected cost of compare and select instructions. If there /// is an existing instruction that holds Opcode, it may be passed in the - /// 'I' parameter. - int getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy = nullptr, - TTI::TargetCostKind CostKind = TTI::TCK_RecipThroughput, - const Instruction *I = nullptr) const; + /// 'I' parameter. The \p VecPred parameter can be used to indicate the select + /// is using a compare with the specified predicate as condition. When vector + /// types are passed, \p VecPred must be used for all lanes. + int getCmpSelInstrCost( + unsigned Opcode, Type *ValTy, Type *CondTy = nullptr, + CmpInst::Predicate VecPred = CmpInst::BAD_ICMP_PREDICATE, + TTI::TargetCostKind CostKind = TTI::TCK_RecipThroughput, + const Instruction *I = nullptr) const; /// \return The expected cost of vector Insert and Extract. /// Use -1 to indicate that there is no information on the index value. @@ -1534,6 +1539,7 @@ public: virtual int getCFInstrCost(unsigned Opcode, TTI::TargetCostKind CostKind) = 0; virtual int getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy, + CmpInst::Predicate VecPred, TTI::TargetCostKind CostKind, const Instruction *I) = 0; virtual int getVectorInstrCost(unsigned Opcode, Type *Val, @@ -1975,9 +1981,10 @@ public: return Impl.getCFInstrCost(Opcode, CostKind); } int getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy, + CmpInst::Predicate VecPred, TTI::TargetCostKind CostKind, const Instruction *I) override { - return Impl.getCmpSelInstrCost(Opcode, ValTy, CondTy, CostKind, I); + return Impl.getCmpSelInstrCost(Opcode, ValTy, CondTy, VecPred, CostKind, I); } int getVectorInstrCost(unsigned Opcode, Type *Val, unsigned Index) override { return Impl.getVectorInstrCost(Opcode, Val, Index); diff --git a/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h b/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h index 4b43ee4..6e8550d 100644 --- a/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h +++ b/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h @@ -478,6 +478,7 @@ public: } unsigned getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy, + CmpInst::Predicate VecPred, TTI::TargetCostKind CostKind, const Instruction *I) const { return 1; @@ -947,12 +948,14 @@ public: case Instruction::Select: { Type *CondTy = U->getOperand(0)->getType(); return TargetTTI->getCmpSelInstrCost(Opcode, U->getType(), CondTy, + CmpInst::BAD_ICMP_PREDICATE, CostKind, I); } case Instruction::ICmp: case Instruction::FCmp: { Type *ValTy = U->getOperand(0)->getType(); return TargetTTI->getCmpSelInstrCost(Opcode, ValTy, U->getType(), + cast(U)->getPredicate(), CostKind, I); } case Instruction::InsertElement: { diff --git a/llvm/include/llvm/CodeGen/BasicTTIImpl.h b/llvm/include/llvm/CodeGen/BasicTTIImpl.h index 1deb881..67d0881 100644 --- a/llvm/include/llvm/CodeGen/BasicTTIImpl.h +++ b/llvm/include/llvm/CodeGen/BasicTTIImpl.h @@ -881,6 +881,7 @@ public: } unsigned getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy, + CmpInst::Predicate VecPred, TTI::TargetCostKind CostKind, const Instruction *I = nullptr) { const TargetLoweringBase *TLI = getTLI(); @@ -889,7 +890,8 @@ public: // TODO: Handle other cost kinds. if (CostKind != TTI::TCK_RecipThroughput) - return BaseT::getCmpSelInstrCost(Opcode, ValTy, CondTy, CostKind, I); + return BaseT::getCmpSelInstrCost(Opcode, ValTy, CondTy, VecPred, CostKind, + I); // Selects on vectors are actually vector selects. if (ISD == ISD::SELECT) { @@ -914,7 +916,7 @@ public: if (CondTy) CondTy = CondTy->getScalarType(); unsigned Cost = thisT()->getCmpSelInstrCost( - Opcode, ValVTy->getScalarType(), CondTy, CostKind, I); + Opcode, ValVTy->getScalarType(), CondTy, VecPred, CostKind, I); // Return the cost of multiple scalar invocation plus the cost of // inserting and extracting the values. @@ -1241,10 +1243,12 @@ public: // For non-rotates (X != Y) we must add shift-by-zero handling costs. if (X != Y) { Type *CondTy = RetTy->getWithNewBitWidth(1); - Cost += thisT()->getCmpSelInstrCost(BinaryOperator::ICmp, RetTy, CondTy, - CostKind); - Cost += thisT()->getCmpSelInstrCost(BinaryOperator::Select, RetTy, - CondTy, CostKind); + Cost += + thisT()->getCmpSelInstrCost(BinaryOperator::ICmp, RetTy, CondTy, + CmpInst::BAD_ICMP_PREDICATE, CostKind); + Cost += + thisT()->getCmpSelInstrCost(BinaryOperator::Select, RetTy, CondTy, + CmpInst::BAD_ICMP_PREDICATE, CostKind); } return Cost; } @@ -1483,10 +1487,12 @@ public: Type *CondTy = RetTy->getWithNewBitWidth(1); unsigned Cost = 0; // TODO: Ideally getCmpSelInstrCost would accept an icmp condition code. - Cost += thisT()->getCmpSelInstrCost(BinaryOperator::ICmp, RetTy, CondTy, - CostKind); - Cost += thisT()->getCmpSelInstrCost(BinaryOperator::Select, RetTy, CondTy, - CostKind); + Cost += + thisT()->getCmpSelInstrCost(BinaryOperator::ICmp, RetTy, CondTy, + CmpInst::BAD_ICMP_PREDICATE, CostKind); + Cost += + thisT()->getCmpSelInstrCost(BinaryOperator::Select, RetTy, CondTy, + CmpInst::BAD_ICMP_PREDICATE, CostKind); // TODO: Should we add an OperandValueProperties::OP_Zero property? if (IID == Intrinsic::abs) Cost += thisT()->getArithmeticInstrCost( @@ -1508,10 +1514,12 @@ public: IntrinsicCostAttributes Attrs(OverflowOp, OpTy, {RetTy, RetTy}, FMF, ScalarizationCostPassed); Cost += thisT()->getIntrinsicInstrCost(Attrs, CostKind); - Cost += thisT()->getCmpSelInstrCost(BinaryOperator::ICmp, RetTy, CondTy, - CostKind); - Cost += 2 * thisT()->getCmpSelInstrCost(BinaryOperator::Select, RetTy, - CondTy, CostKind); + Cost += + thisT()->getCmpSelInstrCost(BinaryOperator::ICmp, RetTy, CondTy, + CmpInst::BAD_ICMP_PREDICATE, CostKind); + Cost += 2 * thisT()->getCmpSelInstrCost( + BinaryOperator::Select, RetTy, CondTy, + CmpInst::BAD_ICMP_PREDICATE, CostKind); return Cost; } case Intrinsic::uadd_sat: @@ -1527,8 +1535,9 @@ public: IntrinsicCostAttributes Attrs(OverflowOp, OpTy, {RetTy, RetTy}, FMF, ScalarizationCostPassed); Cost += thisT()->getIntrinsicInstrCost(Attrs, CostKind); - Cost += thisT()->getCmpSelInstrCost(BinaryOperator::Select, RetTy, CondTy, - CostKind); + Cost += + thisT()->getCmpSelInstrCost(BinaryOperator::Select, RetTy, CondTy, + CmpInst::BAD_ICMP_PREDICATE, CostKind); return Cost; } case Intrinsic::smul_fix: @@ -1573,10 +1582,12 @@ public: // Overflow -> (LHSSign != RHSSign) && (LHSSign != SumSign) unsigned Cost = 0; Cost += thisT()->getArithmeticInstrCost(Opcode, SumTy, CostKind); - Cost += 3 * thisT()->getCmpSelInstrCost(BinaryOperator::ICmp, SumTy, - OverflowTy, CostKind); - Cost += 2 * thisT()->getCmpSelInstrCost(BinaryOperator::ICmp, OverflowTy, - OverflowTy, CostKind); + Cost += 3 * thisT()->getCmpSelInstrCost( + BinaryOperator::ICmp, SumTy, OverflowTy, + CmpInst::BAD_ICMP_PREDICATE, CostKind); + Cost += 2 * thisT()->getCmpSelInstrCost( + BinaryOperator::ICmp, OverflowTy, OverflowTy, + CmpInst::BAD_ICMP_PREDICATE, CostKind); Cost += thisT()->getArithmeticInstrCost(BinaryOperator::And, OverflowTy, CostKind); return Cost; @@ -1591,8 +1602,9 @@ public: unsigned Cost = 0; Cost += thisT()->getArithmeticInstrCost(Opcode, SumTy, CostKind); - Cost += thisT()->getCmpSelInstrCost(BinaryOperator::ICmp, SumTy, - OverflowTy, CostKind); + Cost += + thisT()->getCmpSelInstrCost(BinaryOperator::ICmp, SumTy, OverflowTy, + CmpInst::BAD_ICMP_PREDICATE, CostKind); return Cost; } case Intrinsic::smul_with_overflow: @@ -1621,8 +1633,9 @@ public: CostKind, TTI::OK_AnyValue, TTI::OK_UniformConstantValue); - Cost += thisT()->getCmpSelInstrCost(BinaryOperator::ICmp, MulTy, - OverflowTy, CostKind); + Cost += + thisT()->getCmpSelInstrCost(BinaryOperator::ICmp, MulTy, OverflowTy, + CmpInst::BAD_ICMP_PREDICATE, CostKind); return Cost; } case Intrinsic::ctpop: @@ -1864,9 +1877,10 @@ public: (IsPairwise + 1) * thisT()->getShuffleCost(TTI::SK_ExtractSubvector, Ty, NumVecElts, SubTy); MinMaxCost += - thisT()->getCmpSelInstrCost(CmpOpcode, SubTy, CondTy, CostKind) + + thisT()->getCmpSelInstrCost(CmpOpcode, SubTy, CondTy, + CmpInst::BAD_ICMP_PREDICATE, CostKind) + thisT()->getCmpSelInstrCost(Instruction::Select, SubTy, CondTy, - CostKind); + CmpInst::BAD_ICMP_PREDICATE, CostKind); Ty = SubTy; ++LongVectorCount; } @@ -1888,9 +1902,10 @@ public: thisT()->getShuffleCost(TTI::SK_PermuteSingleSrc, Ty, 0, Ty); MinMaxCost += NumReduxLevels * - (thisT()->getCmpSelInstrCost(CmpOpcode, Ty, CondTy, CostKind) + + (thisT()->getCmpSelInstrCost(CmpOpcode, Ty, CondTy, + CmpInst::BAD_ICMP_PREDICATE, CostKind) + thisT()->getCmpSelInstrCost(Instruction::Select, Ty, CondTy, - CostKind)); + CmpInst::BAD_ICMP_PREDICATE, CostKind)); // The last min/max should be in vector registers and we counted it above. // So just need a single extractelement. return ShuffleCost + MinMaxCost + diff --git a/llvm/lib/Analysis/TargetTransformInfo.cpp b/llvm/lib/Analysis/TargetTransformInfo.cpp index 60b127b2..6443aad 100644 --- a/llvm/lib/Analysis/TargetTransformInfo.cpp +++ b/llvm/lib/Analysis/TargetTransformInfo.cpp @@ -807,11 +807,13 @@ int TargetTransformInfo::getCFInstrCost(unsigned Opcode, int TargetTransformInfo::getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy, + CmpInst::Predicate VecPred, TTI::TargetCostKind CostKind, const Instruction *I) const { assert((I == nullptr || I->getOpcode() == Opcode) && "Opcode should reflect passed instruction."); - int Cost = TTIImpl->getCmpSelInstrCost(Opcode, ValTy, CondTy, CostKind, I); + int Cost = + TTIImpl->getCmpSelInstrCost(Opcode, ValTy, CondTy, VecPred, CostKind, I); assert(Cost >= 0 && "TTI should not produce negative costs!"); return Cost; } diff --git a/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp b/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp index 1a75f79..595f403 100644 --- a/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp +++ b/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp @@ -6,8 +6,8 @@ // //===----------------------------------------------------------------------===// -#include "AArch64ExpandImm.h" #include "AArch64TargetTransformInfo.h" +#include "AArch64ExpandImm.h" #include "MCTargetDesc/AArch64AddressingModes.h" #include "llvm/Analysis/LoopInfo.h" #include "llvm/Analysis/TargetTransformInfo.h" @@ -16,9 +16,11 @@ #include "llvm/CodeGen/TargetLowering.h" #include "llvm/IR/IntrinsicInst.h" #include "llvm/IR/IntrinsicsAArch64.h" +#include "llvm/IR/PatternMatch.h" #include "llvm/Support/Debug.h" #include using namespace llvm; +using namespace llvm::PatternMatch; #define DEBUG_TYPE "aarch64tti" @@ -675,12 +677,13 @@ int AArch64TTIImpl::getAddressComputationCost(Type *Ty, ScalarEvolution *SE, } int AArch64TTIImpl::getCmpSelInstrCost(unsigned Opcode, Type *ValTy, - Type *CondTy, + Type *CondTy, CmpInst::Predicate VecPred, TTI::TargetCostKind CostKind, const Instruction *I) { // TODO: Handle other cost kinds. if (CostKind != TTI::TCK_RecipThroughput) - return BaseT::getCmpSelInstrCost(Opcode, ValTy, CondTy, CostKind, I); + return BaseT::getCmpSelInstrCost(Opcode, ValTy, CondTy, VecPred, CostKind, + I); int ISD = TLI->InstructionOpcodeToISD(Opcode); // We don't lower some vector selects well that are wider than the register @@ -688,6 +691,26 @@ int AArch64TTIImpl::getCmpSelInstrCost(unsigned Opcode, Type *ValTy, if (ValTy->isVectorTy() && ISD == ISD::SELECT) { // We would need this many instructions to hide the scalarization happening. const int AmortizationCost = 20; + + // If VecPred is not set, check if we can get a predicate from the context + // instruction, if its type matches the requested ValTy. + if (VecPred == CmpInst::BAD_ICMP_PREDICATE && I && I->getType() == ValTy) { + CmpInst::Predicate CurrentPred; + if (match(I, m_Select(m_Cmp(CurrentPred, m_Value(), m_Value()), m_Value(), + m_Value()))) + VecPred = CurrentPred; + } + // Check if we have a compare/select chain that can be lowered using CMxx & + // BFI pair. + if (CmpInst::isIntPredicate(VecPred)) { + static const auto ValidMinMaxTys = {MVT::v8i8, MVT::v16i8, MVT::v4i16, + MVT::v8i16, MVT::v2i32, MVT::v4i32, + MVT::v2i64}; + auto LT = TLI->getTypeLegalizationCost(DL, ValTy); + if (any_of(ValidMinMaxTys, [<](MVT M) { return M == LT.second; })) + return LT.first; + } + static const TypeConversionCostTblEntry VectorSelectTbl[] = { { ISD::SELECT, MVT::v16i1, MVT::v16i16, 16 }, @@ -707,7 +730,7 @@ int AArch64TTIImpl::getCmpSelInstrCost(unsigned Opcode, Type *ValTy, return Entry->Cost; } } - return BaseT::getCmpSelInstrCost(Opcode, ValTy, CondTy, CostKind, I); + return BaseT::getCmpSelInstrCost(Opcode, ValTy, CondTy, VecPred, CostKind, I); } AArch64TTIImpl::TTI::MemCmpExpansionOptions diff --git a/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.h b/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.h index af513be..f3ebdc4 100644 --- a/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.h +++ b/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.h @@ -141,6 +141,7 @@ public: int getAddressComputationCost(Type *Ty, ScalarEvolution *SE, const SCEV *Ptr); int getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy, + CmpInst::Predicate VecPred, TTI::TargetCostKind CostKind, const Instruction *I = nullptr); diff --git a/llvm/lib/Target/ARM/ARMTargetTransformInfo.cpp b/llvm/lib/Target/ARM/ARMTargetTransformInfo.cpp index c26a77b..5184fd6 100644 --- a/llvm/lib/Target/ARM/ARMTargetTransformInfo.cpp +++ b/llvm/lib/Target/ARM/ARMTargetTransformInfo.cpp @@ -810,6 +810,7 @@ int ARMTTIImpl::getVectorInstrCost(unsigned Opcode, Type *ValTy, } int ARMTTIImpl::getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy, + CmpInst::Predicate VecPred, TTI::TargetCostKind CostKind, const Instruction *I) { int ISD = TLI->InstructionOpcodeToISD(Opcode); @@ -839,7 +840,8 @@ int ARMTTIImpl::getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy, } if (CostKind != TTI::TCK_RecipThroughput) - return BaseT::getCmpSelInstrCost(Opcode, ValTy, CondTy, CostKind, I); + return BaseT::getCmpSelInstrCost(Opcode, ValTy, CondTy, VecPred, CostKind, + I); // On NEON a vector select gets lowered to vbsl. if (ST->hasNEON() && ValTy->isVectorTy() && ISD == ISD::SELECT) { @@ -866,8 +868,8 @@ int ARMTTIImpl::getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy, int BaseCost = ST->hasMVEIntegerOps() && ValTy->isVectorTy() ? ST->getMVEVectorCostFactor() : 1; - return BaseCost * BaseT::getCmpSelInstrCost(Opcode, ValTy, CondTy, CostKind, - I); + return BaseCost * + BaseT::getCmpSelInstrCost(Opcode, ValTy, CondTy, VecPred, CostKind, I); } int ARMTTIImpl::getAddressComputationCost(Type *Ty, ScalarEvolution *SE, diff --git a/llvm/lib/Target/ARM/ARMTargetTransformInfo.h b/llvm/lib/Target/ARM/ARMTargetTransformInfo.h index 9a0d719..890e905 100644 --- a/llvm/lib/Target/ARM/ARMTargetTransformInfo.h +++ b/llvm/lib/Target/ARM/ARMTargetTransformInfo.h @@ -213,6 +213,7 @@ public: const Instruction *I = nullptr); int getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy, + CmpInst::Predicate VecPred, TTI::TargetCostKind CostKind, const Instruction *I = nullptr); diff --git a/llvm/lib/Target/Hexagon/HexagonTargetTransformInfo.cpp b/llvm/lib/Target/Hexagon/HexagonTargetTransformInfo.cpp index 76e8220..045e59a 100644 --- a/llvm/lib/Target/Hexagon/HexagonTargetTransformInfo.cpp +++ b/llvm/lib/Target/Hexagon/HexagonTargetTransformInfo.cpp @@ -242,13 +242,16 @@ unsigned HexagonTTIImpl::getInterleavedMemoryOpCost( } unsigned HexagonTTIImpl::getCmpSelInstrCost(unsigned Opcode, Type *ValTy, - Type *CondTy, TTI::TargetCostKind CostKind, const Instruction *I) { + Type *CondTy, + CmpInst::Predicate VecPred, + TTI::TargetCostKind CostKind, + const Instruction *I) { if (ValTy->isVectorTy() && CostKind == TTI::TCK_RecipThroughput) { std::pair LT = TLI.getTypeLegalizationCost(DL, ValTy); if (Opcode == Instruction::FCmp) return LT.first + FloatFactor * getTypeNumElements(ValTy); } - return BaseT::getCmpSelInstrCost(Opcode, ValTy, CondTy, CostKind, I); + return BaseT::getCmpSelInstrCost(Opcode, ValTy, CondTy, VecPred, CostKind, I); } unsigned HexagonTTIImpl::getArithmeticInstrCost( diff --git a/llvm/lib/Target/Hexagon/HexagonTargetTransformInfo.h b/llvm/lib/Target/Hexagon/HexagonTargetTransformInfo.h index b99f512..5a5a5f7 100644 --- a/llvm/lib/Target/Hexagon/HexagonTargetTransformInfo.h +++ b/llvm/lib/Target/Hexagon/HexagonTargetTransformInfo.h @@ -134,6 +134,8 @@ public: TTI::TargetCostKind CostKind = TTI::TCK_SizeAndLatency, bool UseMaskForCond = false, bool UseMaskForGaps = false); unsigned getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy, + + CmpInst::Predicate VecPred, TTI::TargetCostKind CostKind, const Instruction *I = nullptr); unsigned getArithmeticInstrCost( diff --git a/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.cpp b/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.cpp index 479fc47..f313446 100644 --- a/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.cpp +++ b/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.cpp @@ -978,9 +978,11 @@ int PPCTTIImpl::getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src, } int PPCTTIImpl::getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy, + CmpInst::Predicate VecPred, TTI::TargetCostKind CostKind, const Instruction *I) { - int Cost = BaseT::getCmpSelInstrCost(Opcode, ValTy, CondTy, CostKind, I); + int Cost = + BaseT::getCmpSelInstrCost(Opcode, ValTy, CondTy, VecPred, CostKind, I); // TODO: Handle other cost kinds. if (CostKind != TTI::TCK_RecipThroughput) return Cost; diff --git a/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.h b/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.h index 8c08ad2..bc94671 100644 --- a/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.h +++ b/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.h @@ -113,6 +113,7 @@ public: const Instruction *I = nullptr); int getCFInstrCost(unsigned Opcode, TTI::TargetCostKind CostKind); int getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy, + CmpInst::Predicate VecPred, TTI::TargetCostKind CostKind, const Instruction *I = nullptr); int getVectorInstrCost(unsigned Opcode, Type *Val, unsigned Index); diff --git a/llvm/lib/Target/SystemZ/SystemZTargetTransformInfo.cpp b/llvm/lib/Target/SystemZ/SystemZTargetTransformInfo.cpp index 767c186..2c6659b 100644 --- a/llvm/lib/Target/SystemZ/SystemZTargetTransformInfo.cpp +++ b/llvm/lib/Target/SystemZ/SystemZTargetTransformInfo.cpp @@ -847,11 +847,11 @@ static unsigned getOperandsExtensionCost(const Instruction *I) { } int SystemZTTIImpl::getCmpSelInstrCost(unsigned Opcode, Type *ValTy, - Type *CondTy, + Type *CondTy, CmpInst::Predicate VecPred, TTI::TargetCostKind CostKind, const Instruction *I) { if (CostKind != TTI::TCK_RecipThroughput) - return BaseT::getCmpSelInstrCost(Opcode, ValTy, CondTy, CostKind); + return BaseT::getCmpSelInstrCost(Opcode, ValTy, CondTy, VecPred, CostKind); if (!ValTy->isVectorTy()) { switch (Opcode) { @@ -927,7 +927,7 @@ int SystemZTTIImpl::getCmpSelInstrCost(unsigned Opcode, Type *ValTy, } } - return BaseT::getCmpSelInstrCost(Opcode, ValTy, CondTy, CostKind); + return BaseT::getCmpSelInstrCost(Opcode, ValTy, CondTy, VecPred, CostKind); } int SystemZTTIImpl:: diff --git a/llvm/lib/Target/SystemZ/SystemZTargetTransformInfo.h b/llvm/lib/Target/SystemZ/SystemZTargetTransformInfo.h index 8b3b513..c97e099 100644 --- a/llvm/lib/Target/SystemZ/SystemZTargetTransformInfo.h +++ b/llvm/lib/Target/SystemZ/SystemZTargetTransformInfo.h @@ -97,6 +97,7 @@ public: TTI::CastContextHint CCH, TTI::TargetCostKind CostKind, const Instruction *I = nullptr); int getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy, + CmpInst::Predicate VecPred, TTI::TargetCostKind CostKind, const Instruction *I = nullptr); int getVectorInstrCost(unsigned Opcode, Type *Val, unsigned Index); diff --git a/llvm/lib/Target/X86/X86TargetTransformInfo.cpp b/llvm/lib/Target/X86/X86TargetTransformInfo.cpp index 33b0eb3..fcc38f2 100644 --- a/llvm/lib/Target/X86/X86TargetTransformInfo.cpp +++ b/llvm/lib/Target/X86/X86TargetTransformInfo.cpp @@ -2084,11 +2084,13 @@ int X86TTIImpl::getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src, } int X86TTIImpl::getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy, + CmpInst::Predicate VecPred, TTI::TargetCostKind CostKind, const Instruction *I) { // TODO: Handle other cost kinds. if (CostKind != TTI::TCK_RecipThroughput) - return BaseT::getCmpSelInstrCost(Opcode, ValTy, CondTy, CostKind, I); + return BaseT::getCmpSelInstrCost(Opcode, ValTy, CondTy, VecPred, CostKind, + I); // Legalize the type. std::pair LT = TLI->getTypeLegalizationCost(DL, ValTy); @@ -2272,7 +2274,7 @@ int X86TTIImpl::getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy, if (const auto *Entry = CostTableLookup(SSE1CostTbl, ISD, MTy)) return LT.first * (ExtraCost + Entry->Cost); - return BaseT::getCmpSelInstrCost(Opcode, ValTy, CondTy, CostKind, I); + return BaseT::getCmpSelInstrCost(Opcode, ValTy, CondTy, VecPred, CostKind, I); } unsigned X86TTIImpl::getAtomicMemIntrinsicMaxElementSize() const { return 16; } @@ -3223,7 +3225,7 @@ int X86TTIImpl::getMaskedMemoryOpCost(unsigned Opcode, Type *SrcTy, getScalarizationOverhead(MaskTy, DemandedElts, false, true); int ScalarCompareCost = getCmpSelInstrCost( Instruction::ICmp, Type::getInt8Ty(SrcVTy->getContext()), nullptr, - CostKind); + CmpInst::BAD_ICMP_PREDICATE, CostKind); int BranchCost = getCFInstrCost(Instruction::Br, CostKind); int MaskCmpCost = NumElem * (BranchCost + ScalarCompareCost); int ValueSplitCost = @@ -3644,8 +3646,10 @@ int X86TTIImpl::getMinMaxCost(Type *Ty, Type *CondTy, bool IsUnsigned) { TTI::TargetCostKind CostKind = TTI::TCK_RecipThroughput; // Otherwise fall back to cmp+select. - return getCmpSelInstrCost(CmpOpcode, Ty, CondTy, CostKind) + - getCmpSelInstrCost(Instruction::Select, Ty, CondTy, CostKind); + return getCmpSelInstrCost(CmpOpcode, Ty, CondTy, CmpInst::BAD_ICMP_PREDICATE, + CostKind) + + getCmpSelInstrCost(Instruction::Select, Ty, CondTy, + CmpInst::BAD_ICMP_PREDICATE, CostKind); } int X86TTIImpl::getMinMaxReductionCost(VectorType *ValTy, VectorType *CondTy, @@ -4123,9 +4127,9 @@ int X86TTIImpl::getGSScalarCost(unsigned Opcode, Type *SrcVTy, FixedVectorType::get(Type::getInt1Ty(SrcVTy->getContext()), VF); MaskUnpackCost = getScalarizationOverhead(MaskTy, DemandedElts, false, true); - int ScalarCompareCost = - getCmpSelInstrCost(Instruction::ICmp, Type::getInt1Ty(SrcVTy->getContext()), - nullptr, CostKind); + int ScalarCompareCost = getCmpSelInstrCost( + Instruction::ICmp, Type::getInt1Ty(SrcVTy->getContext()), nullptr, + CmpInst::BAD_ICMP_PREDICATE, CostKind); int BranchCost = getCFInstrCost(Instruction::Br, CostKind); MaskUnpackCost += VF * (BranchCost + ScalarCompareCost); } diff --git a/llvm/lib/Target/X86/X86TargetTransformInfo.h b/llvm/lib/Target/X86/X86TargetTransformInfo.h index 687870b..17570f1c 100644 --- a/llvm/lib/Target/X86/X86TargetTransformInfo.h +++ b/llvm/lib/Target/X86/X86TargetTransformInfo.h @@ -133,6 +133,7 @@ public: TTI::CastContextHint CCH, TTI::TargetCostKind CostKind, const Instruction *I = nullptr); int getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy, + CmpInst::Predicate VecPred, TTI::TargetCostKind CostKind, const Instruction *I = nullptr); int getVectorInstrCost(unsigned Opcode, Type *Val, unsigned Index); diff --git a/llvm/lib/Transforms/Utils/ScalarEvolutionExpander.cpp b/llvm/lib/Transforms/Utils/ScalarEvolutionExpander.cpp index c4d90a4..a45b5cd 100644 --- a/llvm/lib/Transforms/Utils/ScalarEvolutionExpander.cpp +++ b/llvm/lib/Transforms/Utils/ScalarEvolutionExpander.cpp @@ -2236,9 +2236,9 @@ template static int costAndCollectOperands( unsigned MinIdx, unsigned MaxIdx) { Operations.emplace_back(Opcode, MinIdx, MaxIdx); Type *OpType = S->getOperand(0)->getType(); - return NumRequired * - TTI.getCmpSelInstrCost(Opcode, OpType, - CmpInst::makeCmpResultType(OpType), CostKind); + return NumRequired * TTI.getCmpSelInstrCost( + Opcode, OpType, CmpInst::makeCmpResultType(OpType), + CmpInst::BAD_ICMP_PREDICATE, CostKind); }; switch (S->getSCEVType()) { diff --git a/llvm/lib/Transforms/Utils/SimplifyCFG.cpp b/llvm/lib/Transforms/Utils/SimplifyCFG.cpp index 124a7c4..a48a335 100644 --- a/llvm/lib/Transforms/Utils/SimplifyCFG.cpp +++ b/llvm/lib/Transforms/Utils/SimplifyCFG.cpp @@ -2026,7 +2026,7 @@ static bool validateAndCostRequiredSelects(BasicBlock *BB, BasicBlock *ThenBB, BudgetRemaining -= TTI.getCmpSelInstrCost(Instruction::Select, PN.getType(), nullptr, - CostKind); + CmpInst::BAD_ICMP_PREDICATE, CostKind); // Don't convert to selects if we could remove undefined behavior instead. if (passingValueIsAlwaysUndefined(OrigV, &PN) || diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp index 1f8e986..4925bb1 100644 --- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp +++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp @@ -6613,7 +6613,7 @@ unsigned LoopVectorizationCostModel::getInstructionCost(Instruction *I, TTI.getCmpSelInstrCost( Instruction::Select, ToVectorTy(Phi->getType(), VF), ToVectorTy(Type::getInt1Ty(Phi->getContext()), VF), - CostKind); + CmpInst::BAD_ICMP_PREDICATE, CostKind); return TTI.getCFInstrCost(Instruction::PHI, CostKind); } @@ -6702,7 +6702,7 @@ unsigned LoopVectorizationCostModel::getInstructionCost(Instruction *I, CondTy = VectorType::get(CondTy, VF); } return TTI.getCmpSelInstrCost(I->getOpcode(), VectorTy, CondTy, - CostKind, I); + CmpInst::BAD_ICMP_PREDICATE, CostKind, I); } case Instruction::ICmp: case Instruction::FCmp: { @@ -6711,8 +6711,8 @@ unsigned LoopVectorizationCostModel::getInstructionCost(Instruction *I, if (canTruncateToMinimalBitwidth(Op0AsInstruction, VF)) ValTy = IntegerType::get(ValTy->getContext(), MinBWs[Op0AsInstruction]); VectorTy = ToVectorTy(ValTy, VF); - return TTI.getCmpSelInstrCost(I->getOpcode(), VectorTy, nullptr, CostKind, - I); + return TTI.getCmpSelInstrCost(I->getOpcode(), VectorTy, nullptr, + CmpInst::BAD_ICMP_PREDICATE, CostKind, I); } case Instruction::Store: case Instruction::Load: { diff --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp index 576d08a..113ecd0 100644 --- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp +++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp @@ -3539,16 +3539,17 @@ int BoUpSLP::getEntryCost(TreeEntry *E) { case Instruction::ICmp: case Instruction::Select: { // Calculate the cost of this instruction. - int ScalarEltCost = TTI->getCmpSelInstrCost(E->getOpcode(), ScalarTy, - Builder.getInt1Ty(), - CostKind, VL0); + int ScalarEltCost = + TTI->getCmpSelInstrCost(E->getOpcode(), ScalarTy, Builder.getInt1Ty(), + CmpInst::BAD_ICMP_PREDICATE, CostKind, VL0); if (NeedToShuffleReuses) { ReuseShuffleCost -= (ReuseShuffleNumbers - VL.size()) * ScalarEltCost; } auto *MaskTy = FixedVectorType::get(Builder.getInt1Ty(), VL.size()); int ScalarCost = VecTy->getNumElements() * ScalarEltCost; - int VecCost = TTI->getCmpSelInstrCost(E->getOpcode(), VecTy, MaskTy, - CostKind, VL0); + int VecCost = + TTI->getCmpSelInstrCost(E->getOpcode(), VecTy, MaskTy, + CmpInst::BAD_ICMP_PREDICATE, CostKind, VL0); // Check if it is possible and profitable to use min/max for selects in // VL. // @@ -3560,8 +3561,9 @@ int BoUpSLP::getEntryCost(TreeEntry *E) { // If the selects are the only uses of the compares, they will be dead // and we can adjust the cost by removing their cost. if (IntrinsicAndUse.second) - IntrinsicCost -= TTI->getCmpSelInstrCost(Instruction::ICmp, VecTy, - MaskTy, CostKind); + IntrinsicCost -= + TTI->getCmpSelInstrCost(Instruction::ICmp, VecTy, MaskTy, + CmpInst::BAD_ICMP_PREDICATE, CostKind); VecCost = std::min(VecCost, IntrinsicCost); } return ReuseShuffleCost + VecCost - ScalarCost; diff --git a/llvm/test/Analysis/CostModel/AArch64/vector-select.ll b/llvm/test/Analysis/CostModel/AArch64/vector-select.ll index 8c2b2b7..f2271c4 100644 --- a/llvm/test/Analysis/CostModel/AArch64/vector-select.ll +++ b/llvm/test/Analysis/CostModel/AArch64/vector-select.ll @@ -3,7 +3,7 @@ ; COST-LABEL: v8i8_select_eq ; COST-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %cmp.1 = icmp eq <8 x i8> %a, %b -; COST-NEXT: Cost Model: Found an estimated cost of 29 for instruction: %s.1 = select <8 x i1> %cmp.1, <8 x i8> %a, <8 x i8> %c +; COST-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %s.1 = select <8 x i1> %cmp.1, <8 x i8> %a, <8 x i8> %c ; CODE-LABEL: v8i8_select_eq ; CODE: bb.0 @@ -19,7 +19,7 @@ define <8 x i8> @v8i8_select_eq(<8 x i8> %a, <8 x i8> %b, <8 x i8> %c) { ; COST-LABEL: v16i8_select_sgt ; COST-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %cmp.1 = icmp sgt <16 x i8> %a, %b -; COST-NEXT: Cost Model: Found an estimated cost of 61 for instruction: %s.1 = select <16 x i1> %cmp.1, <16 x i8> %a, <16 x i8> %c +; COST-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %s.1 = select <16 x i1> %cmp.1, <16 x i8> %a, <16 x i8> %c ; CODE-LABEL: v16i8_select_sgt ; CODE: bb.0 @@ -35,7 +35,7 @@ define <16 x i8> @v16i8_select_sgt(<16 x i8> %a, <16 x i8> %b, <16 x i8> %c) { ; COST-LABEL: v4i16_select_ne ; COST-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %cmp.1 = icmp ne <4 x i16> %a, %b -; COST-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %s.1 = select <4 x i1> %cmp.1, <4 x i16> %a, <4 x i16> %c +; COST-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %s.1 = select <4 x i1> %cmp.1, <4 x i16> %a, <4 x i16> %c ; CODE-LABEL: v4i16_select_ne ; CODE: bb.0 @@ -51,7 +51,7 @@ define <4 x i16> @v4i16_select_ne(<4 x i16> %a, <4 x i16> %b, <4 x i16> %c) { ; COST-LABEL: v8i16_select_ugt ; COST-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %cmp.1 = icmp ugt <8 x i16> %a, %b -; COST-NEXT: Cost Model: Found an estimated cost of 29 for instruction: %s.1 = select <8 x i1> %cmp.1, <8 x i16> %a, <8 x i16> %c +; COST-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %s.1 = select <8 x i1> %cmp.1, <8 x i16> %a, <8 x i16> %c ; CODE-LABEL: v8i16_select_ugt ; CODE: bb.0 @@ -67,7 +67,7 @@ define <8 x i16> @v8i16_select_ugt(<8 x i16> %a, <8 x i16> %b, <8 x i16> %c) { ; COST-LABEL: v2i32_select_ule ; COST-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %cmp.1 = icmp ule <2 x i32> %a, %b -; COST-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %s.1 = select <2 x i1> %cmp.1, <2 x i32> %a, <2 x i32> %c +; COST-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %s.1 = select <2 x i1> %cmp.1, <2 x i32> %a, <2 x i32> %c ; CODE-LABEL: v2i32_select_ule ; CODE: bb.0 @@ -83,7 +83,7 @@ define <2 x i32> @v2i32_select_ule(<2 x i32> %a, <2 x i32> %b, <2 x i32> %c) { ; COST-LABEL: v4i32_select_ult ; COST-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %cmp.1 = icmp ult <4 x i32> %a, %b -; COST-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %s.1 = select <4 x i1> %cmp.1, <4 x i32> %a, <4 x i32> %c +; COST-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %s.1 = select <4 x i1> %cmp.1, <4 x i32> %a, <4 x i32> %c ; CODE-LABEL: v4i32_select_ult ; CODE: bb.0 @@ -99,7 +99,7 @@ define <4 x i32> @v4i32_select_ult(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c) { ; COST-LABEL: v2i64_select_sle ; COST-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %cmp.1 = icmp sle <2 x i64> %a, %b -; COST-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %s.1 = select <2 x i1> %cmp.1, <2 x i64> %a, <2 x i64> %c +; COST-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %s.1 = select <2 x i1> %cmp.1, <2 x i64> %a, <2 x i64> %c ; CODE-LABEL: v2i64_select_sle ; CODE: bb.0 @@ -115,7 +115,7 @@ define <2 x i64> @v2i64_select_sle(<2 x i64> %a, <2 x i64> %b, <2 x i64> %c) { ; COST-LABEL: v3i64_select_sle ; COST-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %cmp.1 = icmp sle <3 x i64> %a, %b -; COST-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %s.1 = select <3 x i1> %cmp.1, <3 x i64> %a, <3 x i64> %c +; COST-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %s.1 = select <3 x i1> %cmp.1, <3 x i64> %a, <3 x i64> %c ; CODE-LABEL: v3i64_select_sle ; CODE: bb.0 -- 2.7.4