From f4257c5832aa51e960e7351929ca3d37031985b7 Mon Sep 17 00:00:00 2001 From: David Sherwood Date: Fri, 14 Aug 2020 12:15:59 +0100 Subject: [PATCH] [SVE] Make ElementCount members private This patch changes ElementCount so that the Min and Scalable members are now private and can only be accessed via the get functions getKnownMinValue() and isScalable(). In addition I've added some other member functions for more commonly used operations. Hopefully this makes the class more useful and will reduce the need for calling getKnownMinValue(). Differential Revision: https://reviews.llvm.org/D86065 --- clang/lib/CodeGen/CGBuiltin.cpp | 3 +- clang/lib/CodeGen/CGDebugInfo.cpp | 2 +- clang/lib/CodeGen/CodeGenTypes.cpp | 3 +- llvm/include/llvm/Analysis/TargetTransformInfo.h | 4 +- llvm/include/llvm/Analysis/VectorUtils.h | 2 +- llvm/include/llvm/CodeGen/ValueTypes.h | 7 +- llvm/include/llvm/IR/DataLayout.h | 6 +- llvm/include/llvm/IR/DerivedTypes.h | 15 +- llvm/include/llvm/IR/Instructions.h | 5 +- llvm/include/llvm/Support/MachineValueType.h | 10 +- llvm/include/llvm/Support/TypeSize.h | 35 ++- llvm/lib/Analysis/InstructionSimplify.cpp | 7 +- llvm/lib/Analysis/VFABIDemangling.cpp | 2 +- llvm/lib/Analysis/ValueTracking.cpp | 3 +- llvm/lib/Bitcode/Writer/BitcodeWriter.cpp | 2 +- llvm/lib/CodeGen/CodeGenPrepare.cpp | 4 +- llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp | 2 +- .../CodeGen/SelectionDAG/SelectionDAGBuilder.cpp | 12 +- llvm/lib/CodeGen/TargetLoweringBase.cpp | 46 +-- llvm/lib/CodeGen/ValueTypes.cpp | 10 +- llvm/lib/IR/AsmWriter.cpp | 4 +- llvm/lib/IR/ConstantFold.cpp | 11 +- llvm/lib/IR/Constants.cpp | 12 +- llvm/lib/IR/Core.cpp | 2 +- llvm/lib/IR/DataLayout.cpp | 2 +- llvm/lib/IR/Function.cpp | 5 +- llvm/lib/IR/IRBuilder.cpp | 2 +- llvm/lib/IR/Instructions.cpp | 13 +- llvm/lib/IR/IntrinsicInst.cpp | 10 +- llvm/lib/IR/Type.cpp | 8 +- llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp | 3 +- llvm/lib/Target/AArch64/AArch64ISelLowering.cpp | 26 +- .../InstCombine/InstCombineVectorOps.cpp | 6 +- llvm/lib/Transforms/Utils/FunctionComparator.cpp | 13 +- llvm/lib/Transforms/Vectorize/LoopVectorize.cpp | 317 +++++++++++---------- llvm/lib/Transforms/Vectorize/VPlan.cpp | 21 +- llvm/lib/Transforms/Vectorize/VPlan.h | 9 +- llvm/unittests/CodeGen/ScalableVectorMVTsTest.cpp | 8 +- llvm/unittests/IR/VectorTypesTest.cpp | 10 +- 39 files changed, 367 insertions(+), 295 deletions(-) diff --git a/clang/lib/CodeGen/CGBuiltin.cpp b/clang/lib/CodeGen/CGBuiltin.cpp index 69899fd..1192fbd 100644 --- a/clang/lib/CodeGen/CGBuiltin.cpp +++ b/clang/lib/CodeGen/CGBuiltin.cpp @@ -8457,7 +8457,8 @@ Value *CodeGenFunction::EmitAArch64SVEBuiltinExpr(unsigned BuiltinID, case SVE::BI__builtin_sve_svlen_u64: { SVETypeFlags TF(Builtin->TypeModifier); auto VTy = cast(getSVEType(TF)); - auto NumEls = llvm::ConstantInt::get(Ty, VTy->getElementCount().Min); + auto *NumEls = + llvm::ConstantInt::get(Ty, VTy->getElementCount().getKnownMinValue()); Function *F = CGM.getIntrinsic(Intrinsic::vscale, Ty); return Builder.CreateMul(NumEls, Builder.CreateCall(F)); diff --git a/clang/lib/CodeGen/CGDebugInfo.cpp b/clang/lib/CodeGen/CGDebugInfo.cpp index d90cffb..8a85a24 100644 --- a/clang/lib/CodeGen/CGDebugInfo.cpp +++ b/clang/lib/CodeGen/CGDebugInfo.cpp @@ -726,7 +726,7 @@ llvm::DIType *CGDebugInfo::CreateType(const BuiltinType *BT) { { ASTContext::BuiltinVectorTypeInfo Info = CGM.getContext().getBuiltinVectorTypeInfo(BT); - unsigned NumElemsPerVG = (Info.EC.Min * Info.NumVectors) / 2; + unsigned NumElemsPerVG = (Info.EC.getKnownMinValue() * Info.NumVectors) / 2; // Debuggers can't extract 1bit from a vector, so will display a // bitpattern for svbool_t instead. diff --git a/clang/lib/CodeGen/CodeGenTypes.cpp b/clang/lib/CodeGen/CodeGenTypes.cpp index 9c072d4..aede8a5 100644 --- a/clang/lib/CodeGen/CodeGenTypes.cpp +++ b/clang/lib/CodeGen/CodeGenTypes.cpp @@ -586,7 +586,8 @@ llvm::Type *CodeGenTypes::ConvertType(QualType T) { ASTContext::BuiltinVectorTypeInfo Info = Context.getBuiltinVectorTypeInfo(cast(Ty)); return llvm::ScalableVectorType::get(ConvertType(Info.ElementType), - Info.EC.Min * Info.NumVectors); + Info.EC.getKnownMinValue() * + Info.NumVectors); } case BuiltinType::Dependent: #define BUILTIN_TYPE(Id, SingletonId) diff --git a/llvm/include/llvm/Analysis/TargetTransformInfo.h b/llvm/include/llvm/Analysis/TargetTransformInfo.h index a3e6248..ffbec74 100644 --- a/llvm/include/llvm/Analysis/TargetTransformInfo.h +++ b/llvm/include/llvm/Analysis/TargetTransformInfo.h @@ -130,8 +130,8 @@ public: unsigned Factor); IntrinsicCostAttributes(Intrinsic::ID Id, const CallBase &CI, ElementCount Factor) - : IntrinsicCostAttributes(Id, CI, Factor.Min) { - assert(!Factor.Scalable); + : IntrinsicCostAttributes(Id, CI, Factor.getKnownMinValue()) { + assert(!Factor.isScalable()); } IntrinsicCostAttributes(Intrinsic::ID Id, const CallBase &CI, diff --git a/llvm/include/llvm/Analysis/VectorUtils.h b/llvm/include/llvm/Analysis/VectorUtils.h index 527bba6..074960e 100644 --- a/llvm/include/llvm/Analysis/VectorUtils.h +++ b/llvm/include/llvm/Analysis/VectorUtils.h @@ -115,7 +115,7 @@ struct VFShape { Parameters.push_back( VFParameter({CI.arg_size(), VFParamKind::GlobalPredicate})); - return {EC.Min, EC.Scalable, Parameters}; + return {EC.getKnownMinValue(), EC.isScalable(), Parameters}; } /// Sanity check on the Parameters in the VFShape. bool hasValidParameterList() const; diff --git a/llvm/include/llvm/CodeGen/ValueTypes.h b/llvm/include/llvm/CodeGen/ValueTypes.h index db8161ca..2e172bf 100644 --- a/llvm/include/llvm/CodeGen/ValueTypes.h +++ b/llvm/include/llvm/CodeGen/ValueTypes.h @@ -304,7 +304,7 @@ namespace llvm { /// Given a vector type, return the minimum number of elements it contains. unsigned getVectorMinNumElements() const { - return getVectorElementCount().Min; + return getVectorElementCount().getKnownMinValue(); } /// Return the size of the specified value type in bits. @@ -383,7 +383,7 @@ namespace llvm { EVT getHalfNumVectorElementsVT(LLVMContext &Context) const { EVT EltVT = getVectorElementType(); auto EltCnt = getVectorElementCount(); - assert(!(EltCnt.Min & 1) && "Splitting vector, but not in half!"); + assert(EltCnt.isKnownEven() && "Splitting vector, but not in half!"); return EVT::getVectorVT(Context, EltVT, EltCnt / 2); } @@ -398,7 +398,8 @@ namespace llvm { EVT getPow2VectorType(LLVMContext &Context) const { if (!isPow2VectorType()) { ElementCount NElts = getVectorElementCount(); - NElts.Min = 1 << Log2_32_Ceil(NElts.Min); + unsigned NewMinCount = 1 << Log2_32_Ceil(NElts.getKnownMinValue()); + NElts = ElementCount::get(NewMinCount, NElts.isScalable()); return EVT::getVectorVT(Context, getVectorElementType(), NElts); } else { diff --git a/llvm/include/llvm/IR/DataLayout.h b/llvm/include/llvm/IR/DataLayout.h index 579275a..a58f381 100644 --- a/llvm/include/llvm/IR/DataLayout.h +++ b/llvm/include/llvm/IR/DataLayout.h @@ -696,9 +696,9 @@ inline TypeSize DataLayout::getTypeSizeInBits(Type *Ty) const { case Type::ScalableVectorTyID: { VectorType *VTy = cast(Ty); auto EltCnt = VTy->getElementCount(); - uint64_t MinBits = EltCnt.Min * - getTypeSizeInBits(VTy->getElementType()).getFixedSize(); - return TypeSize(MinBits, EltCnt.Scalable); + uint64_t MinBits = EltCnt.getKnownMinValue() * + getTypeSizeInBits(VTy->getElementType()).getFixedSize(); + return TypeSize(MinBits, EltCnt.isScalable()); } default: llvm_unreachable("DataLayout::getTypeSizeInBits(): Unsupported type"); diff --git a/llvm/include/llvm/IR/DerivedTypes.h b/llvm/include/llvm/IR/DerivedTypes.h index 1837f68..25ece7a 100644 --- a/llvm/include/llvm/IR/DerivedTypes.h +++ b/llvm/include/llvm/IR/DerivedTypes.h @@ -426,16 +426,16 @@ public: unsigned getNumElements() const { ElementCount EC = getElementCount(); #ifdef STRICT_FIXED_SIZE_VECTORS - assert(!EC.Scalable && + assert(!EC.isScalable() && "Request for fixed number of elements from scalable vector"); - return EC.Min; + return EC.getKnownMinValue(); #else - if (EC.Scalable) + if (EC.isScalable()) WithColor::warning() << "The code that requested the fixed number of elements has made " "the assumption that this vector is not scalable. This assumption " "was not correct, and this may lead to broken code\n"; - return EC.Min; + return EC.getKnownMinValue(); #endif } @@ -512,8 +512,8 @@ public: /// input type and the same element type. static VectorType *getHalfElementsVectorType(VectorType *VTy) { auto EltCnt = VTy->getElementCount(); - assert ((EltCnt.Min & 1) == 0 && - "Cannot halve vector with odd number of elements."); + assert(EltCnt.isKnownEven() && + "Cannot halve vector with odd number of elements."); return VectorType::get(VTy->getElementType(), EltCnt/2); } @@ -521,7 +521,8 @@ public: /// input type and the same element type. static VectorType *getDoubleElementsVectorType(VectorType *VTy) { auto EltCnt = VTy->getElementCount(); - assert((EltCnt.Min * 2ull) <= UINT_MAX && "Too many elements in vector"); + assert((EltCnt.getKnownMinValue() * 2ull) <= UINT_MAX && + "Too many elements in vector"); return VectorType::get(VTy->getElementType(), EltCnt * 2); } diff --git a/llvm/include/llvm/IR/Instructions.h b/llvm/include/llvm/IR/Instructions.h index ac7ce75..7b41dce 100644 --- a/llvm/include/llvm/IR/Instructions.h +++ b/llvm/include/llvm/IR/Instructions.h @@ -2046,8 +2046,9 @@ public: /// Examples: shufflevector <4 x n> A, <4 x n> B, <1,2,3> /// shufflevector <4 x n> A, <4 x n> B, <1,2,3,4,5> bool changesLength() const { - unsigned NumSourceElts = - cast(Op<0>()->getType())->getElementCount().Min; + unsigned NumSourceElts = cast(Op<0>()->getType()) + ->getElementCount() + .getKnownMinValue(); unsigned NumMaskElts = ShuffleMask.size(); return NumSourceElts != NumMaskElts; } diff --git a/llvm/include/llvm/Support/MachineValueType.h b/llvm/include/llvm/Support/MachineValueType.h index 172d4fd..d88efce 100644 --- a/llvm/include/llvm/Support/MachineValueType.h +++ b/llvm/include/llvm/Support/MachineValueType.h @@ -424,7 +424,7 @@ namespace llvm { MVT getHalfNumVectorElementsVT() const { MVT EltVT = getVectorElementType(); auto EltCnt = getVectorElementCount(); - assert(!(EltCnt.Min & 1) && "Splitting vector, but not in half!"); + assert(EltCnt.isKnownEven() && "Splitting vector, but not in half!"); return getVectorVT(EltVT, EltCnt / 2); } @@ -742,7 +742,7 @@ namespace llvm { /// Given a vector type, return the minimum number of elements it contains. unsigned getVectorMinNumElements() const { - return getVectorElementCount().Min; + return getVectorElementCount().getKnownMinValue(); } /// Returns the size of the specified MVT in bits. @@ -1207,9 +1207,9 @@ namespace llvm { } static MVT getVectorVT(MVT VT, ElementCount EC) { - if (EC.Scalable) - return getScalableVectorVT(VT, EC.Min); - return getVectorVT(VT, EC.Min); + if (EC.isScalable()) + return getScalableVectorVT(VT, EC.getKnownMinValue()); + return getVectorVT(VT, EC.getKnownMinValue()); } /// Return the value type corresponding to the specified type. This returns diff --git a/llvm/include/llvm/Support/TypeSize.h b/llvm/include/llvm/Support/TypeSize.h index 8b346ad..297c51d 100644 --- a/llvm/include/llvm/Support/TypeSize.h +++ b/llvm/include/llvm/Support/TypeSize.h @@ -27,6 +27,10 @@ template struct DenseMapInfo; class ElementCount { private: + unsigned Min; // Minimum number of vector elements. + bool Scalable; // If true, NumElements is a multiple of 'Min' determined + // at runtime rather than compile time. + /// Prevent code from using initializer-list contructors like /// ElementCount EC = {, }. The static `get*` /// methods below are preferred, as users should always make a @@ -35,10 +39,6 @@ private: ElementCount(unsigned Min, bool Scalable) : Min(Min), Scalable(Scalable) {} public: - unsigned Min; // Minimum number of vector elements. - bool Scalable; // If true, NumElements is a multiple of 'Min' determined - // at runtime rather than compile time. - ElementCount() = default; ElementCount operator*(unsigned RHS) { @@ -58,6 +58,16 @@ public: bool operator==(unsigned RHS) const { return Min == RHS && !Scalable; } bool operator!=(unsigned RHS) const { return !(*this == RHS); } + ElementCount &operator*=(unsigned RHS) { + Min *= RHS; + return *this; + } + + ElementCount &operator/=(unsigned RHS) { + Min /= RHS; + return *this; + } + ElementCount NextPowerOf2() const { return {(unsigned)llvm::NextPowerOf2(Min), Scalable}; } @@ -81,11 +91,21 @@ public: /// ///@{ No elements.. bool isZero() const { return Min == 0; } + /// At least one element. + bool isNonZero() const { return Min != 0; } + /// A return value of true indicates we know at compile time that the number + /// of elements (vscale * Min) is definitely even. However, returning false + /// does not guarantee that the total number of elements is odd. + bool isKnownEven() const { return (Min & 0x1) == 0; } /// Exactly one element. bool isScalar() const { return !Scalable && Min == 1; } /// One or more elements. bool isVector() const { return (Scalable && Min != 0) || Min > 1; } ///@} + + unsigned getKnownMinValue() const { return Min; } + + bool isScalable() const { return Scalable; } }; /// Stream operator function for `ElementCount`. @@ -322,10 +342,11 @@ template <> struct DenseMapInfo { return ElementCount::getFixed(~0U - 1); } static unsigned getHashValue(const ElementCount& EltCnt) { - if (EltCnt.Scalable) - return (EltCnt.Min * 37U) - 1U; + unsigned HashVal = EltCnt.getKnownMinValue() * 37U; + if (EltCnt.isScalable()) + return (HashVal - 1U); - return EltCnt.Min * 37U; + return HashVal; } static bool isEqual(const ElementCount& LHS, const ElementCount& RHS) { diff --git a/llvm/lib/Analysis/InstructionSimplify.cpp b/llvm/lib/Analysis/InstructionSimplify.cpp index 3746bc6..eb41257 100644 --- a/llvm/lib/Analysis/InstructionSimplify.cpp +++ b/llvm/lib/Analysis/InstructionSimplify.cpp @@ -4550,7 +4550,7 @@ static Value *SimplifyShuffleVectorInst(Value *Op0, Value *Op1, unsigned MaskNumElts = Mask.size(); ElementCount InVecEltCount = InVecTy->getElementCount(); - bool Scalable = InVecEltCount.Scalable; + bool Scalable = InVecEltCount.isScalable(); SmallVector Indices; Indices.assign(Mask.begin(), Mask.end()); @@ -4559,7 +4559,7 @@ static Value *SimplifyShuffleVectorInst(Value *Op0, Value *Op1, // replace that input vector with undef. if (!Scalable) { bool MaskSelects0 = false, MaskSelects1 = false; - unsigned InVecNumElts = InVecEltCount.Min; + unsigned InVecNumElts = InVecEltCount.getKnownMinValue(); for (unsigned i = 0; i != MaskNumElts; ++i) { if (Indices[i] == -1) continue; @@ -4588,7 +4588,8 @@ static Value *SimplifyShuffleVectorInst(Value *Op0, Value *Op1, // is not known at compile time for scalable vectors if (!Scalable && Op0Const && !Op1Const) { std::swap(Op0, Op1); - ShuffleVectorInst::commuteShuffleMask(Indices, InVecEltCount.Min); + ShuffleVectorInst::commuteShuffleMask(Indices, + InVecEltCount.getKnownMinValue()); } // A splat of an inserted scalar constant becomes a vector constant: diff --git a/llvm/lib/Analysis/VFABIDemangling.cpp b/llvm/lib/Analysis/VFABIDemangling.cpp index 56155b2..4cdffa63 100644 --- a/llvm/lib/Analysis/VFABIDemangling.cpp +++ b/llvm/lib/Analysis/VFABIDemangling.cpp @@ -442,7 +442,7 @@ Optional VFABI::tryDemangleForVFABI(StringRef MangledName, if (!F) return None; const ElementCount EC = getECFromSignature(F->getFunctionType()); - VF = EC.Min; + VF = EC.getKnownMinValue(); } // Sanity checks. diff --git a/llvm/lib/Analysis/ValueTracking.cpp b/llvm/lib/Analysis/ValueTracking.cpp index f8a5cec..b592412 100644 --- a/llvm/lib/Analysis/ValueTracking.cpp +++ b/llvm/lib/Analysis/ValueTracking.cpp @@ -4808,7 +4808,8 @@ static bool canCreateUndefOrPoison(const Operator *Op, bool PoisonOnly) { auto *VTy = cast(Op->getOperand(0)->getType()); unsigned IdxOp = Op->getOpcode() == Instruction::InsertElement ? 2 : 1; auto *Idx = dyn_cast(Op->getOperand(IdxOp)); - if (!Idx || Idx->getZExtValue() >= VTy->getElementCount().Min) + if (!Idx || + Idx->getZExtValue() >= VTy->getElementCount().getKnownMinValue()) return true; return false; } diff --git a/llvm/lib/Bitcode/Writer/BitcodeWriter.cpp b/llvm/lib/Bitcode/Writer/BitcodeWriter.cpp index 329cbe4..f263090 100644 --- a/llvm/lib/Bitcode/Writer/BitcodeWriter.cpp +++ b/llvm/lib/Bitcode/Writer/BitcodeWriter.cpp @@ -970,7 +970,7 @@ void ModuleBitcodeWriter::writeTypeTable() { // VECTOR [numelts, eltty] or // [numelts, eltty, scalable] Code = bitc::TYPE_CODE_VECTOR; - TypeVals.push_back(VT->getElementCount().Min); + TypeVals.push_back(VT->getElementCount().getKnownMinValue()); TypeVals.push_back(VE.getTypeID(VT->getElementType())); if (isa(VT)) TypeVals.push_back(true); diff --git a/llvm/lib/CodeGen/CodeGenPrepare.cpp b/llvm/lib/CodeGen/CodeGenPrepare.cpp index 86b5d20..2034fd0 100644 --- a/llvm/lib/CodeGen/CodeGenPrepare.cpp +++ b/llvm/lib/CodeGen/CodeGenPrepare.cpp @@ -6957,10 +6957,10 @@ class VectorPromoteHelper { if (UseSplat) return ConstantVector::getSplat(EC, Val); - if (!EC.Scalable) { + if (!EC.isScalable()) { SmallVector ConstVec; UndefValue *UndefVal = UndefValue::get(Val->getType()); - for (unsigned Idx = 0; Idx != EC.Min; ++Idx) { + for (unsigned Idx = 0; Idx != EC.getKnownMinValue(); ++Idx) { if (Idx == ExtractIdx) ConstVec.push_back(Val); else diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp index 59edd03..31ac5d9 100644 --- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -18994,7 +18994,7 @@ static SDValue combineConcatVectorOfCasts(SDNode *N, SelectionDAG &DAG) { // check the other type in the cast to make sure this is really legal. EVT VT = N->getValueType(0); EVT SrcEltVT = SrcVT.getVectorElementType(); - unsigned NumElts = SrcVT.getVectorElementCount().Min * N->getNumOperands(); + ElementCount NumElts = SrcVT.getVectorElementCount() * N->getNumOperands(); EVT ConcatSrcVT = EVT::getVectorVT(*DAG.getContext(), SrcEltVT, NumElts); const TargetLowering &TLI = DAG.getTargetLoweringInfo(); switch (CastOpcode) { diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp index 85dc150..1a2c779 100644 --- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp @@ -428,10 +428,10 @@ static SDValue getCopyFromPartsVector(SelectionDAG &DAG, const SDLoc &DL, // vector widening case (e.g. <2 x float> -> <4 x float>). Extract the // elements we want. if (PartEVT.getVectorElementType() == ValueVT.getVectorElementType()) { - assert((PartEVT.getVectorElementCount().Min > - ValueVT.getVectorElementCount().Min) && - (PartEVT.getVectorElementCount().Scalable == - ValueVT.getVectorElementCount().Scalable) && + assert((PartEVT.getVectorElementCount().getKnownMinValue() > + ValueVT.getVectorElementCount().getKnownMinValue()) && + (PartEVT.getVectorElementCount().isScalable() == + ValueVT.getVectorElementCount().isScalable()) && "Cannot narrow, it would be a lossy transformation"); return DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, ValueVT, Val, DAG.getVectorIdxConstant(0, DL)); @@ -3751,7 +3751,7 @@ void SelectionDAGBuilder::visitGetElementPtr(const User &I) { if (IsVectorGEP && !N.getValueType().isVector()) { LLVMContext &Context = *DAG.getContext(); EVT VT = EVT::getVectorVT(Context, N.getValueType(), VectorElementCount); - if (VectorElementCount.Scalable) + if (VectorElementCount.isScalable()) N = DAG.getSplatVector(VT, dl, N); else N = DAG.getSplatBuildVector(VT, dl, N); @@ -3824,7 +3824,7 @@ void SelectionDAGBuilder::visitGetElementPtr(const User &I) { if (!IdxN.getValueType().isVector() && IsVectorGEP) { EVT VT = EVT::getVectorVT(*Context, IdxN.getValueType(), VectorElementCount); - if (VectorElementCount.Scalable) + if (VectorElementCount.isScalable()) IdxN = DAG.getSplatVector(VT, dl, IdxN); else IdxN = DAG.getSplatBuildVector(VT, dl, IdxN); diff --git a/llvm/lib/CodeGen/TargetLoweringBase.cpp b/llvm/lib/CodeGen/TargetLoweringBase.cpp index 40bb45a..958bb79 100644 --- a/llvm/lib/CodeGen/TargetLoweringBase.cpp +++ b/llvm/lib/CodeGen/TargetLoweringBase.cpp @@ -964,23 +964,24 @@ static unsigned getVectorTypeBreakdownMVT(MVT VT, MVT &IntermediateVT, // Scalable vectors cannot be scalarized, so splitting or widening is // required. - if (VT.isScalableVector() && !isPowerOf2_32(EC.Min)) + if (VT.isScalableVector() && !isPowerOf2_32(EC.getKnownMinValue())) llvm_unreachable( "Splitting or widening of non-power-of-2 MVTs is not implemented."); // FIXME: We don't support non-power-of-2-sized vectors for now. // Ideally we could break down into LHS/RHS like LegalizeDAG does. - if (!isPowerOf2_32(EC.Min)) { + if (!isPowerOf2_32(EC.getKnownMinValue())) { // Split EC to unit size (scalable property is preserved). - NumVectorRegs = EC.Min; - EC = EC / NumVectorRegs; + NumVectorRegs = EC.getKnownMinValue(); + EC = ElementCount::getFixed(1); } // Divide the input until we get to a supported size. This will // always end up with an EC that represent a scalar or a scalable // scalar. - while (EC.Min > 1 && !TLI->isTypeLegal(MVT::getVectorVT(EltTy, EC))) { - EC.Min >>= 1; + while (EC.getKnownMinValue() > 1 && + !TLI->isTypeLegal(MVT::getVectorVT(EltTy, EC))) { + EC /= 2; NumVectorRegs <<= 1; } @@ -1315,13 +1316,15 @@ void TargetLoweringBase::computeRegisterProperties( } case TypeWidenVector: - if (isPowerOf2_32(EC.Min)) { + if (isPowerOf2_32(EC.getKnownMinValue())) { // Try to widen the vector. for (unsigned nVT = i + 1; nVT <= MVT::LAST_VECTOR_VALUETYPE; ++nVT) { MVT SVT = (MVT::SimpleValueType) nVT; if (SVT.getVectorElementType() == EltVT && SVT.isScalableVector() == IsScalable && - SVT.getVectorElementCount().Min > EC.Min && isTypeLegal(SVT)) { + SVT.getVectorElementCount().getKnownMinValue() > + EC.getKnownMinValue() && + isTypeLegal(SVT)) { TransformToType[i] = SVT; RegisterTypeForVT[i] = SVT; NumRegistersForVT[i] = 1; @@ -1365,10 +1368,10 @@ void TargetLoweringBase::computeRegisterProperties( ValueTypeActions.setTypeAction(VT, TypeScalarizeVector); else if (PreferredAction == TypeSplitVector) ValueTypeActions.setTypeAction(VT, TypeSplitVector); - else if (EC.Min > 1) + else if (EC.getKnownMinValue() > 1) ValueTypeActions.setTypeAction(VT, TypeSplitVector); else - ValueTypeActions.setTypeAction(VT, EC.Scalable + ValueTypeActions.setTypeAction(VT, EC.isScalable() ? TypeScalarizeScalableVector : TypeScalarizeVector); } else { @@ -1426,7 +1429,8 @@ unsigned TargetLoweringBase::getVectorTypeBreakdown(LLVMContext &Context, EVT VT // This handles things like <2 x float> -> <4 x float> and // <4 x i1> -> <4 x i32>. LegalizeTypeAction TA = getTypeAction(Context, VT); - if (EltCnt.Min != 1 && (TA == TypeWidenVector || TA == TypePromoteInteger)) { + if (EltCnt.getKnownMinValue() != 1 && + (TA == TypeWidenVector || TA == TypePromoteInteger)) { EVT RegisterEVT = getTypeToTransformTo(Context, VT); if (isTypeLegal(RegisterEVT)) { IntermediateVT = RegisterEVT; @@ -1443,7 +1447,7 @@ unsigned TargetLoweringBase::getVectorTypeBreakdown(LLVMContext &Context, EVT VT // Scalable vectors cannot be scalarized, so handle the legalisation of the // types like done elsewhere in SelectionDAG. - if (VT.isScalableVector() && !isPowerOf2_32(EltCnt.Min)) { + if (VT.isScalableVector() && !isPowerOf2_32(EltCnt.getKnownMinValue())) { LegalizeKind LK; EVT PartVT = VT; do { @@ -1452,15 +1456,15 @@ unsigned TargetLoweringBase::getVectorTypeBreakdown(LLVMContext &Context, EVT VT PartVT = LK.second; } while (LK.first != TypeLegal); - NumIntermediates = - VT.getVectorElementCount().Min / PartVT.getVectorElementCount().Min; + NumIntermediates = VT.getVectorElementCount().getKnownMinValue() / + PartVT.getVectorElementCount().getKnownMinValue(); // FIXME: This code needs to be extended to handle more complex vector // breakdowns, like nxv7i64 -> nxv8i64 -> 4 x nxv2i64. Currently the only // supported cases are vectors that are broken down into equal parts // such as nxv6i64 -> 3 x nxv2i64. - assert(NumIntermediates * PartVT.getVectorElementCount().Min == - VT.getVectorElementCount().Min && + assert((PartVT.getVectorElementCount() * NumIntermediates) == + VT.getVectorElementCount() && "Expected an integer multiple of PartVT"); IntermediateVT = PartVT; RegisterVT = getRegisterType(Context, IntermediateVT); @@ -1469,16 +1473,16 @@ unsigned TargetLoweringBase::getVectorTypeBreakdown(LLVMContext &Context, EVT VT // FIXME: We don't support non-power-of-2-sized vectors for now. Ideally // we could break down into LHS/RHS like LegalizeDAG does. - if (!isPowerOf2_32(EltCnt.Min)) { - NumVectorRegs = EltCnt.Min; - EltCnt.Min = 1; + if (!isPowerOf2_32(EltCnt.getKnownMinValue())) { + NumVectorRegs = EltCnt.getKnownMinValue(); + EltCnt = ElementCount::getFixed(1); } // Divide the input until we get to a supported size. This will always // end with a scalar if the target doesn't support vectors. - while (EltCnt.Min > 1 && + while (EltCnt.getKnownMinValue() > 1 && !isTypeLegal(EVT::getVectorVT(Context, EltTy, EltCnt))) { - EltCnt.Min >>= 1; + EltCnt /= 2; NumVectorRegs <<= 1; } diff --git a/llvm/lib/CodeGen/ValueTypes.cpp b/llvm/lib/CodeGen/ValueTypes.cpp index c397451..72eda95 100644 --- a/llvm/lib/CodeGen/ValueTypes.cpp +++ b/llvm/lib/CodeGen/ValueTypes.cpp @@ -122,13 +122,13 @@ EVT EVT::getExtendedVectorElementType() const { unsigned EVT::getExtendedVectorNumElements() const { assert(isExtended() && "Type is not extended!"); ElementCount EC = cast(LLVMTy)->getElementCount(); - if (EC.Scalable) { + if (EC.isScalable()) { WithColor::warning() << "The code that requested the fixed number of elements has made the " "assumption that this vector is not scalable. This assumption was " "not correct, and this may lead to broken code\n"; } - return EC.Min; + return EC.getKnownMinValue(); } ElementCount EVT::getExtendedVectorElementCount() const { @@ -150,9 +150,9 @@ std::string EVT::getEVTString() const { switch (V.SimpleTy) { default: if (isVector()) - return (isScalableVector() ? "nxv" : "v") - + utostr(getVectorElementCount().Min) - + getVectorElementType().getEVTString(); + return (isScalableVector() ? "nxv" : "v") + + utostr(getVectorElementCount().getKnownMinValue()) + + getVectorElementType().getEVTString(); if (isInteger()) return "i" + utostr(getSizeInBits()); if (isFloatingPoint()) diff --git a/llvm/lib/IR/AsmWriter.cpp b/llvm/lib/IR/AsmWriter.cpp index 0b2ac85..8cb1883 100644 --- a/llvm/lib/IR/AsmWriter.cpp +++ b/llvm/lib/IR/AsmWriter.cpp @@ -656,9 +656,9 @@ void TypePrinting::print(Type *Ty, raw_ostream &OS) { VectorType *PTy = cast(Ty); ElementCount EC = PTy->getElementCount(); OS << "<"; - if (EC.Scalable) + if (EC.isScalable()) OS << "vscale x "; - OS << EC.Min << " x "; + OS << EC.getKnownMinValue() << " x "; print(PTy->getElementType(), OS); OS << '>'; return; diff --git a/llvm/lib/IR/ConstantFold.cpp b/llvm/lib/IR/ConstantFold.cpp index e5c4250..468dce9 100644 --- a/llvm/lib/IR/ConstantFold.cpp +++ b/llvm/lib/IR/ConstantFold.cpp @@ -931,7 +931,7 @@ Constant *llvm::ConstantFoldShuffleVectorInstruction(Constant *V1, Constant *V2, // If the mask is all zeros this is a splat, no need to go through all // elements. if (all_of(Mask, [](int Elt) { return Elt == 0; }) && - !MaskEltCount.Scalable) { + !MaskEltCount.isScalable()) { Type *Ty = IntegerType::get(V1->getContext(), 32); Constant *Elt = ConstantExpr::getExtractElement(V1, ConstantInt::get(Ty, 0)); @@ -942,7 +942,7 @@ Constant *llvm::ConstantFoldShuffleVectorInstruction(Constant *V1, Constant *V2, if (isa(V1VTy)) return nullptr; - unsigned SrcNumElts = V1VTy->getElementCount().Min; + unsigned SrcNumElts = V1VTy->getElementCount().getKnownMinValue(); // Loop over the shuffle mask, evaluating each element. SmallVector Result; @@ -2056,11 +2056,12 @@ Constant *llvm::ConstantFoldCompareInstruction(unsigned short pred, SmallVector ResElts; Type *Ty = IntegerType::get(C1->getContext(), 32); // Compare the elements, producing an i1 result or constant expr. - for (unsigned i = 0, e = C1VTy->getElementCount().Min; i != e; ++i) { + for (unsigned I = 0, E = C1VTy->getElementCount().getKnownMinValue(); + I != E; ++I) { Constant *C1E = - ConstantExpr::getExtractElement(C1, ConstantInt::get(Ty, i)); + ConstantExpr::getExtractElement(C1, ConstantInt::get(Ty, I)); Constant *C2E = - ConstantExpr::getExtractElement(C2, ConstantInt::get(Ty, i)); + ConstantExpr::getExtractElement(C2, ConstantInt::get(Ty, I)); ResElts.push_back(ConstantExpr::getCompare(pred, C1E, C2E)); } diff --git a/llvm/lib/IR/Constants.cpp b/llvm/lib/IR/Constants.cpp index 8d960ea..d84c7bc 100644 --- a/llvm/lib/IR/Constants.cpp +++ b/llvm/lib/IR/Constants.cpp @@ -1300,14 +1300,14 @@ Constant *ConstantVector::getImpl(ArrayRef V) { } Constant *ConstantVector::getSplat(ElementCount EC, Constant *V) { - if (!EC.Scalable) { + if (!EC.isScalable()) { // If this splat is compatible with ConstantDataVector, use it instead of // ConstantVector. if ((isa(V) || isa(V)) && ConstantDataSequential::isElementTypeCompatible(V->getType())) - return ConstantDataVector::getSplat(EC.Min, V); + return ConstantDataVector::getSplat(EC.getKnownMinValue(), V); - SmallVector Elts(EC.Min, V); + SmallVector Elts(EC.getKnownMinValue(), V); return get(Elts); } @@ -1324,7 +1324,7 @@ Constant *ConstantVector::getSplat(ElementCount EC, Constant *V) { Constant *UndefV = UndefValue::get(VTy); V = ConstantExpr::getInsertElement(UndefV, V, ConstantInt::get(I32Ty, 0)); // Build shuffle mask to perform the splat. - SmallVector Zeros(EC.Min, 0); + SmallVector Zeros(EC.getKnownMinValue(), 0); // Splat. return ConstantExpr::getShuffleVector(V, UndefV, Zeros); } @@ -2264,7 +2264,7 @@ Constant *ConstantExpr::getGetElementPtr(Type *Ty, Constant *C, if (VectorType *VecTy = dyn_cast(Idx->getType())) EltCount = VecTy->getElementCount(); - if (EltCount.Min != 0) + if (EltCount.isNonZero()) ReqTy = VectorType::get(ReqTy, EltCount); if (OnlyIfReducedTy == ReqTy) @@ -2284,7 +2284,7 @@ Constant *ConstantExpr::getGetElementPtr(Type *Ty, Constant *C, if (GTI.isStruct() && Idx->getType()->isVectorTy()) { Idx = Idx->getSplatValue(); - } else if (GTI.isSequential() && EltCount.Min != 0 && + } else if (GTI.isSequential() && EltCount.isNonZero() && !Idx->getType()->isVectorTy()) { Idx = ConstantVector::getSplat(EltCount, Idx); } diff --git a/llvm/lib/IR/Core.cpp b/llvm/lib/IR/Core.cpp index 71faa50..8598acc 100644 --- a/llvm/lib/IR/Core.cpp +++ b/llvm/lib/IR/Core.cpp @@ -781,7 +781,7 @@ unsigned LLVMGetPointerAddressSpace(LLVMTypeRef PointerTy) { } unsigned LLVMGetVectorSize(LLVMTypeRef VectorTy) { - return unwrap(VectorTy)->getElementCount().Min; + return unwrap(VectorTy)->getElementCount().getKnownMinValue(); } /*--.. Operations on other types ...........................................--*/ diff --git a/llvm/lib/IR/DataLayout.cpp b/llvm/lib/IR/DataLayout.cpp index 31b227d..ffb3adc 100644 --- a/llvm/lib/IR/DataLayout.cpp +++ b/llvm/lib/IR/DataLayout.cpp @@ -630,7 +630,7 @@ Align DataLayout::getAlignmentInfo(AlignTypeEnum AlignType, uint32_t BitWidth, // We're only calculating a natural alignment, so it doesn't have to be // based on the full size for scalable vectors. Using the minimum element // count should be enough here. - Alignment *= cast(Ty)->getElementCount().Min; + Alignment *= cast(Ty)->getElementCount().getKnownMinValue(); Alignment = PowerOf2Ceil(Alignment); return Align(Alignment); } diff --git a/llvm/lib/IR/Function.cpp b/llvm/lib/IR/Function.cpp index b29a00c..e701fea 100644 --- a/llvm/lib/IR/Function.cpp +++ b/llvm/lib/IR/Function.cpp @@ -714,9 +714,10 @@ static std::string getMangledTypeStr(Type* Ty) { Result += "f"; } else if (VectorType* VTy = dyn_cast(Ty)) { ElementCount EC = VTy->getElementCount(); - if (EC.Scalable) + if (EC.isScalable()) Result += "nx"; - Result += "v" + utostr(EC.Min) + getMangledTypeStr(VTy->getElementType()); + Result += "v" + utostr(EC.getKnownMinValue()) + + getMangledTypeStr(VTy->getElementType()); } else if (Ty) { switch (Ty->getTypeID()) { default: llvm_unreachable("Unhandled type"); diff --git a/llvm/lib/IR/IRBuilder.cpp b/llvm/lib/IR/IRBuilder.cpp index 33a0f5b..d6eeffd 100644 --- a/llvm/lib/IR/IRBuilder.cpp +++ b/llvm/lib/IR/IRBuilder.cpp @@ -1003,7 +1003,7 @@ Value *IRBuilderBase::CreateVectorSplat(unsigned NumElts, Value *V, Value *IRBuilderBase::CreateVectorSplat(ElementCount EC, Value *V, const Twine &Name) { - assert(EC.Min > 0 && "Cannot splat to an empty vector!"); + assert(EC.isNonZero() && "Cannot splat to an empty vector!"); // First insert it into an undef vector so we can shuffle it. Type *I32Ty = getInt32Ty(); diff --git a/llvm/lib/IR/Instructions.cpp b/llvm/lib/IR/Instructions.cpp index 48f4161..445fad8 100644 --- a/llvm/lib/IR/Instructions.cpp +++ b/llvm/lib/IR/Instructions.cpp @@ -1967,7 +1967,8 @@ bool ShuffleVectorInst::isValidOperands(const Value *V1, const Value *V2, return false; // Make sure the mask elements make sense. - int V1Size = cast(V1->getType())->getElementCount().Min; + int V1Size = + cast(V1->getType())->getElementCount().getKnownMinValue(); for (int Elem : Mask) if (Elem != UndefMaskElem && Elem >= V1Size * 2) return false; @@ -2026,22 +2027,22 @@ void ShuffleVectorInst::getShuffleMask(const Constant *Mask, ElementCount EC = cast(Mask->getType())->getElementCount(); if (isa(Mask)) { - Result.resize(EC.Min, 0); + Result.resize(EC.getKnownMinValue(), 0); return; } - Result.reserve(EC.Min); + Result.reserve(EC.getKnownMinValue()); - if (EC.Scalable) { + if (EC.isScalable()) { assert((isa(Mask) || isa(Mask)) && "Scalable vector shuffle mask must be undef or zeroinitializer"); int MaskVal = isa(Mask) ? -1 : 0; - for (unsigned I = 0; I < EC.Min; ++I) + for (unsigned I = 0; I < EC.getKnownMinValue(); ++I) Result.emplace_back(MaskVal); return; } - unsigned NumElts = EC.Min; + unsigned NumElts = EC.getKnownMinValue(); if (auto *CDS = dyn_cast(Mask)) { for (unsigned i = 0; i != NumElts; ++i) diff --git a/llvm/lib/IR/IntrinsicInst.cpp b/llvm/lib/IR/IntrinsicInst.cpp index c4e06cd..b6b036d 100644 --- a/llvm/lib/IR/IntrinsicInst.cpp +++ b/llvm/lib/IR/IntrinsicInst.cpp @@ -280,8 +280,8 @@ bool VPIntrinsic::canIgnoreVectorLengthParam() const { // the operation. This function returns true when this is detected statically // in the IR. - // Check whether "W == vscale * EC.Min" - if (EC.Scalable) { + // Check whether "W == vscale * EC.getKnownMinValue()" + if (EC.isScalable()) { // Undig the DL auto ParMod = this->getModule(); if (!ParMod) @@ -291,8 +291,8 @@ bool VPIntrinsic::canIgnoreVectorLengthParam() const { // Compare vscale patterns uint64_t VScaleFactor; if (match(VLParam, m_c_Mul(m_ConstantInt(VScaleFactor), m_VScale(DL)))) - return VScaleFactor >= EC.Min; - return (EC.Min == 1) && match(VLParam, m_VScale(DL)); + return VScaleFactor >= EC.getKnownMinValue(); + return (EC.getKnownMinValue() == 1) && match(VLParam, m_VScale(DL)); } // standard SIMD operation @@ -301,7 +301,7 @@ bool VPIntrinsic::canIgnoreVectorLengthParam() const { return false; uint64_t VLNum = VLConst->getZExtValue(); - if (VLNum >= EC.Min) + if (VLNum >= EC.getKnownMinValue()) return true; return false; diff --git a/llvm/lib/IR/Type.cpp b/llvm/lib/IR/Type.cpp index 1cf0d13..ce374fa 100644 --- a/llvm/lib/IR/Type.cpp +++ b/llvm/lib/IR/Type.cpp @@ -128,7 +128,7 @@ TypeSize Type::getPrimitiveSizeInBits() const { ElementCount EC = VTy->getElementCount(); TypeSize ETS = VTy->getElementType()->getPrimitiveSizeInBits(); assert(!ETS.isScalable() && "Vector type should have fixed-width elements"); - return {ETS.getFixedSize() * EC.Min, EC.Scalable}; + return {ETS.getFixedSize() * EC.getKnownMinValue(), EC.isScalable()}; } default: return TypeSize::Fixed(0); } @@ -598,10 +598,10 @@ VectorType::VectorType(Type *ElType, unsigned EQ, Type::TypeID TID) } VectorType *VectorType::get(Type *ElementType, ElementCount EC) { - if (EC.Scalable) - return ScalableVectorType::get(ElementType, EC.Min); + if (EC.isScalable()) + return ScalableVectorType::get(ElementType, EC.getKnownMinValue()); else - return FixedVectorType::get(ElementType, EC.Min); + return FixedVectorType::get(ElementType, EC.getKnownMinValue()); } bool VectorType::isValidElementType(Type *ElemTy) { diff --git a/llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp b/llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp index 1844586..fda514b 100644 --- a/llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp +++ b/llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp @@ -4827,7 +4827,8 @@ static EVT getPackedVectorTypeFromPredicateType(LLVMContext &Ctx, EVT PredVT, return EVT(); ElementCount EC = PredVT.getVectorElementCount(); - EVT ScalarVT = EVT::getIntegerVT(Ctx, AArch64::SVEBitsPerBlock / EC.Min); + EVT ScalarVT = + EVT::getIntegerVT(Ctx, AArch64::SVEBitsPerBlock / EC.getKnownMinValue()); EVT MemVT = EVT::getVectorVT(Ctx, ScalarVT, EC * NumVec); return MemVT; diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp index c904fc7..e68183dc 100644 --- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp +++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp @@ -3532,8 +3532,9 @@ SDValue AArch64TargetLowering::LowerSTORE(SDValue Op, // 256 bit non-temporal stores can be lowered to STNP. Do this as part of // the custom lowering, as there are no un-paired non-temporal stores and // legalization will break up 256 bit inputs. + ElementCount EC = MemVT.getVectorElementCount(); if (StoreNode->isNonTemporal() && MemVT.getSizeInBits() == 256u && - MemVT.getVectorElementCount().Min % 2u == 0 && + EC.isKnownEven() && ((MemVT.getScalarSizeInBits() == 8u || MemVT.getScalarSizeInBits() == 16u || MemVT.getScalarSizeInBits() == 32u || @@ -3542,11 +3543,11 @@ SDValue AArch64TargetLowering::LowerSTORE(SDValue Op, DAG.getNode(ISD::EXTRACT_SUBVECTOR, Dl, MemVT.getHalfNumVectorElementsVT(*DAG.getContext()), StoreNode->getValue(), DAG.getConstant(0, Dl, MVT::i64)); - SDValue Hi = DAG.getNode( - ISD::EXTRACT_SUBVECTOR, Dl, - MemVT.getHalfNumVectorElementsVT(*DAG.getContext()), - StoreNode->getValue(), - DAG.getConstant(MemVT.getVectorElementCount().Min / 2, Dl, MVT::i64)); + SDValue Hi = + DAG.getNode(ISD::EXTRACT_SUBVECTOR, Dl, + MemVT.getHalfNumVectorElementsVT(*DAG.getContext()), + StoreNode->getValue(), + DAG.getConstant(EC.getKnownMinValue() / 2, Dl, MVT::i64)); SDValue Result = DAG.getMemIntrinsicNode( AArch64ISD::STNP, Dl, DAG.getVTList(MVT::Other), {StoreNode->getChain(), Lo, Hi, StoreNode->getBasePtr()}, @@ -10370,7 +10371,7 @@ SDValue AArch64TargetLowering::LowerSVEStructLoad(unsigned Intrinsic, {Intrinsic::aarch64_sve_ld4, {4, AArch64ISD::SVE_LD4_MERGE_ZERO}}}; std::tie(N, Opcode) = IntrinsicMap[Intrinsic]; - assert(VT.getVectorElementCount().Min % N == 0 && + assert(VT.getVectorElementCount().getKnownMinValue() % N == 0 && "invalid tuple vector type!"); EVT SplitVT = EVT::getVectorVT(*DAG.getContext(), VT.getVectorElementType(), @@ -14443,7 +14444,7 @@ SDValue AArch64TargetLowering::PerformDAGCombine(SDNode *N, uint64_t IdxConst = cast(Idx)->getZExtValue(); EVT ResVT = N->getValueType(0); - uint64_t NumLanes = ResVT.getVectorElementCount().Min; + uint64_t NumLanes = ResVT.getVectorElementCount().getKnownMinValue(); SDValue ExtIdx = DAG.getVectorIdxConstant(IdxConst * NumLanes, DL); SDValue Val = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, ResVT, Src1, ExtIdx); @@ -14457,10 +14458,11 @@ SDValue AArch64TargetLowering::PerformDAGCombine(SDNode *N, SDValue Vec = N->getOperand(4); EVT TupleVT = Tuple.getValueType(); - uint64_t TupleLanes = TupleVT.getVectorElementCount().Min; + uint64_t TupleLanes = TupleVT.getVectorElementCount().getKnownMinValue(); uint64_t IdxConst = cast(Idx)->getZExtValue(); - uint64_t NumLanes = Vec.getValueType().getVectorElementCount().Min; + uint64_t NumLanes = + Vec.getValueType().getVectorElementCount().getKnownMinValue(); if ((TupleLanes % NumLanes) != 0) report_fatal_error("invalid tuple vector!"); @@ -14696,7 +14698,7 @@ void AArch64TargetLowering::ReplaceExtractSubVectorResults( ElementCount ResEC = VT.getVectorElementCount(); - if (InVT.getVectorElementCount().Min != (ResEC.Min * 2)) + if (InVT.getVectorElementCount() != (ResEC * 2)) return; auto *CIndex = dyn_cast(N->getOperand(1)); @@ -14704,7 +14706,7 @@ void AArch64TargetLowering::ReplaceExtractSubVectorResults( return; unsigned Index = CIndex->getZExtValue(); - if ((Index != 0) && (Index != ResEC.Min)) + if ((Index != 0) && (Index != ResEC.getKnownMinValue())) return; unsigned Opcode = (Index == 0) ? AArch64ISD::UUNPKLO : AArch64ISD::UUNPKHI; diff --git a/llvm/lib/Transforms/InstCombine/InstCombineVectorOps.cpp b/llvm/lib/Transforms/InstCombine/InstCombineVectorOps.cpp index f6cd364..5cd1822 100644 --- a/llvm/lib/Transforms/InstCombine/InstCombineVectorOps.cpp +++ b/llvm/lib/Transforms/InstCombine/InstCombineVectorOps.cpp @@ -340,17 +340,17 @@ Instruction *InstCombinerImpl::visitExtractElementInst(ExtractElementInst &EI) { auto *IndexC = dyn_cast(Index); if (IndexC) { ElementCount EC = EI.getVectorOperandType()->getElementCount(); - unsigned NumElts = EC.Min; + unsigned NumElts = EC.getKnownMinValue(); // InstSimplify should handle cases where the index is invalid. // For fixed-length vector, it's invalid to extract out-of-range element. - if (!EC.Scalable && IndexC->getValue().uge(NumElts)) + if (!EC.isScalable() && IndexC->getValue().uge(NumElts)) return nullptr; // This instruction only demands the single element from the input vector. // Skip for scalable type, the number of elements is unknown at // compile-time. - if (!EC.Scalable && NumElts != 1) { + if (!EC.isScalable() && NumElts != 1) { // If the input vector has a single use, simplify it based on this use // property. if (SrcVec->hasOneUse()) { diff --git a/llvm/lib/Transforms/Utils/FunctionComparator.cpp b/llvm/lib/Transforms/Utils/FunctionComparator.cpp index 101cb23..dfab936 100644 --- a/llvm/lib/Transforms/Utils/FunctionComparator.cpp +++ b/llvm/lib/Transforms/Utils/FunctionComparator.cpp @@ -488,12 +488,13 @@ int FunctionComparator::cmpTypes(Type *TyL, Type *TyR) const { case Type::ScalableVectorTyID: { auto *STyL = cast(TyL); auto *STyR = cast(TyR); - if (STyL->getElementCount().Scalable != STyR->getElementCount().Scalable) - return cmpNumbers(STyL->getElementCount().Scalable, - STyR->getElementCount().Scalable); - if (STyL->getElementCount().Min != STyR->getElementCount().Min) - return cmpNumbers(STyL->getElementCount().Min, - STyR->getElementCount().Min); + if (STyL->getElementCount().isScalable() != + STyR->getElementCount().isScalable()) + return cmpNumbers(STyL->getElementCount().isScalable(), + STyR->getElementCount().isScalable()); + if (STyL->getElementCount() != STyR->getElementCount()) + return cmpNumbers(STyL->getElementCount().getKnownMinValue(), + STyR->getElementCount().getKnownMinValue()); return cmpTypes(STyL->getElementType(), STyR->getElementType()); } } diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp index f999c5a..1f82995 100644 --- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp +++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp @@ -342,7 +342,7 @@ static Type *getMemInstValueType(Value *I) { /// type is irregular if its allocated size doesn't equal the store size of an /// element of the corresponding vector type at the given vectorization factor. static bool hasIrregularType(Type *Ty, const DataLayout &DL, ElementCount VF) { - assert(!VF.Scalable && "scalable vectors not yet supported."); + assert(!VF.isScalable() && "scalable vectors not yet supported."); // Determine if an array of VF elements of type Ty is "bitcast compatible" // with a vector. if (VF.isVector()) { @@ -899,8 +899,9 @@ void InnerLoopVectorizer::setDebugLocFromInst(IRBuilder<> &B, const Value *Ptr) const DILocation *DIL = Inst->getDebugLoc(); if (DIL && Inst->getFunction()->isDebugInfoForProfiling() && !isa(Inst)) { - assert(!VF.Scalable && "scalable vectors not yet supported."); - auto NewDIL = DIL->cloneByMultiplyingDuplicationFactor(UF * VF.Min); + assert(!VF.isScalable() && "scalable vectors not yet supported."); + auto NewDIL = + DIL->cloneByMultiplyingDuplicationFactor(UF * VF.getKnownMinValue()); if (NewDIL) B.SetCurrentDebugLocation(NewDIL.getValue()); else @@ -1216,7 +1217,7 @@ public: /// width \p VF. Return CM_Unknown if this instruction did not pass /// through the cost modeling. InstWidening getWideningDecision(Instruction *I, ElementCount VF) { - assert(!VF.Scalable && "scalable vectors not yet supported."); + assert(!VF.isScalable() && "scalable vectors not yet supported."); assert(VF.isVector() && "Expected VF >=2"); // Cost model is not run in the VPlan-native path - return conservative @@ -1837,7 +1838,8 @@ void InnerLoopVectorizer::createVectorIntOrFpInductionPHI( // Multiply the vectorization factor by the step using integer or // floating-point arithmetic as appropriate. - Value *ConstVF = getSignedIntOrFpConstant(Step->getType(), VF.Min); + Value *ConstVF = + getSignedIntOrFpConstant(Step->getType(), VF.getKnownMinValue()); Value *Mul = addFastMathFlag(Builder.CreateBinOp(MulOp, Step, ConstVF)); // Create a vector splat to use in the induction update. @@ -1845,7 +1847,7 @@ void InnerLoopVectorizer::createVectorIntOrFpInductionPHI( // FIXME: If the step is non-constant, we create the vector splat with // IRBuilder. IRBuilder can constant-fold the multiply, but it doesn't // handle a constant vector splat. - assert(!VF.Scalable && "scalable vectors not yet supported."); + assert(!VF.isScalable() && "scalable vectors not yet supported."); Value *SplatVF = isa(Mul) ? ConstantVector::getSplat(VF, cast(Mul)) : Builder.CreateVectorSplat(VF, Mul); @@ -1982,9 +1984,10 @@ void InnerLoopVectorizer::widenIntOrFpInduction(PHINode *IV, TruncInst *Trunc) { auto CreateSplatIV = [&](Value *ScalarIV, Value *Step) { Value *Broadcasted = getBroadcastInstrs(ScalarIV); for (unsigned Part = 0; Part < UF; ++Part) { - assert(!VF.Scalable && "scalable vectors not yet supported."); - Value *EntryPart = getStepVector(Broadcasted, VF.Min * Part, Step, - ID.getInductionOpcode()); + assert(!VF.isScalable() && "scalable vectors not yet supported."); + Value *EntryPart = + getStepVector(Broadcasted, VF.getKnownMinValue() * Part, Step, + ID.getInductionOpcode()); VectorLoopValueMap.setVectorValue(EntryVal, Part, EntryPart); if (Trunc) addMetadata(EntryPart, Trunc); @@ -2093,7 +2096,7 @@ void InnerLoopVectorizer::buildScalarSteps(Value *ScalarIV, Value *Step, const InductionDescriptor &ID) { // We shouldn't have to build scalar steps if we aren't vectorizing. assert(VF.isVector() && "VF should be greater than one"); - assert(!VF.Scalable && + assert(!VF.isScalable() && "the code below assumes a fixed number of elements at compile time"); // Get the value type and ensure it and the step have the same integer type. Type *ScalarIVTy = ScalarIV->getType()->getScalarType(); @@ -2118,12 +2121,12 @@ void InnerLoopVectorizer::buildScalarSteps(Value *ScalarIV, Value *Step, unsigned Lanes = Cost->isUniformAfterVectorization(cast(EntryVal), VF) ? 1 - : VF.Min; + : VF.getKnownMinValue(); // Compute the scalar steps and save the results in VectorLoopValueMap. for (unsigned Part = 0; Part < UF; ++Part) { for (unsigned Lane = 0; Lane < Lanes; ++Lane) { - auto *StartIdx = - getSignedIntOrFpConstant(ScalarIVTy, VF.Min * Part + Lane); + auto *StartIdx = getSignedIntOrFpConstant( + ScalarIVTy, VF.getKnownMinValue() * Part + Lane); auto *Mul = addFastMathFlag(Builder.CreateBinOp(MulOp, StartIdx, Step)); auto *Add = addFastMathFlag(Builder.CreateBinOp(AddOp, ScalarIV, Mul)); VectorLoopValueMap.setScalarValue(EntryVal, {Part, Lane}, Add); @@ -2166,9 +2169,10 @@ Value *InnerLoopVectorizer::getOrCreateVectorValue(Value *V, unsigned Part) { // is known to be uniform after vectorization, this corresponds to lane zero // of the Part unroll iteration. Otherwise, the last instruction is the one // we created for the last vector lane of the Part unroll iteration. - assert(!VF.Scalable && "scalable vectors not yet supported."); - unsigned LastLane = - Cost->isUniformAfterVectorization(I, VF) ? 0 : VF.Min - 1; + assert(!VF.isScalable() && "scalable vectors not yet supported."); + unsigned LastLane = Cost->isUniformAfterVectorization(I, VF) + ? 0 + : VF.getKnownMinValue() - 1; auto *LastInst = cast( VectorLoopValueMap.getScalarValue(V, {Part, LastLane})); @@ -2190,10 +2194,10 @@ Value *InnerLoopVectorizer::getOrCreateVectorValue(Value *V, unsigned Part) { VectorLoopValueMap.setVectorValue(V, Part, VectorValue); } else { // Initialize packing with insertelements to start from undef. - assert(!VF.Scalable && "VF is assumed to be non scalable."); + assert(!VF.isScalable() && "VF is assumed to be non scalable."); Value *Undef = UndefValue::get(VectorType::get(V->getType(), VF)); VectorLoopValueMap.setVectorValue(V, Part, Undef); - for (unsigned Lane = 0; Lane < VF.Min; ++Lane) + for (unsigned Lane = 0; Lane < VF.getKnownMinValue(); ++Lane) packScalarIntoVectorValue(V, {Part, Lane}); VectorValue = VectorLoopValueMap.getVectorValue(V, Part); } @@ -2257,10 +2261,10 @@ void InnerLoopVectorizer::packScalarIntoVectorValue( Value *InnerLoopVectorizer::reverseVector(Value *Vec) { assert(Vec->getType()->isVectorTy() && "Invalid type"); - assert(!VF.Scalable && "Cannot reverse scalable vectors"); + assert(!VF.isScalable() && "Cannot reverse scalable vectors"); SmallVector ShuffleMask; - for (unsigned i = 0; i < VF.Min; ++i) - ShuffleMask.push_back(VF.Min - i - 1); + for (unsigned i = 0; i < VF.getKnownMinValue(); ++i) + ShuffleMask.push_back(VF.getKnownMinValue() - i - 1); return Builder.CreateShuffleVector(Vec, UndefValue::get(Vec->getType()), ShuffleMask, "reverse"); @@ -2314,7 +2318,7 @@ void InnerLoopVectorizer::vectorizeInterleaveGroup( // Prepare for the vector type of the interleaved load/store. Type *ScalarTy = getMemInstValueType(Instr); unsigned InterleaveFactor = Group->getFactor(); - assert(!VF.Scalable && "scalable vectors not yet supported."); + assert(!VF.isScalable() && "scalable vectors not yet supported."); auto *VecTy = VectorType::get(ScalarTy, VF * InterleaveFactor); // Prepare for the new pointers. @@ -2331,10 +2335,10 @@ void InnerLoopVectorizer::vectorizeInterleaveGroup( // pointer operand of the interleaved access is supposed to be uniform. For // uniform instructions, we're only required to generate a value for the // first vector lane in each unroll iteration. - assert(!VF.Scalable && + assert(!VF.isScalable() && "scalable vector reverse operation is not implemented"); if (Group->isReverse()) - Index += (VF.Min - 1) * Group->getFactor(); + Index += (VF.getKnownMinValue() - 1) * Group->getFactor(); for (unsigned Part = 0; Part < UF; Part++) { Value *AddrPart = State.get(Addr, {Part, 0}); @@ -2369,8 +2373,8 @@ void InnerLoopVectorizer::vectorizeInterleaveGroup( Value *MaskForGaps = nullptr; if (Group->requiresScalarEpilogue() && !Cost->isScalarEpilogueAllowed()) { - assert(!VF.Scalable && "scalable vectors not yet supported."); - MaskForGaps = createBitMaskForGaps(Builder, VF.Min, *Group); + assert(!VF.isScalable() && "scalable vectors not yet supported."); + MaskForGaps = createBitMaskForGaps(Builder, VF.getKnownMinValue(), *Group); assert(MaskForGaps && "Mask for Gaps is required but it is null"); } @@ -2387,10 +2391,10 @@ void InnerLoopVectorizer::vectorizeInterleaveGroup( if (BlockInMask) { Value *BlockInMaskPart = State.get(BlockInMask, Part); auto *Undefs = UndefValue::get(BlockInMaskPart->getType()); - assert(!VF.Scalable && "scalable vectors not yet supported."); + assert(!VF.isScalable() && "scalable vectors not yet supported."); Value *ShuffledMask = Builder.CreateShuffleVector( BlockInMaskPart, Undefs, - createReplicatedMask(InterleaveFactor, VF.Min), + createReplicatedMask(InterleaveFactor, VF.getKnownMinValue()), "interleaved.mask"); GroupMask = MaskForGaps ? Builder.CreateBinOp(Instruction::And, ShuffledMask, @@ -2417,15 +2421,16 @@ void InnerLoopVectorizer::vectorizeInterleaveGroup( if (!Member) continue; - assert(!VF.Scalable && "scalable vectors not yet supported."); - auto StrideMask = createStrideMask(I, InterleaveFactor, VF.Min); + assert(!VF.isScalable() && "scalable vectors not yet supported."); + auto StrideMask = + createStrideMask(I, InterleaveFactor, VF.getKnownMinValue()); for (unsigned Part = 0; Part < UF; Part++) { Value *StridedVec = Builder.CreateShuffleVector( NewLoads[Part], UndefVec, StrideMask, "strided.vec"); // If this member has different type, cast the result type. if (Member->getType() != ScalarTy) { - assert(!VF.Scalable && "VF is assumed to be non scalable."); + assert(!VF.isScalable() && "VF is assumed to be non scalable."); VectorType *OtherVTy = VectorType::get(Member->getType(), VF); StridedVec = createBitOrPointerCast(StridedVec, OtherVTy, DL); } @@ -2440,7 +2445,7 @@ void InnerLoopVectorizer::vectorizeInterleaveGroup( } // The sub vector type for current instruction. - assert(!VF.Scalable && "VF is assumed to be non scalable."); + assert(!VF.isScalable() && "VF is assumed to be non scalable."); auto *SubVT = VectorType::get(ScalarTy, VF); // Vectorize the interleaved store group. @@ -2469,9 +2474,10 @@ void InnerLoopVectorizer::vectorizeInterleaveGroup( Value *WideVec = concatenateVectors(Builder, StoredVecs); // Interleave the elements in the wide vector. - assert(!VF.Scalable && "scalable vectors not yet supported."); + assert(!VF.isScalable() && "scalable vectors not yet supported."); Value *IVec = Builder.CreateShuffleVector( - WideVec, UndefVec, createInterleaveMask(VF.Min, InterleaveFactor), + WideVec, UndefVec, + createInterleaveMask(VF.getKnownMinValue(), InterleaveFactor), "interleaved.vec"); Instruction *NewStoreInstr; @@ -2480,7 +2486,8 @@ void InnerLoopVectorizer::vectorizeInterleaveGroup( auto *Undefs = UndefValue::get(BlockInMaskPart->getType()); Value *ShuffledMask = Builder.CreateShuffleVector( BlockInMaskPart, Undefs, - createReplicatedMask(InterleaveFactor, VF.Min), "interleaved.mask"); + createReplicatedMask(InterleaveFactor, VF.getKnownMinValue()), + "interleaved.mask"); NewStoreInstr = Builder.CreateMaskedStore( IVec, AddrParts[Part], Group->getAlign(), ShuffledMask); } @@ -2514,7 +2521,7 @@ void InnerLoopVectorizer::vectorizeMemoryInstruction(Instruction *Instr, Type *ScalarDataTy = getMemInstValueType(Instr); - assert(!VF.Scalable && "scalable vectors not yet supported."); + assert(!VF.isScalable() && "scalable vectors not yet supported."); auto *DataTy = VectorType::get(ScalarDataTy, VF); const Align Alignment = getLoadStoreAlignment(Instr); @@ -2550,16 +2557,16 @@ void InnerLoopVectorizer::vectorizeMemoryInstruction(Instruction *Instr, // If the address is consecutive but reversed, then the // wide store needs to start at the last vector element. PartPtr = cast(Builder.CreateGEP( - ScalarDataTy, Ptr, Builder.getInt32(-Part * VF.Min))); + ScalarDataTy, Ptr, Builder.getInt32(-Part * VF.getKnownMinValue()))); PartPtr->setIsInBounds(InBounds); PartPtr = cast(Builder.CreateGEP( - ScalarDataTy, PartPtr, Builder.getInt32(1 - VF.Min))); + ScalarDataTy, PartPtr, Builder.getInt32(1 - VF.getKnownMinValue()))); PartPtr->setIsInBounds(InBounds); if (isMaskRequired) // Reverse of a null all-one mask is a null mask. BlockInMaskParts[Part] = reverseVector(BlockInMaskParts[Part]); } else { PartPtr = cast(Builder.CreateGEP( - ScalarDataTy, Ptr, Builder.getInt32(Part * VF.Min))); + ScalarDataTy, Ptr, Builder.getInt32(Part * VF.getKnownMinValue()))); PartPtr->setIsInBounds(InBounds); } @@ -2756,8 +2763,8 @@ Value *InnerLoopVectorizer::getOrCreateVectorTripCount(Loop *L) { Type *Ty = TC->getType(); // This is where we can make the step a runtime constant. - assert(!VF.Scalable && "scalable vectorization is not supported yet"); - Constant *Step = ConstantInt::get(Ty, VF.Min * UF); + assert(!VF.isScalable() && "scalable vectorization is not supported yet"); + Constant *Step = ConstantInt::get(Ty, VF.getKnownMinValue() * UF); // If the tail is to be folded by masking, round the number of iterations N // up to a multiple of Step instead of rounding down. This is done by first @@ -2766,10 +2773,10 @@ Value *InnerLoopVectorizer::getOrCreateVectorTripCount(Loop *L) { // that it starts at zero and its Step is a power of two; the loop will then // exit, with the last early-exit vector comparison also producing all-true. if (Cost->foldTailByMasking()) { - assert(isPowerOf2_32(VF.Min * UF) && + assert(isPowerOf2_32(VF.getKnownMinValue() * UF) && "VF*UF must be a power of 2 when folding tail by masking"); - TC = Builder.CreateAdd(TC, ConstantInt::get(Ty, VF.Min * UF - 1), - "n.rnd.up"); + TC = Builder.CreateAdd( + TC, ConstantInt::get(Ty, VF.getKnownMinValue() * UF - 1), "n.rnd.up"); } // Now we need to generate the expression for the part of the loop that the @@ -2846,9 +2853,10 @@ void InnerLoopVectorizer::emitMinimumIterationCountCheck(Loop *L, // If tail is to be folded, vector loop takes care of all iterations. Value *CheckMinIters = Builder.getFalse(); if (!Cost->foldTailByMasking()) { - assert(!VF.Scalable && "scalable vectors not yet supported."); + assert(!VF.isScalable() && "scalable vectors not yet supported."); CheckMinIters = Builder.CreateICmp( - P, Count, ConstantInt::get(Count->getType(), VF.Min * UF), + P, Count, + ConstantInt::get(Count->getType(), VF.getKnownMinValue() * UF), "min.iters.check"); } // Create new preheader for vector loop. @@ -3303,8 +3311,8 @@ BasicBlock *InnerLoopVectorizer::createVectorizedLoopSkeleton() { Value *StartIdx = ConstantInt::get(IdxTy, 0); // The loop step is equal to the vectorization factor (num of SIMD elements) // times the unroll factor (num of SIMD instructions). - assert(!VF.Scalable && "scalable vectors not yet supported."); - Constant *Step = ConstantInt::get(IdxTy, VF.Min * UF); + assert(!VF.isScalable() && "scalable vectors not yet supported."); + Constant *Step = ConstantInt::get(IdxTy, VF.getKnownMinValue() * UF); Value *CountRoundDown = getOrCreateVectorTripCount(Lp); Induction = createInductionVariable(Lp, StartIdx, CountRoundDown, Step, @@ -3438,7 +3446,7 @@ static void cse(BasicBlock *BB) { unsigned LoopVectorizationCostModel::getVectorCallCost(CallInst *CI, ElementCount VF, bool &NeedToScalarize) { - assert(!VF.Scalable && "scalable vectors not yet supported."); + assert(!VF.isScalable() && "scalable vectors not yet supported."); Function *F = CI->getCalledFunction(); Type *ScalarRetTy = CI->getType(); SmallVector Tys, ScalarTys; @@ -3463,7 +3471,7 @@ unsigned LoopVectorizationCostModel::getVectorCallCost(CallInst *CI, // packing the return values to a vector. unsigned ScalarizationCost = getScalarizationOverhead(CI, VF); - unsigned Cost = ScalarCallCost * VF.Min + ScalarizationCost; + unsigned Cost = ScalarCallCost * VF.getKnownMinValue() + ScalarizationCost; // If we can't emit a vector call for this function, then the currently found // cost is the cost we need to return. @@ -3684,11 +3692,11 @@ void InnerLoopVectorizer::fixVectorizedLoop() { // profile is not inherently precise anyway. Note also possible bypass of // vector code caused by legality checks is ignored, assigning all the weight // to the vector loop, optimistically. - assert(!VF.Scalable && + assert(!VF.isScalable() && "cannot use scalable ElementCount to determine unroll factor"); - setProfileInfoAfterUnrolling(LI->getLoopFor(LoopScalarBody), - LI->getLoopFor(LoopVectorBody), - LI->getLoopFor(LoopScalarBody), VF.Min * UF); + setProfileInfoAfterUnrolling( + LI->getLoopFor(LoopScalarBody), LI->getLoopFor(LoopVectorBody), + LI->getLoopFor(LoopScalarBody), VF.getKnownMinValue() * UF); } void InnerLoopVectorizer::fixCrossIterationPHIs() { @@ -3769,10 +3777,10 @@ void InnerLoopVectorizer::fixFirstOrderRecurrence(PHINode *Phi) { auto *VectorInit = ScalarInit; if (VF.isVector()) { Builder.SetInsertPoint(LoopVectorPreHeader->getTerminator()); - assert(!VF.Scalable && "VF is assumed to be non scalable."); + assert(!VF.isScalable() && "VF is assumed to be non scalable."); VectorInit = Builder.CreateInsertElement( UndefValue::get(VectorType::get(VectorInit->getType(), VF)), VectorInit, - Builder.getInt32(VF.Min - 1), "vector.recur.init"); + Builder.getInt32(VF.getKnownMinValue() - 1), "vector.recur.init"); } // We constructed a temporary phi node in the first phase of vectorization. @@ -3813,11 +3821,11 @@ void InnerLoopVectorizer::fixFirstOrderRecurrence(PHINode *Phi) { // We will construct a vector for the recurrence by combining the values for // the current and previous iterations. This is the required shuffle mask. - assert(!VF.Scalable); - SmallVector ShuffleMask(VF.Min); - ShuffleMask[0] = VF.Min - 1; - for (unsigned I = 1; I < VF.Min; ++I) - ShuffleMask[I] = I + VF.Min - 1; + assert(!VF.isScalable()); + SmallVector ShuffleMask(VF.getKnownMinValue()); + ShuffleMask[0] = VF.getKnownMinValue() - 1; + for (unsigned I = 1; I < VF.getKnownMinValue(); ++I) + ShuffleMask[I] = I + VF.getKnownMinValue() - 1; // The vector from which to take the initial value for the current iteration // (actual or unrolled). Initially, this is the vector phi node. @@ -3846,7 +3854,8 @@ void InnerLoopVectorizer::fixFirstOrderRecurrence(PHINode *Phi) { if (VF.isVector()) { Builder.SetInsertPoint(LoopMiddleBlock->getTerminator()); ExtractForScalar = Builder.CreateExtractElement( - ExtractForScalar, Builder.getInt32(VF.Min - 1), "vector.recur.extract"); + ExtractForScalar, Builder.getInt32(VF.getKnownMinValue() - 1), + "vector.recur.extract"); } // Extract the second last element in the middle block if the // Phi is used outside the loop. We need to extract the phi itself @@ -3856,7 +3865,8 @@ void InnerLoopVectorizer::fixFirstOrderRecurrence(PHINode *Phi) { Value *ExtractForPhiUsedOutsideLoop = nullptr; if (VF.isVector()) ExtractForPhiUsedOutsideLoop = Builder.CreateExtractElement( - Incoming, Builder.getInt32(VF.Min - 2), "vector.recur.extract.for.phi"); + Incoming, Builder.getInt32(VF.getKnownMinValue() - 2), + "vector.recur.extract.for.phi"); // When loop is unrolled without vectorizing, initialize // ExtractForPhiUsedOutsideLoop with the value just prior to unrolled value of // `Incoming`. This is analogous to the vectorized case above: extracting the @@ -4013,7 +4023,7 @@ void InnerLoopVectorizer::fixReduction(PHINode *Phi) { // entire expression in the smaller type. if (VF.isVector() && Phi->getType() != RdxDesc.getRecurrenceType()) { assert(!IsInLoopReductionPhi && "Unexpected truncated inloop reduction!"); - assert(!VF.Scalable && "scalable vectors not yet supported."); + assert(!VF.isScalable() && "scalable vectors not yet supported."); Type *RdxVecTy = VectorType::get(RdxDesc.getRecurrenceType(), VF); Builder.SetInsertPoint( LI->getLoopFor(LoopVectorBody)->getLoopLatch()->getTerminator()); @@ -4145,7 +4155,7 @@ void InnerLoopVectorizer::clearReductionWrapFlags( } void InnerLoopVectorizer::fixLCSSAPHIs() { - assert(!VF.Scalable && "the code below assumes fixed width vectors"); + assert(!VF.isScalable() && "the code below assumes fixed width vectors"); for (PHINode &LCSSAPhi : LoopExitBlock->phis()) { if (LCSSAPhi.getNumIncomingValues() == 1) { auto *IncomingValue = LCSSAPhi.getIncomingValue(0); @@ -4155,7 +4165,7 @@ void InnerLoopVectorizer::fixLCSSAPHIs() { LastLane = Cost->isUniformAfterVectorization( cast(IncomingValue), VF) ? 0 - : VF.Min - 1; + : VF.getKnownMinValue() - 1; // Can be a loop invariant incoming value or the last scalar value to be // extracted from the vectorized loop. Builder.SetInsertPoint(LoopMiddleBlock->getTerminator()); @@ -4338,7 +4348,7 @@ void InnerLoopVectorizer::widenGEP(GetElementPtrInst *GEP, VPUser &Operands, void InnerLoopVectorizer::widenPHIInstruction(Instruction *PN, unsigned UF, ElementCount VF) { - assert(!VF.Scalable && "scalable vectors not yet supported."); + assert(!VF.isScalable() && "scalable vectors not yet supported."); PHINode *P = cast(PN); if (EnableVPlanNativePath) { // Currently we enter here in the VPlan-native path for non-induction @@ -4403,11 +4413,12 @@ void InnerLoopVectorizer::widenPHIInstruction(Instruction *PN, unsigned UF, // Determine the number of scalars we need to generate for each unroll // iteration. If the instruction is uniform, we only need to generate the // first lane. Otherwise, we generate all VF values. - unsigned Lanes = Cost->isUniformAfterVectorization(P, VF) ? 1 : VF.Min; + unsigned Lanes = + Cost->isUniformAfterVectorization(P, VF) ? 1 : VF.getKnownMinValue(); for (unsigned Part = 0; Part < UF; ++Part) { for (unsigned Lane = 0; Lane < Lanes; ++Lane) { - Constant *Idx = - ConstantInt::get(PtrInd->getType(), Lane + Part * VF.Min); + Constant *Idx = ConstantInt::get(PtrInd->getType(), + Lane + Part * VF.getKnownMinValue()); Value *GlobalIdx = Builder.CreateAdd(PtrInd, Idx); Value *SclrGep = emitTransformedIndex(Builder, GlobalIdx, PSE.getSE(), DL, II); @@ -4437,8 +4448,9 @@ void InnerLoopVectorizer::widenPHIInstruction(Instruction *PN, unsigned UF, Exp.expandCodeFor(ScalarStep, PhiType, InductionLoc); Value *InductionGEP = GetElementPtrInst::Create( ScStValueType->getPointerElementType(), NewPointerPhi, - Builder.CreateMul(ScalarStepValue, - ConstantInt::get(PhiType, VF.Min * UF)), + Builder.CreateMul( + ScalarStepValue, + ConstantInt::get(PhiType, VF.getKnownMinValue() * UF)), "ptr.ind", InductionLoc); NewPointerPhi->addIncoming(InductionGEP, LoopLatch); @@ -4448,15 +4460,17 @@ void InnerLoopVectorizer::widenPHIInstruction(Instruction *PN, unsigned UF, for (unsigned Part = 0; Part < UF; ++Part) { SmallVector Indices; // Create a vector of consecutive numbers from zero to VF. - for (unsigned i = 0; i < VF.Min; ++i) - Indices.push_back(ConstantInt::get(PhiType, i + Part * VF.Min)); + for (unsigned i = 0; i < VF.getKnownMinValue(); ++i) + Indices.push_back( + ConstantInt::get(PhiType, i + Part * VF.getKnownMinValue())); Constant *StartOffset = ConstantVector::get(Indices); Value *GEP = Builder.CreateGEP( ScStValueType->getPointerElementType(), NewPointerPhi, - Builder.CreateMul(StartOffset, - Builder.CreateVectorSplat(VF.Min, ScalarStepValue), - "vector.gep")); + Builder.CreateMul( + StartOffset, + Builder.CreateVectorSplat(VF.getKnownMinValue(), ScalarStepValue), + "vector.gep")); VectorLoopValueMap.setVectorValue(P, Part, GEP); } } @@ -4483,7 +4497,7 @@ static bool mayDivideByZero(Instruction &I) { void InnerLoopVectorizer::widenInstruction(Instruction &I, VPUser &User, VPTransformState &State) { - assert(!VF.Scalable && "scalable vectors not yet supported."); + assert(!VF.isScalable() && "scalable vectors not yet supported."); switch (I.getOpcode()) { case Instruction::Call: case Instruction::Br: @@ -4571,7 +4585,7 @@ void InnerLoopVectorizer::widenInstruction(Instruction &I, VPUser &User, setDebugLocFromInst(Builder, CI); /// Vectorize casts. - assert(!VF.Scalable && "VF is assumed to be non scalable."); + assert(!VF.isScalable() && "VF is assumed to be non scalable."); Type *DestTy = (VF.isScalar()) ? CI->getType() : VectorType::get(CI->getType(), VF); @@ -4601,7 +4615,7 @@ void InnerLoopVectorizer::widenCallInstruction(CallInst &I, VPUser &ArgOperands, SmallVector Tys; for (Value *ArgOperand : CI->arg_operands()) - Tys.push_back(ToVectorTy(ArgOperand->getType(), VF.Min)); + Tys.push_back(ToVectorTy(ArgOperand->getType(), VF.getKnownMinValue())); Intrinsic::ID ID = getVectorIntrinsicIDForCall(CI, TLI); @@ -4633,7 +4647,7 @@ void InnerLoopVectorizer::widenCallInstruction(CallInst &I, VPUser &ArgOperands, // Use vector version of the intrinsic. Type *TysForDecl[] = {CI->getType()}; if (VF.isVector()) { - assert(!VF.Scalable && "VF is assumed to be non scalable."); + assert(!VF.isScalable() && "VF is assumed to be non scalable."); TysForDecl[0] = VectorType::get(CI->getType()->getScalarType(), VF); } VectorF = Intrinsic::getDeclaration(M, ID, TysForDecl); @@ -4872,7 +4886,7 @@ void LoopVectorizationCostModel::collectLoopScalars(ElementCount VF) { bool LoopVectorizationCostModel::isScalarWithPredication(Instruction *I, ElementCount VF) { - assert(!VF.Scalable && "scalable vectors not yet supported."); + assert(!VF.isScalable() && "scalable vectors not yet supported."); if (!blockNeedsPredication(I->getParent())) return false; switch(I->getOpcode()) { @@ -5357,7 +5371,7 @@ LoopVectorizationCostModel::computeFeasibleMaxVF(unsigned ConstTripCount) { Selected = false; } if (Selected) { - MaxVF = VFs[i].Min; + MaxVF = VFs[i].getKnownMinValue(); break; } } @@ -5558,8 +5572,9 @@ unsigned LoopVectorizationCostModel::selectInterleaveCount(ElementCount VF, } // Clamp the interleave ranges to reasonable counts. - assert(!VF.Scalable && "scalable vectors not yet supported."); - unsigned MaxInterleaveCount = TTI.getMaxInterleaveFactor(VF.Min); + assert(!VF.isScalable() && "scalable vectors not yet supported."); + unsigned MaxInterleaveCount = + TTI.getMaxInterleaveFactor(VF.getKnownMinValue()); // Check if the user has overridden the max. if (VF == 1) { @@ -5573,7 +5588,8 @@ unsigned LoopVectorizationCostModel::selectInterleaveCount(ElementCount VF, // If trip count is known or estimated compile time constant, limit the // interleave count to be less than the trip count divided by VF. if (BestKnownTC) { - MaxInterleaveCount = std::min(*BestKnownTC / VF.Min, MaxInterleaveCount); + MaxInterleaveCount = + std::min(*BestKnownTC / VF.getKnownMinValue(), MaxInterleaveCount); } // If we did not calculate the cost for VF (because the user selected the VF) @@ -5745,8 +5761,9 @@ LoopVectorizationCostModel::calculateRegisterUsage(ArrayRef VFs) { if (Ty->isTokenTy()) return 0U; unsigned TypeSize = DL.getTypeSizeInBits(Ty->getScalarType()); - assert(!VF.Scalable && "scalable vectors not yet supported."); - return std::max(1, VF.Min * TypeSize / WidestRegister); + assert(!VF.isScalable() && "scalable vectors not yet supported."); + return std::max(1, VF.getKnownMinValue() * TypeSize / + WidestRegister); }; for (unsigned int i = 0, s = IdxToInstr.size(); i < s; ++i) { @@ -5973,19 +5990,20 @@ int LoopVectorizationCostModel::computePredInstDiscount( // the instruction as if it wasn't if-converted and instead remained in the // predicated block. We will scale this cost by block probability after // computing the scalarization overhead. - assert(!VF.Scalable && "scalable vectors not yet supported."); + assert(!VF.isScalable() && "scalable vectors not yet supported."); unsigned ScalarCost = - VF.Min * getInstructionCost(I, ElementCount::getFixed(1)).first; + VF.getKnownMinValue() * + getInstructionCost(I, ElementCount::getFixed(1)).first; // Compute the scalarization overhead of needed insertelement instructions // and phi nodes. if (isScalarWithPredication(I) && !I->getType()->isVoidTy()) { ScalarCost += TTI.getScalarizationOverhead( cast(ToVectorTy(I->getType(), VF)), - APInt::getAllOnesValue(VF.Min), true, false); - assert(!VF.Scalable && "scalable vectors not yet supported."); + APInt::getAllOnesValue(VF.getKnownMinValue()), true, false); + assert(!VF.isScalable() && "scalable vectors not yet supported."); ScalarCost += - VF.Min * + VF.getKnownMinValue() * TTI.getCFInstrCost(Instruction::PHI, TTI::TCK_RecipThroughput); } @@ -6000,10 +6018,10 @@ int LoopVectorizationCostModel::computePredInstDiscount( if (canBeScalarized(J)) Worklist.push_back(J); else if (needsExtract(J, VF)) { - assert(!VF.Scalable && "scalable vectors not yet supported."); + assert(!VF.isScalable() && "scalable vectors not yet supported."); ScalarCost += TTI.getScalarizationOverhead( cast(ToVectorTy(J->getType(), VF)), - APInt::getAllOnesValue(VF.Min), false, true); + APInt::getAllOnesValue(VF.getKnownMinValue()), false, true); } } @@ -6021,7 +6039,7 @@ int LoopVectorizationCostModel::computePredInstDiscount( LoopVectorizationCostModel::VectorizationCostTy LoopVectorizationCostModel::expectedCost(ElementCount VF) { - assert(!VF.Scalable && "scalable vectors not yet supported."); + assert(!VF.isScalable() && "scalable vectors not yet supported."); VectorizationCostTy Cost; // For each block. @@ -6104,7 +6122,7 @@ LoopVectorizationCostModel::getMemInstScalarizationCost(Instruction *I, ElementCount VF) { assert(VF.isVector() && "Scalarization cost of instruction implies vectorization."); - assert(!VF.Scalable && "scalable vectors not yet supported."); + assert(!VF.isScalable() && "scalable vectors not yet supported."); Type *ValTy = getMemInstValueType(I); auto SE = PSE.getSE(); @@ -6117,12 +6135,13 @@ LoopVectorizationCostModel::getMemInstScalarizationCost(Instruction *I, const SCEV *PtrSCEV = getAddressAccessSCEV(Ptr, Legal, PSE, TheLoop); // Get the cost of the scalar memory instruction and address computation. - unsigned Cost = VF.Min * TTI.getAddressComputationCost(PtrTy, SE, PtrSCEV); + unsigned Cost = + VF.getKnownMinValue() * TTI.getAddressComputationCost(PtrTy, SE, PtrSCEV); // Don't pass *I here, since it is scalar but will actually be part of a // vectorized loop where the user of it is a vectorized instruction. const Align Alignment = getLoadStoreAlignment(I); - Cost += VF.Min * + Cost += VF.getKnownMinValue() * TTI.getMemoryOpCost(I->getOpcode(), ValTy->getScalarType(), Alignment, AS, TTI::TCK_RecipThroughput); @@ -6190,9 +6209,10 @@ unsigned LoopVectorizationCostModel::getUniformMemOpCost(Instruction *I, return TTI.getAddressComputationCost(ValTy) + TTI.getMemoryOpCost(Instruction::Store, ValTy, Alignment, AS, CostKind) + - (isLoopInvariantStoreValue ? 0 : TTI.getVectorInstrCost( - Instruction::ExtractElement, - VectorTy, VF.Min - 1)); + (isLoopInvariantStoreValue + ? 0 + : TTI.getVectorInstrCost(Instruction::ExtractElement, VectorTy, + VF.getKnownMinValue() - 1)); } unsigned LoopVectorizationCostModel::getGatherScatterCost(Instruction *I, @@ -6218,7 +6238,7 @@ unsigned LoopVectorizationCostModel::getInterleaveGroupCost(Instruction *I, assert(Group && "Fail to get an interleaved access group."); unsigned InterleaveFactor = Group->getFactor(); - assert(!VF.Scalable && "scalable vectors not yet supported."); + assert(!VF.isScalable() && "scalable vectors not yet supported."); auto *WideVecTy = VectorType::get(ValTy, VF * InterleaveFactor); // Holds the indices of existing members in an interleaved load group. @@ -6266,7 +6286,7 @@ unsigned LoopVectorizationCostModel::getMemoryInstructionCost(Instruction *I, LoopVectorizationCostModel::VectorizationCostTy LoopVectorizationCostModel::getInstructionCost(Instruction *I, ElementCount VF) { - assert(!VF.Scalable && + assert(!VF.isScalable() && "the cost model is not yet implemented for scalable vectorization"); // If we know that this instruction will remain uniform, check the cost of // the scalar version. @@ -6282,22 +6302,24 @@ LoopVectorizationCostModel::getInstructionCost(Instruction *I, auto InstSet = ForcedScalar->second; if (InstSet.count(I)) return VectorizationCostTy( - (getInstructionCost(I, ElementCount::getFixed(1)).first * VF.Min), + (getInstructionCost(I, ElementCount::getFixed(1)).first * + VF.getKnownMinValue()), false); } Type *VectorTy; unsigned C = getInstructionCost(I, VF, VectorTy); - bool TypeNotScalarized = VF.isVector() && VectorTy->isVectorTy() && - TTI.getNumberOfParts(VectorTy) < VF.Min; + bool TypeNotScalarized = + VF.isVector() && VectorTy->isVectorTy() && + TTI.getNumberOfParts(VectorTy) < VF.getKnownMinValue(); return VectorizationCostTy(C, TypeNotScalarized); } unsigned LoopVectorizationCostModel::getScalarizationOverhead(Instruction *I, ElementCount VF) { - assert(!VF.Scalable && + assert(!VF.isScalable() && "cannot compute scalarization overhead for scalable vectorization"); if (VF.isScalar()) return 0; @@ -6307,7 +6329,8 @@ unsigned LoopVectorizationCostModel::getScalarizationOverhead(Instruction *I, if (!RetTy->isVoidTy() && (!isa(I) || !TTI.supportsEfficientVectorElementLoadStore())) Cost += TTI.getScalarizationOverhead( - cast(RetTy), APInt::getAllOnesValue(VF.Min), true, false); + cast(RetTy), APInt::getAllOnesValue(VF.getKnownMinValue()), + true, false); // Some targets keep addresses scalar. if (isa(I) && !TTI.prefersVectorizedAddressing()) @@ -6323,13 +6346,12 @@ unsigned LoopVectorizationCostModel::getScalarizationOverhead(Instruction *I, // Skip operands that do not require extraction/scalarization and do not incur // any overhead. - return Cost + - TTI.getOperandsScalarizationOverhead(filterExtractingOperands(Ops, VF), - VF.Min); + return Cost + TTI.getOperandsScalarizationOverhead( + filterExtractingOperands(Ops, VF), VF.getKnownMinValue()); } void LoopVectorizationCostModel::setCostBasedWideningDecision(ElementCount VF) { - assert(!VF.Scalable && "scalable vectors not yet supported."); + assert(!VF.isScalable() && "scalable vectors not yet supported."); if (VF.isScalar()) return; NumPredStores = 0; @@ -6466,14 +6488,15 @@ void LoopVectorizationCostModel::setCostBasedWideningDecision(ElementCount VF) { // Scalarize a widened load of address. setWideningDecision( I, VF, CM_Scalarize, - (VF.Min * getMemoryInstructionCost(I, ElementCount::getFixed(1)))); + (VF.getKnownMinValue() * + getMemoryInstructionCost(I, ElementCount::getFixed(1)))); else if (auto Group = getInterleavedAccessGroup(I)) { // Scalarize an interleave group of address loads. for (unsigned I = 0; I < Group->getFactor(); ++I) { if (Instruction *Member = Group->getMember(I)) setWideningDecision( Member, VF, CM_Scalarize, - (VF.Min * + (VF.getKnownMinValue() * getMemoryInstructionCost(Member, ElementCount::getFixed(1)))); } } @@ -6515,12 +6538,14 @@ unsigned LoopVectorizationCostModel::getInstructionCost(Instruction *I, if (ScalarPredicatedBB) { // Return cost for branches around scalarized and predicated blocks. - assert(!VF.Scalable && "scalable vectors not yet supported."); + assert(!VF.isScalable() && "scalable vectors not yet supported."); auto *Vec_i1Ty = VectorType::get(IntegerType::getInt1Ty(RetTy->getContext()), VF); return (TTI.getScalarizationOverhead( - Vec_i1Ty, APInt::getAllOnesValue(VF.Min), false, true) + - (TTI.getCFInstrCost(Instruction::Br, CostKind) * VF.Min)); + Vec_i1Ty, APInt::getAllOnesValue(VF.getKnownMinValue()), + false, true) + + (TTI.getCFInstrCost(Instruction::Br, CostKind) * + VF.getKnownMinValue())); } else if (I->getParent() == TheLoop->getLoopLatch() || VF.isScalar()) // The back-edge branch will remain, as will all scalar branches. return TTI.getCFInstrCost(Instruction::Br, CostKind); @@ -6537,9 +6562,9 @@ unsigned LoopVectorizationCostModel::getInstructionCost(Instruction *I, // First-order recurrences are replaced by vector shuffles inside the loop. // NOTE: Don't use ToVectorTy as SK_ExtractSubvector expects a vector type. if (VF.isVector() && Legal->isFirstOrderRecurrence(Phi)) - return TTI.getShuffleCost(TargetTransformInfo::SK_ExtractSubvector, - cast(VectorTy), VF.Min - 1, - FixedVectorType::get(RetTy, 1)); + return TTI.getShuffleCost( + TargetTransformInfo::SK_ExtractSubvector, cast(VectorTy), + VF.getKnownMinValue() - 1, FixedVectorType::get(RetTy, 1)); // Phi nodes in non-header blocks (not inductions, reductions, etc.) are // converted into select instructions. We require N - 1 selects per phi @@ -6568,11 +6593,12 @@ unsigned LoopVectorizationCostModel::getInstructionCost(Instruction *I, // that we will create. This cost is likely to be zero. The phi node // cost, if any, should be scaled by the block probability because it // models a copy at the end of each predicated block. - Cost += VF.Min * TTI.getCFInstrCost(Instruction::PHI, CostKind); + Cost += VF.getKnownMinValue() * + TTI.getCFInstrCost(Instruction::PHI, CostKind); // The cost of the non-predicated instruction. - Cost += - VF.Min * TTI.getArithmeticInstrCost(I->getOpcode(), RetTy, CostKind); + Cost += VF.getKnownMinValue() * + TTI.getArithmeticInstrCost(I->getOpcode(), RetTy, CostKind); // The cost of insertelement and extractelement instructions needed for // scalarization. @@ -6611,15 +6637,15 @@ unsigned LoopVectorizationCostModel::getInstructionCost(Instruction *I, Op2VK = TargetTransformInfo::OK_UniformValue; SmallVector Operands(I->operand_values()); - unsigned N = isScalarAfterVectorization(I, VF) ? VF.Min : 1; + unsigned N = isScalarAfterVectorization(I, VF) ? VF.getKnownMinValue() : 1; return N * TTI.getArithmeticInstrCost( I->getOpcode(), VectorTy, CostKind, TargetTransformInfo::OK_AnyValue, Op2VK, TargetTransformInfo::OP_None, Op2VP, Operands, I); } case Instruction::FNeg: { - assert(!VF.Scalable && "VF is assumed to be non scalable."); - unsigned N = isScalarAfterVectorization(I, VF) ? VF.Min : 1; + assert(!VF.isScalable() && "VF is assumed to be non scalable."); + unsigned N = isScalarAfterVectorization(I, VF) ? VF.getKnownMinValue() : 1; return N * TTI.getArithmeticInstrCost( I->getOpcode(), VectorTy, CostKind, TargetTransformInfo::OK_AnyValue, @@ -6633,7 +6659,7 @@ unsigned LoopVectorizationCostModel::getInstructionCost(Instruction *I, bool ScalarCond = (SE->isLoopInvariant(CondSCEV, TheLoop)); Type *CondTy = SI->getCondition()->getType(); if (!ScalarCond) { - assert(!VF.Scalable && "VF is assumed to be non scalable."); + assert(!VF.isScalable() && "VF is assumed to be non scalable."); CondTy = VectorType::get(CondTy, VF); } return TTI.getCmpSelInstrCost(I->getOpcode(), VectorTy, CondTy, @@ -6745,8 +6771,8 @@ unsigned LoopVectorizationCostModel::getInstructionCost(Instruction *I, } } - assert(!VF.Scalable && "VF is assumed to be non scalable"); - unsigned N = isScalarAfterVectorization(I, VF) ? VF.Min : 1; + assert(!VF.isScalable() && "VF is assumed to be non scalable"); + unsigned N = isScalarAfterVectorization(I, VF) ? VF.getKnownMinValue() : 1; return N * TTI.getCastInstrCost(Opcode, VectorTy, SrcVecTy, CCH, CostKind, I); } @@ -6761,9 +6787,8 @@ unsigned LoopVectorizationCostModel::getInstructionCost(Instruction *I, default: // The cost of executing VF copies of the scalar instruction. This opcode // is unknown. Assume that it is the same as 'mul'. - return VF.Min * - TTI.getArithmeticInstrCost(Instruction::Mul, VectorTy, - CostKind) + + return VF.getKnownMinValue() * TTI.getArithmeticInstrCost( + Instruction::Mul, VectorTy, CostKind) + getScalarizationOverhead(I, VF); } // end of switch. } @@ -6870,7 +6895,7 @@ static unsigned determineVPlanVF(const unsigned WidestVectorRegBits, VectorizationFactor LoopVectorizationPlanner::planInVPlanNativePath(ElementCount UserVF) { - assert(!UserVF.Scalable && "scalable vectors not yet supported"); + assert(!UserVF.isScalable() && "scalable vectors not yet supported"); ElementCount VF = UserVF; // Outer loop handling: They may require CFG and instruction level // transformations before even evaluating whether vectorization is profitable. @@ -6892,10 +6917,11 @@ LoopVectorizationPlanner::planInVPlanNativePath(ElementCount UserVF) { } } assert(EnableVPlanNativePath && "VPlan-native path is not enabled."); - assert(isPowerOf2_32(VF.Min) && "VF needs to be a power of two"); + assert(isPowerOf2_32(VF.getKnownMinValue()) && + "VF needs to be a power of two"); LLVM_DEBUG(dbgs() << "LV: Using " << (!UserVF.isZero() ? "user " : "") << "VF " << VF << " to build VPlans.\n"); - buildVPlans(VF.Min, VF.Min); + buildVPlans(VF.getKnownMinValue(), VF.getKnownMinValue()); // For VPlan build stress testing, we bail out after VPlan construction. if (VPlanBuildStressTest) @@ -6912,9 +6938,10 @@ LoopVectorizationPlanner::planInVPlanNativePath(ElementCount UserVF) { Optional LoopVectorizationPlanner::plan(ElementCount UserVF, unsigned UserIC) { - assert(!UserVF.Scalable && "scalable vectorization not yet handled"); + assert(!UserVF.isScalable() && "scalable vectorization not yet handled"); assert(OrigLoop->empty() && "Inner loop expected."); - Optional MaybeMaxVF = CM.computeMaxVF(UserVF.Min, UserIC); + Optional MaybeMaxVF = + CM.computeMaxVF(UserVF.getKnownMinValue(), UserIC); if (!MaybeMaxVF) // Cases that should not to be vectorized nor interleaved. return None; @@ -6934,12 +6961,14 @@ LoopVectorizationPlanner::plan(ElementCount UserVF, unsigned UserIC) { if (!UserVF.isZero()) { LLVM_DEBUG(dbgs() << "LV: Using user VF " << UserVF << ".\n"); - assert(isPowerOf2_32(UserVF.Min) && "VF needs to be a power of two"); + assert(isPowerOf2_32(UserVF.getKnownMinValue()) && + "VF needs to be a power of two"); // Collect the instructions (and their associated costs) that will be more // profitable to scalarize. CM.selectUserVectorizationFactor(UserVF); CM.collectInLoopReductions(); - buildVPlansWithVPRecipes(UserVF.Min, UserVF.Min); + buildVPlansWithVPRecipes(UserVF.getKnownMinValue(), + UserVF.getKnownMinValue()); LLVM_DEBUG(printPlans(dbgs())); return {{UserVF, 0}}; } @@ -7228,7 +7257,7 @@ VPRecipeBuilder::tryToWidenMemory(Instruction *I, VFRange &Range, "Must be called with either a load or store"); auto willWiden = [&](ElementCount VF) -> bool { - assert(!VF.Scalable && "unexpected scalable ElementCount"); + assert(!VF.isScalable() && "unexpected scalable ElementCount"); if (VF.isScalar()) return false; LoopVectorizationCostModel::InstWidening Decision = @@ -7762,7 +7791,7 @@ VPlanPtr LoopVectorizationPlanner::buildVPlanWithVPRecipes( ElementCount VF = ElementCount::getFixed(Range.Start); Plan->addVF(VF); RSO << "Initial VPlan for VF={" << VF; - for (VF.Min *= 2; VF.Min < Range.End; VF.Min *= 2) { + for (VF *= 2; VF.getKnownMinValue() < Range.End; VF *= 2) { Plan->addVF(VF); RSO << "," << VF; } @@ -7986,7 +8015,7 @@ void VPReplicateRecipe::execute(VPTransformState &State) { if (AlsoPack && State.VF.isVector()) { // If we're constructing lane 0, initialize to start from undef. if (State.Instance->Lane == 0) { - assert(!State.VF.Scalable && "VF is assumed to be non scalable."); + assert(!State.VF.isScalable() && "VF is assumed to be non scalable."); Value *Undef = UndefValue::get(VectorType::get(Ingredient->getType(), State.VF)); State.ValueMap.setVectorValue(Ingredient, State.Instance->Part, Undef); @@ -7999,7 +8028,7 @@ void VPReplicateRecipe::execute(VPTransformState &State) { // Generate scalar instances for all VF lanes of all UF parts, unless the // instruction is uniform inwhich case generate only the first lane for each // of the UF parts. - unsigned EndLane = IsUniform ? 1 : State.VF.Min; + unsigned EndLane = IsUniform ? 1 : State.VF.getKnownMinValue(); for (unsigned Part = 0; Part < State.UF; ++Part) for (unsigned Lane = 0; Lane < EndLane; ++Lane) State.ILV->scalarizeInstruction(Ingredient, User, {Part, Lane}, diff --git a/llvm/lib/Transforms/Vectorize/VPlan.cpp b/llvm/lib/Transforms/Vectorize/VPlan.cpp index a616de6..6794454 100644 --- a/llvm/lib/Transforms/Vectorize/VPlan.cpp +++ b/llvm/lib/Transforms/Vectorize/VPlan.cpp @@ -300,8 +300,9 @@ void VPRegionBlock::execute(VPTransformState *State) { for (unsigned Part = 0, UF = State->UF; Part < UF; ++Part) { State->Instance->Part = Part; - assert(!State->VF.Scalable && "VF is assumed to be non scalable."); - for (unsigned Lane = 0, VF = State->VF.Min; Lane < VF; ++Lane) { + assert(!State->VF.isScalable() && "VF is assumed to be non scalable."); + for (unsigned Lane = 0, VF = State->VF.getKnownMinValue(); Lane < VF; + ++Lane) { State->Instance->Lane = Lane; // Visit the VPBlocks connected to \p this, starting from it. for (VPBlockBase *Block : RPOT) { @@ -388,7 +389,7 @@ void VPInstruction::generateInstruction(VPTransformState &State, Value *ScalarTC = State.TripCount; auto *Int1Ty = Type::getInt1Ty(Builder.getContext()); - auto *PredTy = FixedVectorType::get(Int1Ty, State.VF.Min); + auto *PredTy = FixedVectorType::get(Int1Ty, State.VF.getKnownMinValue()); Instruction *Call = Builder.CreateIntrinsic( Intrinsic::get_active_lane_mask, {PredTy, ScalarTC->getType()}, {VIVElem0, ScalarTC}, nullptr, "active.lane.mask"); @@ -840,14 +841,16 @@ void VPWidenCanonicalIVRecipe::execute(VPTransformState &State) { Type *STy = CanonicalIV->getType(); IRBuilder<> Builder(State.CFG.PrevBB->getTerminator()); ElementCount VF = State.VF; - assert(!VF.Scalable && "the code following assumes non scalables ECs"); - Value *VStart = VF.isScalar() ? CanonicalIV - : Builder.CreateVectorSplat(VF.Min, CanonicalIV, - "broadcast"); + assert(!VF.isScalable() && "the code following assumes non scalables ECs"); + Value *VStart = VF.isScalar() + ? CanonicalIV + : Builder.CreateVectorSplat(VF.getKnownMinValue(), + CanonicalIV, "broadcast"); for (unsigned Part = 0, UF = State.UF; Part < UF; ++Part) { SmallVector Indices; - for (unsigned Lane = 0; Lane < VF.Min; ++Lane) - Indices.push_back(ConstantInt::get(STy, Part * VF.Min + Lane)); + for (unsigned Lane = 0; Lane < VF.getKnownMinValue(); ++Lane) + Indices.push_back( + ConstantInt::get(STy, Part * VF.getKnownMinValue() + Lane)); // If VF == 1, there is only one iteration in the loop above, thus the // element pushed back into Indices is ConstantInt::get(STy, Part) Constant *VStep = VF == 1 ? Indices.back() : ConstantVector::get(Indices); diff --git a/llvm/lib/Transforms/Vectorize/VPlan.h b/llvm/lib/Transforms/Vectorize/VPlan.h index 6eed236..078b2ba 100644 --- a/llvm/lib/Transforms/Vectorize/VPlan.h +++ b/llvm/lib/Transforms/Vectorize/VPlan.h @@ -151,14 +151,15 @@ public: /// \return True if the map has a scalar entry for \p Key and \p Instance. bool hasScalarValue(Value *Key, const VPIteration &Instance) const { assert(Instance.Part < UF && "Queried Scalar Part is too large."); - assert(Instance.Lane < VF.Min && "Queried Scalar Lane is too large."); - assert(!VF.Scalable && "VF is assumed to be non scalable."); + assert(Instance.Lane < VF.getKnownMinValue() && + "Queried Scalar Lane is too large."); + assert(!VF.isScalable() && "VF is assumed to be non scalable."); if (!hasAnyScalarValue(Key)) return false; const ScalarParts &Entry = ScalarMapStorage.find(Key)->second; assert(Entry.size() == UF && "ScalarParts has wrong dimensions."); - assert(Entry[Instance.Part].size() == VF.Min && + assert(Entry[Instance.Part].size() == VF.getKnownMinValue() && "ScalarParts has wrong dimensions."); return Entry[Instance.Part][Instance.Lane] != nullptr; } @@ -197,7 +198,7 @@ public: // TODO: Consider storing uniform values only per-part, as they occupy // lane 0 only, keeping the other VF-1 redundant entries null. for (unsigned Part = 0; Part < UF; ++Part) - Entry[Part].resize(VF.Min, nullptr); + Entry[Part].resize(VF.getKnownMinValue(), nullptr); ScalarMapStorage[Key] = Entry; } ScalarMapStorage[Key][Instance.Part][Instance.Lane] = Scalar; diff --git a/llvm/unittests/CodeGen/ScalableVectorMVTsTest.cpp b/llvm/unittests/CodeGen/ScalableVectorMVTsTest.cpp index f1c4a8b..f76b8db 100644 --- a/llvm/unittests/CodeGen/ScalableVectorMVTsTest.cpp +++ b/llvm/unittests/CodeGen/ScalableVectorMVTsTest.cpp @@ -71,8 +71,8 @@ TEST(ScalableVectorMVTsTest, HelperFuncs) { // Check fields inside llvm::ElementCount EltCnt = Vnx4i32.getVectorElementCount(); - EXPECT_EQ(EltCnt.Min, 4U); - ASSERT_TRUE(EltCnt.Scalable); + EXPECT_EQ(EltCnt.getKnownMinValue(), 4U); + ASSERT_TRUE(EltCnt.isScalable()); // Check that fixed-length vector types aren't scalable. EVT V8i32 = EVT::getVectorVT(Ctx, MVT::i32, 8); @@ -82,8 +82,8 @@ TEST(ScalableVectorMVTsTest, HelperFuncs) { // Check that llvm::ElementCount works for fixed-length types. EltCnt = V8i32.getVectorElementCount(); - EXPECT_EQ(EltCnt.Min, 8U); - ASSERT_FALSE(EltCnt.Scalable); + EXPECT_EQ(EltCnt.getKnownMinValue(), 8U); + ASSERT_FALSE(EltCnt.isScalable()); } TEST(ScalableVectorMVTsTest, IRToVTTranslation) { diff --git a/llvm/unittests/IR/VectorTypesTest.cpp b/llvm/unittests/IR/VectorTypesTest.cpp index b28e445..7525ee9 100644 --- a/llvm/unittests/IR/VectorTypesTest.cpp +++ b/llvm/unittests/IR/VectorTypesTest.cpp @@ -119,8 +119,8 @@ TEST(VectorTypesTest, FixedLength) { EXPECT_EQ(ConvTy->getElementType()->getScalarSizeInBits(), 64U); EltCnt = V8Int64Ty->getElementCount(); - EXPECT_EQ(EltCnt.Min, 8U); - ASSERT_FALSE(EltCnt.Scalable); + EXPECT_EQ(EltCnt.getKnownMinValue(), 8U); + ASSERT_FALSE(EltCnt.isScalable()); } TEST(VectorTypesTest, Scalable) { @@ -215,8 +215,8 @@ TEST(VectorTypesTest, Scalable) { EXPECT_EQ(ConvTy->getElementType()->getScalarSizeInBits(), 64U); EltCnt = ScV8Int64Ty->getElementCount(); - EXPECT_EQ(EltCnt.Min, 8U); - ASSERT_TRUE(EltCnt.Scalable); + EXPECT_EQ(EltCnt.getKnownMinValue(), 8U); + ASSERT_TRUE(EltCnt.isScalable()); } TEST(VectorTypesTest, BaseVectorType) { @@ -250,7 +250,7 @@ TEST(VectorTypesTest, BaseVectorType) { // test I == J VectorType *VI = VTys[I]; ElementCount ECI = VI->getElementCount(); - EXPECT_EQ(isa(VI), ECI.Scalable); + EXPECT_EQ(isa(VI), ECI.isScalable()); for (size_t J = I + 1, JEnd = VTys.size(); J < JEnd; ++J) { // test I < J -- 2.7.4