[Cost] Add CostKind to getVectorInstrCost and its related users

author ShihPo Hung <shihpo.hung@sifive.com>

Sat, 21 Jan 2023 13:29:05 +0000 (05:29 -0800)

committer ShihPo Hung <shihpo.hung@sifive.com>

Sat, 21 Jan 2023 13:29:24 +0000 (05:29 -0800)
author ShihPo Hung <shihpo.hung@sifive.com>
Sat, 21 Jan 2023 13:29:05 +0000 (05:29 -0800)
committer ShihPo Hung <shihpo.hung@sifive.com>
Sat, 21 Jan 2023 13:29:24 +0000 (05:29 -0800)
diff --git a/llvm/include/llvm/Analysis/TargetTransformInfo.h b/llvm/include/llvm/Analysis/TargetTransformInfo.h

index a9cb871..0c81f0b 100644 (file)
--- a/llvm/include/llvm/Analysis/TargetTransformInfo.h
+++ b/llvm/include/llvm/Analysis/TargetTransformInfo.h
@@ -752,13 +752,16 @@ public:
    /// extracted from vectors.
    InstructionCost getScalarizationOverhead(VectorType *Ty,
                                             const APInt &DemandedElts,
-                                           bool Insert, bool Extract) const;
+                                           bool Insert, bool Extract,
+                                           TTI::TargetCostKind CostKind) const;
  
    /// Estimate the overhead of scalarizing an instructions unique
    /// non-constant operands. The (potentially vector) types to use for each of
    /// argument are passes via Tys.
-  InstructionCost getOperandsScalarizationOverhead(ArrayRef<const Value *> Args,
-                                                   ArrayRef<Type *> Tys) const;
+  InstructionCost
+  getOperandsScalarizationOverhead(ArrayRef<const Value *> Args,
+                                   ArrayRef<Type *> Tys,
+                                   TTI::TargetCostKind CostKind) const;
  
    /// If target has efficient vector element load/store instructions, it can
    /// return true here so that insertion/extraction costs are not added to
@@ -1193,6 +1196,7 @@ public:
    /// case is to provision the cost of vectorization/scalarization in
    /// vectorizer passes.
    InstructionCost getVectorInstrCost(unsigned Opcode, Type *Val,
+                                     TTI::TargetCostKind CostKind,
                                       unsigned Index = -1, Value *Op0 = nullptr,
                                       Value *Op1 = nullptr) const;
  
@@ -1203,6 +1207,7 @@ public:
    /// A typical suitable use case is cost estimation when vector instruction
    /// exists (e.g., from basic blocks during transformation).
    InstructionCost getVectorInstrCost(const Instruction &I, Type *Val,
+                                     TTI::TargetCostKind CostKind,
                                       unsigned Index = -1) const;
  
    /// \return The cost of replication shuffle of \p VF elements typed \p EltTy
@@ -1675,11 +1680,12 @@ public:
    virtual bool useColdCCForColdCall(Function &F) = 0;
    virtual InstructionCost getScalarizationOverhead(VectorType *Ty,
                                                     const APInt &DemandedElts,
-                                                   bool Insert,
-                                                   bool Extract) = 0;
+                                                   bool Insert, bool Extract,
+                                                   TargetCostKind CostKind) = 0;
    virtual InstructionCost
    getOperandsScalarizationOverhead(ArrayRef<const Value *> Args,
-                                   ArrayRef<Type *> Tys) = 0;
+                                   ArrayRef<Type *> Tys,
+                                   TargetCostKind CostKind) = 0;
    virtual bool supportsEfficientVectorElementLoadStore() = 0;
    virtual bool supportsTailCalls() = 0;
    virtual bool supportsTailCallFor(const CallBase *CB) = 0;
@@ -1787,9 +1793,11 @@ public:
                                               TTI::TargetCostKind CostKind,
                                               const Instruction *I) = 0;
    virtual InstructionCost getVectorInstrCost(unsigned Opcode, Type *Val,
+                                             TTI::TargetCostKind CostKind,
                                               unsigned Index, Value *Op0,
                                               Value *Op1) = 0;
    virtual InstructionCost getVectorInstrCost(const Instruction &I, Type *Val,
+                                             TTI::TargetCostKind CostKind,
                                               unsigned Index) = 0;
  
    virtual InstructionCost
@@ -2150,13 +2158,16 @@ public:
  
    InstructionCost getScalarizationOverhead(VectorType *Ty,
                                             const APInt &DemandedElts,
-                                           bool Insert, bool Extract) override {
-    return Impl.getScalarizationOverhead(Ty, DemandedElts, Insert, Extract);
+                                           bool Insert, bool Extract,
+                                           TargetCostKind CostKind) override {
+    return Impl.getScalarizationOverhead(Ty, DemandedElts, Insert, Extract,
+                                         CostKind);
    }
    InstructionCost
    getOperandsScalarizationOverhead(ArrayRef<const Value *> Args,
-                                   ArrayRef<Type *> Tys) override {
-    return Impl.getOperandsScalarizationOverhead(Args, Tys);
+                                   ArrayRef<Type *> Tys,
+                                   TargetCostKind CostKind) override {
+    return Impl.getOperandsScalarizationOverhead(Args, Tys, CostKind);
    }
  
    bool supportsEfficientVectorElementLoadStore() override {
@@ -2360,13 +2371,16 @@ public:
                                       const Instruction *I) override {
      return Impl.getCmpSelInstrCost(Opcode, ValTy, CondTy, VecPred, CostKind, I);
    }
-  InstructionCost getVectorInstrCost(unsigned Opcode, Type *Val, unsigned Index,
-                                     Value *Op0, Value *Op1) override {
-    return Impl.getVectorInstrCost(Opcode, Val, Index, Op0, Op1);
+  InstructionCost getVectorInstrCost(unsigned Opcode, Type *Val,
+                                     TTI::TargetCostKind CostKind,
+                                     unsigned Index, Value *Op0,
+                                     Value *Op1) override {
+    return Impl.getVectorInstrCost(Opcode, Val, CostKind, Index, Op0, Op1);
    }
    InstructionCost getVectorInstrCost(const Instruction &I, Type *Val,
+                                     TTI::TargetCostKind CostKind,
                                       unsigned Index) override {
-    return Impl.getVectorInstrCost(I, Val, Index);
+    return Impl.getVectorInstrCost(I, Val, CostKind, Index);
    }
    InstructionCost
    getReplicationShuffleCost(Type *EltTy, int ReplicationFactor, int VF,
diff --git a/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h b/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h

index 0cd68df..21d1048 100644 (file)
--- a/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h
+++ b/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h
@@ -333,12 +333,15 @@ public:
  
    InstructionCost getScalarizationOverhead(VectorType *Ty,
                                             const APInt &DemandedElts,
-                                           bool Insert, bool Extract) const {
+                                           bool Insert, bool Extract,
+                                           TTI::TargetCostKind CostKind) const {
      return 0;
    }
  
-  InstructionCost getOperandsScalarizationOverhead(ArrayRef<const Value *> Args,
-                                                   ArrayRef<Type *> Tys) const {
+  InstructionCost
+  getOperandsScalarizationOverhead(ArrayRef<const Value *> Args,
+                                   ArrayRef<Type *> Tys,
+                                   TTI::TargetCostKind CostKind) const {
      return 0;
    }
  
@@ -585,12 +588,15 @@ public:
      return 1;
    }
  
-  InstructionCost getVectorInstrCost(unsigned Opcode, Type *Val, unsigned Index,
-                                     Value *Op0, Value *Op1) const {
+  InstructionCost getVectorInstrCost(unsigned Opcode, Type *Val,
+                                     TTI::TargetCostKind CostKind,
+                                     unsigned Index, Value *Op0,
+                                     Value *Op1) const {
      return 1;
    }
  
    InstructionCost getVectorInstrCost(const Instruction &I, Type *Val,
+                                     TTI::TargetCostKind CostKind,
                                       unsigned Index) const {
      return 1;
    }
@@ -1176,7 +1182,7 @@ public:
        if (auto *CI = dyn_cast<ConstantInt>(IE->getOperand(2)))
          if (CI->getValue().getActiveBits() <= 32)
            Idx = CI->getZExtValue();
-      return TargetTTI->getVectorInstrCost(*IE, Ty, Idx);
+      return TargetTTI->getVectorInstrCost(*IE, Ty, CostKind, Idx);
      }
      case Instruction::ShuffleVector: {
        auto *Shuffle = dyn_cast<ShuffleVectorInst>(U);
@@ -1272,7 +1278,7 @@ public:
          if (CI->getValue().getActiveBits() <= 32)
            Idx = CI->getZExtValue();
        Type *DstTy = U->getOperand(0)->getType();
-      return TargetTTI->getVectorInstrCost(*EEI, DstTy, Idx);
+      return TargetTTI->getVectorInstrCost(*EEI, DstTy, CostKind, Idx);
      }
      }
  
diff --git a/llvm/include/llvm/CodeGen/BasicTTIImpl.h b/llvm/include/llvm/CodeGen/BasicTTIImpl.h

index f27c689..77dd315 100644 (file)
--- a/llvm/include/llvm/CodeGen/BasicTTIImpl.h
+++ b/llvm/include/llvm/CodeGen/BasicTTIImpl.h
@@ -86,23 +86,25 @@ private:
  
    /// Estimate a cost of Broadcast as an extract and sequence of insert
    /// operations.
-  InstructionCost getBroadcastShuffleOverhead(FixedVectorType *VTy) {
+  InstructionCost getBroadcastShuffleOverhead(FixedVectorType *VTy,
+                                              TTI::TargetCostKind CostKind) {
      InstructionCost Cost = 0;
      // Broadcast cost is equal to the cost of extracting the zero'th element
      // plus the cost of inserting it into every element of the result vector.
-    Cost += thisT()->getVectorInstrCost(Instruction::ExtractElement, VTy, 0,
-                                        nullptr, nullptr);
+    Cost += thisT()->getVectorInstrCost(Instruction::ExtractElement, VTy,
+                                        CostKind, 0, nullptr, nullptr);
  
      for (int i = 0, e = VTy->getNumElements(); i < e; ++i) {
-      Cost += thisT()->getVectorInstrCost(Instruction::InsertElement, VTy, i,
-                                          nullptr, nullptr);
+      Cost += thisT()->getVectorInstrCost(Instruction::InsertElement, VTy,
+                                          CostKind, i, nullptr, nullptr);
      }
      return Cost;
    }
  
    /// Estimate a cost of shuffle as a sequence of extract and insert
    /// operations.
-  InstructionCost getPermuteShuffleOverhead(FixedVectorType *VTy) {
+  InstructionCost getPermuteShuffleOverhead(FixedVectorType *VTy,
+                                            TTI::TargetCostKind CostKind) {
      InstructionCost Cost = 0;
      // Shuffle cost is equal to the cost of extracting element from its argument
      // plus the cost of inserting them onto the result vector.
@@ -112,18 +114,20 @@ private:
      // vector and finally index 3 of second vector and insert them at index
      // <0,1,2,3> of result vector.
      for (int i = 0, e = VTy->getNumElements(); i < e; ++i) {
-      Cost += thisT()->getVectorInstrCost(Instruction::InsertElement, VTy, i,
-                                          nullptr, nullptr);
-      Cost += thisT()->getVectorInstrCost(Instruction::ExtractElement, VTy, i,
-                                          nullptr, nullptr);
+      Cost += thisT()->getVectorInstrCost(Instruction::InsertElement, VTy,
+                                          CostKind, i, nullptr, nullptr);
+      Cost += thisT()->getVectorInstrCost(Instruction::ExtractElement, VTy,
+                                          CostKind, i, nullptr, nullptr);
      }
      return Cost;
    }
  
    /// Estimate a cost of subvector extraction as a sequence of extract and
    /// insert operations.
-  InstructionCost getExtractSubvectorOverhead(VectorType *VTy, int Index,
-                                       FixedVectorType *SubVTy) {
+  InstructionCost getExtractSubvectorOverhead(VectorType *VTy,
+                                              TTI::TargetCostKind CostKind,
+                                              int Index,
+                                              FixedVectorType *SubVTy) {
      assert(VTy && SubVTy &&
             "Can only extract subvectors from vectors");
      int NumSubElts = SubVTy->getNumElements();
@@ -137,18 +141,21 @@ private:
      // the source type plus the cost of inserting them into the result vector
      // type.
      for (int i = 0; i != NumSubElts; ++i) {
-      Cost += thisT()->getVectorInstrCost(Instruction::ExtractElement, VTy,
-                                          i + Index, nullptr, nullptr);
-      Cost += thisT()->getVectorInstrCost(Instruction::InsertElement, SubVTy, i,
-                                          nullptr, nullptr);
+      Cost +=
+          thisT()->getVectorInstrCost(Instruction::ExtractElement, VTy,
+                                      CostKind, i + Index, nullptr, nullptr);
+      Cost += thisT()->getVectorInstrCost(Instruction::InsertElement, SubVTy,
+                                          CostKind, i, nullptr, nullptr);
      }
      return Cost;
    }
  
    /// Estimate a cost of subvector insertion as a sequence of extract and
    /// insert operations.
-  InstructionCost getInsertSubvectorOverhead(VectorType *VTy, int Index,
-                                      FixedVectorType *SubVTy) {
+  InstructionCost getInsertSubvectorOverhead(VectorType *VTy,
+                                             TTI::TargetCostKind CostKind,
+                                             int Index,
+                                             FixedVectorType *SubVTy) {
      assert(VTy && SubVTy &&
             "Can only insert subvectors into vectors");
      int NumSubElts = SubVTy->getNumElements();
@@ -163,9 +170,10 @@ private:
      // type.
      for (int i = 0; i != NumSubElts; ++i) {
        Cost += thisT()->getVectorInstrCost(Instruction::ExtractElement, SubVTy,
-                                          i, nullptr, nullptr);
-      Cost += thisT()->getVectorInstrCost(Instruction::InsertElement, VTy,
-                                          i + Index, nullptr, nullptr);
+                                          CostKind, i, nullptr, nullptr);
+      Cost +=
+          thisT()->getVectorInstrCost(Instruction::InsertElement, VTy, CostKind,
+                                      i + Index, nullptr, nullptr);
      }
      return Cost;
    }
@@ -216,7 +224,7 @@ private:
                                   FixedVectorType::get(
                                       PointerType::get(VT->getElementType(), 0),
                                       VT->getNumElements()),
-                                 -1, nullptr, nullptr)
+                                 CostKind, -1, nullptr, nullptr)
              : 0;
      InstructionCost LoadCost =
          VT->getNumElements() *
@@ -224,8 +232,9 @@ private:
           getMemoryOpCost(Opcode, VT->getElementType(), Alignment, 0, CostKind));
  
      // Next, compute the cost of packing the result in a vector.
-    InstructionCost PackingCost = getScalarizationOverhead(
-        VT, Opcode != Instruction::Store, Opcode == Instruction::Store);
+    InstructionCost PackingCost =
+        getScalarizationOverhead(VT, Opcode != Instruction::Store,
+                                 Opcode == Instruction::Store, CostKind);
  
      InstructionCost ConditionalCost = 0;
      if (VariableMask) {
@@ -241,7 +250,7 @@ private:
                 Instruction::ExtractElement,
                 FixedVectorType::get(Type::getInt1Ty(DataTy->getContext()),
                                      VT->getNumElements()),
-               -1, nullptr, nullptr) +
+               CostKind, -1, nullptr, nullptr) +
             getCFInstrCost(Instruction::Br, CostKind) +
             getCFInstrCost(Instruction::PHI, CostKind));
      }
@@ -710,7 +719,8 @@ public:
    /// extracted from vectors.
    InstructionCost getScalarizationOverhead(VectorType *InTy,
                                             const APInt &DemandedElts,
-                                           bool Insert, bool Extract) {
+                                           bool Insert, bool Extract,
+                                           TTI::TargetCostKind CostKind) {
      /// FIXME: a bitfield is not a reasonable abstraction for talking about
      /// which elements are needed from a scalable vector
      if (isa<ScalableVectorType>(InTy))
@@ -726,11 +736,11 @@ public:
        if (!DemandedElts[i])
          continue;
        if (Insert)
-        Cost += thisT()->getVectorInstrCost(Instruction::InsertElement, Ty, i,
-                                            nullptr, nullptr);
+        Cost += thisT()->getVectorInstrCost(Instruction::InsertElement, Ty,
+                                            CostKind, i, nullptr, nullptr);
        if (Extract)
-        Cost += thisT()->getVectorInstrCost(Instruction::ExtractElement, Ty, i,
-                                            nullptr, nullptr);
+        Cost += thisT()->getVectorInstrCost(Instruction::ExtractElement, Ty,
+                                            CostKind, i, nullptr, nullptr);
      }
  
      return Cost;
@@ -738,20 +748,24 @@ public:
  
    /// Helper wrapper for the DemandedElts variant of getScalarizationOverhead.
    InstructionCost getScalarizationOverhead(VectorType *InTy, bool Insert,
-                                           bool Extract) {
+                                           bool Extract,
+                                           TTI::TargetCostKind CostKind) {
      if (isa<ScalableVectorType>(InTy))
        return InstructionCost::getInvalid();
      auto *Ty = cast<FixedVectorType>(InTy);
  
      APInt DemandedElts = APInt::getAllOnes(Ty->getNumElements());
-    return thisT()->getScalarizationOverhead(Ty, DemandedElts, Insert, Extract);
+    return thisT()->getScalarizationOverhead(Ty, DemandedElts, Insert, Extract,
+                                             CostKind);
    }
  
    /// Estimate the overhead of scalarizing an instructions unique
    /// non-constant operands. The (potentially vector) types to use for each of
    /// argument are passes via Tys.
-  InstructionCost getOperandsScalarizationOverhead(ArrayRef<const Value *> Args,
-                                                   ArrayRef<Type *> Tys) {
+  InstructionCost
+  getOperandsScalarizationOverhead(ArrayRef<const Value *> Args,
+                                   ArrayRef<Type *> Tys,
+                                   TTI::TargetCostKind CostKind) {
      assert(Args.size() == Tys.size() && "Expected matching Args and Tys");
  
      InstructionCost Cost = 0;
@@ -766,7 +780,8 @@ public:
  
        if (!isa<Constant>(A) && UniqueOperands.insert(A).second) {
          if (auto *VecTy = dyn_cast<VectorType>(Ty))
-          Cost += getScalarizationOverhead(VecTy, false, true);
+          Cost += getScalarizationOverhead(VecTy, /*Insert*/ false,
+                                           /*Extract*/ true, CostKind);
        }
      }
  
@@ -779,14 +794,17 @@ public:
    /// added as a heuristic.
    InstructionCost getScalarizationOverhead(VectorType *RetTy,
                                             ArrayRef<const Value *> Args,
-                                           ArrayRef<Type *> Tys) {
-    InstructionCost Cost = getScalarizationOverhead(RetTy, true, false);
+                                           ArrayRef<Type *> Tys,
+                                           TTI::TargetCostKind CostKind) {
+    InstructionCost Cost = getScalarizationOverhead(
+        RetTy, /*Insert*/ true, /*Extract*/ false, CostKind);
      if (!Args.empty())
-      Cost += getOperandsScalarizationOverhead(Args, Tys);
+      Cost += getOperandsScalarizationOverhead(Args, Tys, CostKind);
      else
        // When no information on arguments is provided, we add the cost
        // associated with one argument as a heuristic.
-      Cost += getScalarizationOverhead(RetTy, false, true);
+      Cost += getScalarizationOverhead(RetTy, /*Insert*/ false,
+                                       /*Extract*/ true, CostKind);
  
      return Cost;
    }
@@ -898,7 +916,7 @@ public:
        // Return the cost of multiple scalar invocation plus the cost of
        // inserting and extracting the values.
        SmallVector<Type *> Tys(Args.size(), Ty);
-      return getScalarizationOverhead(VTy, Args, Tys) +
+      return getScalarizationOverhead(VTy, Args, Tys, CostKind) +
               VTy->getNumElements() * Cost;
      }
  
@@ -951,7 +969,7 @@ public:
      switch (improveShuffleKindFromMask(Kind, Mask)) {
      case TTI::SK_Broadcast:
        if (auto *FVT = dyn_cast<FixedVectorType>(Tp))
-        return getBroadcastShuffleOverhead(FVT);
+        return getBroadcastShuffleOverhead(FVT, CostKind);
        return InstructionCost::getInvalid();
      case TTI::SK_Select:
      case TTI::SK_Splice:
@@ -960,13 +978,13 @@ public:
      case TTI::SK_PermuteSingleSrc:
      case TTI::SK_PermuteTwoSrc:
        if (auto *FVT = dyn_cast<FixedVectorType>(Tp))
-        return getPermuteShuffleOverhead(FVT);
+        return getPermuteShuffleOverhead(FVT, CostKind);
        return InstructionCost::getInvalid();
      case TTI::SK_ExtractSubvector:
-      return getExtractSubvectorOverhead(Tp, Index,
+      return getExtractSubvectorOverhead(Tp, CostKind, Index,
                                           cast<FixedVectorType>(SubTp));
      case TTI::SK_InsertSubvector:
-      return getInsertSubvectorOverhead(Tp, Index,
+      return getInsertSubvectorOverhead(Tp, CostKind, Index,
                                          cast<FixedVectorType>(SubTp));
      }
      llvm_unreachable("Unknown TTI::ShuffleKind");
@@ -1110,7 +1128,9 @@ public:
  
        // Return the cost of multiple scalar invocation plus the cost of
        // inserting and extracting the values.
-      return getScalarizationOverhead(DstVTy, true, true) + Num * Cost;
+      return getScalarizationOverhead(DstVTy, /*Insert*/ true, /*Extract*/ true,
+                                      CostKind) +
+             Num * Cost;
      }
  
      // We already handled vector-to-vector and scalar-to-scalar conversions.
@@ -1119,8 +1139,12 @@ public:
      //  that the conversion is scalarized in one way or another.
      if (Opcode == Instruction::BitCast) {
        // Illegal bitcasts are done by storing and loading from a stack slot.
-      return (SrcVTy ? getScalarizationOverhead(SrcVTy, false, true) : 0) +
-             (DstVTy ? getScalarizationOverhead(DstVTy, true, false) : 0);
+      return (SrcVTy ? getScalarizationOverhead(SrcVTy, /*Insert*/ false,
+                                                /*Extract*/ true, CostKind)
+                     : 0) +
+             (DstVTy ? getScalarizationOverhead(DstVTy, /*Insert*/ true,
+                                                /*Extract*/ false, CostKind)
+                     : 0);
      }
  
      llvm_unreachable("Unhandled cast");
@@ -1128,11 +1152,11 @@ public:
  
    InstructionCost getExtractWithExtendCost(unsigned Opcode, Type *Dst,
                                             VectorType *VecTy, unsigned Index) {
+    TTI::TargetCostKind CostKind = TTI::TCK_RecipThroughput;
      return thisT()->getVectorInstrCost(Instruction::ExtractElement, VecTy,
-                                       Index, nullptr, nullptr) +
+                                       CostKind, Index, nullptr, nullptr) +
             thisT()->getCastInstrCost(Opcode, Dst, VecTy->getElementType(),
-                                     TTI::CastContextHint::None,
-                                     TTI::TCK_RecipThroughput);
+                                     TTI::CastContextHint::None, CostKind);
    }
  
    InstructionCost getCFInstrCost(unsigned Opcode, TTI::TargetCostKind CostKind,
@@ -1183,19 +1207,23 @@ public:
  
        // Return the cost of multiple scalar invocation plus the cost of
        // inserting and extracting the values.
-      return getScalarizationOverhead(ValVTy, true, false) + Num * Cost;
+      return getScalarizationOverhead(ValVTy, /*Insert*/ true,
+                                      /*Extract*/ false, CostKind) +
+             Num * Cost;
      }
  
      // Unknown scalar opcode.
      return 1;
    }
  
-  InstructionCost getVectorInstrCost(unsigned Opcode, Type *Val, unsigned Index,
-                                     Value *Op0, Value *Op1) {
+  InstructionCost getVectorInstrCost(unsigned Opcode, Type *Val,
+                                     TTI::TargetCostKind CostKind,
+                                     unsigned Index, Value *Op0, Value *Op1) {
      return getRegUsageForType(Val->getScalarType());
    }
  
    InstructionCost getVectorInstrCost(const Instruction &I, Type *Val,
+                                     TTI::TargetCostKind CostKind,
                                       unsigned Index) {
      Value *Op0 = nullptr;
      Value *Op1 = nullptr;
@@ -1203,7 +1231,8 @@ public:
        Op0 = IE->getOperand(0);
        Op1 = IE->getOperand(1);
      }
-    return thisT()->getVectorInstrCost(I.getOpcode(), Val, Index, Op0, Op1);
+    return thisT()->getVectorInstrCost(I.getOpcode(), Val, CostKind, Index, Op0,
+                                       Op1);
    }
  
    InstructionCost getReplicationShuffleCost(Type *EltTy, int ReplicationFactor,
@@ -1231,10 +1260,10 @@ public:
      APInt DemandedSrcElts = APIntOps::ScaleBitMask(DemandedDstElts, VF);
      Cost += thisT()->getScalarizationOverhead(SrcVT, DemandedSrcElts,
                                                /*Insert*/ false,
-                                              /*Extract*/ true);
-    Cost +=
-        thisT()->getScalarizationOverhead(ReplicatedVT, DemandedDstElts,
-                                          /*Insert*/ true, /*Extract*/ false);
+                                              /*Extract*/ true, CostKind);
+    Cost += thisT()->getScalarizationOverhead(ReplicatedVT, DemandedDstElts,
+                                              /*Insert*/ true,
+                                              /*Extract*/ false, CostKind);
  
      return Cost;
    }
@@ -1275,9 +1304,9 @@ public:
        if (LA != TargetLowering::Legal && LA != TargetLowering::Custom) {
          // This is a vector load/store for some illegal type that is scalarized.
          // We must account for the cost of building or decomposing the vector.
-        Cost += getScalarizationOverhead(cast<VectorType>(Src),
-                                         Opcode != Instruction::Store,
-                                         Opcode == Instruction::Store);
+        Cost += getScalarizationOverhead(
+            cast<VectorType>(Src), Opcode != Instruction::Store,
+            Opcode == Instruction::Store, CostKind);
        }
      }
  
@@ -1389,13 +1418,13 @@ public:
        //      %v0 = shuffle %vec, undef, <0, 2, 4, 6>         ; Index 0
        // The cost is estimated as extract elements at 0, 2, 4, 6 from the
        // <8 x i32> vector and insert them into a <4 x i32> vector.
-      InstructionCost InsSubCost =
-          thisT()->getScalarizationOverhead(SubVT, DemandedAllSubElts,
-                                            /*Insert*/ true, /*Extract*/ false);
+      InstructionCost InsSubCost = thisT()->getScalarizationOverhead(
+          SubVT, DemandedAllSubElts,
+          /*Insert*/ true, /*Extract*/ false, CostKind);
        Cost += Indices.size() * InsSubCost;
-      Cost +=
-          thisT()->getScalarizationOverhead(VT, DemandedLoadStoreElts,
-                                            /*Insert*/ false, /*Extract*/ true);
+      Cost += thisT()->getScalarizationOverhead(VT, DemandedLoadStoreElts,
+                                                /*Insert*/ false,
+                                                /*Extract*/ true, CostKind);
      } else {
        // The interleave cost is extract elements from sub vectors, and
        // insert them into the wide vector.
@@ -1410,13 +1439,13 @@ public:
        // The cost is estimated as extract all elements (of actual members,
        // excluding gaps) from both <4 x i32> vectors and insert into the <12 x
        // i32> vector.
-      InstructionCost ExtSubCost =
-          thisT()->getScalarizationOverhead(SubVT, DemandedAllSubElts,
-                                            /*Insert*/ false, /*Extract*/ true);
+      InstructionCost ExtSubCost = thisT()->getScalarizationOverhead(
+          SubVT, DemandedAllSubElts,
+          /*Insert*/ false, /*Extract*/ true, CostKind);
        Cost += ExtSubCost * Indices.size();
        Cost += thisT()->getScalarizationOverhead(VT, DemandedLoadStoreElts,
                                                  /*Insert*/ true,
-                                                /*Extract*/ false);
+                                                /*Extract*/ false, CostKind);
      }
  
      if (!UseMaskForCond)
@@ -1649,10 +1678,11 @@ public:
      if (RetVF.isVector() && !RetVF.isScalable()) {
        ScalarizationCost = 0;
        if (!RetTy->isVoidTy())
-        ScalarizationCost +=
-            getScalarizationOverhead(cast<VectorType>(RetTy), true, false);
+        ScalarizationCost += getScalarizationOverhead(
+            cast<VectorType>(RetTy),
+            /*Insert*/ true, /*Extract*/ false, CostKind);
        ScalarizationCost +=
-          getOperandsScalarizationOverhead(Args, ICA.getArgTypes());
+          getOperandsScalarizationOverhead(Args, ICA.getArgTypes(), CostKind);
      }
  
      IntrinsicCostAttributes Attrs(IID, RetTy, ICA.getArgTypes(), FMF, I,
@@ -1704,7 +1734,8 @@ public:
        Type *ScalarRetTy = RetTy;
        if (auto *RetVTy = dyn_cast<VectorType>(RetTy)) {
          if (!SkipScalarizationCost)
-          ScalarizationCost = getScalarizationOverhead(RetVTy, true, false);
+          ScalarizationCost = getScalarizationOverhead(
+              RetVTy, /*Insert*/ true, /*Extract*/ false, CostKind);
          ScalarCalls = std::max(ScalarCalls,
                                 cast<FixedVectorType>(RetVTy)->getNumElements());
          ScalarRetTy = RetTy->getScalarType();
@@ -1714,7 +1745,8 @@ public:
          Type *Ty = Tys[i];
          if (auto *VTy = dyn_cast<VectorType>(Ty)) {
            if (!SkipScalarizationCost)
-            ScalarizationCost += getScalarizationOverhead(VTy, false, true);
+            ScalarizationCost += getScalarizationOverhead(
+                VTy, /*Insert*/ false, /*Extract*/ true, CostKind);
            ScalarCalls = std::max(ScalarCalls,
                                   cast<FixedVectorType>(VTy)->getNumElements());
            Ty = Ty->getScalarType();
@@ -2124,8 +2156,10 @@ public:
          return InstructionCost::getInvalid();
  
        InstructionCost ScalarizationCost =
-          SkipScalarizationCost ? ScalarizationCostPassed
-                                : getScalarizationOverhead(RetVTy, true, false);
+          SkipScalarizationCost
+              ? ScalarizationCostPassed
+              : getScalarizationOverhead(RetVTy, /*Insert*/ true,
+                                         /*Extract*/ false, CostKind);
  
        unsigned ScalarCalls = cast<FixedVectorType>(RetVTy)->getNumElements();
        SmallVector<Type *, 4> ScalarTys;
@@ -2141,7 +2175,8 @@ public:
        for (unsigned i = 0, ie = Tys.size(); i != ie; ++i) {
          if (auto *VTy = dyn_cast<VectorType>(Tys[i])) {
            if (!ICA.skipScalarizationCost())
-            ScalarizationCost += getScalarizationOverhead(VTy, false, true);
+            ScalarizationCost += getScalarizationOverhead(
+                VTy, /*Insert*/ false, /*Extract*/ true, CostKind);
            ScalarCalls = std::max(ScalarCalls,
                                   cast<FixedVectorType>(VTy)->getNumElements());
          }
@@ -2258,8 +2293,8 @@ public:
      ArithCost +=
          NumReduxLevels * thisT()->getArithmeticInstrCost(Opcode, Ty, CostKind);
      return ShuffleCost + ArithCost +
-           thisT()->getVectorInstrCost(Instruction::ExtractElement, Ty, 0,
-                                       nullptr, nullptr);
+           thisT()->getVectorInstrCost(Instruction::ExtractElement, Ty,
+                                       CostKind, 0, nullptr, nullptr);
    }
  
    /// Try to calculate the cost of performing strict (in-order) reductions,
@@ -2286,8 +2321,8 @@ public:
        return InstructionCost::getInvalid();
  
      auto *VTy = cast<FixedVectorType>(Ty);
-    InstructionCost ExtractCost =
-        getScalarizationOverhead(VTy, /*Insert=*/false, /*Extract=*/true);
+    InstructionCost ExtractCost = getScalarizationOverhead(
+        VTy, /*Insert=*/false, /*Extract=*/true, CostKind);
      InstructionCost ArithCost = thisT()->getArithmeticInstrCost(
          Opcode, VTy->getElementType(), CostKind);
      ArithCost *= VTy->getNumElements();
@@ -2366,8 +2401,8 @@ public:
      // The last min/max should be in vector registers and we counted it above.
      // So just need a single extractelement.
      return ShuffleCost + MinMaxCost +
-           thisT()->getVectorInstrCost(Instruction::ExtractElement, Ty, 0,
-                                       nullptr, nullptr);
+           thisT()->getVectorInstrCost(Instruction::ExtractElement, Ty,
+                                       CostKind, 0, nullptr, nullptr);
    }
  
    InstructionCost getExtendedReductionCost(unsigned Opcode, bool IsUnsigned,
diff --git a/llvm/lib/Analysis/TargetTransformInfo.cpp b/llvm/lib/Analysis/TargetTransformInfo.cpp

index d03a8cf..ad7e543 100644 (file)
--- a/llvm/lib/Analysis/TargetTransformInfo.cpp
+++ b/llvm/lib/Analysis/TargetTransformInfo.cpp
@@ -513,16 +513,17 @@ bool TargetTransformInfo::useColdCCForColdCall(Function &F) const {
    return TTIImpl->useColdCCForColdCall(F);
  }
  
-InstructionCost
-TargetTransformInfo::getScalarizationOverhead(VectorType *Ty,
-                                              const APInt &DemandedElts,
-                                              bool Insert, bool Extract) const {
-  return TTIImpl->getScalarizationOverhead(Ty, DemandedElts, Insert, Extract);
+InstructionCost TargetTransformInfo::getScalarizationOverhead(
+    VectorType *Ty, const APInt &DemandedElts, bool Insert, bool Extract,
+    TTI::TargetCostKind CostKind) const {
+  return TTIImpl->getScalarizationOverhead(Ty, DemandedElts, Insert, Extract,
+                                           CostKind);
  }
  
  InstructionCost TargetTransformInfo::getOperandsScalarizationOverhead(
-    ArrayRef<const Value *> Args, ArrayRef<Type *> Tys) const {
-  return TTIImpl->getOperandsScalarizationOverhead(Args, Tys);
+    ArrayRef<const Value *> Args, ArrayRef<Type *> Tys,
+    TTI::TargetCostKind CostKind) const {
+  return TTIImpl->getOperandsScalarizationOverhead(Args, Tys, CostKind);
  }
  
  bool TargetTransformInfo::supportsEfficientVectorElementLoadStore() const {
@@ -898,23 +899,25 @@ InstructionCost TargetTransformInfo::getCmpSelInstrCost(
  }
  
  InstructionCost TargetTransformInfo::getVectorInstrCost(
-    unsigned Opcode, Type *Val, unsigned Index, Value *Op0, Value *Op1) const {
+    unsigned Opcode, Type *Val, TTI::TargetCostKind CostKind, unsigned Index,
+    Value *Op0, Value *Op1) const {
    // FIXME: Assert that Opcode is either InsertElement or ExtractElement.
    // This is mentioned in the interface description and respected by all
    // callers, but never asserted upon.
    InstructionCost Cost =
-      TTIImpl->getVectorInstrCost(Opcode, Val, Index, Op0, Op1);
+      TTIImpl->getVectorInstrCost(Opcode, Val, CostKind, Index, Op0, Op1);
    assert(Cost >= 0 && "TTI should not produce negative costs!");
    return Cost;
  }
  
-InstructionCost TargetTransformInfo::getVectorInstrCost(const Instruction &I,
-                                                        Type *Val,
-                                                        unsigned Index) const {
+InstructionCost
+TargetTransformInfo::getVectorInstrCost(const Instruction &I, Type *Val,
+                                        TTI::TargetCostKind CostKind,
+                                        unsigned Index) const {
    // FIXME: Assert that Opcode is either InsertElement or ExtractElement.
    // This is mentioned in the interface description and respected by all
    // callers, but never asserted upon.
-  InstructionCost Cost = TTIImpl->getVectorInstrCost(I, Val, Index);
+  InstructionCost Cost = TTIImpl->getVectorInstrCost(I, Val, CostKind, Index);
    assert(Cost >= 0 && "TTI should not produce negative costs!");
    return Cost;
  }
diff --git a/llvm/lib/CodeGen/CodeGenPrepare.cpp b/llvm/lib/CodeGen/CodeGenPrepare.cpp

index 87ba74b..fee11ef 100644 (file)
--- a/llvm/lib/CodeGen/CodeGenPrepare.cpp
+++ b/llvm/lib/CodeGen/CodeGenPrepare.cpp
@@ -7383,11 +7383,11 @@ class VectorPromoteHelper {
      // The scalar chain of computation has to pay for the transition
      // scalar to vector.
      // The vector chain has to account for the combining cost.
-    InstructionCost ScalarCost =
-        TTI.getVectorInstrCost(*Transition, PromotedType, Index);
-    InstructionCost VectorCost = StoreExtractCombineCost;
      enum TargetTransformInfo::TargetCostKind CostKind =
          TargetTransformInfo::TCK_RecipThroughput;
+    InstructionCost ScalarCost =
+        TTI.getVectorInstrCost(*Transition, PromotedType, CostKind, Index);
+    InstructionCost VectorCost = StoreExtractCombineCost;
      for (const auto &Inst : InstsToBePromoted) {
        // Compute the cost.
        // By construction, all instructions being promoted are arithmetic ones.
diff --git a/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp b/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp

index 916eefc..c6e9e05 100644 (file)
--- a/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp
+++ b/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp
@@ -2131,14 +2131,14 @@ InstructionCost AArch64TTIImpl::getExtractWithExtendCost(unsigned Opcode,
  
    // Get the cost for the extract. We compute the cost (if any) for the extend
    // below.
+  TTI::TargetCostKind CostKind = TTI::TCK_RecipThroughput;
    InstructionCost Cost = getVectorInstrCost(Instruction::ExtractElement, VecTy,
-                                            Index, nullptr, nullptr);
+                                            CostKind, Index, nullptr, nullptr);
  
    // Legalize the types.
    auto VecLT = getTypeLegalizationCost(VecTy);
    auto DstVT = TLI->getValueType(DL, Dst);
    auto SrcVT = TLI->getValueType(DL, Src);
-  TTI::TargetCostKind CostKind = TTI::TCK_RecipThroughput;
  
    // If the resulting type is still a vector and the destination type is legal,
    // we may get the extension for free. If not, get the default cost for the
@@ -2225,13 +2225,16 @@ InstructionCost AArch64TTIImpl::getVectorInstrCostHelper(Type *Val,
  }
  
  InstructionCost AArch64TTIImpl::getVectorInstrCost(unsigned Opcode, Type *Val,
+                                                   TTI::TargetCostKind CostKind,
                                                     unsigned Index, Value *Op0,
                                                     Value *Op1) {
    return getVectorInstrCostHelper(Val, Index, false /* HasRealUse */);
  }
  
  InstructionCost AArch64TTIImpl::getVectorInstrCost(const Instruction &I,
-                                                   Type *Val, unsigned Index) {
+                                                   Type *Val,
+                                                   TTI::TargetCostKind CostKind,
+                                                   unsigned Index) {
    return getVectorInstrCostHelper(Val, Index, true /* HasRealUse */);
  }
  
diff --git a/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.h b/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.h

index 6eaff95..a22ba47 100644 (file)
--- a/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.h
+++ b/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.h
@@ -169,9 +169,11 @@ public:
    InstructionCost getCFInstrCost(unsigned Opcode, TTI::TargetCostKind CostKind,
                                   const Instruction *I = nullptr);
  
-  InstructionCost getVectorInstrCost(unsigned Opcode, Type *Val, unsigned Index,
-                                     Value *Op0, Value *Op1);
+  InstructionCost getVectorInstrCost(unsigned Opcode, Type *Val,
+                                     TTI::TargetCostKind CostKind,
+                                     unsigned Index, Value *Op0, Value *Op1);
    InstructionCost getVectorInstrCost(const Instruction &I, Type *Val,
+                                     TTI::TargetCostKind CostKind,
                                       unsigned Index);
  
    InstructionCost getMinMaxReductionCost(VectorType *Ty, VectorType *CondTy,
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp b/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp

index 00e6970..0c3324f 100644 (file)
--- a/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp
@@ -790,6 +790,7 @@ GCNTTIImpl::getMinMaxReductionCost(VectorType *Ty, VectorType *CondTy,
  }
  
  InstructionCost GCNTTIImpl::getVectorInstrCost(unsigned Opcode, Type *ValTy,
+                                               TTI::TargetCostKind CostKind,
                                                 unsigned Index, Value *Op0,
                                                 Value *Op1) {
    switch (Opcode) {
@@ -800,7 +801,8 @@ InstructionCost GCNTTIImpl::getVectorInstrCost(unsigned Opcode, Type *ValTy,
      if (EltSize < 32) {
        if (EltSize == 16 && Index == 0 && ST->has16BitInsts())
          return 0;
-      return BaseT::getVectorInstrCost(Opcode, ValTy, Index, Op0, Op1);
+      return BaseT::getVectorInstrCost(Opcode, ValTy, CostKind, Index, Op0,
+                                       Op1);
      }
  
      // Extracts are just reads of a subregister, so are free. Inserts are
@@ -811,7 +813,7 @@ InstructionCost GCNTTIImpl::getVectorInstrCost(unsigned Opcode, Type *ValTy,
      return Index == ~0u ? 2 : 0;
    }
    default:
-    return BaseT::getVectorInstrCost(Opcode, ValTy, Index, Op0, Op1);
+    return BaseT::getVectorInstrCost(Opcode, ValTy, CostKind, Index, Op0, Op1);
    }
  }
  
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.h b/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.h

index 4a1137d..7862f21 100644 (file)
--- a/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.h
+++ b/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.h
@@ -162,6 +162,7 @@ public:
  
    using BaseT::getVectorInstrCost;
    InstructionCost getVectorInstrCost(unsigned Opcode, Type *ValTy,
+                                     TTI::TargetCostKind CostKind,
                                       unsigned Index, Value *Op0, Value *Op1);
  
    bool isReadRegisterSourceOfDivergence(const IntrinsicInst *ReadReg) const;
diff --git a/llvm/lib/Target/AMDGPU/R600TargetTransformInfo.cpp b/llvm/lib/Target/AMDGPU/R600TargetTransformInfo.cpp

index c3dd321..c01f9c4 100644 (file)
--- a/llvm/lib/Target/AMDGPU/R600TargetTransformInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/R600TargetTransformInfo.cpp
@@ -108,6 +108,7 @@ InstructionCost R600TTIImpl::getCFInstrCost(unsigned Opcode,
  }
  
  InstructionCost R600TTIImpl::getVectorInstrCost(unsigned Opcode, Type *ValTy,
+                                                TTI::TargetCostKind CostKind,
                                                  unsigned Index, Value *Op0,
                                                  Value *Op1) {
    switch (Opcode) {
@@ -116,7 +117,8 @@ InstructionCost R600TTIImpl::getVectorInstrCost(unsigned Opcode, Type *ValTy,
      unsigned EltSize =
          DL.getTypeSizeInBits(cast<VectorType>(ValTy)->getElementType());
      if (EltSize < 32) {
-      return BaseT::getVectorInstrCost(Opcode, ValTy, Index, Op0, Op1);
+      return BaseT::getVectorInstrCost(Opcode, ValTy, CostKind, Index, Op0,
+                                       Op1);
      }
  
      // Extracts are just reads of a subregister, so are free. Inserts are
@@ -127,7 +129,7 @@ InstructionCost R600TTIImpl::getVectorInstrCost(unsigned Opcode, Type *ValTy,
      return Index == ~0u ? 2 : 0;
    }
    default:
-    return BaseT::getVectorInstrCost(Opcode, ValTy, Index, Op0, Op1);
+    return BaseT::getVectorInstrCost(Opcode, ValTy, CostKind, Index, Op0, Op1);
    }
  }
  
diff --git a/llvm/lib/Target/AMDGPU/R600TargetTransformInfo.h b/llvm/lib/Target/AMDGPU/R600TargetTransformInfo.h

index 9045cc7..8dacae0 100644 (file)
--- a/llvm/lib/Target/AMDGPU/R600TargetTransformInfo.h
+++ b/llvm/lib/Target/AMDGPU/R600TargetTransformInfo.h
@@ -62,6 +62,7 @@ public:
                                   const Instruction *I = nullptr);
    using BaseT::getVectorInstrCost;
    InstructionCost getVectorInstrCost(unsigned Opcode, Type *ValTy,
+                                     TTI::TargetCostKind CostKind,
                                       unsigned Index, Value *Op0, Value *Op1);
  };
  
diff --git a/llvm/lib/Target/ARM/ARMTargetTransformInfo.cpp b/llvm/lib/Target/ARM/ARMTargetTransformInfo.cpp

index 7d26dde..048790a 100644 (file)
--- a/llvm/lib/Target/ARM/ARMTargetTransformInfo.cpp
+++ b/llvm/lib/Target/ARM/ARMTargetTransformInfo.cpp
@@ -874,6 +874,7 @@ InstructionCost ARMTTIImpl::getCastInstrCost(unsigned Opcode, Type *Dst,
  }
  
  InstructionCost ARMTTIImpl::getVectorInstrCost(unsigned Opcode, Type *ValTy,
+                                               TTI::TargetCostKind CostKind,
                                                 unsigned Index, Value *Op0,
                                                 Value *Op1) {
    // Penalize inserting into an D-subregister. We end up with a three times
@@ -894,7 +895,8 @@ InstructionCost ARMTTIImpl::getVectorInstrCost(unsigned Opcode, Type *ValTy,
      if (ValTy->isVectorTy() &&
          ValTy->getScalarSizeInBits() <= 32)
        return std::max<InstructionCost>(
-          BaseT::getVectorInstrCost(Opcode, ValTy, Index, Op0, Op1), 2U);
+          BaseT::getVectorInstrCost(Opcode, ValTy, CostKind, Index, Op0, Op1),
+          2U);
    }
  
    if (ST->hasMVEIntegerOps() && (Opcode == Instruction::InsertElement ||
@@ -907,7 +909,7 @@ InstructionCost ARMTTIImpl::getVectorInstrCost(unsigned Opcode, Type *ValTy,
      return LT.first * (ValTy->getScalarType()->isIntegerTy() ? 4 : 1);
    }
  
-  return BaseT::getVectorInstrCost(Opcode, ValTy, Index, Op0, Op1);
+  return BaseT::getVectorInstrCost(Opcode, ValTy, CostKind, Index, Op0, Op1);
  }
  
  InstructionCost ARMTTIImpl::getCmpSelInstrCost(unsigned Opcode, Type *ValTy,
@@ -1021,12 +1023,14 @@ InstructionCost ARMTTIImpl::getCmpSelInstrCost(unsigned Opcode, Type *ValTy,
      if (Opcode == Instruction::FCmp && !ST->hasMVEFloatOps()) {
        // One scalaization insert, one scalarization extract and the cost of the
        // fcmps.
-      return BaseT::getScalarizationOverhead(VecValTy, false, true) +
-             BaseT::getScalarizationOverhead(VecCondTy, true, false) +
+      return BaseT::getScalarizationOverhead(VecValTy, /*Insert*/ false,
+                                             /*Extract*/ true, CostKind) +
+             BaseT::getScalarizationOverhead(VecCondTy, /*Insert*/ true,
+                                             /*Extract*/ false, CostKind) +
               VecValTy->getNumElements() *
                   getCmpSelInstrCost(Opcode, ValTy->getScalarType(),
-                                    VecCondTy->getScalarType(), VecPred, CostKind,
-                                    I);
+                                    VecCondTy->getScalarType(), VecPred,
+                                    CostKind, I);
      }
  
      std::pair<InstructionCost, MVT> LT = getTypeLegalizationCost(ValTy);
@@ -1039,7 +1043,8 @@ InstructionCost ARMTTIImpl::getCmpSelInstrCost(unsigned Opcode, Type *ValTy,
      if (LT.second.isVector() && LT.second.getVectorNumElements() > 2) {
        if (LT.first > 1)
          return LT.first * BaseCost +
-               BaseT::getScalarizationOverhead(VecCondTy, true, false);
+               BaseT::getScalarizationOverhead(VecCondTy, /*Insert*/ true,
+                                               /*Extract*/ false, CostKind);
        return BaseCost;
      }
    }
@@ -1442,7 +1447,8 @@ InstructionCost ARMTTIImpl::getArithmeticInstrCost(
      // Return the cost of multiple scalar invocation plus the cost of
      // inserting and extracting the values.
      SmallVector<Type *> Tys(Args.size(), Ty);
-    return BaseT::getScalarizationOverhead(VTy, Args, Tys) + Num * Cost;
+    return BaseT::getScalarizationOverhead(VTy, Args, Tys, CostKind) +
+           Num * Cost;
    }
  
    return BaseCost;
@@ -1581,8 +1587,11 @@ InstructionCost ARMTTIImpl::getGatherScatterOpCost(
    // The scalarization cost should be a lot higher. We use the number of vector
    // elements plus the scalarization overhead.
    InstructionCost ScalarCost =
-      NumElems * LT.first + BaseT::getScalarizationOverhead(VTy, true, false) +
-      BaseT::getScalarizationOverhead(VTy, false, true);
+      NumElems * LT.first +
+      BaseT::getScalarizationOverhead(VTy, /*Insert*/ true, /*Extract*/ false,
+                                      CostKind) +
+      BaseT::getScalarizationOverhead(VTy, /*Insert*/ false, /*Extract*/ true,
+                                      CostKind);
  
    if (EltSize < 8 || Alignment < EltSize / 8)
      return ScalarCost;
diff --git a/llvm/lib/Target/ARM/ARMTargetTransformInfo.h b/llvm/lib/Target/ARM/ARMTargetTransformInfo.h

index 6b1e644..69b7a31 100644 (file)
--- a/llvm/lib/Target/ARM/ARMTargetTransformInfo.h
+++ b/llvm/lib/Target/ARM/ARMTargetTransformInfo.h
@@ -240,8 +240,9 @@ public:
                                       const Instruction *I = nullptr);
  
    using BaseT::getVectorInstrCost;
-  InstructionCost getVectorInstrCost(unsigned Opcode, Type *Val, unsigned Index,
-                                     Value *Op0, Value *Op1);
+  InstructionCost getVectorInstrCost(unsigned Opcode, Type *Val,
+                                     TTI::TargetCostKind CostKind,
+                                     unsigned Index, Value *Op0, Value *Op1);
  
    InstructionCost getAddressComputationCost(Type *Val, ScalarEvolution *SE,
                                              const SCEV *Ptr);
diff --git a/llvm/lib/Target/Hexagon/HexagonTargetTransformInfo.cpp b/llvm/lib/Target/Hexagon/HexagonTargetTransformInfo.cpp

index 67e0723..979a436 100644 (file)
--- a/llvm/lib/Target/Hexagon/HexagonTargetTransformInfo.cpp
+++ b/llvm/lib/Target/Hexagon/HexagonTargetTransformInfo.cpp
@@ -139,14 +139,17 @@ ElementCount HexagonTTIImpl::getMinimumVF(unsigned ElemWidth,
  }
  
  InstructionCost HexagonTTIImpl::getScalarizationOverhead(
-    VectorType *Ty, const APInt &DemandedElts, bool Insert, bool Extract) {
-  return BaseT::getScalarizationOverhead(Ty, DemandedElts, Insert, Extract);
+    VectorType *Ty, const APInt &DemandedElts, bool Insert, bool Extract,
+    TTI::TargetCostKind CostKind) {
+  return BaseT::getScalarizationOverhead(Ty, DemandedElts, Insert, Extract,
+                                         CostKind);
  }
  
  InstructionCost
  HexagonTTIImpl::getOperandsScalarizationOverhead(ArrayRef<const Value *> Args,
-                                                 ArrayRef<Type *> Tys) {
-  return BaseT::getOperandsScalarizationOverhead(Args, Tys);
+                                                 ArrayRef<Type *> Tys,
+                                                 TTI::TargetCostKind CostKind) {
+  return BaseT::getOperandsScalarizationOverhead(Args, Tys, CostKind);
  }
  
  InstructionCost HexagonTTIImpl::getCallInstrCost(Function *F, Type *RetTy,
@@ -329,6 +332,7 @@ InstructionCost HexagonTTIImpl::getCastInstrCost(unsigned Opcode, Type *DstTy,
  }
  
  InstructionCost HexagonTTIImpl::getVectorInstrCost(unsigned Opcode, Type *Val,
+                                                   TTI::TargetCostKind CostKind,
                                                     unsigned Index, Value *Op0,
                                                     Value *Op1) {
    Type *ElemTy = Val->isVectorTy() ? cast<VectorType>(Val)->getElementType()
@@ -339,8 +343,8 @@ InstructionCost HexagonTTIImpl::getVectorInstrCost(unsigned Opcode, Type *Val,
      if (ElemTy->isIntegerTy(32))
        return Cost;
      // If it's not a 32-bit value, there will need to be an extract.
-    return Cost + getVectorInstrCost(Instruction::ExtractElement, Val, Index,
-                                     Op0, Op1);
+    return Cost + getVectorInstrCost(Instruction::ExtractElement, Val, CostKind,
+                                     Index, Op0, Op1);
    }
  
    if (Opcode == Instruction::ExtractElement)
diff --git a/llvm/lib/Target/Hexagon/HexagonTargetTransformInfo.h b/llvm/lib/Target/Hexagon/HexagonTargetTransformInfo.h

index d41299f..3d1e51a 100644 (file)
--- a/llvm/lib/Target/Hexagon/HexagonTargetTransformInfo.h
+++ b/llvm/lib/Target/Hexagon/HexagonTargetTransformInfo.h
@@ -107,9 +107,12 @@ public:
  
    InstructionCost getScalarizationOverhead(VectorType *Ty,
                                             const APInt &DemandedElts,
-                                           bool Insert, bool Extract);
-  InstructionCost getOperandsScalarizationOverhead(ArrayRef<const Value *> Args,
-                                                   ArrayRef<Type *> Tys);
+                                           bool Insert, bool Extract,
+                                           TTI::TargetCostKind CostKind);
+  InstructionCost
+  getOperandsScalarizationOverhead(ArrayRef<const Value *> Args,
+                                   ArrayRef<Type *> Tys,
+                                   TTI::TargetCostKind CostKind);
    InstructionCost getCallInstrCost(Function *F, Type *RetTy,
                                     ArrayRef<Type *> Tys,
                                     TTI::TargetCostKind CostKind);
@@ -154,8 +157,9 @@ public:
                                     TTI::TargetCostKind CostKind,
                                     const Instruction *I = nullptr);
    using BaseT::getVectorInstrCost;
-  InstructionCost getVectorInstrCost(unsigned Opcode, Type *Val, unsigned Index,
-                                     Value *Op0, Value *Op1);
+  InstructionCost getVectorInstrCost(unsigned Opcode, Type *Val,
+                                     TTI::TargetCostKind CostKind,
+                                     unsigned Index, Value *Op0, Value *Op1);
  
    InstructionCost getCFInstrCost(unsigned Opcode, TTI::TargetCostKind CostKind,
                                   const Instruction *I = nullptr) {
diff --git a/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.cpp b/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.cpp

index 328a70e..594ba18 100644 (file)
--- a/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.cpp
+++ b/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.cpp
@@ -675,6 +675,7 @@ InstructionCost PPCTTIImpl::getCmpSelInstrCost(unsigned Opcode, Type *ValTy,
  }
  
  InstructionCost PPCTTIImpl::getVectorInstrCost(unsigned Opcode, Type *Val,
+                                               TTI::TargetCostKind CostKind,
                                                 unsigned Index, Value *Op0,
                                                 Value *Op1) {
    assert(Val->isVectorTy() && "This must be a vector type");
@@ -687,7 +688,7 @@ InstructionCost PPCTTIImpl::getVectorInstrCost(unsigned Opcode, Type *Val,
      return InstructionCost::getMax();
  
    InstructionCost Cost =
-      BaseT::getVectorInstrCost(Opcode, Val, Index, Op0, Op1);
+      BaseT::getVectorInstrCost(Opcode, Val, CostKind, Index, Op0, Op1);
    Cost *= CostFactor;
  
    if (ST->hasVSX() && Val->getScalarType()->isDoubleTy()) {
@@ -829,8 +830,8 @@ InstructionCost PPCTTIImpl::getMemoryOpCost(unsigned Opcode, Type *Src,
    if (Src->isVectorTy() && Opcode == Instruction::Store)
      for (int i = 0, e = cast<FixedVectorType>(Src)->getNumElements(); i < e;
           ++i)
-      Cost += getVectorInstrCost(Instruction::ExtractElement, Src, i, nullptr,
-                                 nullptr);
+      Cost += getVectorInstrCost(Instruction::ExtractElement, Src, CostKind, i,
+                                 nullptr, nullptr);
  
    return Cost;
  }
diff --git a/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.h b/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.h

index 810a7d0..97377cb 100644 (file)
--- a/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.h
+++ b/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.h
@@ -126,8 +126,9 @@ public:
                                       TTI::TargetCostKind CostKind,
                                       const Instruction *I = nullptr);
    using BaseT::getVectorInstrCost;
-  InstructionCost getVectorInstrCost(unsigned Opcode, Type *Val, unsigned Index,
-                                     Value *Op0, Value *Op1);
+  InstructionCost getVectorInstrCost(unsigned Opcode, Type *Val,
+                                     TTI::TargetCostKind CostKind,
+                                     unsigned Index, Value *Op0, Value *Op1);
    InstructionCost
    getMemoryOpCost(unsigned Opcode, Type *Src, MaybeAlign Alignment,
                    unsigned AddressSpace, TTI::TargetCostKind CostKind,
diff --git a/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp b/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp

index e5b7f40..bb50b5b 100644 (file)
--- a/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp
+++ b/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp
@@ -1198,13 +1198,14 @@ InstructionCost RISCVTTIImpl::getCmpSelInstrCost(unsigned Opcode, Type *ValTy,
  }
  
  InstructionCost RISCVTTIImpl::getVectorInstrCost(unsigned Opcode, Type *Val,
+                                                 TTI::TargetCostKind CostKind,
                                                   unsigned Index, Value *Op0,
                                                   Value *Op1) {
    assert(Val->isVectorTy() && "This must be a vector type");
  
    if (Opcode != Instruction::ExtractElement &&
        Opcode != Instruction::InsertElement)
-    return BaseT::getVectorInstrCost(Opcode, Val, Index, Op0, Op1);
+    return BaseT::getVectorInstrCost(Opcode, Val, CostKind, Index, Op0, Op1);
  
    // Legalize the type.
    std::pair<InstructionCost, MVT> LT = getTypeLegalizationCost(Val);
@@ -1218,7 +1219,7 @@ InstructionCost RISCVTTIImpl::getVectorInstrCost(unsigned Opcode, Type *Val,
      return LT.first;
  
    if (!isTypeLegal(Val))
-    return BaseT::getVectorInstrCost(Opcode, Val, Index, Op0, Op1);
+    return BaseT::getVectorInstrCost(Opcode, Val, CostKind, Index, Op0, Op1);
  
    // In RVV, we could use vslidedown + vmv.x.s to extract element from vector
    // and vslideup + vmv.s.x to insert element to vector.
diff --git a/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.h b/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.h

index c4cc798..78e035b 100644 (file)
--- a/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.h
+++ b/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.h
@@ -157,8 +157,9 @@ public:
                                       const Instruction *I = nullptr);
  
    using BaseT::getVectorInstrCost;
-  InstructionCost getVectorInstrCost(unsigned Opcode, Type *Val, unsigned Index,
-                                     Value *Op0, Value *Op1);
+  InstructionCost getVectorInstrCost(unsigned Opcode, Type *Val,
+                                     TTI::TargetCostKind CostKind,
+                                     unsigned Index, Value *Op0, Value *Op1);
  
    InstructionCost getArithmeticInstrCost(
        unsigned Opcode, Type *Ty, TTI::TargetCostKind CostKind,
diff --git a/llvm/lib/Target/SystemZ/SystemZTargetTransformInfo.cpp b/llvm/lib/Target/SystemZ/SystemZTargetTransformInfo.cpp

index 0635150..821efc1 100644 (file)
--- a/llvm/lib/Target/SystemZ/SystemZTargetTransformInfo.cpp
+++ b/llvm/lib/Target/SystemZ/SystemZTargetTransformInfo.cpp
@@ -532,7 +532,8 @@ InstructionCost SystemZTTIImpl::getArithmeticInstrCost(
        return (NumVectors * (SignedDivRem ? SDivPow2Cost : 1));
      if (DivRemConst) {
        SmallVector<Type *> Tys(Args.size(), Ty);
-      return VF * DivMulSeqCost + getScalarizationOverhead(VTy, Args, Tys);
+      return VF * DivMulSeqCost +
+             getScalarizationOverhead(VTy, Args, Tys, CostKind);
      }
      if ((SignedDivRem || UnsignedDivRem) && VF > 4)
        // Temporary hack: disable high vectorization factors with integer
@@ -558,7 +559,8 @@ InstructionCost SystemZTTIImpl::getArithmeticInstrCost(
              getArithmeticInstrCost(Opcode, Ty->getScalarType(), CostKind);
          SmallVector<Type *> Tys(Args.size(), Ty);
          InstructionCost Cost =
-            (VF * ScalarCost) + getScalarizationOverhead(VTy, Args, Tys);
+            (VF * ScalarCost) +
+            getScalarizationOverhead(VTy, Args, Tys, CostKind);
          // FIXME: VF 2 for these FP operations are currently just as
          // expensive as for VF 4.
          if (VF == 2)
@@ -576,8 +578,8 @@ InstructionCost SystemZTTIImpl::getArithmeticInstrCost(
      // There is no native support for FRem.
      if (Opcode == Instruction::FRem) {
        SmallVector<Type *> Tys(Args.size(), Ty);
-      InstructionCost Cost =
-          (VF * LIBCALL_COST) + getScalarizationOverhead(VTy, Args, Tys);
+      InstructionCost Cost = (VF * LIBCALL_COST) +
+                             getScalarizationOverhead(VTy, Args, Tys, CostKind);
        // FIXME: VF 2 for float is currently just as expensive as for VF 4.
        if (VF == 2 && ScalarBits == 32)
          Cost *= 2;
@@ -865,8 +867,10 @@ InstructionCost SystemZTTIImpl::getCastInstrCost(unsigned Opcode, Type *Dst,
            (Opcode == Instruction::FPToSI || Opcode == Instruction::FPToUI))
          NeedsExtracts = false;
  
-      TotCost += getScalarizationOverhead(SrcVecTy, false, NeedsExtracts);
-      TotCost += getScalarizationOverhead(DstVecTy, NeedsInserts, false);
+      TotCost += getScalarizationOverhead(SrcVecTy, /*Insert*/ false,
+                                          NeedsExtracts, CostKind);
+      TotCost += getScalarizationOverhead(DstVecTy, NeedsInserts,
+                                          /*Extract*/ false, CostKind);
  
        // FIXME: VF 2 for float<->i32 is currently just as expensive as for VF 4.
        if (VF == 2 && SrcScalarBits == 32 && DstScalarBits == 32)
@@ -878,7 +882,8 @@ InstructionCost SystemZTTIImpl::getCastInstrCost(unsigned Opcode, Type *Dst,
      if (Opcode == Instruction::FPTrunc) {
        if (SrcScalarBits == 128)  // fp128 -> double/float + inserts of elements.
          return VF /*ldxbr/lexbr*/ +
-               getScalarizationOverhead(DstVecTy, true, false);
+               getScalarizationOverhead(DstVecTy, /*Insert*/ true,
+                                        /*Extract*/ false, CostKind);
        else // double -> float
          return VF / 2 /*vledb*/ + std::max(1U, VF / 4 /*vperm*/);
      }
@@ -891,7 +896,8 @@ InstructionCost SystemZTTIImpl::getCastInstrCost(unsigned Opcode, Type *Dst,
          return VF * 2;
        }
        // -> fp128.  VF * lxdb/lxeb + extraction of elements.
-      return VF + getScalarizationOverhead(SrcVecTy, false, true);
+      return VF + getScalarizationOverhead(SrcVecTy, /*Insert*/ false,
+                                           /*Extract*/ true, CostKind);
      }
    }
  
@@ -996,6 +1002,7 @@ InstructionCost SystemZTTIImpl::getCmpSelInstrCost(unsigned Opcode, Type *ValTy,
  }
  
  InstructionCost SystemZTTIImpl::getVectorInstrCost(unsigned Opcode, Type *Val,
+                                                   TTI::TargetCostKind CostKind,
                                                     unsigned Index, Value *Op0,
                                                     Value *Op1) {
    // vlvgp will insert two grs into a vector register, so only count half the
@@ -1013,7 +1020,7 @@ InstructionCost SystemZTTIImpl::getVectorInstrCost(unsigned Opcode, Type *Val,
      return Cost;
    }
  
-  return BaseT::getVectorInstrCost(Opcode, Val, Index, Op0, Op1);
+  return BaseT::getVectorInstrCost(Opcode, Val, CostKind, Index, Op0, Op1);
  }
  
  // Check if a load may be folded as a memory operand in its user.
diff --git a/llvm/lib/Target/SystemZ/SystemZTargetTransformInfo.h b/llvm/lib/Target/SystemZ/SystemZTargetTransformInfo.h

index 33c3778..1c82e69 100644 (file)
--- a/llvm/lib/Target/SystemZ/SystemZTargetTransformInfo.h
+++ b/llvm/lib/Target/SystemZ/SystemZTargetTransformInfo.h
@@ -107,8 +107,9 @@ public:
                                       TTI::TargetCostKind CostKind,
                                       const Instruction *I = nullptr);
    using BaseT::getVectorInstrCost;
-  InstructionCost getVectorInstrCost(unsigned Opcode, Type *Val, unsigned Index,
-                                     Value *Op0, Value *Op1);
+  InstructionCost getVectorInstrCost(unsigned Opcode, Type *Val,
+                                     TTI::TargetCostKind CostKind,
+                                     unsigned Index, Value *Op0, Value *Op1);
    bool isFoldableLoad(const LoadInst *Ld, const Instruction *&FoldedValue);
    InstructionCost
    getMemoryOpCost(unsigned Opcode, Type *Src, MaybeAlign Alignment,
diff --git a/llvm/lib/Target/WebAssembly/WebAssemblyTargetTransformInfo.cpp b/llvm/lib/Target/WebAssembly/WebAssemblyTargetTransformInfo.cpp

index b94dcd6..9a434d9 100644 (file)
--- a/llvm/lib/Target/WebAssembly/WebAssemblyTargetTransformInfo.cpp
+++ b/llvm/lib/Target/WebAssembly/WebAssemblyTargetTransformInfo.cpp
@@ -80,12 +80,12 @@ InstructionCost WebAssemblyTTIImpl::getArithmeticInstrCost(
    return Cost;
  }
  
-InstructionCost WebAssemblyTTIImpl::getVectorInstrCost(unsigned Opcode,
-                                                       Type *Val,
-                                                       unsigned Index,
-                                                       Value *Op0, Value *Op1) {
-  InstructionCost Cost =
-      BasicTTIImplBase::getVectorInstrCost(Opcode, Val, Index, Op0, Op1);
+InstructionCost
+WebAssemblyTTIImpl::getVectorInstrCost(unsigned Opcode, Type *Val,
+                                       TTI::TargetCostKind CostKind,
+                                       unsigned Index, Value *Op0, Value *Op1) {
+  InstructionCost Cost = BasicTTIImplBase::getVectorInstrCost(
+      Opcode, Val, CostKind, Index, Op0, Op1);
  
    // SIMD128's insert/extract currently only take constant indices.
    if (Index == -1u)
diff --git a/llvm/lib/Target/WebAssembly/WebAssemblyTargetTransformInfo.h b/llvm/lib/Target/WebAssembly/WebAssemblyTargetTransformInfo.h

index 4f54a76..a803fe5 100644 (file)
--- a/llvm/lib/Target/WebAssembly/WebAssemblyTargetTransformInfo.h
+++ b/llvm/lib/Target/WebAssembly/WebAssemblyTargetTransformInfo.h
@@ -66,8 +66,9 @@ public:
        ArrayRef<const Value *> Args = ArrayRef<const Value *>(),
        const Instruction *CxtI = nullptr);
    using BaseT::getVectorInstrCost;
-  InstructionCost getVectorInstrCost(unsigned Opcode, Type *Val, unsigned Index,
-                                     Value *Op0, Value *Op1);
+  InstructionCost getVectorInstrCost(unsigned Opcode, Type *Val,
+                                     TTI::TargetCostKind CostKind,
+                                     unsigned Index, Value *Op0, Value *Op1);
  
    /// @}
  
diff --git a/llvm/lib/Target/X86/X86TargetTransformInfo.cpp b/llvm/lib/Target/X86/X86TargetTransformInfo.cpp

index 14f1f83..9366c1b 100644 (file)
--- a/llvm/lib/Target/X86/X86TargetTransformInfo.cpp
+++ b/llvm/lib/Target/X86/X86TargetTransformInfo.cpp
@@ -4257,6 +4257,7 @@ X86TTIImpl::getIntrinsicInstrCost(const IntrinsicCostAttributes &ICA,
  }
  
  InstructionCost X86TTIImpl::getVectorInstrCost(unsigned Opcode, Type *Val,
+                                               TTI::TargetCostKind CostKind,
                                                 unsigned Index, Value *Op0,
                                                 Value *Op1) {
    static const CostTblEntry SLMCostTbl[] = {
@@ -4269,7 +4270,6 @@ InstructionCost X86TTIImpl::getVectorInstrCost(unsigned Opcode, Type *Val,
    assert(Val->isVectorTy() && "This must be a vector type");
    Type *ScalarType = Val->getScalarType();
    InstructionCost RegisterFileMoveCost = 0;
-  TTI::TargetCostKind CostKind = TTI::TargetCostKind::TCK_RecipThroughput;
  
    // Non-immediate extraction/insertion can be handled as a sequence of
    // aliased loads+stores via the stack.
@@ -4401,14 +4401,14 @@ InstructionCost X86TTIImpl::getVectorInstrCost(unsigned Opcode, Type *Val,
    if (Opcode == Instruction::ExtractElement && ScalarType->isPointerTy())
      RegisterFileMoveCost += 1;
  
-  return BaseT::getVectorInstrCost(Opcode, Val, Index, Op0, Op1) +
+  return BaseT::getVectorInstrCost(Opcode, Val, CostKind, Index, Op0, Op1) +
           RegisterFileMoveCost;
  }
  
-InstructionCost X86TTIImpl::getScalarizationOverhead(VectorType *Ty,
-                                                     const APInt &DemandedElts,
-                                                     bool Insert,
-                                                     bool Extract) {
+InstructionCost
+X86TTIImpl::getScalarizationOverhead(VectorType *Ty, const APInt &DemandedElts,
+                                     bool Insert, bool Extract,
+                                     TTI::TargetCostKind CostKind) {
    assert(DemandedElts.getBitWidth() ==
               cast<FixedVectorType>(Ty)->getNumElements() &&
           "Vector size mismatch");
@@ -4416,7 +4416,6 @@ InstructionCost X86TTIImpl::getScalarizationOverhead(VectorType *Ty,
    std::pair<InstructionCost, MVT> LT = getTypeLegalizationCost(Ty);
    MVT MScalarTy = LT.second.getScalarType();
    unsigned LegalVectorBitWidth = LT.second.getSizeInBits();
-  TTI::TargetCostKind CostKind = TTI::TargetCostKind::TCK_RecipThroughput;
    InstructionCost Cost = 0;
  
    constexpr unsigned LaneBitWidth = 128;
@@ -4436,8 +4435,8 @@ InstructionCost X86TTIImpl::getScalarizationOverhead(VectorType *Ty,
        // For types we can insert directly, insertion into 128-bit sub vectors is
        // cheap, followed by a cheap chain of concatenations.
        if (LegalVectorBitWidth <= LaneBitWidth) {
-        Cost +=
-            BaseT::getScalarizationOverhead(Ty, DemandedElts, Insert, false);
+        Cost += BaseT::getScalarizationOverhead(Ty, DemandedElts, Insert,
+                                                /*Extract*/ false, CostKind);
        } else {
          // In each 128-lane, if at least one index is demanded but not all
          // indices are demanded and this 128-lane is not the first 128-lane of
@@ -4477,7 +4476,7 @@ InstructionCost X86TTIImpl::getScalarizationOverhead(VectorType *Ty,
              Cost += getShuffleCost(TTI::SK_ExtractSubvector, Ty, std::nullopt,
                                     CostKind, I * NumEltsPerLane, LaneTy);
            Cost += BaseT::getScalarizationOverhead(LaneTy, LaneEltMask, Insert,
-                                                  false);
+                                                  /*Extract*/ false, CostKind);
          }
  
          APInt AffectedLanes =
@@ -4554,8 +4553,8 @@ InstructionCost X86TTIImpl::getScalarizationOverhead(VectorType *Ty,
              continue;
            Cost += getShuffleCost(TTI::SK_ExtractSubvector, Ty, std::nullopt,
                                   CostKind, I * NumEltsPerLane, LaneTy);
-          Cost += BaseT::getScalarizationOverhead(LaneTy, LaneEltMask, false,
-                                                  Extract);
+          Cost += BaseT::getScalarizationOverhead(
+              LaneTy, LaneEltMask, /*Insert*/ false, Extract, CostKind);
          }
  
          return Cost;
@@ -4563,7 +4562,8 @@ InstructionCost X86TTIImpl::getScalarizationOverhead(VectorType *Ty,
      }
  
      // Fallback to default extraction.
-    Cost += BaseT::getScalarizationOverhead(Ty, DemandedElts, false, Extract);
+    Cost += BaseT::getScalarizationOverhead(Ty, DemandedElts, /*Insert*/ false,
+                                            Extract, CostKind);
    }
  
    return Cost;
@@ -4815,7 +4815,7 @@ InstructionCost X86TTIImpl::getMemoryOpCost(unsigned Opcode, Type *Src,
                                CoalescedVecEltIdx, CoalescedVecEltIdx + 1);
          assert(DemandedElts.countPopulation() == 1 && "Inserting single value");
          Cost += getScalarizationOverhead(CoalescedVecTy, DemandedElts, IsLoad,
-                                         !IsLoad);
+                                         !IsLoad, CostKind);
        }
  
        // This isn't exactly right. We're using slow unaligned 32-byte accesses
@@ -4856,15 +4856,15 @@ X86TTIImpl::getMaskedMemoryOpCost(unsigned Opcode, Type *SrcTy, Align Alignment,
        (IsStore && !isLegalMaskedStore(SrcVTy, Alignment))) {
      // Scalarization
      APInt DemandedElts = APInt::getAllOnes(NumElem);
-    InstructionCost MaskSplitCost =
-        getScalarizationOverhead(MaskTy, DemandedElts, false, true);
+    InstructionCost MaskSplitCost = getScalarizationOverhead(
+        MaskTy, DemandedElts, /*Insert*/ false, /*Extract*/ true, CostKind);
      InstructionCost ScalarCompareCost = getCmpSelInstrCost(
          Instruction::ICmp, Type::getInt8Ty(SrcVTy->getContext()), nullptr,
          CmpInst::BAD_ICMP_PREDICATE, CostKind);
      InstructionCost BranchCost = getCFInstrCost(Instruction::Br, CostKind);
      InstructionCost MaskCmpCost = NumElem * (BranchCost + ScalarCompareCost);
-    InstructionCost ValueSplitCost =
-        getScalarizationOverhead(SrcVTy, DemandedElts, IsLoad, IsStore);
+    InstructionCost ValueSplitCost = getScalarizationOverhead(
+        SrcVTy, DemandedElts, IsLoad, IsStore, CostKind);
      InstructionCost MemopCost =
          NumElem * BaseT::getMemoryOpCost(Opcode, SrcVTy->getScalarType(),
                                           Alignment, AddressSpace, CostKind);
@@ -5174,8 +5174,8 @@ X86TTIImpl::getArithmeticReductionCost(unsigned Opcode, VectorType *ValTy,
    }
  
    // Add the final extract element to the cost.
-  return ReductionCost + getVectorInstrCost(Instruction::ExtractElement, Ty, 0,
-                                            nullptr, nullptr);
+  return ReductionCost + getVectorInstrCost(Instruction::ExtractElement, Ty,
+                                            CostKind, 0, nullptr, nullptr);
  }
  
  InstructionCost X86TTIImpl::getMinMaxCost(Type *Ty, Type *CondTy,
@@ -5475,8 +5475,8 @@ X86TTIImpl::getMinMaxReductionCost(VectorType *ValTy, VectorType *CondTy,
    }
  
    // Add the final extract element to the cost.
-  return MinMaxCost + getVectorInstrCost(Instruction::ExtractElement, Ty, 0,
-                                         nullptr, nullptr);
+  return MinMaxCost + getVectorInstrCost(Instruction::ExtractElement, Ty,
+                                         CostKind, 0, nullptr, nullptr);
  }
  
  /// Calculate the cost of materializing a 64-bit value. This helper
@@ -5781,7 +5781,7 @@ InstructionCost X86TTIImpl::getGSScalarCost(unsigned Opcode, Type *SrcVTy,
      auto *MaskTy =
          FixedVectorType::get(Type::getInt1Ty(SrcVTy->getContext()), VF);
      MaskUnpackCost = getScalarizationOverhead(
-        MaskTy, DemandedElts, /*Insert=*/false, /*Extract=*/true);
+        MaskTy, DemandedElts, /*Insert=*/false, /*Extract=*/true, CostKind);
      InstructionCost ScalarCompareCost = getCmpSelInstrCost(
          Instruction::ICmp, Type::getInt1Ty(SrcVTy->getContext()), nullptr,
          CmpInst::BAD_ICMP_PREDICATE, CostKind);
@@ -5791,7 +5791,7 @@ InstructionCost X86TTIImpl::getGSScalarCost(unsigned Opcode, Type *SrcVTy,
  
    InstructionCost AddressUnpackCost = getScalarizationOverhead(
        FixedVectorType::get(ScalarTy->getPointerTo(), VF), DemandedElts,
-      /*Insert=*/false, /*Extract=*/true);
+      /*Insert=*/false, /*Extract=*/true, CostKind);
  
    // The cost of the scalar loads/stores.
    InstructionCost MemoryOpCost =
@@ -5800,10 +5800,10 @@ InstructionCost X86TTIImpl::getGSScalarCost(unsigned Opcode, Type *SrcVTy,
  
    // The cost of forming the vector from loaded scalars/
    // scalarizing the vector to perform scalar stores.
-  InstructionCost InsertExtractCost =
-      getScalarizationOverhead(cast<FixedVectorType>(SrcVTy), DemandedElts,
-                               /*Insert=*/Opcode == Instruction::Load,
-                               /*Extract=*/Opcode == Instruction::Store);
+  InstructionCost InsertExtractCost = getScalarizationOverhead(
+      cast<FixedVectorType>(SrcVTy), DemandedElts,
+      /*Insert=*/Opcode == Instruction::Load,
+      /*Extract=*/Opcode == Instruction::Store, CostKind);
  
    return AddressUnpackCost + MemoryOpCost + MaskUnpackCost + InsertExtractCost;
  }
diff --git a/llvm/lib/Target/X86/X86TargetTransformInfo.h b/llvm/lib/Target/X86/X86TargetTransformInfo.h

index c189e50..d0abfe2 100644 (file)
--- a/llvm/lib/Target/X86/X86TargetTransformInfo.h
+++ b/llvm/lib/Target/X86/X86TargetTransformInfo.h
@@ -147,11 +147,13 @@ public:
                                       TTI::TargetCostKind CostKind,
                                       const Instruction *I = nullptr);
    using BaseT::getVectorInstrCost;
-  InstructionCost getVectorInstrCost(unsigned Opcode, Type *Val, unsigned Index,
-                                     Value *Op0, Value *Op1);
+  InstructionCost getVectorInstrCost(unsigned Opcode, Type *Val,
+                                     TTI::TargetCostKind CostKind,
+                                     unsigned Index, Value *Op0, Value *Op1);
    InstructionCost getScalarizationOverhead(VectorType *Ty,
                                             const APInt &DemandedElts,
-                                           bool Insert, bool Extract);
+                                           bool Insert, bool Extract,
+                                           TTI::TargetCostKind CostKind);
    InstructionCost getReplicationShuffleCost(Type *EltTy, int ReplicationFactor,
                                              int VF,
                                              const APInt &DemandedDstElts,
diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp

index ca13e79..a28099d 100644 (file)
--- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
+++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
@@ -1683,8 +1683,8 @@ private:
  
    /// Estimate the overhead of scalarizing an instruction. This is a
    /// convenience wrapper for the type-based getScalarizationOverhead API.
-  InstructionCost getScalarizationOverhead(Instruction *I,
-                                           ElementCount VF) const;
+  InstructionCost getScalarizationOverhead(Instruction *I, ElementCount VF,
+                                           TTI::TargetCostKind CostKind) const;
  
    /// Returns true if an artificially high cost for emulated masked memrefs
    /// should be used.
@@ -3443,8 +3443,9 @@ LoopVectorizationCostModel::getVectorCallCost(CallInst *CI, ElementCount VF,
    // to be vectors, so we need to extract individual elements from there,
    // execute VF scalar calls, and then gather the result into the vector return
    // value.
+  TTI::TargetCostKind CostKind = TTI::TCK_RecipThroughput;
    InstructionCost ScalarCallCost =
-      TTI.getCallInstrCost(F, ScalarRetTy, ScalarTys, TTI::TCK_RecipThroughput);
+      TTI.getCallInstrCost(F, ScalarRetTy, ScalarTys, CostKind);
    if (VF.isScalar())
      return ScalarCallCost;
  
@@ -3455,7 +3456,8 @@ LoopVectorizationCostModel::getVectorCallCost(CallInst *CI, ElementCount VF,
  
    // Compute costs of unpacking argument values for the scalar calls and
    // packing the return values to a vector.
-  InstructionCost ScalarizationCost = getScalarizationOverhead(CI, VF);
+  InstructionCost ScalarizationCost =
+      getScalarizationOverhead(CI, VF, CostKind);
  
    InstructionCost Cost =
        ScalarCallCost * VF.getKnownMinValue() + ScalarizationCost;
@@ -3471,7 +3473,7 @@ LoopVectorizationCostModel::getVectorCallCost(CallInst *CI, ElementCount VF,
  
    // If the corresponding vector cost is cheaper, return its cost.
    InstructionCost VectorCallCost =
-      TTI.getCallInstrCost(nullptr, RetTy, Tys, TTI::TCK_RecipThroughput);
+      TTI.getCallInstrCost(nullptr, RetTy, Tys, CostKind);
    if (VectorCallCost < Cost) {
      NeedToScalarize = false;
      Cost = VectorCallCost;
@@ -4478,7 +4480,7 @@ LoopVectorizationCostModel::getDivRemSpeculationCost(Instruction *I,
  
      // The cost of insertelement and extractelement instructions needed for
      // scalarization.
-    ScalarizationCost += getScalarizationOverhead(I, VF);
+    ScalarizationCost += getScalarizationOverhead(I, VF, CostKind);
  
      // Scale the cost by the probability of executing the predicated blocks.
      // This assumes the predicated block for each vector lane is equally
@@ -6239,13 +6241,14 @@ InstructionCost LoopVectorizationCostModel::computePredInstDiscount(
  
      // Compute the scalarization overhead of needed insertelement instructions
      // and phi nodes.
+    TTI::TargetCostKind CostKind = TTI::TCK_RecipThroughput;
      if (isScalarWithPredication(I, VF) && !I->getType()->isVoidTy()) {
        ScalarCost += TTI.getScalarizationOverhead(
            cast<VectorType>(ToVectorTy(I->getType(), VF)),
-          APInt::getAllOnes(VF.getFixedValue()), true, false);
+          APInt::getAllOnes(VF.getFixedValue()), /*Insert*/ true,
+          /*Extract*/ false, CostKind);
        ScalarCost +=
-          VF.getFixedValue() *
-          TTI.getCFInstrCost(Instruction::PHI, TTI::TCK_RecipThroughput);
+          VF.getFixedValue() * TTI.getCFInstrCost(Instruction::PHI, CostKind);
      }
  
      // Compute the scalarization overhead of needed extractelement
@@ -6261,7 +6264,8 @@ InstructionCost LoopVectorizationCostModel::computePredInstDiscount(
          else if (needsExtract(J, VF)) {
            ScalarCost += TTI.getScalarizationOverhead(
                cast<VectorType>(ToVectorTy(J->getType(), VF)),
-              APInt::getAllOnes(VF.getFixedValue()), false, true);
+              APInt::getAllOnes(VF.getFixedValue()), /*Insert*/ false,
+              /*Extract*/ true, CostKind);
          }
        }
  
@@ -6390,14 +6394,15 @@ LoopVectorizationCostModel::getMemInstScalarizationCost(Instruction *I,
  
    // Don't pass *I here, since it is scalar but will actually be part of a
    // vectorized loop where the user of it is a vectorized instruction.
+  TTI::TargetCostKind CostKind = TTI::TCK_RecipThroughput;
    const Align Alignment = getLoadStoreAlignment(I);
-  Cost += VF.getKnownMinValue() *
-          TTI.getMemoryOpCost(I->getOpcode(), ValTy->getScalarType(), Alignment,
-                              AS, TTI::TCK_RecipThroughput);
+  Cost += VF.getKnownMinValue() * TTI.getMemoryOpCost(I->getOpcode(),
+                                                      ValTy->getScalarType(),
+                                                      Alignment, AS, CostKind);
  
    // Get the overhead of the extractelement and insertelement instructions
    // we might create due to scalarization.
-  Cost += getScalarizationOverhead(I, VF);
+  Cost += getScalarizationOverhead(I, VF, CostKind);
  
    // If we have a predicated load/store, it will need extra i1 extracts and
    // conditional branches, but may not be executed for each vector lane. Scale
@@ -6410,8 +6415,8 @@ LoopVectorizationCostModel::getMemInstScalarizationCost(Instruction *I,
          VectorType::get(IntegerType::getInt1Ty(ValTy->getContext()), VF);
      Cost += TTI.getScalarizationOverhead(
          Vec_i1Ty, APInt::getAllOnes(VF.getKnownMinValue()),
-        /*Insert=*/false, /*Extract=*/true);
-    Cost += TTI.getCFInstrCost(Instruction::Br, TTI::TCK_RecipThroughput);
+        /*Insert=*/false, /*Extract=*/true, CostKind);
+    Cost += TTI.getCFInstrCost(Instruction::Br, CostKind);
  
      if (useEmulatedMaskMemRefHack(I, VF))
        // Artificially setting to a high enough value to practically disable
@@ -6477,7 +6482,7 @@ LoopVectorizationCostModel::getUniformMemOpCost(Instruction *I,
           (isLoopInvariantStoreValue
                ? 0
                : TTI.getVectorInstrCost(Instruction::ExtractElement, VectorTy,
-                                       VF.getKnownMinValue() - 1));
+                                       CostKind, VF.getKnownMinValue() - 1));
  }
  
  InstructionCost
@@ -6772,9 +6777,8 @@ LoopVectorizationCostModel::getInstructionCost(Instruction *I,
    return VectorizationCostTy(C, TypeNotScalarized);
  }
  
-InstructionCost
-LoopVectorizationCostModel::getScalarizationOverhead(Instruction *I,
-                                                     ElementCount VF) const {
+InstructionCost LoopVectorizationCostModel::getScalarizationOverhead(
+    Instruction *I, ElementCount VF, TTI::TargetCostKind CostKind) const {
  
    // There is no mechanism yet to create a scalable scalarization loop,
    // so this is currently Invalid.
@@ -6789,8 +6793,9 @@ LoopVectorizationCostModel::getScalarizationOverhead(Instruction *I,
    if (!RetTy->isVoidTy() &&
        (!isa<LoadInst>(I) || !TTI.supportsEfficientVectorElementLoadStore()))
      Cost += TTI.getScalarizationOverhead(
-        cast<VectorType>(RetTy), APInt::getAllOnes(VF.getKnownMinValue()), true,
-        false);
+        cast<VectorType>(RetTy), APInt::getAllOnes(VF.getKnownMinValue()),
+        /*Insert*/ true,
+        /*Extract*/ false, CostKind);
  
    // Some targets keep addresses scalar.
    if (isa<LoadInst>(I) && !TTI.prefersVectorizedAddressing())
@@ -6810,7 +6815,7 @@ LoopVectorizationCostModel::getScalarizationOverhead(Instruction *I,
    for (auto *V : filterExtractingOperands(Ops, VF))
      Tys.push_back(MaybeVectorizeType(V->getType(), VF));
    return Cost + TTI.getOperandsScalarizationOverhead(
-                    filterExtractingOperands(Ops, VF), Tys);
+                    filterExtractingOperands(Ops, VF), Tys, CostKind);
  }
  
  void LoopVectorizationCostModel::setCostBasedWideningDecision(ElementCount VF) {
@@ -7067,7 +7072,8 @@ LoopVectorizationCostModel::getInstructionCost(Instruction *I, ElementCount VF,
            VectorType::get(IntegerType::getInt1Ty(RetTy->getContext()), VF);
        return (
            TTI.getScalarizationOverhead(
-              Vec_i1Ty, APInt::getAllOnes(VF.getFixedValue()), false, true) +
+              Vec_i1Ty, APInt::getAllOnes(VF.getFixedValue()),
+              /*Insert*/ false, /*Extract*/ true, CostKind) +
            (TTI.getCFInstrCost(Instruction::Br, CostKind) * VF.getFixedValue()));
      } else if (I->getParent() == TheLoop->getLoopLatch() || VF.isScalar())
        // The back-edge branch will remain, as will all scalar branches.
diff --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp

index 9bcf73e..e3eb6b1 100644 (file)
--- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
+++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
@@ -6664,7 +6664,8 @@ InstructionCost BoUpSLP::getEntryCost(const TreeEntry *E,
            continue;
          }
        }
-      Cost -= TTI->getVectorInstrCost(*EE, EE->getVectorOperandType(), Idx);
+      Cost -= TTI->getVectorInstrCost(*EE, EE->getVectorOperandType(), CostKind,
+                                      Idx);
      }
      // Add a cost for subvector extracts/inserts if required.
      for (const auto &Data : ExtractVectorsTys) {
@@ -6792,7 +6793,7 @@ InstructionCost BoUpSLP::getEntryCost(const TreeEntry *E,
        bool NeedShuffle =
            VL.front() != *It || !all_of(VL.drop_front(), UndefValue::classof);
        InstructionCost InsertCost =
-          TTI->getVectorInstrCost(Instruction::InsertElement, VecTy,
+          TTI->getVectorInstrCost(Instruction::InsertElement, VecTy, CostKind,
                                    /*Index=*/0, PoisonValue::get(VecTy), *It);
        return InsertCost + (NeedShuffle
                                 ? TTI->getShuffleCost(
@@ -7047,7 +7048,7 @@ InstructionCost BoUpSLP::getEntryCost(const TreeEntry *E,
          }
        }
        return TTI->getVectorInstrCost(Instruction::ExtractElement, SrcVecTy,
-                                     *getExtractIndex(I));
+                                     CostKind, *getExtractIndex(I));
      };
      auto GetVectorCost = [](InstructionCost CommonCost) { return CommonCost; };
      return GetCostDiff(GetScalarCost, GetVectorCost);
@@ -7116,7 +7117,8 @@ InstructionCost BoUpSLP::getEntryCost(const TreeEntry *E,
  
      InstructionCost Cost = 0;
      Cost -= TTI->getScalarizationOverhead(SrcVecTy, DemandedElts,
-                                          /*Insert*/ true, /*Extract*/ false);
+                                          /*Insert*/ true, /*Extract*/ false,
+                                          CostKind);
  
      // First cost - resize to actual vector size if not identity shuffle or
      // need to shift the vector.
@@ -7995,6 +7997,7 @@ InstructionCost BoUpSLP::getTreeCost(ArrayRef<Value *> VectorizedVals) {
      // extend the extracted value back to the original type. Here, we account
      // for the extract and the added cost of the sign extend if needed.
      auto *VecTy = FixedVectorType::get(EU.Scalar->getType(), BundleWidth);
+    TTI::TargetCostKind CostKind = TTI::TCK_RecipThroughput;
      auto *ScalarRoot = VectorizableTree[0]->Scalars[0];
      if (MinBWs.count(ScalarRoot)) {
        auto *MinTy = IntegerType::get(F->getContext(), MinBWs[ScalarRoot].first);
@@ -8004,8 +8007,8 @@ InstructionCost BoUpSLP::getTreeCost(ArrayRef<Value *> VectorizedVals) {
        ExtractCost += TTI->getExtractWithExtendCost(Extend, EU.Scalar->getType(),
                                                     VecTy, EU.Lane);
      } else {
-      ExtractCost +=
-          TTI->getVectorInstrCost(Instruction::ExtractElement, VecTy, EU.Lane);
+      ExtractCost += TTI->getVectorInstrCost(Instruction::ExtractElement, VecTy,
+                                             CostKind, EU.Lane);
      }
    }
  
@@ -8079,7 +8082,7 @@ InstructionCost BoUpSLP::getTreeCost(ArrayRef<Value *> VectorizedVals) {
          EstimateShufflesCost);
      InstructionCost InsertCost = TTI->getScalarizationOverhead(
          cast<FixedVectorType>(FirstUsers[I].first->getType()), DemandedElts[I],
-        /*Insert*/ true, /*Extract*/ false);
+        /*Insert*/ true, /*Extract*/ false, TTI::TCK_RecipThroughput);
      Cost -= InsertCost;
    }
  
@@ -8427,9 +8430,10 @@ BoUpSLP::isGatherShuffledEntry(const TreeEntry *TE, ArrayRef<Value *> VL,
  InstructionCost BoUpSLP::getGatherCost(FixedVectorType *Ty,
                                         const APInt &ShuffledIndices,
                                         bool NeedToShuffle) const {
+  TTI::TargetCostKind CostKind = TTI::TCK_RecipThroughput;
    InstructionCost Cost =
        TTI->getScalarizationOverhead(Ty, ~ShuffledIndices, /*Insert*/ true,
-                                    /*Extract*/ false);
+                                    /*Extract*/ false, CostKind);
    if (NeedToShuffle)
      Cost += TTI->getShuffleCost(TargetTransformInfo::SK_PermuteSingleSrc, Ty);
    return Cost;
diff --git a/llvm/lib/Transforms/Vectorize/VectorCombine.cpp b/llvm/lib/Transforms/Vectorize/VectorCombine.cpp

index 455fb39..2e48975 100644 (file)
--- a/llvm/lib/Transforms/Vectorize/VectorCombine.cpp
+++ b/llvm/lib/Transforms/Vectorize/VectorCombine.cpp
@@ -230,8 +230,10 @@ bool VectorCombine::vectorizeLoadInsert(Instruction &I) {
    InstructionCost OldCost =
        TTI.getMemoryOpCost(Instruction::Load, LoadTy, Alignment, AS);
    APInt DemandedElts = APInt::getOneBitSet(MinVecNumElts, 0);
-  OldCost += TTI.getScalarizationOverhead(MinVecTy, DemandedElts,
-                                          /* Insert */ true, HasExtract);
+  TTI::TargetCostKind CostKind = TTI::TCK_RecipThroughput;
+  OldCost +=
+      TTI.getScalarizationOverhead(MinVecTy, DemandedElts,
+                                   /* Insert */ true, HasExtract, CostKind);
  
    // New pattern: load VecPtr
    InstructionCost NewCost =
@@ -346,9 +348,12 @@ ExtractElementInst *VectorCombine::getShuffleExtract(
      return nullptr;
  
    Type *VecTy = Ext0->getVectorOperand()->getType();
+  TTI::TargetCostKind CostKind = TTI::TCK_RecipThroughput;
    assert(VecTy == Ext1->getVectorOperand()->getType() && "Need matching types");
-  InstructionCost Cost0 = TTI.getVectorInstrCost(*Ext0, VecTy, Index0);
-  InstructionCost Cost1 = TTI.getVectorInstrCost(*Ext1, VecTy, Index1);
+  InstructionCost Cost0 =
+      TTI.getVectorInstrCost(*Ext0, VecTy, CostKind, Index0);
+  InstructionCost Cost1 =
+      TTI.getVectorInstrCost(*Ext1, VecTy, CostKind, Index1);
  
    // If both costs are invalid no shuffle is needed
    if (!Cost0.isValid() && !Cost1.isValid())
@@ -411,11 +416,12 @@ bool VectorCombine::isExtractExtractCheap(ExtractElementInst *Ext0,
    // both sequences.
    unsigned Ext0Index = Ext0IndexC->getZExtValue();
    unsigned Ext1Index = Ext1IndexC->getZExtValue();
+  TTI::TargetCostKind CostKind = TTI::TCK_RecipThroughput;
  
    InstructionCost Extract0Cost =
-      TTI.getVectorInstrCost(*Ext0, VecTy, Ext0Index);
+      TTI.getVectorInstrCost(*Ext0, VecTy, CostKind, Ext0Index);
    InstructionCost Extract1Cost =
-      TTI.getVectorInstrCost(*Ext1, VecTy, Ext1Index);
+      TTI.getVectorInstrCost(*Ext1, VecTy, CostKind, Ext1Index);
  
    // A more expensive extract will always be replaced by a splat shuffle.
    // For example, if Ext0 is more expensive:
@@ -645,15 +651,16 @@ bool VectorCombine::foldInsExtFNeg(Instruction &I) {
    Mask[Index] = Index + NumElts;
  
    Type *ScalarTy = VecTy->getScalarType();
+  TTI::TargetCostKind CostKind = TTI::TCK_RecipThroughput;
    InstructionCost OldCost =
        TTI.getArithmeticInstrCost(Instruction::FNeg, ScalarTy) +
-      TTI.getVectorInstrCost(I, VecTy, Index);
+      TTI.getVectorInstrCost(I, VecTy, CostKind, Index);
  
    // If the extract has one use, it will be eliminated, so count it in the
    // original cost. If it has more than one use, ignore the cost because it will
    // be the same before/after.
    if (Extract->hasOneUse())
-    OldCost += TTI.getVectorInstrCost(*Extract, VecTy, Index);
+    OldCost += TTI.getVectorInstrCost(*Extract, VecTy, CostKind, Index);
  
    InstructionCost NewCost =
        TTI.getArithmeticInstrCost(Instruction::FNeg, VecTy) +
@@ -801,8 +808,9 @@ bool VectorCombine::scalarizeBinopOrCmp(Instruction &I) {
  
    // Get cost estimate for the insert element. This cost will factor into
    // both sequences.
-  InstructionCost InsertCost =
-      TTI.getVectorInstrCost(Instruction::InsertElement, VecTy, Index);
+  TTI::TargetCostKind CostKind = TTI::TCK_RecipThroughput;
+  InstructionCost InsertCost = TTI.getVectorInstrCost(
+      Instruction::InsertElement, VecTy, CostKind, Index);
    InstructionCost OldCost =
        (IsConst0 ? 0 : InsertCost) + (IsConst1 ? 0 : InsertCost) + VectorOpCost;
    InstructionCost NewCost = ScalarOpCost + InsertCost +
@@ -891,8 +899,10 @@ bool VectorCombine::foldExtractedCmps(Instruction &I) {
    if (!VecTy)
      return false;
  
-  InstructionCost OldCost = TTI.getVectorInstrCost(*Ext0, VecTy, Index0);
-  OldCost += TTI.getVectorInstrCost(*Ext1, VecTy, Index1);
+  TTI::TargetCostKind CostKind = TTI::TCK_RecipThroughput;
+  InstructionCost OldCost =
+      TTI.getVectorInstrCost(*Ext0, VecTy, CostKind, Index0);
+  OldCost += TTI.getVectorInstrCost(*Ext1, VecTy, CostKind, Index1);
    OldCost +=
        TTI.getCmpSelInstrCost(CmpOpcode, I0->getType(),
                               CmpInst::makeCmpResultType(I0->getType()), Pred) *
@@ -912,7 +922,7 @@ bool VectorCombine::foldExtractedCmps(Instruction &I) {
    NewCost += TTI.getShuffleCost(TargetTransformInfo::SK_PermuteSingleSrc, CmpTy,
                                  ShufMask);
    NewCost += TTI.getArithmeticInstrCost(I.getOpcode(), CmpTy);
-  NewCost += TTI.getVectorInstrCost(*Ext0, CmpTy, CheapIndex);
+  NewCost += TTI.getVectorInstrCost(*Ext0, CmpTy, CostKind, CheapIndex);
  
    // Aggressively form vector ops if the cost is equal because the transform
    // may enable further optimization.
@@ -1169,8 +1179,9 @@ bool VectorCombine::scalarizeLoadExtract(Instruction &I) {
      }
  
      auto *Index = dyn_cast<ConstantInt>(UI->getOperand(1));
+    TTI::TargetCostKind CostKind = TTI::TCK_RecipThroughput;
      OriginalCost +=
-        TTI.getVectorInstrCost(Instruction::ExtractElement, FixedVT,
+        TTI.getVectorInstrCost(Instruction::ExtractElement, FixedVT, CostKind,
                                 Index ? Index->getZExtValue() : -1);
      ScalarizedCost +=
          TTI.getMemoryOpCost(Instruction::Load, FixedVT->getElementType(),
diff --git a/llvm/test/Analysis/CostModel/X86/shuffle-extract_subvector-latency.ll b/llvm/test/Analysis/CostModel/X86/shuffle-extract_subvector-latency.ll

index 5c5ee39..979c3a2 100644 (file)
--- a/llvm/test/Analysis/CostModel/X86/shuffle-extract_subvector-latency.ll
+++ b/llvm/test/Analysis/CostModel/X86/shuffle-extract_subvector-latency.ll
@@ -51,7 +51,7 @@ define void @test_vXf64(<4 x double> %src256, <8 x double> %src512) {
  ; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V512_45 = shufflevector <8 x double> %src512, <8 x double> undef, <2 x i32> <i32 4, i32 5>
  ; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V512_67 = shufflevector <8 x double> %src512, <8 x double> undef, <2 x i32> <i32 6, i32 7>
  ; AVX512-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %V512_0123 = shufflevector <8 x double> %src512, <8 x double> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %V512_2345 = shufflevector <8 x double> %src512, <8 x double> undef, <4 x i32> <i32 2, i32 3, i32 4, i32 5>
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V512_2345 = shufflevector <8 x double> %src512, <8 x double> undef, <4 x i32> <i32 2, i32 3, i32 4, i32 5>
  ; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V512_4567 = shufflevector <8 x double> %src512, <8 x double> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
  ; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V512_567u = shufflevector <8 x double> %src512, <8 x double> undef, <4 x i32> <i32 5, i32 6, i32 7, i32 undef>
  ; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
author	ShihPo Hung <shihpo.hung@sifive.com>
	Sat, 21 Jan 2023 13:29:05 +0000 (05:29 -0800)
committer	ShihPo Hung <shihpo.hung@sifive.com>
	Sat, 21 Jan 2023 13:29:24 +0000 (05:29 -0800)
llvm/include/llvm/Analysis/TargetTransformInfo.h		patch \| blob \| history
llvm/include/llvm/Analysis/TargetTransformInfoImpl.h		patch \| blob \| history
llvm/include/llvm/CodeGen/BasicTTIImpl.h		patch \| blob \| history
llvm/lib/Analysis/TargetTransformInfo.cpp		patch \| blob \| history
llvm/lib/CodeGen/CodeGenPrepare.cpp		patch \| blob \| history
llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp		patch \| blob \| history
llvm/lib/Target/AArch64/AArch64TargetTransformInfo.h		patch \| blob \| history
llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp		patch \| blob \| history
llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.h		patch \| blob \| history
llvm/lib/Target/AMDGPU/R600TargetTransformInfo.cpp		patch \| blob \| history
llvm/lib/Target/AMDGPU/R600TargetTransformInfo.h		patch \| blob \| history
llvm/lib/Target/ARM/ARMTargetTransformInfo.cpp		patch \| blob \| history
llvm/lib/Target/ARM/ARMTargetTransformInfo.h		patch \| blob \| history
llvm/lib/Target/Hexagon/HexagonTargetTransformInfo.cpp		patch \| blob \| history
llvm/lib/Target/Hexagon/HexagonTargetTransformInfo.h		patch \| blob \| history
llvm/lib/Target/PowerPC/PPCTargetTransformInfo.cpp		patch \| blob \| history
llvm/lib/Target/PowerPC/PPCTargetTransformInfo.h		patch \| blob \| history
llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp		patch \| blob \| history
llvm/lib/Target/RISCV/RISCVTargetTransformInfo.h		patch \| blob \| history
llvm/lib/Target/SystemZ/SystemZTargetTransformInfo.cpp		patch \| blob \| history
llvm/lib/Target/SystemZ/SystemZTargetTransformInfo.h		patch \| blob \| history
llvm/lib/Target/WebAssembly/WebAssemblyTargetTransformInfo.cpp		patch \| blob \| history
llvm/lib/Target/WebAssembly/WebAssemblyTargetTransformInfo.h		patch \| blob \| history
llvm/lib/Target/X86/X86TargetTransformInfo.cpp		patch \| blob \| history
llvm/lib/Target/X86/X86TargetTransformInfo.h		patch \| blob \| history
llvm/lib/Transforms/Vectorize/LoopVectorize.cpp		patch \| blob \| history
llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp		patch \| blob \| history
llvm/lib/Transforms/Vectorize/VectorCombine.cpp		patch \| blob \| history
llvm/test/Analysis/CostModel/X86/shuffle-extract_subvector-latency.ll		patch \| blob \| history