[LV][NFC] Use ElementCount for getMaxInterleaveFactor

author Luke Lau <luke@igalia.com>

Tue, 21 Feb 2023 13:00:18 +0000 (13:00 +0000)

committer Luke Lau <luke@igalia.com>

Wed, 22 Feb 2023 10:15:05 +0000 (10:15 +0000)
author Luke Lau <luke@igalia.com>
Tue, 21 Feb 2023 13:00:18 +0000 (13:00 +0000)
committer Luke Lau <luke@igalia.com>
Wed, 22 Feb 2023 10:15:05 +0000 (10:15 +0000)
diff --git a/llvm/include/llvm/Analysis/TargetTransformInfo.h b/llvm/include/llvm/Analysis/TargetTransformInfo.h

index c81ac7e..34ef2ea 100644 (file)
--- a/llvm/include/llvm/Analysis/TargetTransformInfo.h
+++ b/llvm/include/llvm/Analysis/TargetTransformInfo.h
@@ -1096,7 +1096,7 @@ public:
    /// \return The maximum interleave factor that any transform should try to
    /// perform for this target. This number depends on the level of parallelism
    /// and the number of execution units in the CPU.
-  unsigned getMaxInterleaveFactor(unsigned VF) const;
+  unsigned getMaxInterleaveFactor(ElementCount VF) const;
  
    /// Collect properties of V used in cost analysis, e.g. OP_PowerOf2.
    static OperandValueInfo getOperandInfo(const Value *V);
@@ -1796,7 +1796,7 @@ public:
    /// \return if target want to issue a prefetch in address space \p AS.
    virtual bool shouldPrefetchAddressSpace(unsigned AS) const = 0;
  
-  virtual unsigned getMaxInterleaveFactor(unsigned VF) = 0;
+  virtual unsigned getMaxInterleaveFactor(ElementCount VF) = 0;
    virtual InstructionCost getArithmeticInstrCost(
        unsigned Opcode, Type *Ty, TTI::TargetCostKind CostKind,
        OperandValueInfo Opd1Info, OperandValueInfo Opd2Info,
@@ -2356,7 +2356,7 @@ public:
      return Impl.shouldPrefetchAddressSpace(AS);
    }
  
-  unsigned getMaxInterleaveFactor(unsigned VF) override {
+  unsigned getMaxInterleaveFactor(ElementCount VF) override {
      return Impl.getMaxInterleaveFactor(VF);
    }
    unsigned getEstimatedNumberOfCaseClusters(const SwitchInst &SI,
diff --git a/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h b/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h

index 78d66c2..f7afd63 100644 (file)
--- a/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h
+++ b/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h
@@ -491,7 +491,7 @@ public:
    bool enableWritePrefetching() const { return false; }
    bool shouldPrefetchAddressSpace(unsigned AS) const { return !AS; }
  
-  unsigned getMaxInterleaveFactor(unsigned VF) const { return 1; }
+  unsigned getMaxInterleaveFactor(ElementCount VF) const { return 1; }
  
    InstructionCost getArithmeticInstrCost(
        unsigned Opcode, Type *Ty, TTI::TargetCostKind CostKind,
diff --git a/llvm/include/llvm/CodeGen/BasicTTIImpl.h b/llvm/include/llvm/CodeGen/BasicTTIImpl.h

index 02eff6a..e1d3a36 100644 (file)
--- a/llvm/include/llvm/CodeGen/BasicTTIImpl.h
+++ b/llvm/include/llvm/CodeGen/BasicTTIImpl.h
@@ -844,7 +844,7 @@ public:
      }
    }
  
-  unsigned getMaxInterleaveFactor(unsigned VF) { return 1; }
+  unsigned getMaxInterleaveFactor(ElementCount VF) { return 1; }
  
    InstructionCost getArithmeticInstrCost(
        unsigned Opcode, Type *Ty, TTI::TargetCostKind CostKind,
diff --git a/llvm/lib/Analysis/TargetTransformInfo.cpp b/llvm/lib/Analysis/TargetTransformInfo.cpp

index 4ad5d2d..f72c593 100644 (file)
--- a/llvm/lib/Analysis/TargetTransformInfo.cpp
+++ b/llvm/lib/Analysis/TargetTransformInfo.cpp
@@ -735,7 +735,7 @@ bool TargetTransformInfo::shouldPrefetchAddressSpace(unsigned AS) const {
    return TTIImpl->shouldPrefetchAddressSpace(AS);
  }
  
-unsigned TargetTransformInfo::getMaxInterleaveFactor(unsigned VF) const {
+unsigned TargetTransformInfo::getMaxInterleaveFactor(ElementCount VF) const {
    return TTIImpl->getMaxInterleaveFactor(VF);
  }
  
diff --git a/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp b/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp

index 06e5589..5cd0058 100644 (file)
--- a/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp
+++ b/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp
@@ -2673,7 +2673,7 @@ AArch64TTIImpl::getCostOfKeepingLiveOverCall(ArrayRef<Type *> Tys) {
    return Cost;
  }
  
-unsigned AArch64TTIImpl::getMaxInterleaveFactor(unsigned VF) {
+unsigned AArch64TTIImpl::getMaxInterleaveFactor(ElementCount VF) {
    return ST->getMaxInterleaveFactor();
  }
  
diff --git a/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.h b/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.h

index fa51860..03873e5 100644 (file)
--- a/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.h
+++ b/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.h
@@ -144,7 +144,7 @@ public:
      return VF.getKnownMinValue() * ST->getVScaleForTuning();
    }
  
-  unsigned getMaxInterleaveFactor(unsigned VF);
+  unsigned getMaxInterleaveFactor(ElementCount VF);
  
    bool prefersVectorizedAddressing() const;
  
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp b/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp

index e2c647f..d93c4b6 100644 (file)
--- a/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp
@@ -474,10 +474,10 @@ void GCNTTIImpl::getMemcpyLoopResidualLoweringType(
    }
  }
  
-unsigned GCNTTIImpl::getMaxInterleaveFactor(unsigned VF) {
+unsigned GCNTTIImpl::getMaxInterleaveFactor(ElementCount VF) {
    // Disable unrolling if the loop is not vectorized.
    // TODO: Enable this again.
-  if (VF == 1)
+  if (VF.isScalar())
      return 1;
  
    return 8;
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.h b/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.h

index 7862f21..8a7bf3e 100644 (file)
--- a/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.h
+++ b/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.h
@@ -143,7 +143,7 @@ public:
        unsigned RemainingBytes, unsigned SrcAddrSpace, unsigned DestAddrSpace,
        unsigned SrcAlign, unsigned DestAlign,
        std::optional<uint32_t> AtomicCpySize) const;
-  unsigned getMaxInterleaveFactor(unsigned VF);
+  unsigned getMaxInterleaveFactor(ElementCount VF);
  
    bool getTgtMemIntrinsic(IntrinsicInst *Inst, MemIntrinsicInfo &Info) const;
  
diff --git a/llvm/lib/Target/AMDGPU/R600TargetTransformInfo.cpp b/llvm/lib/Target/AMDGPU/R600TargetTransformInfo.cpp

index c01f9c4..1a1be4a 100644 (file)
--- a/llvm/lib/Target/AMDGPU/R600TargetTransformInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/R600TargetTransformInfo.cpp
@@ -82,10 +82,10 @@ bool R600TTIImpl::isLegalToVectorizeStoreChain(unsigned ChainSizeInBytes,
    return isLegalToVectorizeMemChain(ChainSizeInBytes, Alignment, AddrSpace);
  }
  
-unsigned R600TTIImpl::getMaxInterleaveFactor(unsigned VF) {
+unsigned R600TTIImpl::getMaxInterleaveFactor(ElementCount VF) {
    // Disable unrolling if the loop is not vectorized.
    // TODO: Enable this again.
-  if (VF == 1)
+  if (VF.isScalar())
      return 1;
  
    return 8;
diff --git a/llvm/lib/Target/AMDGPU/R600TargetTransformInfo.h b/llvm/lib/Target/AMDGPU/R600TargetTransformInfo.h

index 8dacae0..2934b01 100644 (file)
--- a/llvm/lib/Target/AMDGPU/R600TargetTransformInfo.h
+++ b/llvm/lib/Target/AMDGPU/R600TargetTransformInfo.h
@@ -57,7 +57,7 @@ public:
                                     unsigned AddrSpace) const;
    bool isLegalToVectorizeStoreChain(unsigned ChainSizeInBytes, Align Alignment,
                                      unsigned AddrSpace) const;
-  unsigned getMaxInterleaveFactor(unsigned VF);
+  unsigned getMaxInterleaveFactor(ElementCount VF);
    InstructionCost getCFInstrCost(unsigned Opcode, TTI::TargetCostKind CostKind,
                                   const Instruction *I = nullptr);
    using BaseT::getVectorInstrCost;
diff --git a/llvm/lib/Target/ARM/ARMTargetTransformInfo.h b/llvm/lib/Target/ARM/ARMTargetTransformInfo.h

index d75879b..c6f10ed 100644 (file)
--- a/llvm/lib/Target/ARM/ARMTargetTransformInfo.h
+++ b/llvm/lib/Target/ARM/ARMTargetTransformInfo.h
@@ -178,7 +178,7 @@ public:
      llvm_unreachable("Unsupported register kind");
    }
  
-  unsigned getMaxInterleaveFactor(unsigned VF) {
+  unsigned getMaxInterleaveFactor(ElementCount VF) {
      return ST->getMaxInterleaveFactor();
    }
  
diff --git a/llvm/lib/Target/Hexagon/HexagonTargetTransformInfo.cpp b/llvm/lib/Target/Hexagon/HexagonTargetTransformInfo.cpp

index 979a436..cf4b66f 100644 (file)
--- a/llvm/lib/Target/Hexagon/HexagonTargetTransformInfo.cpp
+++ b/llvm/lib/Target/Hexagon/HexagonTargetTransformInfo.cpp
@@ -110,7 +110,7 @@ unsigned HexagonTTIImpl::getNumberOfRegisters(bool Vector) const {
    return 32;
  }
  
-unsigned HexagonTTIImpl::getMaxInterleaveFactor(unsigned VF) {
+unsigned HexagonTTIImpl::getMaxInterleaveFactor(ElementCount VF) {
    return useHVX() ? 2 : 1;
  }
  
diff --git a/llvm/lib/Target/Hexagon/HexagonTargetTransformInfo.h b/llvm/lib/Target/Hexagon/HexagonTargetTransformInfo.h

index 3d1e51a..b1dc49c 100644 (file)
--- a/llvm/lib/Target/Hexagon/HexagonTargetTransformInfo.h
+++ b/llvm/lib/Target/Hexagon/HexagonTargetTransformInfo.h
@@ -82,7 +82,7 @@ public:
    /// @{
  
    unsigned getNumberOfRegisters(bool vector) const;
-  unsigned getMaxInterleaveFactor(unsigned VF);
+  unsigned getMaxInterleaveFactor(ElementCount VF);
    TypeSize getRegisterBitWidth(TargetTransformInfo::RegisterKind K) const;
    unsigned getMinVectorRegisterBitWidth() const;
    ElementCount getMinimumVF(unsigned ElemWidth, bool IsScalable) const;
diff --git a/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.cpp b/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.cpp

index 594ba18..69f2bbf 100644 (file)
--- a/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.cpp
+++ b/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.cpp
@@ -517,7 +517,7 @@ unsigned PPCTTIImpl::getPrefetchDistance() const {
    return 300;
  }
  
-unsigned PPCTTIImpl::getMaxInterleaveFactor(unsigned VF) {
+unsigned PPCTTIImpl::getMaxInterleaveFactor(ElementCount VF) {
    unsigned Directive = ST->getCPUDirective();
    // The 440 has no SIMD support, but floating-point instructions
    // have a 5-cycle latency, so unroll by 5x for latency hiding.
diff --git a/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.h b/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.h

index 97377cb..5a6b4c9 100644 (file)
--- a/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.h
+++ b/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.h
@@ -101,7 +101,7 @@ public:
    TypeSize getRegisterBitWidth(TargetTransformInfo::RegisterKind K) const;
    unsigned getCacheLineSize() const override;
    unsigned getPrefetchDistance() const override;
-  unsigned getMaxInterleaveFactor(unsigned VF);
+  unsigned getMaxInterleaveFactor(ElementCount VF);
    InstructionCost vectorCostAdjustmentFactor(unsigned Opcode, Type *Ty1,
                                               Type *Ty2);
    InstructionCost getArithmeticInstrCost(
diff --git a/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.h b/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.h

index 143079c..813c6a0 100644 (file)
--- a/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.h
+++ b/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.h
@@ -270,10 +270,10 @@ public:
      }
    }
  
-  unsigned getMaxInterleaveFactor(unsigned VF) {
+  unsigned getMaxInterleaveFactor(ElementCount VF) {
      // If the loop will not be vectorized, don't interleave the loop.
      // Let regular unroll to unroll the loop.
-    return VF == 1 ? 1 : ST->getMaxInterleaveFactor();
+    return VF.getKnownMinValue() == 1 ? 1 : ST->getMaxInterleaveFactor();
    }
  
    enum RISCVRegisterClass { GPRRC, FPRRC, VRRC };
diff --git a/llvm/lib/Target/X86/X86TargetTransformInfo.cpp b/llvm/lib/Target/X86/X86TargetTransformInfo.cpp

index c49b4a8..4e6acf4 100644 (file)
--- a/llvm/lib/Target/X86/X86TargetTransformInfo.cpp
+++ b/llvm/lib/Target/X86/X86TargetTransformInfo.cpp
@@ -199,11 +199,11 @@ unsigned X86TTIImpl::getLoadStoreVecRegBitWidth(unsigned) const {
        .getFixedValue();
  }
  
-unsigned X86TTIImpl::getMaxInterleaveFactor(unsigned VF) {
+unsigned X86TTIImpl::getMaxInterleaveFactor(ElementCount VF) {
    // If the loop will not be vectorized, don't interleave the loop.
    // Let regular unroll to unroll the loop, which saves the overflow
    // check and memory check cost.
-  if (VF == 1)
+  if (VF.isScalar())
      return 1;
  
    if (ST->isAtom())
diff --git a/llvm/lib/Target/X86/X86TargetTransformInfo.h b/llvm/lib/Target/X86/X86TargetTransformInfo.h

index ef8c4a1..2034cf8 100644 (file)
--- a/llvm/lib/Target/X86/X86TargetTransformInfo.h
+++ b/llvm/lib/Target/X86/X86TargetTransformInfo.h
@@ -127,7 +127,7 @@ public:
    unsigned getNumberOfRegisters(unsigned ClassID) const;
    TypeSize getRegisterBitWidth(TargetTransformInfo::RegisterKind K) const;
    unsigned getLoadStoreVecRegBitWidth(unsigned AS) const;
-  unsigned getMaxInterleaveFactor(unsigned VF);
+  unsigned getMaxInterleaveFactor(ElementCount VF);
    InstructionCost getArithmeticInstrCost(
        unsigned Opcode, Type *Ty, TTI::TargetCostKind CostKind,
        TTI::OperandValueInfo Op1Info = {TTI::OK_AnyValue, TTI::OP_None},
diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp

index 8c9cfff..8db1e14 100644 (file)
--- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
+++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
@@ -5548,7 +5548,7 @@ bool LoopVectorizationCostModel::isEpilogueVectorizationProfitable(
  
    // We also consider epilogue vectorization unprofitable for targets that don't
    // consider interleaving beneficial (eg. MVE).
-  if (TTI.getMaxInterleaveFactor(VF.getKnownMinValue()) <= 1)
+  if (TTI.getMaxInterleaveFactor(VF) <= 1)
      return false;
    // FIXME: We should consider changing the threshold for scalable
    // vectors to take VScaleForTuning into account.
@@ -5803,8 +5803,7 @@ LoopVectorizationCostModel::selectInterleaveCount(ElementCount VF,
    }
  
    // Clamp the interleave ranges to reasonable counts.
-  unsigned MaxInterleaveCount =
-      TTI.getMaxInterleaveFactor(VF.getKnownMinValue());
+  unsigned MaxInterleaveCount = TTI.getMaxInterleaveFactor(VF);
  
    // Check if the user has overridden the max.
    if (VF.isScalar()) {
@@ -10539,7 +10538,7 @@ LoopVectorizeResult LoopVectorizePass::runImpl(
    // vector registers, loop vectorization may still enable scalar
    // interleaving.
    if (!TTI->getNumberOfRegisters(TTI->getRegisterClassForType(true)) &&
-      TTI->getMaxInterleaveFactor(1) < 2)
+      TTI->getMaxInterleaveFactor(ElementCount::getFixed(1)) < 2)
      return LoopVectorizeResult(false, false);
  
    bool Changed = false, CFGChanged = false;
author	Luke Lau <luke@igalia.com>
	Tue, 21 Feb 2023 13:00:18 +0000 (13:00 +0000)
committer	Luke Lau <luke@igalia.com>
	Wed, 22 Feb 2023 10:15:05 +0000 (10:15 +0000)
llvm/include/llvm/Analysis/TargetTransformInfo.h		patch \| blob \| history
llvm/include/llvm/Analysis/TargetTransformInfoImpl.h		patch \| blob \| history
llvm/include/llvm/CodeGen/BasicTTIImpl.h		patch \| blob \| history
llvm/lib/Analysis/TargetTransformInfo.cpp		patch \| blob \| history
llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp		patch \| blob \| history
llvm/lib/Target/AArch64/AArch64TargetTransformInfo.h		patch \| blob \| history
llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp		patch \| blob \| history
llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.h		patch \| blob \| history
llvm/lib/Target/AMDGPU/R600TargetTransformInfo.cpp		patch \| blob \| history
llvm/lib/Target/AMDGPU/R600TargetTransformInfo.h		patch \| blob \| history
llvm/lib/Target/ARM/ARMTargetTransformInfo.h		patch \| blob \| history
llvm/lib/Target/Hexagon/HexagonTargetTransformInfo.cpp		patch \| blob \| history
llvm/lib/Target/Hexagon/HexagonTargetTransformInfo.h		patch \| blob \| history
llvm/lib/Target/PowerPC/PPCTargetTransformInfo.cpp		patch \| blob \| history
llvm/lib/Target/PowerPC/PPCTargetTransformInfo.h		patch \| blob \| history
llvm/lib/Target/RISCV/RISCVTargetTransformInfo.h		patch \| blob \| history
llvm/lib/Target/X86/X86TargetTransformInfo.cpp		patch \| blob \| history
llvm/lib/Target/X86/X86TargetTransformInfo.h		patch \| blob \| history
llvm/lib/Transforms/Vectorize/LoopVectorize.cpp		patch \| blob \| history