[SLP][RISCV] Account for offset folding in getPointersChainCost

author Luke Lau <luke@igalia.com>

Tue, 2 May 2023 13:46:22 +0000 (14:46 +0100)

committer Luke Lau <luke@igalia.com>

Mon, 22 May 2023 12:55:30 +0000 (13:55 +0100)
author Luke Lau <luke@igalia.com>
Tue, 2 May 2023 13:46:22 +0000 (14:46 +0100)
committer Luke Lau <luke@igalia.com>
Mon, 22 May 2023 12:55:30 +0000 (13:55 +0100)
diff --git a/llvm/include/llvm/Analysis/TargetTransformInfo.h b/llvm/include/llvm/Analysis/TargetTransformInfo.h

index 4fc56fc..df3b24a 100644 (file)
--- a/llvm/include/llvm/Analysis/TargetTransformInfo.h
+++ b/llvm/include/llvm/Analysis/TargetTransformInfo.h
@@ -320,9 +320,11 @@ public:
  
    /// Estimate the cost of a chain of pointers (typically pointer operands of a
    /// chain of loads or stores within same block) operations set when lowered.
+  /// \p AccessTy is the type of the loads/stores that will ultimately use the
+  /// \p Ptrs.
    InstructionCost
    getPointersChainCost(ArrayRef<const Value *> Ptrs, const Value *Base,
-                       const PointersChainInfo &Info,
+                       const PointersChainInfo &Info, Type *AccessTy,
                         TargetCostKind CostKind = TTI::TCK_RecipThroughput
  
    ) const;
@@ -1663,7 +1665,7 @@ public:
                                       TTI::TargetCostKind CostKind) = 0;
    virtual InstructionCost
    getPointersChainCost(ArrayRef<const Value *> Ptrs, const Value *Base,
-                       const TTI::PointersChainInfo &Info,
+                       const TTI::PointersChainInfo &Info, Type *AccessTy,
                         TTI::TargetCostKind CostKind) = 0;
    virtual unsigned getInliningThresholdMultiplier() = 0;
    virtual unsigned adjustInliningThreshold(const CallBase *CB) = 0;
@@ -2024,8 +2026,9 @@ public:
    InstructionCost getPointersChainCost(ArrayRef<const Value *> Ptrs,
                                         const Value *Base,
                                         const PointersChainInfo &Info,
+                                       Type *AccessTy,
                                         TargetCostKind CostKind) override {
-    return Impl.getPointersChainCost(Ptrs, Base, Info, CostKind);
+    return Impl.getPointersChainCost(Ptrs, Base, Info, AccessTy, CostKind);
    }
    unsigned getInliningThresholdMultiplier() override {
      return Impl.getInliningThresholdMultiplier();
diff --git a/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h b/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h

index 131e1e3..d7b1538 100644 (file)
--- a/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h
+++ b/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h
@@ -1041,6 +1041,7 @@ public:
    InstructionCost getPointersChainCost(ArrayRef<const Value *> Ptrs,
                                         const Value *Base,
                                         const TTI::PointersChainInfo &Info,
+                                       Type *AccessTy,
                                         TTI::TargetCostKind CostKind) {
      InstructionCost Cost = TTI::TCC_Free;
      // In the basic model we take into account GEP instructions only
diff --git a/llvm/lib/Analysis/TargetTransformInfo.cpp b/llvm/lib/Analysis/TargetTransformInfo.cpp

index f7da461..a7b89b2 100644 (file)
--- a/llvm/lib/Analysis/TargetTransformInfo.cpp
+++ b/llvm/lib/Analysis/TargetTransformInfo.cpp
@@ -230,10 +230,11 @@ TargetTransformInfo::getGEPCost(Type *PointeeType, const Value *Ptr,
  
  InstructionCost TargetTransformInfo::getPointersChainCost(
      ArrayRef<const Value *> Ptrs, const Value *Base,
-    const TTI::PointersChainInfo &Info, TTI::TargetCostKind CostKind) const {
+    const TTI::PointersChainInfo &Info, Type *AccessTy,
+    TTI::TargetCostKind CostKind) const {
    assert((Base || !Info.isSameBase()) &&
           "If pointers have same base address it has to be provided.");
-  return TTIImpl->getPointersChainCost(Ptrs, Base, Info, CostKind);
+  return TTIImpl->getPointersChainCost(Ptrs, Base, Info, AccessTy, CostKind);
  }
  
  unsigned TargetTransformInfo::getEstimatedNumberOfCaseClusters(
diff --git a/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp b/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp

index 528c207..880d7ad 100644 (file)
--- a/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp
+++ b/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp
@@ -1592,6 +1592,55 @@ InstructionCost RISCVTTIImpl::getArithmeticInstrCost(
    }
  }
  
+// TODO: Deduplicate from TargetTransformInfoImplCRTPBase.
+InstructionCost RISCVTTIImpl::getPointersChainCost(
+    ArrayRef<const Value *> Ptrs, const Value *Base,
+    const TTI::PointersChainInfo &Info, Type *AccessTy,
+    TTI::TargetCostKind CostKind) {
+  InstructionCost Cost = TTI::TCC_Free;
+  // In the basic model we take into account GEP instructions only
+  // (although here can come alloca instruction, a value, constants and/or
+  // constant expressions, PHIs, bitcasts ... whatever allowed to be used as a
+  // pointer). Typically, if Base is a not a GEP-instruction and all the
+  // pointers are relative to the same base address, all the rest are
+  // either GEP instructions, PHIs, bitcasts or constants. When we have same
+  // base, we just calculate cost of each non-Base GEP as an ADD operation if
+  // any their index is a non-const.
+  // If no known dependecies between the pointers cost is calculated as a sum
+  // of costs of GEP instructions.
+  for (auto [I, V] : enumerate(Ptrs)) {
+    const auto *GEP = dyn_cast<GetElementPtrInst>(V);
+    if (!GEP)
+      continue;
+    if (Info.isSameBase() && V != Base) {
+      if (GEP->hasAllConstantIndices())
+        continue;
+      // If the chain is unit-stride and BaseReg + stride*i is a legal
+      // addressing mode, then presume the base GEP is sitting around in a
+      // register somewhere and check if we can fold the offset relative to
+      // it.
+      unsigned Stride = DL.getTypeStoreSize(AccessTy);
+      if (Info.isUnitStride() &&
+          isLegalAddressingMode(AccessTy,
+                                /* BaseGV */ nullptr,
+                                /* BaseOffset */ Stride * I,
+                                /* HasBaseReg */ true,
+                                /* Scale */ 0,
+                                GEP->getType()->getPointerAddressSpace()))
+        continue;
+      Cost += getArithmeticInstrCost(Instruction::Add, GEP->getType(), CostKind,
+                                     {TTI::OK_AnyValue, TTI::OP_None},
+                                     {TTI::OK_AnyValue, TTI::OP_None},
+                                     std::nullopt);
+    } else {
+      SmallVector<const Value *> Indices(GEP->indices());
+      Cost += getGEPCost(GEP->getSourceElementType(), GEP->getPointerOperand(),
+                         Indices, CostKind);
+    }
+  }
+  return Cost;
+}
+
  void RISCVTTIImpl::getUnrollingPreferences(Loop *L, ScalarEvolution &SE,
                                             TTI::UnrollingPreferences &UP,
                                             OptimizationRemarkEmitter *ORE) {
diff --git a/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.h b/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.h

index 5319bff..e96f93c 100644 (file)
--- a/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.h
+++ b/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.h
@@ -106,6 +106,12 @@ public:
                                          Align Alignment, unsigned AddressSpace,
                                          TTI::TargetCostKind CostKind);
  
+  InstructionCost getPointersChainCost(ArrayRef<const Value *> Ptrs,
+                                       const Value *Base,
+                                       const TTI::PointersChainInfo &Info,
+                                       Type *AccessTy,
+                                       TTI::TargetCostKind CostKind);
+
    void getUnrollingPreferences(Loop *L, ScalarEvolution &SE,
                                 TTI::UnrollingPreferences &UP,
                                 OptimizationRemarkEmitter *ORE);
diff --git a/llvm/lib/Target/X86/X86TargetTransformInfo.cpp b/llvm/lib/Target/X86/X86TargetTransformInfo.cpp

index e2de1db..be9f264 100644 (file)
--- a/llvm/lib/Target/X86/X86TargetTransformInfo.cpp
+++ b/llvm/lib/Target/X86/X86TargetTransformInfo.cpp
@@ -4943,9 +4943,11 @@ X86TTIImpl::getMaskedMemoryOpCost(unsigned Opcode, Type *SrcTy, Align Alignment,
    return Cost + LT.first;
  }
  
-InstructionCost X86TTIImpl::getPointersChainCost(
-    ArrayRef<const Value *> Ptrs, const Value *Base,
-    const TTI::PointersChainInfo &Info, TTI::TargetCostKind CostKind) {
+InstructionCost
+X86TTIImpl::getPointersChainCost(ArrayRef<const Value *> Ptrs,
+                                 const Value *Base,
+                                 const TTI::PointersChainInfo &Info,
+                                 Type *AccessTy, TTI::TargetCostKind CostKind) {
    if (Info.isSameBase() && Info.isKnownStride()) {
      // If all the pointers have known stride all the differences are translated
      // into constants. X86 memory addressing allows encoding it into
@@ -4957,7 +4959,7 @@ InstructionCost X86TTIImpl::getPointersChainCost(
      }
      return TTI::TCC_Free;
    }
-  return BaseT::getPointersChainCost(Ptrs, Base, Info, CostKind);
+  return BaseT::getPointersChainCost(Ptrs, Base, Info, AccessTy, CostKind);
  }
  
  InstructionCost X86TTIImpl::getAddressComputationCost(Type *Ty,
diff --git a/llvm/lib/Target/X86/X86TargetTransformInfo.h b/llvm/lib/Target/X86/X86TargetTransformInfo.h

index f182aa7..2c5105d 100644 (file)
--- a/llvm/lib/Target/X86/X86TargetTransformInfo.h
+++ b/llvm/lib/Target/X86/X86TargetTransformInfo.h
@@ -180,6 +180,7 @@ public:
    InstructionCost getPointersChainCost(ArrayRef<const Value *> Ptrs,
                                         const Value *Base,
                                         const TTI::PointersChainInfo &Info,
+                                       Type *AccessTy,
                                         TTI::TargetCostKind CostKind);
    InstructionCost getAddressComputationCost(Type *PtrTy, ScalarEvolution *SE,
                                              const SCEV *Ptr);
diff --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp

index a1079d4..75780ca 100644 (file)
--- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
+++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
@@ -7394,7 +7394,8 @@ BoUpSLP::getEntryCost(const TreeEntry *E, ArrayRef<Value *> VectorizedVals,
        // stay in vectorized code due to uses outside of these scalar
        // loads/stores.
        ScalarCost = TTI->getPointersChainCost(
-          Ptrs, BasePtr, TTI::PointersChainInfo::getUnitStride(), CostKind);
+          Ptrs, BasePtr, TTI::PointersChainInfo::getUnitStride(), ScalarTy,
+          CostKind);
  
        SmallVector<const Value *> PtrsRetainedInVecCode;
        for (Value *V : Ptrs) {
@@ -7420,7 +7421,7 @@ BoUpSLP::getEntryCost(const TreeEntry *E, ArrayRef<Value *> VectorizedVals,
        }
        VecCost = TTI->getPointersChainCost(
            PtrsRetainedInVecCode, BasePtr,
-          TTI::PointersChainInfo::getKnownStride(), CostKind);
+          TTI::PointersChainInfo::getKnownStride(), VecTy, CostKind);
      } else {
        // Case 1: Ptrs are the arguments of loads that we are going to transform
        // into masked gather load intrinsic.
@@ -7436,7 +7437,8 @@ BoUpSLP::getEntryCost(const TreeEntry *E, ArrayRef<Value *> VectorizedVals,
                ? TTI::PointersChainInfo::getUnknownStride()
                : TTI::PointersChainInfo::getKnownStride();
  
-      ScalarCost = TTI->getPointersChainCost(Ptrs, BasePtr, PtrsInfo, CostKind);
+      ScalarCost = TTI->getPointersChainCost(Ptrs, BasePtr, PtrsInfo, ScalarTy,
+                                             CostKind);
  
        // Remark: it not quite correct to use scalar GEP cost for a vector GEP,
        // but it's not clear how to do that without having vector GEP arguments
diff --git a/llvm/test/Transforms/SLPVectorizer/RISCV/getpointerschaincost.ll b/llvm/test/Transforms/SLPVectorizer/RISCV/getpointerschaincost.ll

index 6a21760..3872cb0 100644 (file)
--- a/llvm/test/Transforms/SLPVectorizer/RISCV/getpointerschaincost.ll
+++ b/llvm/test/Transforms/SLPVectorizer/RISCV/getpointerschaincost.ll
@@ -69,7 +69,7 @@ entry:
    ret void
  }
  
-; FIXME: When computing the scalar pointers chain cost here, there is a cost of
+; When computing the scalar pointers chain cost here, there is a cost of
  ; 1 for the base pointer, and the rest can be folded in, so the scalar cost
  ; should be 1.
  define void @h(ptr %dest, i32 %i) {
@@ -86,7 +86,7 @@ entry:
  ; YAML-NEXT: Function:        h
  ; YAML-NEXT: Args:
  ; YAML-NEXT:   - String:          'Stores SLP vectorized with cost '
-; YAML-NEXT:   - Cost:            '-5'
+; YAML-NEXT:   - Cost:            '-2'
  ; YAML-NEXT:   - String:          ' and with tree size '
  ; YAML-NEXT:   - TreeSize:        '2'
    %p1 = getelementptr [4 x i32], ptr %dest, i32 %i, i32 0
diff --git a/llvm/test/Transforms/SLPVectorizer/RISCV/struct-gep.ll b/llvm/test/Transforms/SLPVectorizer/RISCV/struct-gep.ll

index 1f6a28e..8f12501 100644 (file)
--- a/llvm/test/Transforms/SLPVectorizer/RISCV/struct-gep.ll
+++ b/llvm/test/Transforms/SLPVectorizer/RISCV/struct-gep.ll
@@ -2,7 +2,9 @@
  ; RUN: opt < %s -passes=slp-vectorizer -mtriple=riscv64 -mattr=+v \
  ; RUN: -riscv-v-slp-max-vf=0 -S | FileCheck %s
  
-; FIXME: This should not be vectorized
+; This shouldn't be vectorized as the extra address computation required for the
+; vector store make it unprofitable (vle/vse don't have an offset in their
+; addressing modes)
  
  %struct.2i32 = type { i32, i32 }
  
@@ -10,7 +12,9 @@ define void @splat_store_v2i32(ptr %dest, i64 %i) {
  ; CHECK-LABEL: @splat_store_v2i32(
  ; CHECK-NEXT:  entry:
  ; CHECK-NEXT:    [[P1:%.*]] = getelementptr [[STRUCT_2I32:%.*]], ptr [[DEST:%.*]], i64 [[I:%.*]], i32 0
-; CHECK-NEXT:    store <2 x i32> <i32 1, i32 1>, ptr [[P1]], align 4
+; CHECK-NEXT:    store i32 1, ptr [[P1]], align 4
+; CHECK-NEXT:    [[P2:%.*]] = getelementptr [[STRUCT_2I32]], ptr [[DEST]], i64 [[I]], i32 1
+; CHECK-NEXT:    store i32 1, ptr [[P2]], align 4
  ; CHECK-NEXT:    ret void
  ;
  entry:
author	Luke Lau <luke@igalia.com>
	Tue, 2 May 2023 13:46:22 +0000 (14:46 +0100)
committer	Luke Lau <luke@igalia.com>
	Mon, 22 May 2023 12:55:30 +0000 (13:55 +0100)
llvm/include/llvm/Analysis/TargetTransformInfo.h		patch \| blob \| history
llvm/include/llvm/Analysis/TargetTransformInfoImpl.h		patch \| blob \| history
llvm/lib/Analysis/TargetTransformInfo.cpp		patch \| blob \| history
llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp		patch \| blob \| history
llvm/lib/Target/RISCV/RISCVTargetTransformInfo.h		patch \| blob \| history
llvm/lib/Target/X86/X86TargetTransformInfo.cpp		patch \| blob \| history
llvm/lib/Target/X86/X86TargetTransformInfo.h		patch \| blob \| history
llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp		patch \| blob \| history
llvm/test/Transforms/SLPVectorizer/RISCV/getpointerschaincost.ll		patch \| blob \| history
llvm/test/Transforms/SLPVectorizer/RISCV/struct-gep.ll		patch \| blob \| history