[LoopDataPrefetch] Add TTI to limit the number of iterations to prefetch ahead

author Adam Nemet <anemet@apple.com>

Fri, 18 Mar 2016 00:27:43 +0000 (00:27 +0000)

committer Adam Nemet <anemet@apple.com>

Fri, 18 Mar 2016 00:27:43 +0000 (00:27 +0000)
author Adam Nemet <anemet@apple.com>
Fri, 18 Mar 2016 00:27:43 +0000 (00:27 +0000)
committer Adam Nemet <anemet@apple.com>
Fri, 18 Mar 2016 00:27:43 +0000 (00:27 +0000)
diff --git a/llvm/include/llvm/Analysis/TargetTransformInfo.h b/llvm/include/llvm/Analysis/TargetTransformInfo.h

index 76a4315..57d0cf4 100644 (file)
--- a/llvm/include/llvm/Analysis/TargetTransformInfo.h
+++ b/llvm/include/llvm/Analysis/TargetTransformInfo.h
@@ -428,6 +428,11 @@ public:
    /// adding SW prefetches.  The default is 1, i.e. prefetch with any stride.
    unsigned getMinPrefetchStride() const;
  
+  /// \return The maximum number of iterations to prefetch ahead.  If the
+  /// required number of iterations is more than this number, no prefetching is
+  /// performed.
+  unsigned getMaxPrefetchIterationsAhead() const;
+
    /// \return The maximum interleave factor that any transform should try to
    /// perform for this target. This number depends on the level of parallelism
    /// and the number of execution units in the CPU.
@@ -624,6 +629,7 @@ public:
    virtual unsigned getCacheLineSize() = 0;
    virtual unsigned getPrefetchDistance() = 0;
    virtual unsigned getMinPrefetchStride() = 0;
+  virtual unsigned getMaxPrefetchIterationsAhead() = 0;
    virtual unsigned getMaxInterleaveFactor(unsigned VF) = 0;
    virtual unsigned
    getArithmeticInstrCost(unsigned Opcode, Type *Ty, OperandValueKind Opd1Info,
@@ -797,6 +803,9 @@ public:
    unsigned getMinPrefetchStride() override {
      return Impl.getMinPrefetchStride();
    }
+  unsigned getMaxPrefetchIterationsAhead() override {
+    return Impl.getMaxPrefetchIterationsAhead();
+  }
    unsigned getMaxInterleaveFactor(unsigned VF) override {
      return Impl.getMaxInterleaveFactor(VF);
    }
diff --git a/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h b/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h

index 96dc944..39eba4d 100644 (file)
--- a/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h
+++ b/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h
@@ -270,6 +270,8 @@ public:
  
    unsigned getMinPrefetchStride() { return 1; }
  
+  unsigned getMaxPrefetchIterationsAhead() { return UINT_MAX; }
+
    unsigned getMaxInterleaveFactor(unsigned VF) { return 1; }
  
    unsigned getArithmeticInstrCost(unsigned Opcode, Type *Ty,
diff --git a/llvm/lib/Analysis/TargetTransformInfo.cpp b/llvm/lib/Analysis/TargetTransformInfo.cpp

index 8fd9fbf..b64d413 100644 (file)
--- a/llvm/lib/Analysis/TargetTransformInfo.cpp
+++ b/llvm/lib/Analysis/TargetTransformInfo.cpp
@@ -227,6 +227,10 @@ unsigned TargetTransformInfo::getMinPrefetchStride() const {
    return TTIImpl->getMinPrefetchStride();
  }
  
+unsigned TargetTransformInfo::getMaxPrefetchIterationsAhead() const {
+  return TTIImpl->getMaxPrefetchIterationsAhead();
+}
+
  unsigned TargetTransformInfo::getMaxInterleaveFactor(unsigned VF) const {
    return TTIImpl->getMaxInterleaveFactor(VF);
  }
diff --git a/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp b/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp

index aee2989..2b3fae9 100644 (file)
--- a/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp
+++ b/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp
@@ -31,6 +31,13 @@ static cl::opt<unsigned> CycloneMinPrefetchStride(
      cl::desc("Min stride to add prefetches for Cyclone"),
      cl::init(2048), cl::Hidden);
  
+// Be conservative for now and don't prefetch ahead too much since the loop
+// may terminate early.
+static cl::opt<unsigned> CycloneMaxPrefetchIterationsAhead(
+    "cyclone-max-prefetch-iters-ahead",
+    cl::desc("Max number of iterations to prefetch ahead on Cyclone"),
+    cl::init(3), cl::Hidden);
+
  /// \brief Calculate the cost of materializing a 64-bit value. This helper
  /// method might only calculate a fraction of a larger immediate. Therefore it
  /// is valid to return a cost of ZERO.
@@ -602,3 +609,9 @@ unsigned AArch64TTIImpl::getMinPrefetchStride() {
      return CycloneMinPrefetchStride;
    return BaseT::getMinPrefetchStride();
  }
+
+unsigned AArch64TTIImpl::getMaxPrefetchIterationsAhead() {
+  if (ST->isCyclone())
+    return CycloneMaxPrefetchIterationsAhead;
+  return BaseT::getMaxPrefetchIterationsAhead();
+}
diff --git a/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.h b/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.h

index a54db00..93a84b7 100644 (file)
--- a/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.h
+++ b/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.h
@@ -133,6 +133,8 @@ public:
    unsigned getPrefetchDistance();
  
    unsigned getMinPrefetchStride();
+
+  unsigned getMaxPrefetchIterationsAhead();
    /// @}
  };
  
diff --git a/llvm/lib/Transforms/Scalar/LoopDataPrefetch.cpp b/llvm/lib/Transforms/Scalar/LoopDataPrefetch.cpp

index 8e03221..f55f319 100644 (file)
--- a/llvm/lib/Transforms/Scalar/LoopDataPrefetch.cpp
+++ b/llvm/lib/Transforms/Scalar/LoopDataPrefetch.cpp
@@ -171,6 +171,9 @@ bool LoopDataPrefetch::runOnLoop(Loop *L) {
    if (!ItersAhead)
      ItersAhead = 1;
  
+  if (ItersAhead > TTI->getMaxPrefetchIterationsAhead())
+    return MadeChange;
+
    DEBUG(dbgs() << "Prefetching " << ItersAhead
                 << " iterations ahead (loop size: " << LoopSize << ") in "
                 << L->getHeader()->getParent()->getName() << ": " << *L);
diff --git a/llvm/test/Transforms/LoopDataPrefetch/AArch64/large-stride.ll b/llvm/test/Transforms/LoopDataPrefetch/AArch64/large-stride.ll

index 4e0b9c0..437d941 100644 (file)
--- a/llvm/test/Transforms/LoopDataPrefetch/AArch64/large-stride.ll
+++ b/llvm/test/Transforms/LoopDataPrefetch/AArch64/large-stride.ll
@@ -1,4 +1,5 @@
-; RUN: opt -mcpu=cyclone -mtriple=arm64-apple-ios -loop-data-prefetch -S < %s | FileCheck %s --check-prefix=LARGE_PREFETCH --check-prefix=ALL
+; RUN: opt -mcpu=cyclone -mtriple=arm64-apple-ios -loop-data-prefetch -cyclone-max-prefetch-iters-ahead=100 -S < %s | FileCheck %s --check-prefix=LARGE_PREFETCH --check-prefix=ALL
+; RUN: opt -mcpu=cyclone -mtriple=arm64-apple-ios -loop-data-prefetch -S < %s | FileCheck %s --check-prefix=NO_LARGE_PREFETCH --check-prefix=ALL
  ; RUN: opt -mcpu=generic -mtriple=arm64-apple-ios -loop-data-prefetch -S < %s | FileCheck %s --check-prefix=NO_LARGE_PREFETCH --check-prefix=ALL
  
  target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-n32:64-S128"
author	Adam Nemet <anemet@apple.com>
	Fri, 18 Mar 2016 00:27:43 +0000 (00:27 +0000)
committer	Adam Nemet <anemet@apple.com>
	Fri, 18 Mar 2016 00:27:43 +0000 (00:27 +0000)
llvm/include/llvm/Analysis/TargetTransformInfo.h		patch \| blob \| history
llvm/include/llvm/Analysis/TargetTransformInfoImpl.h		patch \| blob \| history
llvm/lib/Analysis/TargetTransformInfo.cpp		patch \| blob \| history
llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp		patch \| blob \| history
llvm/lib/Target/AArch64/AArch64TargetTransformInfo.h		patch \| blob \| history
llvm/lib/Transforms/Scalar/LoopDataPrefetch.cpp		patch \| blob \| history
llvm/test/Transforms/LoopDataPrefetch/AArch64/large-stride.ll		patch \| blob \| history