/// adding SW prefetches. The default is 1, i.e. prefetch with any stride.
unsigned getMinPrefetchStride() const;
+ /// \return The maximum number of iterations to prefetch ahead. If the
+ /// required number of iterations is more than this number, no prefetching is
+ /// performed.
+ unsigned getMaxPrefetchIterationsAhead() const;
+
/// \return The maximum interleave factor that any transform should try to
/// perform for this target. This number depends on the level of parallelism
/// and the number of execution units in the CPU.
virtual unsigned getCacheLineSize() = 0;
virtual unsigned getPrefetchDistance() = 0;
virtual unsigned getMinPrefetchStride() = 0;
+ virtual unsigned getMaxPrefetchIterationsAhead() = 0;
virtual unsigned getMaxInterleaveFactor(unsigned VF) = 0;
virtual unsigned
getArithmeticInstrCost(unsigned Opcode, Type *Ty, OperandValueKind Opd1Info,
unsigned getMinPrefetchStride() override {
return Impl.getMinPrefetchStride();
}
+ unsigned getMaxPrefetchIterationsAhead() override {
+ return Impl.getMaxPrefetchIterationsAhead();
+ }
unsigned getMaxInterleaveFactor(unsigned VF) override {
return Impl.getMaxInterleaveFactor(VF);
}
unsigned getMinPrefetchStride() { return 1; }
+ unsigned getMaxPrefetchIterationsAhead() { return UINT_MAX; }
+
unsigned getMaxInterleaveFactor(unsigned VF) { return 1; }
unsigned getArithmeticInstrCost(unsigned Opcode, Type *Ty,
return TTIImpl->getMinPrefetchStride();
}
+unsigned TargetTransformInfo::getMaxPrefetchIterationsAhead() const {
+ return TTIImpl->getMaxPrefetchIterationsAhead();
+}
+
unsigned TargetTransformInfo::getMaxInterleaveFactor(unsigned VF) const {
return TTIImpl->getMaxInterleaveFactor(VF);
}
cl::desc("Min stride to add prefetches for Cyclone"),
cl::init(2048), cl::Hidden);
+// Be conservative for now and don't prefetch ahead too much since the loop
+// may terminate early.
+static cl::opt<unsigned> CycloneMaxPrefetchIterationsAhead(
+ "cyclone-max-prefetch-iters-ahead",
+ cl::desc("Max number of iterations to prefetch ahead on Cyclone"),
+ cl::init(3), cl::Hidden);
+
/// \brief Calculate the cost of materializing a 64-bit value. This helper
/// method might only calculate a fraction of a larger immediate. Therefore it
/// is valid to return a cost of ZERO.
return CycloneMinPrefetchStride;
return BaseT::getMinPrefetchStride();
}
+
+unsigned AArch64TTIImpl::getMaxPrefetchIterationsAhead() {
+ if (ST->isCyclone())
+ return CycloneMaxPrefetchIterationsAhead;
+ return BaseT::getMaxPrefetchIterationsAhead();
+}
unsigned getPrefetchDistance();
unsigned getMinPrefetchStride();
+
+ unsigned getMaxPrefetchIterationsAhead();
/// @}
};
if (!ItersAhead)
ItersAhead = 1;
+ if (ItersAhead > TTI->getMaxPrefetchIterationsAhead())
+ return MadeChange;
+
DEBUG(dbgs() << "Prefetching " << ItersAhead
<< " iterations ahead (loop size: " << LoopSize << ") in "
<< L->getHeader()->getParent()->getName() << ": " << *L);
-; RUN: opt -mcpu=cyclone -mtriple=arm64-apple-ios -loop-data-prefetch -S < %s | FileCheck %s --check-prefix=LARGE_PREFETCH --check-prefix=ALL
+; RUN: opt -mcpu=cyclone -mtriple=arm64-apple-ios -loop-data-prefetch -cyclone-max-prefetch-iters-ahead=100 -S < %s | FileCheck %s --check-prefix=LARGE_PREFETCH --check-prefix=ALL
+; RUN: opt -mcpu=cyclone -mtriple=arm64-apple-ios -loop-data-prefetch -S < %s | FileCheck %s --check-prefix=NO_LARGE_PREFETCH --check-prefix=ALL
; RUN: opt -mcpu=generic -mtriple=arm64-apple-ios -loop-data-prefetch -S < %s | FileCheck %s --check-prefix=NO_LARGE_PREFETCH --check-prefix=ALL
target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-n32:64-S128"