[Aarch64] Add pass LoopDataPrefetch for Cyclone

author Adam Nemet <anemet@apple.com>

Fri, 18 Mar 2016 00:27:29 +0000 (00:27 +0000)

committer Adam Nemet <anemet@apple.com>

Fri, 18 Mar 2016 00:27:29 +0000 (00:27 +0000)
author Adam Nemet <anemet@apple.com>
Fri, 18 Mar 2016 00:27:29 +0000 (00:27 +0000)
committer Adam Nemet <anemet@apple.com>
Fri, 18 Mar 2016 00:27:29 +0000 (00:27 +0000)
diff --git a/llvm/lib/Target/AArch64/AArch64TargetMachine.cpp b/llvm/lib/Target/AArch64/AArch64TargetMachine.cpp

index 67b2983..791cb90 100644 (file)
--- a/llvm/lib/Target/AArch64/AArch64TargetMachine.cpp
+++ b/llvm/lib/Target/AArch64/AArch64TargetMachine.cpp
@@ -101,6 +101,11 @@ static cl::opt<cl::boolOrDefault>
  EnableGlobalMerge("aarch64-global-merge", cl::Hidden,
                    cl::desc("Enable the global merge pass"));
  
+static cl::opt<bool>
+    EnableLoopDataPrefetch("aarch64-loop-data-prefetch", cl::Hidden,
+                           cl::desc("Enable the loop data prefetch pass"),
+                           cl::init(false));
+
  extern "C" void LLVMInitializeAArch64Target() {
    // Register the target.
    RegisterTargetMachine<AArch64leTargetMachine> X(TheAArch64leTarget);
@@ -236,6 +241,14 @@ void AArch64PassConfig::addIRPasses() {
    if (TM->getOptLevel() != CodeGenOpt::None && EnableAtomicTidy)
      addPass(createCFGSimplificationPass());
  
+  // Run LoopDataPrefetch for Cyclone (the only subtarget that defines a
+  // non-zero getPrefetchDistance).
+  //
+  // Run this before LSR to remove the multiplies involved in computing the
+  // pointer values N iterations ahead.
+  if (TM->getOptLevel() != CodeGenOpt::None && EnableLoopDataPrefetch)
+    addPass(createLoopDataPrefetchPass());
+
    TargetPassConfig::addIRPasses();
  
    // Match interleaved memory accesses to ldN/stN intrinsics.
diff --git a/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp b/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp

index 08b89dc..5803e07 100644 (file)
--- a/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp
+++ b/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp
@@ -20,6 +20,11 @@ using namespace llvm;
  
  #define DEBUG_TYPE "aarch64tti"
  
+static cl::opt<unsigned> CyclonePrefetchDistance(
+    "cyclone-prefetch-distance",
+    cl::desc("Number of instructions to prefetch ahead for Cyclone"),
+    cl::init(280), cl::Hidden);
+
  /// \brief Calculate the cost of materializing a 64-bit value. This helper
  /// method might only calculate a fraction of a larger immediate. Therefore it
  /// is valid to return a cost of ZERO.
@@ -573,3 +578,15 @@ bool AArch64TTIImpl::getTgtMemIntrinsic(IntrinsicInst *Inst,
    }
    return true;
  }
+
+unsigned AArch64TTIImpl::getCacheLineSize() {
+  if (ST->isCyclone())
+    return 64;
+  return BaseT::getCacheLineSize();
+}
+
+unsigned AArch64TTIImpl::getPrefetchDistance() {
+  if (ST->isCyclone())
+    return CyclonePrefetchDistance;
+  return BaseT::getPrefetchDistance();
+}
diff --git a/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.h b/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.h

index ec58c4f..ad711f5 100644 (file)
--- a/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.h
+++ b/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.h
@@ -127,6 +127,10 @@ public:
    int getInterleavedMemoryOpCost(unsigned Opcode, Type *VecTy, unsigned Factor,
                                   ArrayRef<unsigned> Indices, unsigned Alignment,
                                   unsigned AddressSpace);
+
+  unsigned getCacheLineSize();
+
+  unsigned getPrefetchDistance();
    /// @}
  };
author	Adam Nemet <anemet@apple.com>
	Fri, 18 Mar 2016 00:27:29 +0000 (00:27 +0000)
committer	Adam Nemet <anemet@apple.com>
	Fri, 18 Mar 2016 00:27:29 +0000 (00:27 +0000)
llvm/lib/Target/AArch64/AArch64TargetMachine.cpp		patch \| blob \| history
llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp		patch \| blob \| history
llvm/lib/Target/AArch64/AArch64TargetTransformInfo.h		patch \| blob \| history