From: Krzysztof Parzyszek Date: Fri, 22 Jul 2016 14:22:43 +0000 (+0000) Subject: [Hexagon] Use loop data prefetch on Hexagon X-Git-Tag: llvmorg-4.0.0-rc1~14497 X-Git-Url: http://review.tizen.org/git/?a=commitdiff_plain;h=d3d0a4bda36133bdf233c75dfd9e32d5d7cff3ac;p=platform%2Fupstream%2Fllvm.git [Hexagon] Use loop data prefetch on Hexagon llvm-svn: 276422 --- diff --git a/llvm/lib/Target/Hexagon/HexagonSubtarget.cpp b/llvm/lib/Target/Hexagon/HexagonSubtarget.cpp index fb315a7..73d3bd1 100644 --- a/llvm/lib/Target/Hexagon/HexagonSubtarget.cpp +++ b/llvm/lib/Target/Hexagon/HexagonSubtarget.cpp @@ -367,3 +367,11 @@ void HexagonSubtarget::adjustSchedDependency(SUnit *Src, SUnit *Dst, updateLatency(SrcInst, DstInst, Dep); } +unsigned HexagonSubtarget::getL1CacheLineSize() const { + return 32; +} + +unsigned HexagonSubtarget::getL1PrefetchDistance() const { + return 32; +} + diff --git a/llvm/lib/Target/Hexagon/HexagonSubtarget.h b/llvm/lib/Target/Hexagon/HexagonSubtarget.h index 9b40c13..77d0966 100644 --- a/llvm/lib/Target/Hexagon/HexagonSubtarget.h +++ b/llvm/lib/Target/Hexagon/HexagonSubtarget.h @@ -132,6 +132,9 @@ public: /// dependency. void adjustSchedDependency(SUnit *def, SUnit *use, SDep& dep) const override; + unsigned getL1CacheLineSize() const; + unsigned getL1PrefetchDistance() const; + private: // Helper function responsible for increasing the latency only. void updateLatency(MachineInstr *SrcInst, MachineInstr *DstInst, SDep &Dep) diff --git a/llvm/lib/Target/Hexagon/HexagonTargetMachine.cpp b/llvm/lib/Target/Hexagon/HexagonTargetMachine.cpp index f964a66..2c971b1 100644 --- a/llvm/lib/Target/Hexagon/HexagonTargetMachine.cpp +++ b/llvm/lib/Target/Hexagon/HexagonTargetMachine.cpp @@ -68,6 +68,10 @@ static cl::opt EnableGenPred("hexagon-gen-pred", cl::init(true), cl::Hidden, cl::desc("Enable conversion of arithmetic operations to " "predicate instructions")); +static cl::opt EnableLoopPrefetch("hexagon-loop-prefetch", + cl::init(false), cl::Hidden, cl::ZeroOrMore, + cl::desc("Enable loop data prefetch on Hexagon")); + static cl::opt DisableHSDR("disable-hsdr", cl::init(false), cl::Hidden, cl::desc("Disable splitting double registers")); @@ -225,6 +229,8 @@ void HexagonPassConfig::addIRPasses() { addPass(createAtomicExpandPass(TM)); if (!NoOpt) { + if (EnableLoopPrefetch) + addPass(createLoopDataPrefetchPass()); if (EnableCommGEP) addPass(createHexagonCommonGEP()); // Replace certain combinations of shifts and ands with extracts. diff --git a/llvm/lib/Target/Hexagon/HexagonTargetTransformInfo.cpp b/llvm/lib/Target/Hexagon/HexagonTargetTransformInfo.cpp index a05443e..de3c20f 100644 --- a/llvm/lib/Target/Hexagon/HexagonTargetTransformInfo.cpp +++ b/llvm/lib/Target/Hexagon/HexagonTargetTransformInfo.cpp @@ -36,3 +36,11 @@ void HexagonTTIImpl::getUnrollingPreferences(Loop *L, unsigned HexagonTTIImpl::getNumberOfRegisters(bool vector) const { return vector ? 0 : 32; } + +unsigned HexagonTTIImpl::getPrefetchDistance() const { + return getST()->getL1PrefetchDistance(); +} + +unsigned HexagonTTIImpl::getCacheLineSize() const { + return getST()->getL1CacheLineSize(); +} diff --git a/llvm/lib/Target/Hexagon/HexagonTargetTransformInfo.h b/llvm/lib/Target/Hexagon/HexagonTargetTransformInfo.h index 71ae17a..9c4d3a7 100644 --- a/llvm/lib/Target/Hexagon/HexagonTargetTransformInfo.h +++ b/llvm/lib/Target/Hexagon/HexagonTargetTransformInfo.h @@ -55,6 +55,10 @@ public: // The Hexagon target can unroll loops with run-time trip counts. void getUnrollingPreferences(Loop *L, TTI::UnrollingPreferences &UP); + // L1 cache prefetch. + unsigned getPrefetchDistance() const; + unsigned getCacheLineSize() const; + /// @} /// \name Vector TTI Implementations diff --git a/llvm/test/CodeGen/Hexagon/loop-prefetch.ll b/llvm/test/CodeGen/Hexagon/loop-prefetch.ll new file mode 100644 index 0000000..0c6e458 --- /dev/null +++ b/llvm/test/CodeGen/Hexagon/loop-prefetch.ll @@ -0,0 +1,27 @@ +; RUN: llc -march=hexagon -hexagon-loop-prefetch < %s | FileCheck %s +; CHECK: dcfetch + +target triple = "hexagon" + +define void @copy(i32* nocapture %d, i32* nocapture readonly %s, i32 %n) local_unnamed_addr #0 { +entry: + %tobool2 = icmp eq i32 %n, 0 + br i1 %tobool2, label %while.end, label %while.body + +while.body: ; preds = %entry, %while.body + %n.addr.05 = phi i32 [ %dec, %while.body ], [ %n, %entry ] + %s.addr.04 = phi i32* [ %incdec.ptr, %while.body ], [ %s, %entry ] + %d.addr.03 = phi i32* [ %incdec.ptr1, %while.body ], [ %d, %entry ] + %dec = add i32 %n.addr.05, -1 + %incdec.ptr = getelementptr inbounds i32, i32* %s.addr.04, i32 1 + %0 = load i32, i32* %s.addr.04, align 4 + %incdec.ptr1 = getelementptr inbounds i32, i32* %d.addr.03, i32 1 + store i32 %0, i32* %d.addr.03, align 4 + %tobool = icmp eq i32 %dec, 0 + br i1 %tobool, label %while.end, label %while.body + +while.end: ; preds = %while.body, %entry + ret void +} + +attributes #0 = { norecurse nounwind "target-cpu"="hexagonv60" "target-features"="-hvx,-hvx-double" }