From c246b7bd4a5191d48f68ce12b50e03bfadd2a0b5 Mon Sep 17 00:00:00 2001 From: Jay Foad Date: Thu, 31 Mar 2022 13:39:02 +0100 Subject: [PATCH] [AMDGPU] Only count global-to-global as indirect accesses Previously any load (global, local or constant) feeding into a global load or store would be counted as an indirect access. This patch only counts global loads feeding into a global load or store. The rationale is that the latency for global loads is generally much larger than the other kinds. As a side effect this makes it easier to write small kernels test cases that are not counted as having indirect accesses, despite the fact that arguments to the kernel are accessed with an SMEM load. Differential Revision: https://reviews.llvm.org/D122804 --- llvm/lib/Target/AMDGPU/AMDGPUPerfHintAnalysis.cpp | 2 +- llvm/test/CodeGen/AMDGPU/perfhint.ll | 3 +-- llvm/test/CodeGen/AMDGPU/schedule-regpressure-limit2.ll | 4 ++-- 3 files changed, 4 insertions(+), 5 deletions(-) diff --git a/llvm/lib/Target/AMDGPU/AMDGPUPerfHintAnalysis.cpp b/llvm/lib/Target/AMDGPU/AMDGPUPerfHintAnalysis.cpp index de97b76..b994b53 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUPerfHintAnalysis.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUPerfHintAnalysis.cpp @@ -153,7 +153,7 @@ bool AMDGPUPerfHint::isIndirectAccess(const Instruction *Inst) const { if (auto LD = dyn_cast(V)) { auto M = LD->getPointerOperand(); - if (isGlobalAddr(M) || isLocalAddr(M) || isConstantAddr(M)) { + if (isGlobalAddr(M)) { LLVM_DEBUG(dbgs() << " is IA\n"); return true; } diff --git a/llvm/test/CodeGen/AMDGPU/perfhint.ll b/llvm/test/CodeGen/AMDGPU/perfhint.ll index 2fe01e8..296eeab 100644 --- a/llvm/test/CodeGen/AMDGPU/perfhint.ll +++ b/llvm/test/CodeGen/AMDGPU/perfhint.ll @@ -75,10 +75,9 @@ bb: ret void } -; FIXME: This test was intended to be WaveLimiterHint : 0 ; GCN-LABEL: {{^}}test_indirect_through_phi: ; GCN: MemoryBound: 0 -; GCN: WaveLimiterHint : 1 +; GCN: WaveLimiterHint : 0 define amdgpu_kernel void @test_indirect_through_phi(float addrspace(1)* %arg) { bb: %load = load float, float addrspace(1)* %arg, align 8 diff --git a/llvm/test/CodeGen/AMDGPU/schedule-regpressure-limit2.ll b/llvm/test/CodeGen/AMDGPU/schedule-regpressure-limit2.ll index d8dac0b..e209f9e 100644 --- a/llvm/test/CodeGen/AMDGPU/schedule-regpressure-limit2.ll +++ b/llvm/test/CodeGen/AMDGPU/schedule-regpressure-limit2.ll @@ -6,8 +6,8 @@ ; SI-MINREG: NumSgprs: {{[1-9]$}} ; SI-MINREG: NumVgprs: {{[1-9]$}} -; SI-MAXOCC: NumSgprs: {{[0-4][0-9]$}} -; SI-MAXOCC: NumVgprs: {{[0-4][0-9]$}} +; SI-MAXOCC: NumSgprs: {{[1-4]?[0-9]$}} +; SI-MAXOCC: NumVgprs: {{[1-4]?[0-9]$}} ; stores may alias loads ; VI: NumSgprs: {{[0-9]$}} -- 2.7.4