From: Mark Searles Date: Thu, 26 Apr 2018 16:11:19 +0000 (+0000) Subject: [AMDGPU][Waitcnt] As of gfx7, VMEM operations do not increment the export counter... X-Git-Url: http://review.tizen.org/git/?a=commitdiff_plain;h=2a19af6e17545234f96c7d10a80c5b5992731ced;p=platform%2Fupstream%2Fllvm.git [AMDGPU][Waitcnt] As of gfx7, VMEM operations do not increment the export counter and the input registers are available in the next instruction; update the waitcnt pass to take this into account. Differential Revision: https://reviews.llvm.org/D46067 llvm-svn: 330954 --- diff --git a/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.h b/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.h index cd08026..3bcb701 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.h +++ b/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.h @@ -530,6 +530,10 @@ public: return HasSDWAOutModsVOPC; } + bool vmemWriteNeedsExpWaitcnt() const { + return getGeneration() < SEA_ISLANDS; + } + /// \brief Returns the offset in bytes from the start of the input buffer /// of the first explicit kernel argument. unsigned getExplicitKernelArgOffset(const MachineFunction &MF) const { diff --git a/llvm/lib/Target/AMDGPU/SIInsertWaitcnts.cpp b/llvm/lib/Target/AMDGPU/SIInsertWaitcnts.cpp index 49e6afa..543d073 100644 --- a/llvm/lib/Target/AMDGPU/SIInsertWaitcnts.cpp +++ b/llvm/lib/Target/AMDGPU/SIInsertWaitcnts.cpp @@ -1328,7 +1328,7 @@ void SIInsertWaitcnts::updateEventWaitcntAfter( Inst.getOpcode() != AMDGPU::BUFFER_WBINVL1_SC && Inst.getOpcode() != AMDGPU::BUFFER_WBINVL1_VOL) { ScoreBrackets->updateByEvent(TII, TRI, MRI, VMEM_ACCESS, Inst); - if ( // TODO: assumed yes -- target_info->MemWriteNeedsExpWait() && + if (ST->vmemWriteNeedsExpWaitcnt() && (Inst.mayStore() || AMDGPU::getAtomicNoRetOp(Inst.getOpcode()) != -1)) { ScoreBrackets->updateByEvent(TII, TRI, MRI, VMW_GPR_LOCK, Inst); } diff --git a/llvm/test/CodeGen/AMDGPU/insert_vector_elt.ll b/llvm/test/CodeGen/AMDGPU/insert_vector_elt.ll index 79e1943..13b6c1a 100644 --- a/llvm/test/CodeGen/AMDGPU/insert_vector_elt.ll +++ b/llvm/test/CodeGen/AMDGPU/insert_vector_elt.ll @@ -217,7 +217,7 @@ define amdgpu_kernel void @dynamic_insertelement_v3i16(<3 x i16> addrspace(1)* % ; GCN-DAG: buffer_store_short v{{[0-9]+}}, off, s{{\[[0-9]+:[0-9]+\]}}, s{{[0-9]+}} offset:8 ; GCN: buffer_store_short v{{[0-9]+}}, [[IDX]], s{{\[[0-9]+:[0-9]+\]}}, s{{[0-9]+}} offen{{$}} -; GCN: s_waitcnt +; GCN-NO-TONGA: s_waitcnt expcnt ; GCN: buffer_load_dwordx2 diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.buffer.store.format.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.buffer.store.format.ll index 7fc7acd..78c5281 100644 --- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.buffer.store.format.ll +++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.buffer.store.format.ll @@ -1,4 +1,4 @@ -;RUN: llc < %s -march=amdgcn -mcpu=verde -verify-machineinstrs | FileCheck %s +;RUN: llc < %s -march=amdgcn -mcpu=verde -verify-machineinstrs | FileCheck -check-prefix=VERDE %s ;RUN: llc < %s -march=amdgcn -mcpu=tonga -verify-machineinstrs | FileCheck %s ;CHECK-LABEL: {{^}}buffer_store: @@ -65,7 +65,7 @@ main_body: ;CHECK-LABEL: {{^}}buffer_store_wait: ;CHECK-NOT: s_waitcnt ;CHECK: buffer_store_format_xyzw v[0:3], v4, s[0:3], 0 idxen -;CHECK: s_waitcnt expcnt(0) +;VERDE: s_waitcnt expcnt(0) ;CHECK: buffer_load_format_xyzw v[0:3], v5, s[0:3], 0 idxen ;CHECK: s_waitcnt vmcnt(0) ;CHECK: buffer_store_format_xyzw v[0:3], v6, s[0:3], 0 idxen diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.buffer.store.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.buffer.store.ll index c12c713..48a30c7 100644 --- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.buffer.store.ll +++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.buffer.store.ll @@ -1,4 +1,4 @@ -;RUN: llc < %s -march=amdgcn -mcpu=verde -verify-machineinstrs | FileCheck %s +;RUN: llc < %s -march=amdgcn -mcpu=verde -verify-machineinstrs | FileCheck -check-prefix=VERDE %s ;RUN: llc < %s -march=amdgcn -mcpu=tonga -verify-machineinstrs | FileCheck %s ;CHECK-LABEL: {{^}}buffer_store: @@ -65,7 +65,7 @@ main_body: ;CHECK-LABEL: {{^}}buffer_store_wait: ;CHECK-NOT: s_waitcnt ;CHECK: buffer_store_dwordx4 v[0:3], v4, s[0:3], 0 idxen -;CHECK: s_waitcnt expcnt(0) +;VERDE: s_waitcnt expcnt(0) ;CHECK: buffer_load_dwordx4 v[0:3], v5, s[0:3], 0 idxen ;CHECK: s_waitcnt vmcnt(0) ;CHECK: buffer_store_dwordx4 v[0:3], v6, s[0:3], 0 idxen diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.image.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.image.ll index d9be4a4..5afe4c3 100644 --- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.image.ll +++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.image.ll @@ -1,4 +1,4 @@ -; RUN: llc -march=amdgcn -mcpu=verde -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s +; RUN: llc -march=amdgcn -mcpu=verde -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,VERDE %s ; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,VI %s ; GCN-LABEL: {{^}}image_load_v4i32: @@ -159,7 +159,7 @@ main_body: ; ; GCN-LABEL: {{^}}image_store_wait: ; GCN: image_store v[0:3], v4, s[0:7] dmask:0xf unorm -; GCN: s_waitcnt expcnt(0) +; VERDE: s_waitcnt expcnt(0) ; GCN: image_load v[0:3], v4, s[8:15] dmask:0xf unorm ; GCN: s_waitcnt vmcnt(0) ; GCN: image_store v[0:3], v4, s[16:23] dmask:0xf unorm diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.tbuffer.store.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.tbuffer.store.ll index 997d7f5..abd0ed8 100644 --- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.tbuffer.store.ll +++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.tbuffer.store.ll @@ -1,4 +1,4 @@ -;RUN: llc < %s -march=amdgcn -mcpu=verde -verify-machineinstrs | FileCheck -check-prefix=GCN %s +;RUN: llc < %s -march=amdgcn -mcpu=verde -verify-machineinstrs | FileCheck -check-prefixes=GCN,VERDE %s ;RUN: llc < %s -march=amdgcn -mcpu=tonga -verify-machineinstrs | FileCheck -check-prefix=GCN %s ; GCN-LABEL: {{^}}tbuffer_store: @@ -67,7 +67,7 @@ main_body: ; ; GCN-LABEL: {{^}}buffer_store_wait: ; GCN: tbuffer_store_format_xyzw v[0:3], v4, s[0:3], dfmt:15, nfmt:3, 0 idxen -; GCN: s_waitcnt expcnt(0) +; VERDE: s_waitcnt expcnt(0) ; GCN: buffer_load_format_xyzw v[0:3], v5, s[0:3], 0 idxen ; GCN: s_waitcnt vmcnt(0) ; GCN: tbuffer_store_format_xyzw v[0:3], v6, s[0:3], dfmt:16, nfmt:2, 0 idxen