From a73f76750bf0580a2bd4a42ffc427fba4e8a0ccb Mon Sep 17 00:00:00 2001 From: Rhys Perry Date: Fri, 22 Sep 2023 20:01:55 +0100 Subject: [PATCH] aco: fix LdsDirectVMEMHazard WaW with the wrong waitcnt MIME-Version: 1.0 Content-Type: text/plain; charset=utf8 Content-Transfer-Encoding: 8bit Seems we missed this case. fossil-db (navi31): Totals from 24 (0.03% of 79332) affected shaders: Instrs: 3562 -> 3538 (-0.67%) CodeSize: 18740 -> 18644 (-0.51%) Signed-off-by: Rhys Perry Reviewed-by: Daniel Schürmann Fixes: 2cdb3e4b6b6d ("aco: add VMEMtoScalarWriteHazard tests") Part-of: --- src/amd/compiler/aco_insert_NOPs.cpp | 2 +- src/amd/compiler/tests/test_insert_nops.cpp | 10 ++++++++++ 2 files changed, 11 insertions(+), 1 deletion(-) diff --git a/src/amd/compiler/aco_insert_NOPs.cpp b/src/amd/compiler/aco_insert_NOPs.cpp index 8cebae8..fd4f5ac 100644 --- a/src/amd/compiler/aco_insert_NOPs.cpp +++ b/src/amd/compiler/aco_insert_NOPs.cpp @@ -1396,7 +1396,7 @@ handle_instruction_gfx11(State& state, NOP_ctx_gfx11& ctx, aco_ptr& */ if (instr->isVMEM() || instr->isFlatLike()) { for (Definition& def : instr->definitions) - fill_vgpr_bitset(ctx.vgpr_used_by_vmem_store, def.physReg(), def.bytes()); + fill_vgpr_bitset(ctx.vgpr_used_by_vmem_load, def.physReg(), def.bytes()); if (instr->definitions.empty()) { for (Operand& op : instr->operands) fill_vgpr_bitset(ctx.vgpr_used_by_vmem_store, op.physReg(), op.bytes()); diff --git a/src/amd/compiler/tests/test_insert_nops.cpp b/src/amd/compiler/tests/test_insert_nops.cpp index 1658e8d..ffa0cf9 100644 --- a/src/amd/compiler/tests/test_insert_nops.cpp +++ b/src/amd/compiler/tests/test_insert_nops.cpp @@ -577,6 +577,16 @@ BEGIN_TEST(insert_nops.lds_direct_vmem) bld.sopp(aco_opcode::s_waitcnt, -1, 0x3ff); bld.ldsdir(aco_opcode::lds_direct_load, Definition(PhysReg(256), v1), Operand(m0, s1)); + //! p_unit_test 14 + //! v1: %0:v[0] = buffer_load_dword %0:s[0-3], %0:v[1], 0 offen + //! s1: %0:null = s_waitcnt_vscnt imm:0 + //! s_waitcnt_depctr vm_vsrc(0) + //! v1: %0:v[0] = lds_direct_load %0:m0 + bld.pseudo(aco_opcode::p_unit_test, Operand::c32(14)); + create_mubuf(0, PhysReg(256), PhysReg(257)); + bld.sopk(aco_opcode::s_waitcnt_vscnt, Definition(sgpr_null, s1), 0); + bld.ldsdir(aco_opcode::lds_direct_load, Definition(PhysReg(256), v1), Operand(m0, s1)); + finish_insert_nops_test(); END_TEST -- 2.7.4