aco: fix LdsDirectVMEMHazard WaW with the wrong waitcnt
authorRhys Perry <pendingchaos02@gmail.com>
Fri, 22 Sep 2023 19:01:55 +0000 (20:01 +0100)
committerMarge Bot <emma+marge@anholt.net>
Wed, 11 Oct 2023 15:14:04 +0000 (15:14 +0000)
Seems we missed this case.

fossil-db (navi31):
Totals from 24 (0.03% of 79332) affected shaders:
Instrs: 3562 -> 3538 (-0.67%)
CodeSize: 18740 -> 18644 (-0.51%)

Signed-off-by: Rhys Perry <pendingchaos02@gmail.com>
Reviewed-by: Daniel Schürmann <daniel@schuermann.dev>
Fixes: 2cdb3e4b6b6d ("aco: add VMEMtoScalarWriteHazard tests")
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/25374>

src/amd/compiler/aco_insert_NOPs.cpp
src/amd/compiler/tests/test_insert_nops.cpp

index 8cebae8..fd4f5ac 100644 (file)
@@ -1396,7 +1396,7 @@ handle_instruction_gfx11(State& state, NOP_ctx_gfx11& ctx, aco_ptr<Instruction>&
     */
    if (instr->isVMEM() || instr->isFlatLike()) {
       for (Definition& def : instr->definitions)
-         fill_vgpr_bitset(ctx.vgpr_used_by_vmem_store, def.physReg(), def.bytes());
+         fill_vgpr_bitset(ctx.vgpr_used_by_vmem_load, def.physReg(), def.bytes());
       if (instr->definitions.empty()) {
          for (Operand& op : instr->operands)
             fill_vgpr_bitset(ctx.vgpr_used_by_vmem_store, op.physReg(), op.bytes());
index 1658e8d..ffa0cf9 100644 (file)
@@ -577,6 +577,16 @@ BEGIN_TEST(insert_nops.lds_direct_vmem)
    bld.sopp(aco_opcode::s_waitcnt, -1, 0x3ff);
    bld.ldsdir(aco_opcode::lds_direct_load, Definition(PhysReg(256), v1), Operand(m0, s1));
 
+   //! p_unit_test 14
+   //! v1: %0:v[0] = buffer_load_dword %0:s[0-3], %0:v[1], 0 offen
+   //! s1: %0:null = s_waitcnt_vscnt imm:0
+   //! s_waitcnt_depctr vm_vsrc(0)
+   //! v1: %0:v[0] = lds_direct_load %0:m0
+   bld.pseudo(aco_opcode::p_unit_test, Operand::c32(14));
+   create_mubuf(0, PhysReg(256), PhysReg(257));
+   bld.sopk(aco_opcode::s_waitcnt_vscnt, Definition(sgpr_null, s1), 0);
+   bld.ldsdir(aco_opcode::lds_direct_load, Definition(PhysReg(256), v1), Operand(m0, s1));
+
    finish_insert_nops_test();
 END_TEST