From c037ba1bb7aba99bd15e063bbdbc6d4c68cf2384 Mon Sep 17 00:00:00 2001 From: =?utf8?q?Timur=20Krist=C3=B3f?= Date: Thu, 24 Oct 2019 18:55:07 +0200 Subject: [PATCH] aco/gfx10: Mitigate LdsBranchVmemWARHazard. MIME-Version: 1.0 Content-Type: text/plain; charset=utf8 Content-Transfer-Encoding: 8bit There is a hazard caused by there is a branch between a VMEM/GLOBAL/SCRATCH instruction and a DS instruction. This commit adds a workaround that avoids the problem. Signed-off-by: Timur Kristóf Reviewed-by: Daniel Schürmann --- src/amd/compiler/README | 9 ++++++ src/amd/compiler/aco_insert_NOPs.cpp | 57 ++++++++++++++++++++++++++++++++++++ 2 files changed, 66 insertions(+) diff --git a/src/amd/compiler/README b/src/amd/compiler/README index 620b4bc..822ecd7 100644 --- a/src/amd/compiler/README +++ b/src/amd/compiler/README @@ -200,3 +200,12 @@ Mitigated by: A VALU instruction that writes an SGPR (or has a valid SDST operand), or `s_waitcnt_depctr 0xfffe`. Note: `s_waitcnt_depctr` is an internal instruction, so there is no further information about what it does or what its operand means. + +### LdsBranchVmemWARHazard + +Triggered by: +VMEM/GLOBAL/SCRATCH instruction, then a branch, then a DS instruction, +or vice versa: DS instruction, then a branch, then a VMEM/GLOBAL/SCRATCH instruction. + +Mitigated by: +Only `s_waitcnt_vscnt null, 0`. Needed even if the first instruction is a load. diff --git a/src/amd/compiler/aco_insert_NOPs.cpp b/src/amd/compiler/aco_insert_NOPs.cpp index 6924ea1..2442f60 100644 --- a/src/amd/compiler/aco_insert_NOPs.cpp +++ b/src/amd/compiler/aco_insert_NOPs.cpp @@ -43,6 +43,10 @@ struct NOP_ctx { int last_VMEM_since_scalar_write = -1; bool has_VOPC = false; bool has_nonVALU_exec_read = false; + bool has_VMEM = false; + bool has_branch_after_VMEM = false; + bool has_DS = false; + bool has_branch_after_DS = false; std::bitset<128> sgprs_read_by_SMEM; NOP_ctx(Program* program) : chip_class(program->chip_class) { @@ -107,6 +111,27 @@ bool instr_writes_sgpr(const aco_ptr& instr) }); } +inline bool instr_is_branch(const aco_ptr& instr) +{ + return instr->opcode == aco_opcode::s_branch || + instr->opcode == aco_opcode::s_cbranch_scc0 || + instr->opcode == aco_opcode::s_cbranch_scc1 || + instr->opcode == aco_opcode::s_cbranch_vccz || + instr->opcode == aco_opcode::s_cbranch_vccnz || + instr->opcode == aco_opcode::s_cbranch_execz || + instr->opcode == aco_opcode::s_cbranch_execnz || + instr->opcode == aco_opcode::s_cbranch_cdbgsys || + instr->opcode == aco_opcode::s_cbranch_cdbguser || + instr->opcode == aco_opcode::s_cbranch_cdbgsys_or_user || + instr->opcode == aco_opcode::s_cbranch_cdbgsys_and_user || + instr->opcode == aco_opcode::s_subvector_loop_begin || + instr->opcode == aco_opcode::s_subvector_loop_end || + instr->opcode == aco_opcode::s_setpc_b64 || + instr->opcode == aco_opcode::s_swappc_b64 || + instr->opcode == aco_opcode::s_getpc_b64 || + instr->opcode == aco_opcode::s_call_b64; +} + bool regs_intersect(PhysReg a_reg, unsigned a_size, PhysReg b_reg, unsigned b_size) { return a_reg > b_reg ? @@ -413,6 +438,38 @@ std::pair handle_instruction_gfx10(NOP_ctx& ctx, aco_ptr& } } + /* LdsBranchVmemWARHazard + * Handle VMEM/GLOBAL/SCRATCH->branch->DS and DS->branch->VMEM/GLOBAL/SCRATCH patterns. + */ + if (instr->isVMEM() || instr->format == Format::GLOBAL || instr->format == Format::SCRATCH) { + ctx.has_VMEM = true; + ctx.has_branch_after_VMEM = false; + /* Mitigation for DS is needed only if there was already a branch after */ + ctx.has_DS = ctx.has_branch_after_DS; + } else if (instr->format == Format::DS) { + ctx.has_DS = true; + ctx.has_branch_after_DS = false; + /* Mitigation for VMEM is needed only if there was already a branch after */ + ctx.has_VMEM = ctx.has_branch_after_VMEM; + } else if (instr_is_branch(instr)) { + ctx.has_branch_after_VMEM = ctx.has_VMEM; + ctx.has_branch_after_DS = ctx.has_DS; + } else if (instr->opcode == aco_opcode::s_waitcnt_vscnt) { + /* Only s_waitcnt_vscnt can mitigate the hazard */ + const SOPK_instruction *sopk = static_cast(instr.get()); + if (sopk->definitions[0].physReg() == sgpr_null && sopk->imm == 0) + ctx.has_VMEM = ctx.has_branch_after_VMEM = ctx.has_DS = ctx.has_branch_after_DS = false; + } + if ((ctx.has_VMEM && ctx.has_branch_after_DS) || (ctx.has_DS && ctx.has_branch_after_VMEM)) { + ctx.has_VMEM = ctx.has_branch_after_VMEM = ctx.has_DS = ctx.has_branch_after_DS = false; + + /* Insert s_waitcnt_vscnt to mitigate the problem */ + aco_ptr wait{create_instruction(aco_opcode::s_waitcnt_vscnt, Format::SOPK, 0, 1)}; + wait->definitions[0] = Definition(sgpr_null, s1); + wait->imm = 0; + new_instructions.emplace_back(std::move(wait)); + } + return std::make_pair(sNOPs, vNOPs); } -- 2.7.4