From 83993f535eb90874ca2256ddbd35bce4e407c13a Mon Sep 17 00:00:00 2001 From: Rhys Perry Date: Tue, 10 Sep 2019 18:11:13 +0100 Subject: [PATCH] aco: workaround GFX10 0x3f branch bug MIME-Version: 1.0 Content-Type: text/plain; charset=utf8 Content-Transfer-Encoding: 8bit According to LLVM, branches with an offset of 0x3f are buggy. v2: (by Timur Kristóf) - extract the GFX10 specific part to its own function Signed-off-by: Rhys Perry Signed-off-by: Timur Kristóf Reviewed-by: Daniel Schürmann --- src/amd/compiler/aco_assembler.cpp | 44 +++++++++++++++++++++++++++++++++----- 1 file changed, 39 insertions(+), 5 deletions(-) diff --git a/src/amd/compiler/aco_assembler.cpp b/src/amd/compiler/aco_assembler.cpp index 34eb9fb..5a82d44 100644 --- a/src/amd/compiler/aco_assembler.cpp +++ b/src/amd/compiler/aco_assembler.cpp @@ -1,4 +1,5 @@ -#include +#include +#include #include "aco_ir.h" #include "common/sid.h" @@ -9,7 +10,7 @@ namespace aco { struct asm_context { Program *program; enum chip_class chip_class; - std::map branches; + std::vector> branches; std::vector constaddrs; const int16_t* opcode; // TODO: keep track of branch instructions referring blocks @@ -135,7 +136,7 @@ void emit_instruction(asm_context& ctx, std::vector& out, Instruction* encoding |= opcode << 16; encoding |= (uint16_t) sopp->imm; if (sopp->block != -1) - ctx.branches.insert({out.size(), sopp}); + ctx.branches.emplace_back(out.size(), sopp); out.push_back(encoding); break; } @@ -605,10 +606,43 @@ void fix_exports(asm_context& ctx, std::vector& out, Program* program) } } +static void fix_branches_gfx10(asm_context& ctx, std::vector& out) +{ + /* Branches with an offset of 0x3f are buggy on GFX10, we workaround by inserting NOPs if needed. */ + bool gfx10_3f_bug = false; + + do { + auto buggy_branch_it = std::find_if(ctx.branches.begin(), ctx.branches.end(), [&ctx](const auto &branch) -> bool { + return ((int)ctx.program->blocks[branch.second->block].offset - branch.first - 1) == 0x3f; + }); + + gfx10_3f_bug = buggy_branch_it != ctx.branches.end(); + + if (gfx10_3f_bug) { + /* Insert an s_nop after the branch */ + constexpr uint32_t s_nop_0 = 0xbf800000u; + auto out_pos = std::next(out.begin(), buggy_branch_it->first + 1); + out.insert(out_pos, s_nop_0); + + /* Update the offset of each affected block */ + for (Block& block : ctx.program->blocks) { + if (block.offset > (unsigned)buggy_branch_it->first) + block.offset++; + } + + /* Update the branches following the current one */ + for (auto branch_it = std::next(buggy_branch_it); branch_it != ctx.branches.end(); ++branch_it) + branch_it->first++; + } + } while (gfx10_3f_bug); +} + void fix_branches(asm_context& ctx, std::vector& out) { - for (std::pair branch : ctx.branches) - { + if (ctx.chip_class >= GFX10) + fix_branches_gfx10(ctx, out); + + for (std::pair &branch : ctx.branches) { int offset = (int)ctx.program->blocks[branch.second->block].offset - branch.first - 1; out[branch.first] |= (uint16_t) offset; } -- 2.7.4