From: Gert Wollny Date: Fri, 4 Aug 2023 07:23:35 +0000 (+0200) Subject: r600/sfn: Use clause local registers in RA X-Git-Tag: upstream/23.3.3~4630 X-Git-Url: http://review.tizen.org/git/?a=commitdiff_plain;h=cfbd1fd41300740154f89b4382e4790e61c1bf0b;p=platform%2Fupstream%2Fmesa.git r600/sfn: Use clause local registers in RA Signed-off-by: Gert Wollny Part-of: --- diff --git a/src/gallium/drivers/r600/r600_asm.c b/src/gallium/drivers/r600/r600_asm.c index 5141b02..26d4022 100644 --- a/src/gallium/drivers/r600/r600_asm.c +++ b/src/gallium/drivers/r600/r600_asm.c @@ -2029,14 +2029,14 @@ static int print_dst(struct r600_bytecode_alu *alu) int o = 0; unsigned sel = alu->dst.sel; char reg_char = 'R'; - if (sel > 128 - 4) { /* clause temporary gpr */ + if (sel >= 128 - 4) { /* clause temporary gpr */ sel -= 128 - 4; reg_char = 'T'; } if (alu_writes(alu)) { o += fprintf(stderr, "%c", reg_char); - o += print_sel(alu->dst.sel, alu->dst.rel, alu->index_mode, 0); + o += print_sel(sel, alu->dst.rel, alu->index_mode, 0); } else { o += fprintf(stderr, "__"); } diff --git a/src/gallium/drivers/r600/r600_asm.h b/src/gallium/drivers/r600/r600_asm.h index b931335..1bf1870 100644 --- a/src/gallium/drivers/r600/r600_asm.h +++ b/src/gallium/drivers/r600/r600_asm.h @@ -215,6 +215,7 @@ struct r600_bytecode_cf { unsigned isa[2]; unsigned nlds_read; unsigned nqueue_read; + unsigned clause_local_written; }; #define FC_NONE 0 diff --git a/src/gallium/drivers/r600/sfn/sfn_alu_defines.h b/src/gallium/drivers/r600/sfn/sfn_alu_defines.h index 180f3f5..dd02459 100644 --- a/src/gallium/drivers/r600/sfn/sfn_alu_defines.h +++ b/src/gallium/drivers/r600/sfn/sfn_alu_defines.h @@ -34,6 +34,10 @@ namespace r600 { + +static const int g_clause_local_start = 124; +static const int g_clause_local_end = 126; + /* ALU op2 instructions 17:7 top three bits always zero. */ enum EAluOp { op2_add = 0, diff --git a/src/gallium/drivers/r600/sfn/sfn_assembler.cpp b/src/gallium/drivers/r600/sfn/sfn_assembler.cpp index dc9343e..47cabf9 100644 --- a/src/gallium/drivers/r600/sfn/sfn_assembler.cpp +++ b/src/gallium/drivers/r600/sfn/sfn_assembler.cpp @@ -426,6 +426,11 @@ AssamblerVisitor::emit_alu_op(const AluInstr& ai) } } + if (alu.dst.sel >= g_clause_local_start && alu.dst.sel < g_clause_local_end) { + int clidx = 4 * (alu.dst.sel - g_clause_local_start) + alu.dst.chan; + m_bc->cf_last->clause_local_written |= 1 << clidx; + } + if (ai.opcode() == op1_set_cf_idx0) { m_bc->index_loaded[0] = 1; m_bc->index_reg[0] = -1; @@ -1211,9 +1216,9 @@ AssamblerVisitor::emit_loop_cont() bool AssamblerVisitor::copy_dst(r600_bytecode_alu_dst& dst, const Register& d, bool write) { - if (write && d.sel() > 124) { - R600_ERR("shader_from_nir: Don't support more then 124 GPRs, but try " - "using %d\n", + if (write && d.sel() > g_clause_local_end) { + R600_ERR("shader_from_nir: Don't support more then 124 GPRs + 2 claus " + "local, but try using %d\n", d.sel()); m_result = false; return false; @@ -1281,6 +1286,13 @@ AssamblerVisitor::copy_src(r600_bytecode_alu_src& src, const VirtualValue& s) src.sel = s.sel(); src.chan = s.chan(); + if (s.sel() >= g_clause_local_start && s.sel() < g_clause_local_end ) { + assert(m_bc->cf_last); + int clidx = 4 * (s.sel() - g_clause_local_start) + s.chan(); + /* Ensure that the clause local register was already written */ + assert(m_bc->cf_last->clause_local_written & (1 << clidx)); + } + s.accept(visitor); return visitor.m_buffer_offset; } @@ -1294,7 +1306,7 @@ EncodeSourceVisitor::EncodeSourceVisitor(r600_bytecode_alu_src& s, r600_bytecode void EncodeSourceVisitor::visit(const Register& value) { - assert(value.sel() <= 124 && "Only have 124 registers"); + assert(value.sel() < g_clause_local_end && "Only have 124 reisters + 4 clause local"); } void diff --git a/src/gallium/drivers/r600/sfn/sfn_ra.cpp b/src/gallium/drivers/r600/sfn/sfn_ra.cpp index 6dac75e..2a36b31 100644 --- a/src/gallium/drivers/r600/sfn/sfn_ra.cpp +++ b/src/gallium/drivers/r600/sfn/sfn_ra.cpp @@ -26,6 +26,7 @@ #include "sfn_ra.h" +#include "sfn_alu_defines.h" #include "sfn_debug.h" #include @@ -229,6 +230,83 @@ scalar_allocation(LiveRangeMap& lrm, const Interference& interference) return true; } +struct AluRegister { + int lifetime; + LiveRangeEntry *lre; +}; + +static inline bool operator < (const AluRegister& lhs, const AluRegister& rhs) +{ + return lhs.lifetime > rhs.lifetime; +} + +using AluClauseRegisters = std::priority_queue; + + +static void +scalar_clause_local_allocation (LiveRangeMap& lrm, const Interference& interference) +{ + for (int comp = 0; comp < 4; ++comp) { + AluClauseRegisters clause_reg; + auto& live_ranges = lrm.component(comp); + for (auto& r : live_ranges) { + + sfn_log << SfnLog::merge << "LR: " << *r.m_register + << "[ " << r.m_start << ", " << r.m_end + << " ], AC: " << r.m_alu_clause_local + << " Color; " << r.m_color << "\n"; + + if (r.m_color != -1) + continue; + + if (r.m_start == -1 && + r.m_end == -1) + continue; + + if (!r.m_alu_clause_local) + continue; + + int len = r.m_end - r.m_start; + if (len > 1) { + clause_reg.push({len, &r}); + sfn_log << SfnLog::merge << "Consider " << *r.m_register + << " for clause local\n"; + } + } + + while (!clause_reg.empty()) { + auto& r = clause_reg.top().lre; + clause_reg.pop(); + + sfn_log << SfnLog::merge << "Color " << *r->m_register << "\n"; + + auto& adjecency = interference.row(comp, r->m_register->index()); + + int color = g_clause_local_start; + + while (color < g_clause_local_end) { + bool color_in_use = false; + for (auto adj : adjecency) { + if (live_ranges[adj].m_color == color) { + color_in_use = true; + break; + } + } + + if (color_in_use) { + ++color; + continue; + } + + r->m_color = color; + break; + } + if (color == g_clause_local_end) + break; + } + } +} + bool register_allocation(LiveRangeMap& lrm) { @@ -289,6 +367,8 @@ register_allocation(LiveRangeMap& lrm) if (!group_allocation(lrm, interference, groups_sorted)) return false; + scalar_clause_local_allocation(lrm, interference); + if (!scalar_allocation(lrm, interference)) return false;