From 396f6b27a759246221dd641b14ecc2f0bb7839a7 Mon Sep 17 00:00:00 2001 From: Francisco Jerez Date: Wed, 9 Oct 2019 18:47:29 -0700 Subject: [PATCH] intel/fs/gen12: Demodernize software scoreboard lowering pass. Kept as a separate commit in order to avoid distracting reviewers of the software scoreboard pass with memory management boilerplate. Reviewed-by: Caio Marcelo de Oliveira Filho --- src/intel/compiler/brw_fs_scoreboard.cpp | 244 +++++++++++++++++++++---------- 1 file changed, 163 insertions(+), 81 deletions(-) diff --git a/src/intel/compiler/brw_fs_scoreboard.cpp b/src/intel/compiler/brw_fs_scoreboard.cpp index f05a150..67e07e7 100644 --- a/src/intel/compiler/brw_fs_scoreboard.cpp +++ b/src/intel/compiler/brw_fs_scoreboard.cpp @@ -52,9 +52,6 @@ * - tdr0 thread dependency register */ -#include -#include - #include "brw_fs.h" #include "brw_cfg.h" @@ -104,18 +101,29 @@ namespace { typedef int ordered_address; /** + * Return the number of instructions in the program. + */ + unsigned + num_instructions(const backend_shader *shader) + { + return shader->cfg->blocks[shader->cfg->num_blocks - 1]->end_ip + 1; + } + + /** * Calculate the local ordered_address instruction counter at every * instruction of the shader for subsequent constant-time look-up. */ - std::vector + ordered_address * ordered_inst_addresses(const fs_visitor *shader) { - std::vector jps; + ordered_address *jps = new ordered_address[num_instructions(shader)]; ordered_address jp = 0; + unsigned ip = 0; foreach_block_and_inst(block, fs_inst, inst, shader->cfg) { - jps.push_back(jp); + jps[ip] = jp; jp += ordered_unit(inst); + ip++; } return jps; @@ -174,6 +182,17 @@ namespace { * only if i == j for every pair of unsigned integers i and j. */ struct equivalence_relation { + equivalence_relation(unsigned n) : is(new unsigned[n]), n(n) + { + for (unsigned i = 0; i < n; i++) + is[i] = i; + } + + ~equivalence_relation() + { + delete[] is; + } + /** * Return equivalence class index of the specified element. Effectively * this is the numeric value of an arbitrary representative from the @@ -185,7 +204,7 @@ namespace { unsigned lookup(unsigned i) const { - if (i < is.size() && is[i] != i) + if (i < n && is[i] != i) return lookup(is[i]); else return i; @@ -195,12 +214,13 @@ namespace { * Create an array with the results of the lookup() method for * constant-time evaluation. */ - std::vector - flatten() const { - std::vector ids; + unsigned * + flatten() const + { + unsigned *ids = new unsigned[n]; - for (const auto i : is) - ids.push_back(lookup(i)); + for (unsigned i = 0; i < n; i++) + ids[i] = lookup(i); return ids; } @@ -223,6 +243,11 @@ namespace { } private: + equivalence_relation(const equivalence_relation &); + + equivalence_relation & + operator=(const equivalence_relation &); + /** * Assign the representative of \p from to be equivalent to \p to. * @@ -233,17 +258,17 @@ namespace { assign(unsigned from, unsigned to) { if (from != to) { - if (from < is.size() && is[from] != from) - assign(is[from], to); + assert(from < n); - for (unsigned i = is.size(); i <= from; i++) - is.push_back(i); + if (is[from] != from) + assign(is[from], to); is[from] = to; } } - std::vector is; + unsigned *is; + unsigned n; }; /** @@ -559,32 +584,74 @@ namespace { * Dependency list handling. * @{ */ + struct dependency_list { + dependency_list() : deps(NULL), n(0) {} + + ~dependency_list() + { + free(deps); + } + + void + push_back(const dependency &dep) + { + deps = (dependency *)realloc(deps, (n + 1) * sizeof(*deps)); + deps[n++] = dep; + } + + unsigned + size() const + { + return n; + } + + const dependency & + operator[](unsigned i) const + { + assert(i < n); + return deps[i]; + } + + dependency & + operator[](unsigned i) + { + assert(i < n); + return deps[i]; + } + + private: + dependency_list(const dependency_list &); + dependency_list & + operator=(const dependency_list &); + + dependency *deps; + unsigned n; + }; /** * Add dependency \p dep to the list of dependencies of an instruction * \p deps. */ void - add_dependency(const std::vector &ids, - std::vector &deps, dependency dep) + add_dependency(const unsigned *ids, dependency_list &deps, dependency dep) { if (is_valid(dep)) { /* Translate the unordered dependency token first in order to keep * the list minimally redundant. */ - if (dep.unordered && dep.id < ids.size()) + if (dep.unordered) dep.id = ids[dep.id]; /* Try to combine the specified dependency with any existing ones. */ - for (auto &dep1 : deps) { - if (dep.ordered && dep1.ordered) { - dep1.jp = MAX2(dep1.jp, dep.jp); - dep1.ordered |= dep.ordered; + for (unsigned i = 0; i < deps.size(); i++) { + if (dep.ordered && deps[i].ordered) { + deps[i].jp = MAX2(deps[i].jp, dep.jp); + deps[i].ordered |= dep.ordered; dep.ordered = TGL_REGDIST_NULL; } - if (dep.unordered && dep1.unordered && dep1.id == dep.id) { - dep1.unordered |= dep.unordered; + if (dep.unordered && deps[i].unordered && deps[i].id == dep.id) { + deps[i].unordered |= dep.unordered; dep.unordered = TGL_SBID_NULL; } } @@ -601,16 +668,16 @@ namespace { * \p jp. */ tgl_swsb - ordered_dependency_swsb(const std::vector &deps, + ordered_dependency_swsb(const dependency_list &deps, const ordered_address &jp) { unsigned min_dist = ~0u; - for (const auto &dep : deps) { - if (dep.ordered) { - const unsigned dist = jp - dep.jp; + for (unsigned i = 0; i < deps.size(); i++) { + if (deps[i].ordered) { + const unsigned dist = jp - deps[i].jp; const unsigned max_dist = 10; - assert(jp > dep.jp); + assert(jp > deps[i].jp); if (dist <= max_dist) min_dist = MIN3(min_dist, dist, 7); } @@ -624,7 +691,7 @@ namespace { * ordered_address \p jp has any non-trivial ordered dependencies. */ bool - find_ordered_dependency(const std::vector &deps, + find_ordered_dependency(const dependency_list &deps, const ordered_address &jp) { return ordered_dependency_swsb(deps, jp).regdist; @@ -636,13 +703,13 @@ namespace { * no such dependency is present. */ tgl_sbid_mode - find_unordered_dependency(const std::vector &deps, + find_unordered_dependency(const dependency_list &deps, tgl_sbid_mode unordered) { if (unordered) { - for (const auto &dep : deps) { - if (unordered & dep.unordered) - return dep.unordered; + for (unsigned i = 0; i < deps.size(); i++) { + if (unordered & deps[i].unordered) + return deps[i].unordered; } } @@ -657,7 +724,7 @@ namespace { */ tgl_sbid_mode baked_unordered_dependency_mode(const fs_inst *inst, - const std::vector &deps, + const dependency_list &deps, const ordered_address &jp) { const bool has_ordered = find_ordered_dependency(deps, jp); @@ -687,8 +754,7 @@ namespace { * instruction \p inst. */ void - update_inst_scoreboard(const fs_visitor *shader, - const std::vector &jps, + update_inst_scoreboard(const fs_visitor *shader, const ordered_address *jps, const fs_inst *inst, unsigned ip, scoreboard &sb) { /* Track any source registers that may be fetched asynchronously by this @@ -730,11 +796,11 @@ namespace { * unconditionally resolved) dependencies at the end of each block of the * program. */ - std::vector + scoreboard * gather_block_scoreboards(const fs_visitor *shader, - const std::vector &jps) + const ordered_address *jps) { - std::vector sbs(shader->cfg->num_blocks); + scoreboard *sbs = new scoreboard[shader->cfg->num_blocks]; unsigned ip = 0; foreach_block_and_inst(block, fs_inst, inst, shader->cfg) @@ -750,15 +816,14 @@ namespace { * Calculates the set of dependencies potentially pending at the beginning * of each block, and returns it as an array of scoreboard objects. */ - std::pair, std::vector> + scoreboard * propagate_block_scoreboards(const fs_visitor *shader, - const std::vector &jps) + const ordered_address *jps, + equivalence_relation &eq) { - const std::vector delta_sbs = - gather_block_scoreboards(shader, jps); - std::vector in_sbs(shader->cfg->num_blocks); - std::vector out_sbs(shader->cfg->num_blocks); - equivalence_relation eq; + const scoreboard *delta_sbs = gather_block_scoreboards(shader, jps); + scoreboard *in_sbs = new scoreboard[shader->cfg->num_blocks]; + scoreboard *out_sbs = new scoreboard[shader->cfg->num_blocks]; for (bool progress = true; progress;) { progress = false; @@ -784,63 +849,66 @@ namespace { } } - return { std::move(in_sbs), eq.flatten() }; + delete[] delta_sbs; + delete[] out_sbs; + + return in_sbs; } /** * Return the list of potential dependencies of each instruction in the * shader based on the result of global dependency analysis. */ - std::vector> + dependency_list * gather_inst_dependencies(const fs_visitor *shader, - const std::vector &jps) + const ordered_address *jps) { - std::vector sbs; - std::vector ids; - std::vector> deps; + equivalence_relation eq(num_instructions(shader)); + scoreboard *sbs = propagate_block_scoreboards(shader, jps, eq); + const unsigned *ids = eq.flatten(); + dependency_list *deps = new dependency_list[num_instructions(shader)]; unsigned ip = 0; - std::tie(sbs, ids) = propagate_block_scoreboards(shader, jps); - foreach_block_and_inst(block, fs_inst, inst, shader->cfg) { scoreboard &sb = sbs[block->num]; - std::vector inst_deps; for (unsigned i = 0; i < inst->sources; i++) { for (unsigned j = 0; j < regs_read(inst, i); j++) - add_dependency(ids, inst_deps, dependency_for_read( + add_dependency(ids, deps[ip], dependency_for_read( sb.get(byte_offset(inst->src[i], REG_SIZE * j)))); } if (is_send(inst) && inst->base_mrf != -1) { for (unsigned j = 0; j < inst->mlen; j++) - add_dependency(ids, inst_deps, dependency_for_read( + add_dependency(ids, deps[ip], dependency_for_read( sb.get(brw_uvec_mrf(8, inst->base_mrf + j, 0)))); } if (is_unordered(inst)) - add_dependency(ids, inst_deps, dependency(TGL_SBID_SET, ip)); + add_dependency(ids, deps[ip], dependency(TGL_SBID_SET, ip)); if (!inst->no_dd_check) { if (inst->dst.file != BAD_FILE && !inst->dst.is_null()) { for (unsigned j = 0; j < regs_written(inst); j++) { - add_dependency(ids, inst_deps, dependency_for_write(inst, + add_dependency(ids, deps[ip], dependency_for_write(inst, sb.get(byte_offset(inst->dst, REG_SIZE * j)))); } } if (is_send(inst) && inst->base_mrf != -1) { for (int j = 0; j < shader->implied_mrf_writes(inst); j++) - add_dependency(ids, inst_deps, dependency_for_write(inst, + add_dependency(ids, deps[ip], dependency_for_write(inst, sb.get(brw_uvec_mrf(8, inst->base_mrf + j, 0)))); } } - deps.push_back(inst_deps); update_inst_scoreboard(shader, jps, inst, ip, sb); ip++; } + delete[] sbs; + delete[] ids; + return deps; } @@ -850,30 +918,39 @@ namespace { * Allocate SBID tokens to track the execution of every out-of-order * instruction of the shader. */ - std::vector> + dependency_list * allocate_inst_dependencies(const fs_visitor *shader, - const std::vector> &deps0) + const dependency_list *deps0) { /* XXX - Use bin-packing algorithm to assign hardware SBIDs optimally in * shaders with a large number of SEND messages. */ - std::vector> deps1; - std::vector ids(deps0.size(), ~0u); + + /* Allocate an unordered dependency ID to hardware SBID translation + * table with as many entries as instructions there are in the shader, + * which is the maximum number of unordered IDs we can find in the + * program. + */ + unsigned *ids = new unsigned[num_instructions(shader)]; + for (unsigned ip = 0; ip < num_instructions(shader); ip++) + ids[ip] = ~0u; + + dependency_list *deps1 = new dependency_list[num_instructions(shader)]; unsigned next_id = 0; - for (const auto &inst_deps0 : deps0) { - std::vector inst_deps1; + for (unsigned ip = 0; ip < num_instructions(shader); ip++) { + for (unsigned i = 0; i < deps0[ip].size(); i++) { + const dependency &dep = deps0[ip][i]; - for (const auto &dep : inst_deps0) { if (dep.unordered && ids[dep.id] == ~0u) ids[dep.id] = (next_id++) & 0xf; - add_dependency(ids, inst_deps1, dep); + add_dependency(ids, deps1[ip], dep); } - - deps1.push_back(inst_deps1); } + delete[] ids; + return deps1; } @@ -884,8 +961,8 @@ namespace { */ void emit_inst_dependencies(fs_visitor *shader, - const std::vector &jps, - const std::vector> &deps) + const ordered_address *jps, + const dependency_list *deps) { unsigned ip = 0; @@ -894,7 +971,9 @@ namespace { const tgl_sbid_mode unordered_mode = baked_unordered_dependency_mode(inst, deps[ip], jps[ip]); - for (const auto &dep : deps[ip]) { + for (unsigned i = 0; i < deps[ip].size(); i++) { + const dependency &dep = deps[ip][i]; + if (dep.unordered) { if (unordered_mode == dep.unordered && !swsb.mode) { /* Bake unordered dependency into the instruction's SWSB if @@ -929,10 +1008,13 @@ bool fs_visitor::lower_scoreboard() { if (devinfo->gen >= 12) { - const std::vector jps = ordered_inst_addresses(this); - emit_inst_dependencies(this, jps, - allocate_inst_dependencies(this, - gather_inst_dependencies(this, jps))); + const ordered_address *jps = ordered_inst_addresses(this); + const dependency_list *deps0 = gather_inst_dependencies(this, jps); + const dependency_list *deps1 = allocate_inst_dependencies(this, deps0); + emit_inst_dependencies(this, jps, deps1); + delete[] deps1; + delete[] deps0; + delete[] jps; } return true; -- 2.7.4