}
/**
- * Number of in-order hardware instructions contained in this IR
- * instruction. This determines the increment applied to the RegDist
- * counter calculated for any ordered dependency that crosses this
+ * Index of the \p p pipeline counter in the ordered_address vector defined
+ * below.
+ */
+#define IDX(p) (p >= TGL_PIPE_FLOAT ? unsigned(p - TGL_PIPE_FLOAT) : \
+ (abort(), ~0u))
+
+ /**
+ * Number of in-order hardware instructions for pipeline index \p contained
+ * in this IR instruction. This determines the increment applied to the
+ * RegDist counter calculated for any ordered dependency that crosses this
* instruction.
*/
unsigned
- ordered_unit(const fs_inst *inst)
+ ordered_unit(const struct gen_device_info *devinfo, const fs_inst *inst,
+ unsigned p)
{
switch (inst->opcode) {
case BRW_OPCODE_SYNC:
* (again) don't use virtual instructions if you want optimal
* scheduling.
*/
- return is_unordered(inst) ? 0 : 1;
+ if (!is_unordered(inst) && (p == IDX(inferred_exec_pipe(devinfo, inst)) ||
+ p == IDX(TGL_PIPE_ALL)))
+ return 1;
+ else
+ return 0;
}
}
* Type for an instruction counter that increments for in-order
* instructions only, arbitrarily denoted 'jp' throughout this lowering
* pass in order to distinguish it from the regular instruction counter.
+ * This is represented as a vector with an independent counter for each
+ * asynchronous ALU pipeline in the EU.
*/
- typedef int ordered_address;
+ struct ordered_address {
+ /**
+ * Construct the ordered address of a dependency known to execute on a
+ * single specified pipeline \p p (unless TGL_PIPE_NONE or TGL_PIPE_ALL
+ * is provided), in which case the vector counter will be initialized
+ * with all components equal to INT_MIN (always satisfied) except for
+ * component IDX(p).
+ */
+ ordered_address(tgl_pipe p = TGL_PIPE_NONE, int jp0 = INT_MIN) {
+ for (unsigned q = 0; q < IDX(TGL_PIPE_ALL); q++)
+ jp[q] = (p == TGL_PIPE_NONE || (IDX(p) != q && p != TGL_PIPE_ALL) ?
+ INT_MIN : jp0);
+ }
+
+ int jp[IDX(TGL_PIPE_ALL)];
+
+ friend bool
+ operator==(const ordered_address &jp0, const ordered_address &jp1)
+ {
+ for (unsigned p = 0; p < IDX(TGL_PIPE_ALL); p++) {
+ if (jp0.jp[p] != jp1.jp[p])
+ return false;
+ }
+
+ return true;
+ }
+ };
/**
* Return the number of instructions in the program.
ordered_inst_addresses(const fs_visitor *shader)
{
ordered_address *jps = new ordered_address[num_instructions(shader)];
- ordered_address jp = 0;
+ ordered_address jp(TGL_PIPE_ALL, 0);
unsigned ip = 0;
foreach_block_and_inst(block, fs_inst, inst, shader->cfg) {
jps[ip] = jp;
- jp += ordered_unit(inst);
+ for (unsigned p = 0; p < IDX(TGL_PIPE_ALL); p++)
+ jp.jp[p] += ordered_unit(shader->devinfo, inst, p);
ip++;
}
/**
* No dependency information.
*/
- dependency() : ordered(TGL_REGDIST_NULL), jp(INT_MIN),
+ dependency() : ordered(TGL_REGDIST_NULL), jp(),
unordered(TGL_SBID_NULL), id(0),
exec_all(false) {}
* Construct a dependency on the in-order instruction with the provided
* ordered_address instruction counter.
*/
- dependency(tgl_regdist_mode mode, ordered_address jp, bool exec_all) :
+ dependency(tgl_regdist_mode mode, const ordered_address &jp,
+ bool exec_all) :
ordered(mode), jp(jp), unordered(TGL_SBID_NULL), id(0),
exec_all(exec_all) {}
* specified synchronization token.
*/
dependency(tgl_sbid_mode mode, unsigned id, bool exec_all) :
- ordered(TGL_REGDIST_NULL), jp(INT_MIN), unordered(mode), id(id),
+ ordered(TGL_REGDIST_NULL), jp(), unordered(mode), id(id),
exec_all(exec_all) {}
/**
}
};
- const dependency dependency::done = dependency(TGL_REGDIST_SRC, INT_MIN, false);
+ const dependency dependency::done =
+ dependency(TGL_REGDIST_SRC, ordered_address(), false);
/**
* Return whether \p dep contains any dependency information.
if (dep0.ordered || dep1.ordered) {
dep.ordered = dep0.ordered | dep1.ordered;
- dep.jp = MAX2(dep0.jp, dep1.jp);
+ for (unsigned p = 0; p < IDX(TGL_PIPE_ALL); p++)
+ dep.jp.jp[p] = MAX2(dep0.jp.jp[p], dep1.jp.jp[p]);
}
if (dep0.unordered || dep1.unordered) {
* the end of the origin block.
*/
dependency
- transport(dependency dep, int delta)
+ transport(dependency dep, int delta[IDX(TGL_PIPE_ALL)])
{
- if (dep.ordered && dep.jp > INT_MIN)
- dep.jp += delta;
+ if (dep.ordered) {
+ for (unsigned p = 0; p < IDX(TGL_PIPE_ALL); p++) {
+ if (dep.jp.jp[p] > INT_MIN)
+ dep.jp.jp[p] += delta[p];
+ }
+ }
return dep;
}
* object. \sa transport().
*/
friend scoreboard
- transport(const scoreboard &sb0, int delta)
+ transport(const scoreboard &sb0, int delta[IDX(TGL_PIPE_ALL)])
{
scoreboard sb;
continue;
if (dep.ordered && deps[i].ordered) {
- deps[i].jp = MAX2(deps[i].jp, dep.jp);
+ for (unsigned p = 0; p < IDX(TGL_PIPE_ALL); p++)
+ deps[i].jp.jp[p] = MAX2(deps[i].jp.jp[p], dep.jp.jp[p]);
+
deps[i].ordered |= dep.ordered;
deps[i].exec_all |= dep.exec_all;
dep.ordered = TGL_REGDIST_NULL;
for (unsigned i = 0; i < deps.size(); i++) {
if (deps[i].ordered && exec_all >= deps[i].exec_all) {
- const unsigned dist = jp - deps[i].jp;
- const unsigned max_dist = 10;
- assert(jp > deps[i].jp);
- if (dist <= max_dist)
- min_dist = MIN3(min_dist, dist, 7);
+ for (unsigned q = 0; q < IDX(TGL_PIPE_ALL); q++) {
+ const unsigned dist = jp.jp[q] - int64_t(deps[i].jp.jp[q]);
+ const unsigned max_dist = (q == IDX(TGL_PIPE_LONG) ? 14 : 10);
+ assert(jp.jp[q] > deps[i].jp.jp[q]);
+ if (dist <= max_dist)
+ min_dist = MIN3(min_dist, dist, 7);
+ }
}
}
const fs_inst *inst, unsigned ip, scoreboard &sb)
{
const bool exec_all = inst->force_writemask_all;
+ const struct gen_device_info *devinfo = shader->devinfo;
+ const tgl_pipe p = inferred_exec_pipe(devinfo, inst);
+ const ordered_address jp = p ? ordered_address(p, jps[ip].jp[IDX(p)]) :
+ ordered_address();
/* Track any source registers that may be fetched asynchronously by this
* instruction, otherwise clear the dependency in order to avoid
const dependency rd_dep =
(inst->is_payload(i) ||
inst->is_math()) ? dependency(TGL_SBID_SRC, ip, exec_all) :
- ordered_unit(inst) ? dependency(TGL_REGDIST_SRC, jps[ip], exec_all) :
+ ordered_unit(devinfo, inst, IDX(TGL_PIPE_ALL)) ?
+ dependency(TGL_REGDIST_SRC, jp, exec_all) :
dependency::done;
for (unsigned j = 0; j < regs_read(inst, i); j++)
/* Track any destination registers of this instruction. */
const dependency wr_dep =
is_unordered(inst) ? dependency(TGL_SBID_DST, ip, exec_all) :
- ordered_unit(inst) ? dependency(TGL_REGDIST_DST, jps[ip], exec_all) :
+ ordered_unit(devinfo, inst, IDX(TGL_PIPE_ALL)) ?
+ dependency(TGL_REGDIST_DST, jp, exec_all) :
dependency();
if (is_valid(wr_dep) && inst->dst.file != BAD_FILE &&
foreach_list_typed(bblock_link, child_link, link,
&block->children) {
scoreboard &in_sb = in_sbs[child_link->block->num];
- const int delta =
- jps[child_link->block->start_ip] - jps[block->end_ip]
- - ordered_unit(static_cast<const fs_inst *>(block->end()));
+ int delta[IDX(TGL_PIPE_ALL)];
+
+ for (unsigned p = 0; p < IDX(TGL_PIPE_ALL); p++)
+ delta[p] = jps[child_link->block->start_ip].jp[p]
+ - jps[block->end_ip].jp[p]
+ - ordered_unit(shader->devinfo,
+ static_cast<const fs_inst *>(block->end()), p);
in_sb = merge(eq, in_sb, transport(sb, delta));
}