From 15cdf5bb488f774ee7b888c1251f4e569f929743 Mon Sep 17 00:00:00 2001 From: Iago Toral Quiroga Date: Fri, 16 Sep 2022 11:44:25 +0200 Subject: [PATCH] v3dv: optimize ldunif load into unifa write MIME-Version: 1.0 Content-Type: text/plain; charset=utf8 Content-Transfer-Encoding: 8bit If we emit a ldunif to load the ubo/ssbo base address and then we are immediately moving it to the unifa register we can have the ldunif write directly to unifa and avoid the mov in between, which won't be done by copy propagation because that only works with temp registers. Also, since we can't read from unifa we must be careful to disallow reuse of the ldunif result for a future ldunif of the same base address. We do that by only reusing ldunif results from temp registers. total instructions in shared programs: 12468943 -> 12455139 (-0.11%) instructions in affected programs: 1661233 -> 1647429 (-0.83%) helped: 8307 HURT: 3994 total uniforms in shared programs: 3704532 -> 3704522 (<.01%) uniforms in affected programs: 339 -> 329 (-2.95%) helped: 7 HURT: 0 total max-temps in shared programs: 2148158 -> 2148290 (<.01%) max-temps in affected programs: 9320 -> 9452 (1.42%) helped: 175 HURT: 295 total spills in shared programs: 2202 -> 2202 (0.00%) spills in affected programs: 0 -> 0 helped: 0 HURT: 0 total fills in shared programs: 3059 -> 3057 (-0.07%) fills in affected programs: 27 -> 25 (-7.41%) helped: 1 HURT: 0 total sfu-stalls in shared programs: 21167 -> 21056 (-0.52%) sfu-stalls in affected programs: 497 -> 386 (-22.33%) helped: 209 HURT: 127 total inst-and-stalls in shared programs: 12490110 -> 12476195 (-0.11%) inst-and-stalls in affected programs: 1662875 -> 1648960 (-0.84%) helped: 8312 HURT: 3987 total nops in shared programs: 316563 -> 313553 (-0.95%) nops in affected programs: 24269 -> 21259 (-12.40%) helped: 2158 HURT: 1006 Reviewed-by: Alejandro Piñeiro Part-of: --- src/broadcom/compiler/nir_to_vir.c | 15 ++++++++++++++- src/broadcom/compiler/vir.c | 6 ++++++ 2 files changed, 20 insertions(+), 1 deletion(-) diff --git a/src/broadcom/compiler/nir_to_vir.c b/src/broadcom/compiler/nir_to_vir.c index dc25d71..bfcee62 100644 --- a/src/broadcom/compiler/nir_to_vir.c +++ b/src/broadcom/compiler/nir_to_vir.c @@ -3207,7 +3207,20 @@ ntq_emit_load_unifa(struct v3d_compile *c, nir_intrinsic_instr *instr) struct qreg unifa = vir_reg(QFILE_MAGIC, V3D_QPU_WADDR_UNIFA); if (!dynamic_src) { if (!is_ssbo) { - vir_MOV_dest(c, unifa, base_offset); + /* Avoid the extra MOV to UNIFA by making + * ldunif load directly into it. We can't + * do this if we have not actually emitted + * ldunif and are instead reusing a previous + * one. + */ + struct qinst *inst = + (struct qinst *)c->cur_block->instructions.prev; + if (inst == c->defs[base_offset.index]) { + inst->dst = unifa; + c->defs[base_offset.index] = NULL; + } else { + vir_MOV_dest(c, unifa, base_offset); + } } else { vir_ADD_dest(c, unifa, base_offset, vir_uniform_ui(c, const_offset)); diff --git a/src/broadcom/compiler/vir.c b/src/broadcom/compiler/vir.c index 8c4307a..1bff8e6 100644 --- a/src/broadcom/compiler/vir.c +++ b/src/broadcom/compiler/vir.c @@ -2069,6 +2069,12 @@ try_opt_ldunif(struct v3d_compile *c, uint32_t index, struct qreg *unif) if (!prev_inst) return false; + /* Only reuse the ldunif result if it was written to a temp register, + * otherwise there may be special restrictions (for example, ldunif + * may write directly to unifa, which is a write-only register). + */ + if (prev_inst->dst.file != QFILE_TEMP) + return false; list_for_each_entry_from(struct qinst, inst, prev_inst->link.next, &c->cur_block->instructions, link) { -- 2.7.4