v3dv: optimize ldunif load into unifa write
authorIago Toral Quiroga <itoral@igalia.com>
Fri, 16 Sep 2022 09:44:25 +0000 (11:44 +0200)
committerMarge Bot <emma+marge@anholt.net>
Tue, 20 Sep 2022 06:56:28 +0000 (06:56 +0000)
If we emit a ldunif to load the ubo/ssbo base address and
then we are immediately moving it to the unifa register we
can have the ldunif write directly to unifa and avoid the mov
in between, which won't be done by copy propagation because that
only works with temp registers.

Also, since we can't read from unifa we must be careful to disallow
reuse of the ldunif result for a future ldunif of the same base address.
We do that by only reusing ldunif results from temp registers.

total instructions in shared programs: 12468943 -> 12455139 (-0.11%)
instructions in affected programs: 1661233 -> 1647429 (-0.83%)
helped: 8307
HURT: 3994

total uniforms in shared programs: 3704532 -> 3704522 (<.01%)
uniforms in affected programs: 339 -> 329 (-2.95%)
helped: 7
HURT: 0

total max-temps in shared programs: 2148158 -> 2148290 (<.01%)
max-temps in affected programs: 9320 -> 9452 (1.42%)
helped: 175
HURT: 295

total spills in shared programs: 2202 -> 2202 (0.00%)
spills in affected programs: 0 -> 0
helped: 0
HURT: 0

total fills in shared programs: 3059 -> 3057 (-0.07%)
fills in affected programs: 27 -> 25 (-7.41%)
helped: 1
HURT: 0

total sfu-stalls in shared programs: 21167 -> 21056 (-0.52%)
sfu-stalls in affected programs: 497 -> 386 (-22.33%)
helped: 209
HURT: 127

total inst-and-stalls in shared programs: 12490110 -> 12476195 (-0.11%)
inst-and-stalls in affected programs: 1662875 -> 1648960 (-0.84%)
helped: 8312
HURT: 3987

total nops in shared programs: 316563 -> 313553 (-0.95%)
nops in affected programs: 24269 -> 21259 (-12.40%)
helped: 2158
HURT: 1006

Reviewed-by: Alejandro PiƱeiro <apinheiro@igalia.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/18667>

src/broadcom/compiler/nir_to_vir.c
src/broadcom/compiler/vir.c

index dc25d71..bfcee62 100644 (file)
@@ -3207,7 +3207,20 @@ ntq_emit_load_unifa(struct v3d_compile *c, nir_intrinsic_instr *instr)
                 struct qreg unifa = vir_reg(QFILE_MAGIC, V3D_QPU_WADDR_UNIFA);
                 if (!dynamic_src) {
                         if (!is_ssbo) {
-                                vir_MOV_dest(c, unifa, base_offset);
+                                /* Avoid the extra MOV to UNIFA by making
+                                 * ldunif load directly into it. We can't
+                                 * do this if we have not actually emitted
+                                 * ldunif and are instead reusing a previous
+                                 * one.
+                                 */
+                                struct qinst *inst =
+                                        (struct qinst *)c->cur_block->instructions.prev;
+                                if (inst == c->defs[base_offset.index]) {
+                                   inst->dst = unifa;
+                                   c->defs[base_offset.index] = NULL;
+                                } else {
+                                   vir_MOV_dest(c, unifa, base_offset);
+                                }
                         } else {
                                 vir_ADD_dest(c, unifa, base_offset,
                                              vir_uniform_ui(c, const_offset));
index 8c4307a..1bff8e6 100644 (file)
@@ -2069,6 +2069,12 @@ try_opt_ldunif(struct v3d_compile *c, uint32_t index, struct qreg *unif)
         if (!prev_inst)
                 return false;
 
+        /* Only reuse the ldunif result if it was written to a temp register,
+         * otherwise there may be special restrictions (for example, ldunif
+         * may write directly to unifa, which is a write-only register).
+         */
+        if (prev_inst->dst.file != QFILE_TEMP)
+                return false;
 
         list_for_each_entry_from(struct qinst, inst, prev_inst->link.next,
                                  &c->cur_block->instructions, link) {