From 54c17e45ae8596961b02f356254784c2ef46b9e1 Mon Sep 17 00:00:00 2001 From: Iago Toral Quiroga Date: Thu, 18 Feb 2021 08:32:13 +0100 Subject: [PATCH] broadcom/compiler: skip unnecessary unifa writes MIME-Version: 1.0 Content-Type: text/plain; charset=utf8 Content-Transfer-Encoding: 8bit If a new UBO load happens to read exactly at the offset right after the previous UBO load (something that is fairly common, for example when reading a matrix), we can skip the unifa write (with its 3 delay slots) and just continue to call ldunifa to continue reading consecutive addresses. Reviewed-by: Alejandro Piñeiro Part-of: --- src/broadcom/compiler/nir_to_vir.c | 38 ++++++++++++++++++++++++++---------- src/broadcom/compiler/v3d_compiler.h | 9 +++++++++ 2 files changed, 37 insertions(+), 10 deletions(-) diff --git a/src/broadcom/compiler/nir_to_vir.c b/src/broadcom/compiler/nir_to_vir.c index d27b184..bc9473c 100644 --- a/src/broadcom/compiler/nir_to_vir.c +++ b/src/broadcom/compiler/nir_to_vir.c @@ -2589,17 +2589,34 @@ ntq_emit_load_ubo_unifa(struct v3d_compile *c, nir_intrinsic_instr *instr) if (c->key->environment == V3D_ENVIRONMENT_OPENGL) index++; - struct qreg base_offset = - vir_uniform(c, QUNIFORM_UBO_ADDR, - v3d_unit_data_create(index, const_offset)); - const_offset = 0; - - struct qreg unifa = vir_reg(QFILE_MAGIC, V3D_QPU_WADDR_UNIFA); - if (!dynamic_src) { - vir_MOV_dest(c, unifa, base_offset); + /* We can only keep track of the last unifa address we used with + * constant offset loads. + */ + bool skip_unifa = false; + if (dynamic_src) { + c->last_unifa_block = NULL; + } else if (c->cur_block == c->last_unifa_block && + c->last_unifa_index == index && + c->last_unifa_offset == const_offset) { + skip_unifa = true; } else { - vir_ADD_dest(c, unifa, base_offset, - ntq_get_src(c, instr->src[1], 0)); + c->last_unifa_block = c->cur_block; + c->last_unifa_index = index; + c->last_unifa_offset = const_offset; + } + + if (!skip_unifa) { + struct qreg base_offset = + vir_uniform(c, QUNIFORM_UBO_ADDR, + v3d_unit_data_create(index, const_offset)); + + struct qreg unifa = vir_reg(QFILE_MAGIC, V3D_QPU_WADDR_UNIFA); + if (!dynamic_src) { + vir_MOV_dest(c, unifa, base_offset); + } else { + vir_ADD_dest(c, unifa, base_offset, + ntq_get_src(c, instr->src[1], 0)); + } } for (uint32_t i = 0; i < nir_intrinsic_dest_components(instr); i++) { @@ -2608,6 +2625,7 @@ ntq_emit_load_ubo_unifa(struct v3d_compile *c, nir_intrinsic_instr *instr) ldunifa->qpu.sig.ldunifa = true; struct qreg data = vir_emit_def(c, ldunifa); ntq_store_dest(c, &instr->dest, i, vir_MOV(c, data)); + c->last_unifa_offset += 4; } } diff --git a/src/broadcom/compiler/v3d_compiler.h b/src/broadcom/compiler/v3d_compiler.h index 8e83a8e..e1f3511 100644 --- a/src/broadcom/compiler/v3d_compiler.h +++ b/src/broadcom/compiler/v3d_compiler.h @@ -650,6 +650,15 @@ struct v3d_compile { */ bool disable_ldunif_opt; + /* Last UBO index and offset used with a unifa/ldunifa sequence and the + * block where it was emitted. This is used to skip unifa writes (and + * their 3 delay slot) when the next UBO load reads right after the + * previous one in the same block. + */ + struct qblock *last_unifa_block; + int32_t last_unifa_index; + uint32_t last_unifa_offset; + /* State for whether we're executing on each channel currently. 0 if * yes, otherwise a block number + 1 that the channel jumped to. */ -- 2.7.4