From fc85f7cfdc154e6c2f29445b6023b379c3c18864 Mon Sep 17 00:00:00 2001 From: Eric Anholt Date: Tue, 18 Sep 2018 11:56:22 -0700 Subject: [PATCH] v3d: Don't rely on sorting input vars for VPM read setup. For supporting scalar VPM i/o at the NIR level, we need to do a pass over the vars to figure out how big each attribute is after DCE. Once we've done that, we can just walk over c->vattr_sizes[] instead of bothering with vars. --- src/broadcom/compiler/nir_to_vir.c | 48 ++++++++++++++++---------------------- 1 file changed, 20 insertions(+), 28 deletions(-) diff --git a/src/broadcom/compiler/nir_to_vir.c b/src/broadcom/compiler/nir_to_vir.c index 9bcca9d..4becc97 100644 --- a/src/broadcom/compiler/nir_to_vir.c +++ b/src/broadcom/compiler/nir_to_vir.c @@ -1276,35 +1276,36 @@ ntq_emit_vpm_read(struct v3d_compile *c, static void ntq_setup_vpm_inputs(struct v3d_compile *c) { - unsigned num_entries = 0; - unsigned num_components = 0; + /* Figure out how many components of each vertex attribute the shader + * uses. Each variable should have been split to individual + * components and unused ones DCEed. The vertex fetcher will load + * from the start of the attribute to the number of components we + * declare we need in c->vattr_sizes[]. + */ nir_foreach_variable(var, &c->s->inputs) { - num_entries++; - num_components += glsl_get_components(var->type); - } - - nir_variable *vars[num_entries]; + /* No VS attribute array support. */ + assert(MAX2(glsl_get_length(var->type), 1) == 1); - unsigned i = 0; - nir_foreach_variable(var, &c->s->inputs) - vars[i++] = var; + unsigned loc = var->data.driver_location; + int start_component = var->data.location_frac; + int num_components = glsl_get_components(var->type); - /* Sort the variables so that we emit the input setup in - * driver_location order. This is required for VPM reads, whose data - * is fetched into the VPM in driver_location (TGSI register index) - * order. - */ - qsort(&vars, num_entries, sizeof(*vars), driver_location_compare); + c->vattr_sizes[loc] = MAX2(c->vattr_sizes[loc], + start_component + num_components); + } + unsigned num_components = 0; uint32_t vpm_components_queued = 0; bool uses_iid = c->s->info.system_values_read & (1ull << SYSTEM_VALUE_INSTANCE_ID); bool uses_vid = c->s->info.system_values_read & (1ull << SYSTEM_VALUE_VERTEX_ID); - num_components += uses_iid; num_components += uses_vid; + for (int i = 0; i < ARRAY_SIZE(c->vattr_sizes); i++) + num_components += c->vattr_sizes[i]; + if (uses_iid) { c->iid = ntq_emit_vpm_read(c, &vpm_components_queued, &num_components, ~0); @@ -1315,19 +1316,11 @@ ntq_setup_vpm_inputs(struct v3d_compile *c) &num_components, ~0); } - for (unsigned i = 0; i < num_entries; i++) { - nir_variable *var = vars[i]; - unsigned array_len = MAX2(glsl_get_length(var->type), 1); - unsigned loc = var->data.driver_location; - - assert(array_len == 1); - (void)array_len; + for (int loc = 0; loc < ARRAY_SIZE(c->vattr_sizes); loc++) { resize_qreg_array(c, &c->inputs, &c->inputs_array_size, (loc + 1) * 4); - int var_components = glsl_get_components(var->type); - - for (int i = 0; i < var_components; i++) { + for (int i = 0; i < c->vattr_sizes[loc]; i++) { c->inputs[loc * 4 + i] = ntq_emit_vpm_read(c, &vpm_components_queued, @@ -1335,7 +1328,6 @@ ntq_setup_vpm_inputs(struct v3d_compile *c) loc * 4 + i); } - c->vattr_sizes[loc] = var_components; } if (c->devinfo->ver >= 40) { -- 2.7.4