From: Kenneth Graunke Date: Tue, 25 Aug 2015 23:59:12 +0000 (-0700) Subject: i965/vs: Unify URB entry size/read length calculations between backends. X-Git-Tag: upstream/17.1.0~15442 X-Git-Url: http://review.tizen.org/git/?a=commitdiff_plain;h=031d3501322aee0a1474c7f2a9b79f9fa9947430;p=platform%2Fupstream%2Fmesa.git i965/vs: Unify URB entry size/read length calculations between backends. Both the vec4 and scalar VS backends had virtually identical URB entry size and read length calculations. We can move those up a level to backend-agnostic code and reuse it for both. Unfortunately, the backends need to know nr_attributes to compute first_non_payload_grf, so I had to store that in prog_data. We could use urb_read_length, but that's nr_attributes rounded up to a multiple of two, so doing so would waste a register in some cases. There's more code to be removed in the vec4 backend, but that will come in a follow-on patch. Signed-off-by: Kenneth Graunke Reviewed-by: Matt Turner --- diff --git a/src/mesa/drivers/dri/i965/brw_context.h b/src/mesa/drivers/dri/i965/brw_context.h index aa1284d..9ad6b4d 100644 --- a/src/mesa/drivers/dri/i965/brw_context.h +++ b/src/mesa/drivers/dri/i965/brw_context.h @@ -699,6 +699,8 @@ struct brw_vs_prog_data { GLbitfield64 inputs_read; + unsigned nr_attributes; + bool uses_vertexid; bool uses_instanceid; }; diff --git a/src/mesa/drivers/dri/i965/brw_fs.cpp b/src/mesa/drivers/dri/i965/brw_fs.cpp index ba62fdd..65f2e68 100644 --- a/src/mesa/drivers/dri/i965/brw_fs.cpp +++ b/src/mesa/drivers/dri/i965/brw_fs.cpp @@ -1508,21 +1508,12 @@ void fs_visitor::assign_vs_urb_setup() { brw_vs_prog_data *vs_prog_data = (brw_vs_prog_data *) prog_data; - int grf, count, slot, channel, attr; + int grf, slot, channel, attr; assert(stage == MESA_SHADER_VERTEX); - count = _mesa_bitcount_64(vs_prog_data->inputs_read); - if (vs_prog_data->uses_vertexid || vs_prog_data->uses_instanceid) - count++; /* Each attribute is 4 regs. */ - this->first_non_payload_grf += count * 4; - - unsigned vue_entries = - MAX2(count, vs_prog_data->base.vue_map.num_slots); - - vs_prog_data->base.urb_entry_size = ALIGN(vue_entries, 4) / 4; - vs_prog_data->base.urb_read_length = (count + 1) / 2; + this->first_non_payload_grf += 4 * vs_prog_data->nr_attributes; assert(vs_prog_data->base.urb_read_length <= 15); @@ -1532,7 +1523,7 @@ fs_visitor::assign_vs_urb_setup() if (inst->src[i].file == ATTR) { if (inst->src[i].reg == VERT_ATTRIB_MAX) { - slot = count - 1; + slot = vs_prog_data->nr_attributes - 1; } else { /* Attributes come in in a contiguous block, ordered by their * gl_vert_attrib value. That means we can compute the slot diff --git a/src/mesa/drivers/dri/i965/brw_vec4.cpp b/src/mesa/drivers/dri/i965/brw_vec4.cpp index e966b96..08f3e91 100644 --- a/src/mesa/drivers/dri/i965/brw_vec4.cpp +++ b/src/mesa/drivers/dri/i965/brw_vec4.cpp @@ -1632,28 +1632,11 @@ vec4_vs_visitor::setup_attributes(int payload_reg) */ if (vs_prog_data->uses_vertexid || vs_prog_data->uses_instanceid) { attribute_map[VERT_ATTRIB_MAX] = payload_reg + nr_attributes; - nr_attributes++; } lower_attributes_to_hw_regs(attribute_map, false /* interleaved */); - /* The BSpec says we always have to read at least one thing from - * the VF, and it appears that the hardware wedges otherwise. - */ - if (nr_attributes == 0) - nr_attributes = 1; - - prog_data->urb_read_length = (nr_attributes + 1) / 2; - - unsigned vue_entries = - MAX2(nr_attributes, prog_data->vue_map.num_slots); - - if (devinfo->gen == 6) - prog_data->urb_entry_size = ALIGN(vue_entries, 8) / 8; - else - prog_data->urb_entry_size = ALIGN(vue_entries, 4) / 4; - - return payload_reg + nr_attributes; + return payload_reg + vs_prog_data->nr_attributes; } int diff --git a/src/mesa/drivers/dri/i965/brw_vs.c b/src/mesa/drivers/dri/i965/brw_vs.c index 38de98f..17d3bc4 100644 --- a/src/mesa/drivers/dri/i965/brw_vs.c +++ b/src/mesa/drivers/dri/i965/brw_vs.c @@ -159,6 +159,38 @@ brw_codegen_vs_prog(struct brw_context *brw, &prog_data.base.vue_map, outputs_written, prog ? prog->SeparateShader : false); + unsigned nr_attributes = _mesa_bitcount_64(prog_data.inputs_read); + + /* gl_VertexID and gl_InstanceID are system values, but arrive via an + * incoming vertex attribute. So, add an extra slot. + */ + if (vp->program.Base.SystemValuesRead & + (BITFIELD64_BIT(SYSTEM_VALUE_VERTEX_ID_ZERO_BASE) | + BITFIELD64_BIT(SYSTEM_VALUE_INSTANCE_ID))) { + nr_attributes++; + } + + /* The BSpec says we always have to read at least one thing from the VF, + * and it appears that the hardware wedges otherwise. + */ + if (nr_attributes == 0 && !brw->intelScreen->compiler->scalar_vs) + nr_attributes = 1; + + prog_data.nr_attributes = nr_attributes; + prog_data.base.urb_read_length = DIV_ROUND_UP(nr_attributes, 2); + + /* Since vertex shaders reuse the same VUE entry for inputs and outputs + * (overwriting the original contents), we need to make sure the size is + * the larger of the two. + */ + const unsigned vue_entries = + MAX2(nr_attributes, prog_data.base.vue_map.num_slots); + + if (brw->gen == 6) + prog_data.base.urb_entry_size = DIV_ROUND_UP(vue_entries, 8); + else + prog_data.base.urb_entry_size = DIV_ROUND_UP(vue_entries, 4); + if (0) { _mesa_fprint_program_opt(stderr, &vp->program.Base, PROG_PRINT_DEBUG, true);