i965/vs: Simplify fs_visitor's ATTR file.
authorKenneth Graunke <kenneth@whitecape.org>
Fri, 14 Aug 2015 23:01:33 +0000 (16:01 -0700)
committerKenneth Graunke <kenneth@whitecape.org>
Mon, 12 Oct 2015 21:33:26 +0000 (14:33 -0700)
Previously, ATTR was indexed by VERT_ATTRIB_* slots; at the end of
compilation, assign_vs_urb_setup() translated those into GRF units,
and converted ATTR to HW_REGs.

This patch moves the transslation earlier, making ATTR work in terms of
GRF units from the beginning.  assign_vs_urb_setup() simply has to add
the number of payload registers and push constants to obtain the final
hardware GRF number.  (We can't do this earlier as those values aren't
known.)

ATTR still supports reg_offset; however, it's simply added to reg.
It's not clear whether this is valuable or not.

Signed-off-by: Kenneth Graunke <kenneth@whitecape.org>
Reviewed-by: Matt Turner <mattst88@gmail.com>
src/mesa/drivers/dri/i965/brw_fs.cpp
src/mesa/drivers/dri/i965/brw_fs_visitor.cpp
src/mesa/drivers/dri/i965/brw_nir.c

index 65f2e68..d000f16 100644 (file)
@@ -1508,9 +1508,11 @@ void
 fs_visitor::assign_vs_urb_setup()
 {
    brw_vs_prog_data *vs_prog_data = (brw_vs_prog_data *) prog_data;
-   int grf, slot, channel, attr;
 
    assert(stage == MESA_SHADER_VERTEX);
+   int count = _mesa_bitcount_64(vs_prog_data->inputs_read);
+   if (vs_prog_data->uses_vertexid || vs_prog_data->uses_instanceid)
+      count++;
 
    /* Each attribute is 4 regs. */
    this->first_non_payload_grf += 4 * vs_prog_data->nr_attributes;
@@ -1521,25 +1523,10 @@ fs_visitor::assign_vs_urb_setup()
    foreach_block_and_inst(block, fs_inst, inst, cfg) {
       for (int i = 0; i < inst->sources; i++) {
          if (inst->src[i].file == ATTR) {
-
-            if (inst->src[i].reg == VERT_ATTRIB_MAX) {
-               slot = vs_prog_data->nr_attributes - 1;
-            } else {
-               /* Attributes come in in a contiguous block, ordered by their
-                * gl_vert_attrib value.  That means we can compute the slot
-                * number for an attribute by masking out the enabled
-                * attributes before it and counting the bits.
-                */
-               attr = inst->src[i].reg + inst->src[i].reg_offset / 4;
-               slot = _mesa_bitcount_64(vs_prog_data->inputs_read &
-                                        BITFIELD64_MASK(attr));
-            }
-
-            channel = inst->src[i].reg_offset & 3;
-
-            grf = payload.num_regs +
-               prog_data->curb_read_length +
-               slot * 4 + channel;
+            int grf = payload.num_regs +
+                      prog_data->curb_read_length +
+                      inst->src[i].reg +
+                      inst->src[i].reg_offset;
 
             inst->src[i].file = HW_REG;
             inst->src[i].fixed_hw_reg =
index df1a7ed..8aee2c0 100644 (file)
@@ -53,7 +53,8 @@ fs_reg *
 fs_visitor::emit_vs_system_value(int location)
 {
    fs_reg *reg = new(this->mem_ctx)
-      fs_reg(ATTR, VERT_ATTRIB_MAX, BRW_REGISTER_TYPE_D);
+      fs_reg(ATTR, 4 * _mesa_bitcount_64(nir->info.inputs_read),
+             BRW_REGISTER_TYPE_D);
    brw_vs_prog_data *vs_prog_data = (brw_vs_prog_data *) prog_data;
 
    switch (location) {
index 15c1b19..4f35d81 100644 (file)
 #include "glsl/nir/glsl_to_nir.h"
 #include "program/prog_to_nir.h"
 
+static bool
+remap_vs_attrs(nir_block *block, void *closure)
+{
+   GLbitfield64 inputs_read = *((GLbitfield64 *) closure);
+
+   nir_foreach_instr(block, instr) {
+      if (instr->type != nir_instr_type_intrinsic)
+         continue;
+
+      nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr);
+
+      /* We set EmitNoIndirect for VS inputs, so there are no indirects. */
+      assert(intrin->intrinsic != nir_intrinsic_load_input_indirect);
+
+      if (intrin->intrinsic == nir_intrinsic_load_input) {
+         /* Attributes come in a contiguous block, ordered by their
+          * gl_vert_attrib value.  That means we can compute the slot
+          * number for an attribute by masking out the enabled attributes
+          * before it and counting the bits.
+          */
+         int attr = intrin->const_index[0];
+         int slot = _mesa_bitcount_64(inputs_read & BITFIELD64_MASK(attr));
+         intrin->const_index[0] = 4 * slot;
+      }
+   }
+   return true;
+}
+
 static void
 brw_nir_lower_inputs(nir_shader *nir, bool is_scalar)
 {
@@ -49,6 +77,18 @@ brw_nir_lower_inputs(nir_shader *nir, bool is_scalar)
        * type_size_vec4 here.
        */
       nir_lower_io(nir, nir_var_shader_in, type_size_vec4);
+
+      /* Finally, translate VERT_ATTRIB_* values into the actual registers.
+       *
+       * Note that we can use nir->info.inputs_read instead of key->inputs_read
+       * since the two are identical aside from Gen4-5 edge flag differences.
+       */
+      GLbitfield64 inputs_read = nir->info.inputs_read;
+      nir_foreach_overload(nir, overload) {
+         if (overload->impl) {
+            nir_foreach_block(overload->impl, remap_vs_attrs, &inputs_read);
+         }
+      }
       break;
    case MESA_SHADER_GEOMETRY:
       foreach_list_typed(nir_variable, var, node, &nir->inputs) {