i965/fs: Rework representation of fragment output locations in NIR.
authorFrancisco Jerez <currojerez@riseup.net>
Fri, 22 Jul 2016 04:26:20 +0000 (21:26 -0700)
committerFrancisco Jerez <currojerez@riseup.net>
Fri, 26 Aug 2016 01:36:06 +0000 (18:36 -0700)
The problem with the current approach is that driver output locations
are represented as a linear offset within the nir_outputs array, which
makes it rather difficult for the back-end to figure out what color
output and index some nir_intrinsic_load/store_output was meant for,
because the offset of a given output within the nir_output array is
dependent on the type and size of all previously allocated outputs.
Instead this defines the driver location of an output to be the pair
formed by its GLSL-assigned location and index (I've borrowed the
bitfield macros from brw_defines.h in order to represent the pair of
integers as a single scalar value that can be assigned to
nir_variable_data::driver_location).  nir_assign_var_locations is no
longer useful for fragment outputs.

Because fragment outputs are now allocated independently rather than
within the nir_outputs array, the get_frag_output() helper becomes
necessary in order to obtain the right temporary register for a given
location-index pair.

The type_size helper passed to nir_lower_io is now type_size_dvec4
rather than type_size_vec4_times_4 so that output array offsets are
provided in terms of whole array elements rather than in terms of
scalar components (dvec4 is the largest vector type supported by the
GLSL so this will cause all individual fragment outputs to have a size
of one regardless of the type).

Reviewed-by: Kenneth Graunke <kenneth@whitecape.org>
src/mesa/drivers/dri/i965/brw_fs_nir.cpp
src/mesa/drivers/dri/i965/brw_nir.c
src/mesa/drivers/dri/i965/brw_nir.h

index c8b6348..5910d8c 100644 (file)
@@ -89,17 +89,19 @@ fs_visitor::nir_setup_outputs()
    nir_outputs = bld.vgrf(BRW_REGISTER_TYPE_F, nir->num_outputs);
 
    nir_foreach_variable(var, &nir->outputs) {
-      fs_reg reg = offset(nir_outputs, bld, var->data.driver_location);
-
       switch (stage) {
       case MESA_SHADER_VERTEX:
       case MESA_SHADER_TESS_EVAL:
       case MESA_SHADER_GEOMETRY: {
+         fs_reg reg = offset(nir_outputs, bld, var->data.driver_location);
          unsigned location = var->data.location;
          nir_setup_single_output_varying(&reg, var->type, &location);
          break;
       }
-      case MESA_SHADER_FRAGMENT:
+      case MESA_SHADER_FRAGMENT: {
+         const fs_reg reg = bld.vgrf(BRW_REGISTER_TYPE_F,
+                                     type_size_vec4_times_4(var->type));
+
          if (key->force_dual_color_blend &&
              var->data.location == FRAG_RESULT_DATA1) {
             this->dual_src_output = reg;
@@ -130,6 +132,7 @@ fs_visitor::nir_setup_outputs()
             }
          }
          break;
+      }
       default:
          unreachable("unhandled shader stage");
       }
@@ -3247,6 +3250,38 @@ fs_visitor::emit_non_coherent_fb_read(const fs_builder &bld, const fs_reg &dst,
    return inst;
 }
 
+static fs_reg
+get_frag_output(const fs_visitor *v, unsigned location)
+{
+   assert(v->stage == MESA_SHADER_FRAGMENT);
+   const brw_wm_prog_key *const key =
+      reinterpret_cast<const brw_wm_prog_key *>(v->key);
+   const unsigned l = GET_FIELD(location, BRW_NIR_FRAG_OUTPUT_LOCATION);
+   const unsigned i = GET_FIELD(location, BRW_NIR_FRAG_OUTPUT_INDEX);
+
+   if (i > 0 || (key->force_dual_color_blend && l == FRAG_RESULT_DATA1))
+      return v->dual_src_output;
+
+   else if (l == FRAG_RESULT_COLOR)
+      return v->outputs[0];
+
+   else if (l == FRAG_RESULT_DEPTH)
+      return v->frag_depth;
+
+   else if (l == FRAG_RESULT_STENCIL)
+      return v->frag_stencil;
+
+   else if (l == FRAG_RESULT_SAMPLE_MASK)
+      return v->sample_mask;
+
+   else if (l >= FRAG_RESULT_DATA0 &&
+            l < FRAG_RESULT_DATA0 + BRW_MAX_DRAW_BUFFERS)
+      return v->outputs[l - FRAG_RESULT_DATA0];
+
+   else
+      unreachable("Invalid location");
+}
+
 void
 fs_visitor::nir_emit_fs_intrinsic(const fs_builder &bld,
                                   nir_intrinsic_instr *instr)
@@ -3285,11 +3320,12 @@ fs_visitor::nir_emit_fs_intrinsic(const fs_builder &bld,
 
    case nir_intrinsic_store_output: {
       const fs_reg src = get_nir_src(instr->src[0]);
-      nir_const_value *const_offset = nir_src_as_const_value(instr->src[1]);
+      const nir_const_value *const_offset = nir_src_as_const_value(instr->src[1]);
       assert(const_offset && "Indirect output stores not allowed");
-      const fs_reg new_dest = offset(retype(nir_outputs, src.type), bld,
-                                     nir_intrinsic_base(instr) +
-                                     const_offset->u32[0]);
+      const unsigned location = nir_intrinsic_base(instr) +
+         SET_FIELD(const_offset->u32[0], BRW_NIR_FRAG_OUTPUT_LOCATION);
+      const fs_reg new_dest = retype(get_frag_output(this, location),
+                                     src.type);
 
       for (unsigned j = 0; j < instr->num_components; j++)
          bld.MOV(offset(new_dest, bld, nir_intrinsic_component(instr) + j),
index d12a946..24a64cb 100644 (file)
@@ -339,9 +339,13 @@ brw_nir_lower_tcs_outputs(nir_shader *nir, const struct brw_vue_map *vue_map)
 void
 brw_nir_lower_fs_outputs(nir_shader *nir)
 {
-   nir_assign_var_locations(&nir->outputs, &nir->num_outputs,
-                            FRAG_RESULT_DATA0, type_size_vec4_times_4);
-   nir_lower_io(nir, nir_var_shader_out, type_size_vec4_times_4);
+   nir_foreach_variable(var, &nir->outputs) {
+      var->data.driver_location =
+         SET_FIELD(var->data.index, BRW_NIR_FRAG_OUTPUT_INDEX) |
+         SET_FIELD(var->data.location, BRW_NIR_FRAG_OUTPUT_LOCATION);
+   }
+
+   nir_lower_io(nir, nir_var_shader_out, type_size_dvec4);
 }
 
 void
index 6185310..dc03c2d 100644 (file)
@@ -139,6 +139,11 @@ void brw_nir_setup_arb_uniforms(nir_shader *shader, struct gl_program *prog,
 
 bool brw_nir_opt_peephole_ffma(nir_shader *shader);
 
+#define BRW_NIR_FRAG_OUTPUT_INDEX_SHIFT 0
+#define BRW_NIR_FRAG_OUTPUT_INDEX_MASK INTEL_MASK(0, 0)
+#define BRW_NIR_FRAG_OUTPUT_LOCATION_SHIFT 1
+#define BRW_NIR_FRAG_OUTPUT_LOCATION_MASK INTEL_MASK(31, 1)
+
 #ifdef __cplusplus
 }
 #endif