ir3: Switch tess lowering to use location

author Connor Abbott <cwabbott0@gmail.com>

Thu, 24 Sep 2020 14:24:55 +0000 (16:24 +0200)

committer Marge Bot <eric+marge@anholt.net>

Fri, 23 Oct 2020 11:09:18 +0000 (11:09 +0000)
author Connor Abbott <cwabbott0@gmail.com>
Thu, 24 Sep 2020 14:24:55 +0000 (16:24 +0200)
committer Marge Bot <eric+marge@anholt.net>
Fri, 23 Oct 2020 11:09:18 +0000 (11:09 +0000)
diff --git a/src/freedreno/ir3/ir3_nir.c b/src/freedreno/ir3/ir3_nir.c

index 2628746..1ee2956 100644 (file)
--- a/src/freedreno/ir3/ir3_nir.c
+++ b/src/freedreno/ir3/ir3_nir.c
@@ -433,17 +433,17 @@ ir3_nir_lower_variant(struct ir3_shader_variant *so, nir_shader *s)
                         break;
                 case MESA_SHADER_TESS_CTRL:
                         NIR_PASS_V(s, ir3_nir_lower_tess_ctrl, so, so->key.tessellation);
-                       NIR_PASS_V(s, ir3_nir_lower_to_explicit_input, so->shader->compiler);
+                       NIR_PASS_V(s, ir3_nir_lower_to_explicit_input, so);
                         progress = true;
                         break;
                 case MESA_SHADER_TESS_EVAL:
-                       NIR_PASS_V(s, ir3_nir_lower_tess_eval, so->key.tessellation);
+                       NIR_PASS_V(s, ir3_nir_lower_tess_eval, so, so->key.tessellation);
                         if (so->key.has_gs)
                                 NIR_PASS_V(s, ir3_nir_lower_to_explicit_output, so, so->key.tessellation);
                         progress = true;
                         break;
                 case MESA_SHADER_GEOMETRY:
-                       NIR_PASS_V(s, ir3_nir_lower_to_explicit_input, so->shader->compiler);
+                       NIR_PASS_V(s, ir3_nir_lower_to_explicit_input, so);
                         progress = true;
                         break;
                 default:
@@ -694,12 +694,12 @@ ir3_setup_const_state(nir_shader *nir, struct ir3_shader_variant *v,
                 constoff = align(constoff - 1, 4) + 3;
                 const_state->offsets.primitive_param = constoff;
                 const_state->offsets.primitive_map = constoff + 5;
-               constoff += 5 + DIV_ROUND_UP(nir->num_inputs, 4);
+               constoff += 5 + DIV_ROUND_UP(v->input_size, 4);
                 break;
         case MESA_SHADER_GEOMETRY:
                 const_state->offsets.primitive_param = constoff;
                 const_state->offsets.primitive_map = constoff + 1;
-               constoff += 1 + DIV_ROUND_UP(nir->num_inputs, 4);
+               constoff += 1 + DIV_ROUND_UP(v->input_size, 4);
                 break;
         default:
                 break;
diff --git a/src/freedreno/ir3/ir3_nir.h b/src/freedreno/ir3/ir3_nir.h

index e9fe495..a6ec144 100644 (file)
--- a/src/freedreno/ir3/ir3_nir.h
+++ b/src/freedreno/ir3/ir3_nir.h
@@ -46,9 +46,9 @@ bool ir3_nir_lower_tex_prefetch(nir_shader *shader);
  
  void ir3_nir_lower_to_explicit_output(nir_shader *shader,
                 struct ir3_shader_variant *v, unsigned topology);
-void ir3_nir_lower_to_explicit_input(nir_shader *shader, struct ir3_compiler *compiler);
+void ir3_nir_lower_to_explicit_input(nir_shader *shader, struct ir3_shader_variant *v);
  void ir3_nir_lower_tess_ctrl(nir_shader *shader, struct ir3_shader_variant *v, unsigned topology);
-void ir3_nir_lower_tess_eval(nir_shader *shader, unsigned topology);
+void ir3_nir_lower_tess_eval(nir_shader *shader, struct ir3_shader_variant *v, unsigned topology);
  void ir3_nir_lower_gs(nir_shader *shader);
  
  const nir_shader_compiler_options * ir3_get_compiler_options(struct ir3_compiler *compiler);
@@ -65,10 +65,6 @@ bool ir3_nir_lower_ubo_loads(nir_shader *nir, struct ir3_shader_variant *v);
  nir_ssa_def *
  ir3_nir_try_propagate_bit_shift(nir_builder *b, nir_ssa_def *offset, int32_t shift);
  
-uint32_t ir3_link_geometry_stages(const struct ir3_shader_variant *producer,
-               const struct ir3_shader_variant *consumer,
-               uint32_t *locs);
-
  static inline nir_intrinsic_instr *
  ir3_bindless_resource(nir_src src)
  {
diff --git a/src/freedreno/ir3/ir3_nir_lower_tess.c b/src/freedreno/ir3/ir3_nir_lower_tess.c

index 44b2921..d54f9a4 100644 (file)
--- a/src/freedreno/ir3/ir3_nir_lower_tess.c
+++ b/src/freedreno/ir3/ir3_nir_lower_tess.c
@@ -30,7 +30,6 @@ struct state {
  
         struct primitive_map {
                 unsigned loc[32];
-               unsigned size[32];
                 unsigned stride;
         } map;
  
@@ -73,45 +72,65 @@ build_local_primitive_id(nir_builder *b, struct state *state)
         return bitfield_extract(b, state->header, state->local_primitive_id_start, 63);
  }
  
-static nir_variable *
-get_var(nir_shader *shader, nir_variable_mode mode, int driver_location)
+static bool
+is_tess_levels(gl_varying_slot slot)
  {
-       nir_foreach_variable_with_modes (v, shader, mode) {
-               if (v->data.driver_location == driver_location) {
-                       return v;
-               }
-       }
-
-       return NULL;
+       return (slot == VARYING_SLOT_TESS_LEVEL_OUTER ||
+                       slot == VARYING_SLOT_TESS_LEVEL_INNER);
  }
  
-static bool
-is_tess_levels(nir_variable *var)
+/* Return a deterministic index for varyings. We can't rely on driver_location
+ * to be correct without linking the different stages first, so we create
+ * "primitive maps" where the producer decides on the location of each varying
+ * slot and then exports a per-slot array to the consumer. This compacts the
+ * gl_varying_slot space down a bit so that the primitive maps aren't too
+ * large.
+ *
+ * Note: per-patch varyings are currently handled separately, without any
+ * compacting.
+ *
+ * TODO: We could probably use the driver_location's directly in the non-SSO
+ * (Vulkan) case.
+ */
+
+static unsigned
+shader_io_get_unique_index(gl_varying_slot slot)
  {
-       return (var->data.location == VARYING_SLOT_TESS_LEVEL_OUTER ||
-                       var->data.location == VARYING_SLOT_TESS_LEVEL_INNER);
+       if (slot == VARYING_SLOT_POS)
+               return 0;
+       if (slot == VARYING_SLOT_PSIZ)
+               return 1;
+       if (slot == VARYING_SLOT_CLIP_DIST0)
+               return 2;
+       if (slot == VARYING_SLOT_CLIP_DIST1)
+               return 3;
+       if (slot >= VARYING_SLOT_VAR0 && slot <= VARYING_SLOT_VAR31)
+               return 4 + (slot - VARYING_SLOT_VAR0);
+       unreachable("illegal slot in get unique index\n");
  }
  
  static nir_ssa_def *
  build_local_offset(nir_builder *b, struct state *state,
-               nir_ssa_def *vertex, uint32_t base, nir_ssa_def *offset)
+               nir_ssa_def *vertex, uint32_t location, uint32_t comp, nir_ssa_def *offset)
  {
         nir_ssa_def *primitive_stride = nir_load_vs_primitive_stride_ir3(b);
         nir_ssa_def *primitive_offset =
                 nir_imul24(b, build_local_primitive_id(b, state), primitive_stride);
         nir_ssa_def *attr_offset;
         nir_ssa_def *vertex_stride;
+       unsigned index = shader_io_get_unique_index(location);
  
         switch (b->shader->info.stage) {
         case MESA_SHADER_VERTEX:
         case MESA_SHADER_TESS_EVAL:
                 vertex_stride = nir_imm_int(b, state->map.stride * 4);
-               attr_offset = nir_imm_int(b, state->map.loc[base] * 4);
+               attr_offset = nir_imm_int(b, state->map.loc[index] + 4 * comp);
                 break;
         case MESA_SHADER_TESS_CTRL:
         case MESA_SHADER_GEOMETRY:
                 vertex_stride = nir_load_vs_vertex_stride_ir3(b);
-               attr_offset = nir_load_primitive_location_ir3(b, base);
+               attr_offset = nir_iadd(b, nir_load_primitive_location_ir3(b, index),
+                                                          nir_imm_int(b, comp * 4));
                 break;
         default:
                 unreachable("bad shader stage");
@@ -120,7 +139,7 @@ build_local_offset(nir_builder *b, struct state *state,
         nir_ssa_def *vertex_offset = nir_imul24(b, vertex, vertex_stride);
  
         return nir_iadd(b, nir_iadd(b, primitive_offset, vertex_offset),
-                       nir_iadd(b, attr_offset, offset));
+                       nir_iadd(b, attr_offset, nir_ishl(b, offset, nir_imm_int(b, 4))));
  }
  
  static nir_intrinsic_instr *
@@ -153,37 +172,58 @@ replace_intrinsic(nir_builder *b, nir_intrinsic_instr *intr,
  }
  
  static void
-build_primitive_map(nir_shader *shader, nir_variable_mode mode, struct primitive_map *map)
+build_primitive_map(nir_shader *shader, struct primitive_map *map)
  {
-       nir_foreach_variable_with_modes (var, shader, mode) {
-               switch (var->data.location) {
-               case VARYING_SLOT_TESS_LEVEL_OUTER:
-               case VARYING_SLOT_TESS_LEVEL_INNER:
-                       continue;
-               }
-
-               unsigned size = glsl_count_attribute_slots(var->type, false) * 4;
-
-               assert(var->data.driver_location < ARRAY_SIZE(map->size));
-               map->size[var->data.driver_location] =
-                       MAX2(map->size[var->data.driver_location], size);
+       /* All interfaces except the TCS <-> TES interface use ldlw, which takes
+        * an offset in bytes, so each vec4 slot is 16 bytes. TCS <-> TES uses
+        * ldg, which takes an offset in dwords, but each per-vertex slot has
+        * space for every vertex, and there's space at the beginning for
+        * per-patch varyings.
+        */
+       unsigned slot_size = 16, start = 0;
+       if (shader->info.stage == MESA_SHADER_TESS_CTRL) {
+               slot_size = shader->info.tess.tcs_vertices_out * 4;
+               start = util_last_bit(shader->info.patch_outputs_written) * 4;
         }
  
-       unsigned loc = 0;
-       for (uint32_t i = 0; i < ARRAY_SIZE(map->size); i++) {
-               if (map->size[i] == 0)
-                               continue;
-               nir_variable *var = get_var(shader, mode, i);
-               map->loc[i] = loc;
-               loc += map->size[i];
-
-               if (var->data.patch)
-                       map->size[i] = 0;
-               else
-                       map->size[i] = map->size[i] / glsl_get_length(var->type);
+       uint64_t mask = shader->info.outputs_written;
+       unsigned loc = start;
+       while (mask) {
+               int location = u_bit_scan64(&mask);
+               if (is_tess_levels(location))
+                       continue;
+
+               unsigned index = shader_io_get_unique_index(location);
+               map->loc[index] = loc;
+               loc += slot_size;
         }
  
         map->stride = loc;
+       /* Use units of dwords for the stride. */
+       if (shader->info.stage != MESA_SHADER_TESS_CTRL)
+               map->stride /= 4;
+}
+
+/* For shader stages that receive a primitive map, calculate how big it should
+ * be.
+ */
+
+static unsigned
+calc_primitive_map_size(nir_shader *shader)
+{
+       uint64_t mask = shader->info.inputs_read;
+       unsigned max_index = 0;
+       while (mask) {
+               int location = u_bit_scan64(&mask);
+
+               if (is_tess_levels(location))
+                       continue;
+
+               unsigned index = shader_io_get_unique_index(location);
+               max_index = MAX2(max_index, index + 1);
+       }
+       
+       return max_index;
  }
  
  static void
@@ -209,7 +249,9 @@ lower_block_to_explicit_output(nir_block *block, nir_builder *b, struct state *s
                         b->cursor = nir_instr_remove(&intr->instr);
  
                         nir_ssa_def *vertex_id = build_vertex_id(b, state);
-                       nir_ssa_def *offset = build_local_offset(b, state, vertex_id, nir_intrinsic_base(intr),
+                       nir_ssa_def *offset = build_local_offset(b, state, vertex_id,
+                                       nir_intrinsic_io_semantics(intr).location,
+                                       nir_intrinsic_component(intr),
                                         intr->src[1].ssa);
                         nir_intrinsic_instr *store =
                                 nir_intrinsic_instr_create(b->shader, nir_intrinsic_store_shared_ir3);
@@ -240,7 +282,7 @@ ir3_nir_lower_to_explicit_output(nir_shader *shader, struct ir3_shader_variant *
  {
         struct state state = { };
  
-       build_primitive_map(shader, nir_var_shader_out, &state.map);
+       build_primitive_map(shader, &state.map);
         memcpy(v->output_loc, state.map.loc, sizeof(v->output_loc));
  
         nir_function_impl *impl = nir_shader_get_entrypoint(shader);
@@ -282,7 +324,8 @@ lower_block_to_explicit_input(nir_block *block, nir_builder *b, struct state *st
  
                         nir_ssa_def *offset = build_local_offset(b, state,
                                         intr->src[0].ssa, // this is typically gl_InvocationID
-                                       nir_intrinsic_base(intr),
+                                       nir_intrinsic_io_semantics(intr).location,
+                                       nir_intrinsic_component(intr),
                                         intr->src[1].ssa);
  
                         replace_intrinsic(b, intr, nir_intrinsic_load_shared_ir3, offset, NULL, NULL);
@@ -305,14 +348,14 @@ lower_block_to_explicit_input(nir_block *block, nir_builder *b, struct state *st
  }
  
  void
-ir3_nir_lower_to_explicit_input(nir_shader *shader, struct ir3_compiler *compiler)
+ir3_nir_lower_to_explicit_input(nir_shader *shader, struct ir3_shader_variant *v)
  {
         struct state state = { };
  
         /* when using stl/ldl (instead of stlw/ldlw) for linking VS and HS,
          * HS uses a different primitive id, which starts at bit 16 in the header
          */
-       if (shader->info.stage == MESA_SHADER_TESS_CTRL && compiler->tess_use_shared)
+       if (shader->info.stage == MESA_SHADER_TESS_CTRL && v->shader->compiler->tess_use_shared)
                 state.local_primitive_id_start = 16;
  
         nir_function_impl *impl = nir_shader_get_entrypoint(shader);
@@ -329,43 +372,74 @@ ir3_nir_lower_to_explicit_input(nir_shader *shader, struct ir3_compiler *compile
  
         nir_foreach_block_safe (block, impl)
                 lower_block_to_explicit_input(block, &b, &state);
+
+       v->input_size = calc_primitive_map_size(shader);
  }
  
+static nir_ssa_def *
+build_tcs_out_vertices(nir_builder *b)
+{
+       if (b->shader->info.stage == MESA_SHADER_TESS_CTRL)
+               return nir_imm_int(b, b->shader->info.tess.tcs_vertices_out);
+       else
+               return nir_load_patch_vertices_in(b);
+}
  
  static nir_ssa_def *
  build_per_vertex_offset(nir_builder *b, struct state *state,
-               nir_ssa_def *vertex, nir_ssa_def *offset, nir_variable *var)
+               nir_ssa_def *vertex, uint32_t location, uint32_t comp, nir_ssa_def *offset)
  {
         nir_ssa_def *primitive_id = nir_load_primitive_id(b);
         nir_ssa_def *patch_stride = nir_load_hs_patch_stride_ir3(b);
         nir_ssa_def *patch_offset = nir_imul24(b, primitive_id, patch_stride);
         nir_ssa_def *attr_offset;
-       int loc = var->data.driver_location;
  
-       switch (b->shader->info.stage) {
-       case MESA_SHADER_TESS_CTRL:
-               attr_offset = nir_imm_int(b, state->map.loc[loc]);
-               break;
-       case MESA_SHADER_TESS_EVAL:
-               attr_offset = nir_load_primitive_location_ir3(b, loc);
-               break;
-       default:
-               unreachable("bad shader state");
+       if (nir_src_is_const(nir_src_for_ssa(offset))) {
+               location += nir_src_as_uint(nir_src_for_ssa(offset));
+               offset = nir_imm_int(b, 0);
+       } else {
+               /* Offset is in vec4's, but we need it in unit of components for the
+                * load/store_global_ir3 offset.
+                */
+               offset = nir_ishl(b, offset, nir_imm_int(b, 2));
         }
  
-       nir_ssa_def *attr_stride = nir_imm_int(b, state->map.size[loc]);
-       nir_ssa_def *vertex_offset = nir_imul24(b, vertex, attr_stride);
+       nir_ssa_def *vertex_offset;
+       if (vertex) {
+               unsigned index = shader_io_get_unique_index(location);
+               switch (b->shader->info.stage) {
+               case MESA_SHADER_TESS_CTRL:
+                       attr_offset = nir_imm_int(b, state->map.loc[index] + comp);
+                       break;
+               case MESA_SHADER_TESS_EVAL:
+                       attr_offset =
+                               nir_iadd(b, nir_load_primitive_location_ir3(b, index),
+                                                nir_imm_int(b, comp));
+                       break;
+               default:
+                       unreachable("bad shader state");
+               }
+
+               attr_offset = nir_iadd(b, attr_offset,
+                                                          nir_imul24(b, offset,
+                                                                                 build_tcs_out_vertices(b)));
+               vertex_offset = nir_ishl(b, vertex, nir_imm_int(b, 2));
+       } else {
+               assert(location >= VARYING_SLOT_PATCH0 &&
+                          location <= VARYING_SLOT_TESS_MAX);
+               unsigned index = location - VARYING_SLOT_PATCH0;
+               attr_offset = nir_iadd(b, nir_imm_int(b, index * 4 + comp), offset);
+               vertex_offset = nir_imm_int(b, 0);
+       }
  
-       return nir_iadd(b, nir_iadd(b, patch_offset, attr_offset),
-                       nir_iadd(b, vertex_offset, nir_ishl(b, offset, nir_imm_int(b, 2))));
+       return nir_iadd(b, nir_iadd(b, patch_offset, attr_offset), vertex_offset);
  }
  
  static nir_ssa_def *
-build_patch_offset(nir_builder *b, struct state *state, nir_ssa_def *offset, nir_variable *var)
+build_patch_offset(nir_builder *b, struct state *state,
+               uint32_t base, uint32_t comp, nir_ssa_def *offset)
  {
-       debug_assert(var && var->data.patch);
-
-       return build_per_vertex_offset(b, state, nir_imm_int(b, 0), offset, var);
+       return build_per_vertex_offset(b, state, NULL, base, comp, offset);
  }
  
  static void
@@ -444,9 +518,11 @@ lower_tess_ctrl_block(nir_block *block, nir_builder *b, struct state *state)
                         b->cursor = nir_before_instr(&intr->instr);
  
                         nir_ssa_def *address = nir_load_tess_param_base_ir3(b);
-                       nir_variable *var = get_var(b->shader, nir_var_shader_out, nir_intrinsic_base(intr));
                         nir_ssa_def *offset = build_per_vertex_offset(b, state,
-                                       intr->src[0].ssa, intr->src[1].ssa, var);
+                                       intr->src[0].ssa,
+                                       nir_intrinsic_io_semantics(intr).location,
+                                       nir_intrinsic_component(intr),
+                                       intr->src[1].ssa);
  
                         replace_intrinsic(b, intr, nir_intrinsic_load_global_ir3, address, offset, NULL);
                         break;
@@ -462,12 +538,13 @@ lower_tess_ctrl_block(nir_block *block, nir_builder *b, struct state *state)
  
                         nir_ssa_def *value = intr->src[0].ssa;
                         nir_ssa_def *address = nir_load_tess_param_base_ir3(b);
-                       nir_variable *var = get_var(b->shader, nir_var_shader_out, nir_intrinsic_base(intr));
                         nir_ssa_def *offset = build_per_vertex_offset(b, state,
-                                       intr->src[1].ssa, intr->src[2].ssa, var);
+                                       intr->src[1].ssa,
+                                       nir_intrinsic_io_semantics(intr).location,
+                                       nir_intrinsic_component(intr),
+                                       intr->src[2].ssa);
  
-                       replace_intrinsic(b, intr, nir_intrinsic_store_global_ir3, value, address,
-                                       nir_iadd(b, offset, nir_imm_int(b, nir_intrinsic_component(intr))));
+                       replace_intrinsic(b, intr, nir_intrinsic_store_global_ir3, value, address, offset);
  
                         break;
                 }
@@ -475,8 +552,6 @@ lower_tess_ctrl_block(nir_block *block, nir_builder *b, struct state *state)
                 case nir_intrinsic_load_output: {
                         // src[] = { offset }.
  
-                       nir_variable *var = get_var(b->shader, nir_var_shader_out, nir_intrinsic_base(intr));
-
                         b->cursor = nir_before_instr(&intr->instr);
  
                         nir_ssa_def *address, *offset;
@@ -486,13 +561,17 @@ lower_tess_ctrl_block(nir_block *block, nir_builder *b, struct state *state)
                          * are never used. most likely some issue with (sy) not properly
                          * syncing with values coming from a second memory transaction.
                          */
-                       if (is_tess_levels(var)) {
+                       gl_varying_slot location = nir_intrinsic_io_semantics(intr).location;
+                       if (is_tess_levels(location)) {
                                 assert(intr->dest.ssa.num_components == 1);
                                 address = nir_load_tess_factor_base_ir3(b);
-                               offset = build_tessfactor_base(b, var->data.location, state);
+                               offset = build_tessfactor_base(b, location, state);
                         } else {
                                 address = nir_load_tess_param_base_ir3(b);
-                               offset = build_patch_offset(b, state, intr->src[0].ssa, var);
+                               offset = build_patch_offset(b, state,
+                                                                                       location,
+                                                                                       nir_intrinsic_component(intr),
+                                                                                       intr->src[0].ssa);
                         }
  
                         replace_intrinsic(b, intr, nir_intrinsic_load_global_ir3, address, offset, NULL);
@@ -504,14 +583,13 @@ lower_tess_ctrl_block(nir_block *block, nir_builder *b, struct state *state)
  
                         /* write patch output to bo */
  
-                       nir_variable *var = get_var(b->shader, nir_var_shader_out, nir_intrinsic_base(intr));
-
                         b->cursor = nir_before_instr(&intr->instr);
  
                         /* sparse writemask not supported */
                         assert(util_is_power_of_two_nonzero(nir_intrinsic_write_mask(intr) + 1));
  
-                       if (is_tess_levels(var)) {
+                       gl_varying_slot location = nir_intrinsic_io_semantics(intr).location;
+                       if (is_tess_levels(location)) {
                                 /* with tess levels are defined as float[4] and float[2],
                                  * but tess factor BO has smaller sizes for tris/isolines,
                                  * so we have to discard any writes beyond the number of
@@ -519,7 +597,7 @@ lower_tess_ctrl_block(nir_block *block, nir_builder *b, struct state *state)
                                 uint32_t inner_levels, outer_levels, levels;
                                 tess_level_components(state, &inner_levels, &outer_levels);
  
-                               if (var->data.location == VARYING_SLOT_TESS_LEVEL_OUTER)
+                               if (location == VARYING_SLOT_TESS_LEVEL_OUTER)
                                         levels = outer_levels;
                                 else
                                         levels = inner_levels;
@@ -534,12 +612,15 @@ lower_tess_ctrl_block(nir_block *block, nir_builder *b, struct state *state)
                                 replace_intrinsic(b, intr, nir_intrinsic_store_global_ir3,
                                                 intr->src[0].ssa,
                                                 nir_load_tess_factor_base_ir3(b),
-                                               nir_iadd(b, offset, build_tessfactor_base(b, var->data.location, state)));
+                                               nir_iadd(b, offset, build_tessfactor_base(b, location, state)));
  
                                 nir_pop_if(b, nif);
                         } else {
                                 nir_ssa_def *address = nir_load_tess_param_base_ir3(b);
-                               nir_ssa_def *offset = build_patch_offset(b, state, intr->src[1].ssa, var);
+                               nir_ssa_def *offset = build_patch_offset(b, state, 
+                                                                                                                location,
+                                                                                                                nir_intrinsic_component(intr),
+                                                                                                                intr->src[1].ssa);
  
                                 debug_assert(nir_intrinsic_component(intr) == 0);
  
@@ -580,7 +661,7 @@ ir3_nir_lower_tess_ctrl(nir_shader *shader, struct ir3_shader_variant *v,
                 nir_print_shader(shader, stderr);
         }
  
-       build_primitive_map(shader, nir_var_shader_out, &state.map);
+       build_primitive_map(shader, &state.map);
         memcpy(v->output_loc, state.map.loc, sizeof(v->output_loc));
         v->output_size = state.map.stride;
  
@@ -672,9 +753,11 @@ lower_tess_eval_block(nir_block *block, nir_builder *b, struct state *state)
                         b->cursor = nir_before_instr(&intr->instr);
  
                         nir_ssa_def *address = nir_load_tess_param_base_ir3(b);
-                       nir_variable *var = get_var(b->shader, nir_var_shader_in, nir_intrinsic_base(intr));
                         nir_ssa_def *offset = build_per_vertex_offset(b, state,
-                                       intr->src[0].ssa, intr->src[1].ssa, var);
+                                       intr->src[0].ssa,
+                                       nir_intrinsic_io_semantics(intr).location,
+                                       nir_intrinsic_component(intr),
+                                       intr->src[1].ssa);
  
                         replace_intrinsic(b, intr, nir_intrinsic_load_global_ir3, address, offset, NULL);
                         break;
@@ -683,10 +766,6 @@ lower_tess_eval_block(nir_block *block, nir_builder *b, struct state *state)
                 case nir_intrinsic_load_input: {
                         // src[] = { offset }.
  
-                       nir_variable *var = get_var(b->shader, nir_var_shader_in, nir_intrinsic_base(intr));
-
-                       debug_assert(var->data.patch);
-
                         b->cursor = nir_before_instr(&intr->instr);
  
                         nir_ssa_def *address, *offset;
@@ -696,13 +775,17 @@ lower_tess_eval_block(nir_block *block, nir_builder *b, struct state *state)
                          * are never used. most likely some issue with (sy) not properly
                          * syncing with values coming from a second memory transaction.
                          */
-                       if (is_tess_levels(var)) {
+                       gl_varying_slot location = nir_intrinsic_io_semantics(intr).location;
+                       if (is_tess_levels(location)) {
                                 assert(intr->dest.ssa.num_components == 1);
                                 address = nir_load_tess_factor_base_ir3(b);
-                               offset = build_tessfactor_base(b, var->data.location, state);
+                               offset = build_tessfactor_base(b, location, state);
                         } else {
                                 address = nir_load_tess_param_base_ir3(b);
-                               offset = build_patch_offset(b, state, intr->src[0].ssa, var);
+                               offset = build_patch_offset(b, state,
+                                                                                       location,
+                                                                                       nir_intrinsic_component(intr),
+                                                                                       intr->src[0].ssa);
                         }
  
                         offset = nir_iadd(b, offset, nir_imm_int(b, nir_intrinsic_component(intr)));
@@ -718,7 +801,7 @@ lower_tess_eval_block(nir_block *block, nir_builder *b, struct state *state)
  }
  
  void
-ir3_nir_lower_tess_eval(nir_shader *shader, unsigned topology)
+ir3_nir_lower_tess_eval(nir_shader *shader, struct ir3_shader_variant *v, unsigned topology)
  {
         struct state state = { .topology = topology };
  
@@ -728,9 +811,6 @@ ir3_nir_lower_tess_eval(nir_shader *shader, unsigned topology)
                 nir_print_shader(shader, stderr);
         }
  
-       /* Build map of inputs so we have the sizes. */
-       build_primitive_map(shader, nir_var_shader_in, &state.map);
-
         nir_function_impl *impl = nir_shader_get_entrypoint(shader);
         assert(impl);
  
@@ -740,6 +820,8 @@ ir3_nir_lower_tess_eval(nir_shader *shader, unsigned topology)
         nir_foreach_block_safe (block, impl)
                 lower_tess_eval_block(block, &b, &state);
  
+       v->input_size = calc_primitive_map_size(shader);
+
         nir_metadata_preserve(impl, 0);
  }
  
@@ -804,8 +886,6 @@ ir3_nir_lower_gs(nir_shader *shader)
                 nir_print_shader(shader, stderr);
         }
  
-       build_primitive_map(shader, nir_var_shader_in, &state.map);
-
         /* Create an output var for vertex_flags. This will be shadowed below,
          * same way regular outputs get shadowed, and this variable will become a
          * temporary.
@@ -914,38 +994,3 @@ ir3_nir_lower_gs(nir_shader *shader)
         }
  }
  
-uint32_t
-ir3_link_geometry_stages(const struct ir3_shader_variant *producer,
-               const struct ir3_shader_variant *consumer,
-               uint32_t *locs)
-{
-       uint32_t num_loc = 0, factor;
-
-       switch (consumer->type) {
-       case MESA_SHADER_TESS_CTRL:
-       case MESA_SHADER_GEOMETRY:
-               /* These stages load with ldlw, which expects byte offsets. */
-               factor = 4;
-               break;
-       case MESA_SHADER_TESS_EVAL:
-               /* The tess eval shader uses ldg, which takes dword offsets. */
-               factor = 1;
-               break;
-       default:
-               unreachable("bad shader stage");
-       }
-
-       nir_foreach_shader_in_variable(in_var, consumer->shader->nir) {
-               nir_foreach_shader_out_variable(out_var, producer->shader->nir) {
-                       if (in_var->data.location == out_var->data.location) {
-                               locs[in_var->data.driver_location] =
-                                       producer->output_loc[out_var->data.driver_location] * factor;
-
-                               debug_assert(num_loc <= in_var->data.driver_location + 1);
-                               num_loc = in_var->data.driver_location + 1;
-                       }
-               }
-       }
-
-       return num_loc;
-}
diff --git a/src/freedreno/ir3/ir3_shader.h b/src/freedreno/ir3/ir3_shader.h

index 3987006..f78d802 100644 (file)
--- a/src/freedreno/ir3/ir3_shader.h
+++ b/src/freedreno/ir3/ir3_shader.h
@@ -570,7 +570,13 @@ struct ir3_shader_variant {
         /* Size in dwords of all outputs for VS, size of entire patch for HS. */
         uint32_t output_size;
  
-       /* Map from driver_location to byte offset in per-primitive storage */
+       /* Expected size of incoming output_loc for HS, DS, and GS */
+       uint32_t input_size;
+
+       /* Map from location to offset in per-primitive storage. In dwords for
+        * HS, where varyings are read in the next stage via ldg with a dword
+        * offset, and in bytes for all other stages.
+        */
         unsigned output_loc[32];
  
         /* attributes (VS) / varyings (FS):
diff --git a/src/freedreno/vulkan/tu_pipeline.c b/src/freedreno/vulkan/tu_pipeline.c

index c2cc4db..c1e6000 100644 (file)
--- a/src/freedreno/vulkan/tu_pipeline.c
+++ b/src/freedreno/vulkan/tu_pipeline.c
@@ -686,7 +686,7 @@ tu6_setup_streamout(struct tu_cs *cs,
  static void
  tu6_emit_const(struct tu_cs *cs, uint32_t opcode, uint32_t base,
                 enum a6xx_state_block block, uint32_t offset,
-               uint32_t size, uint32_t *dwords) {
+               uint32_t size, const uint32_t *dwords) {
     assert(size % 4 == 0);
  
     tu_cs_emit_pkt7(cs, opcode, 3 + size);
@@ -711,16 +711,14 @@ tu6_emit_link_map(struct tu_cs *cs,
  {
     const struct ir3_const_state *const_state = ir3_const_state(consumer);
     uint32_t base = const_state->offsets.primitive_map;
-   uint32_t patch_locs[MAX_VARYING] = { }, num_loc;
-   num_loc = ir3_link_geometry_stages(producer, consumer, patch_locs);
-   int size = DIV_ROUND_UP(num_loc, 4);
+   int size = DIV_ROUND_UP(consumer->input_size, 4);
  
     size = (MIN2(size + base, consumer->constlen) - base) * 4;
     if (size <= 0)
        return;
  
     tu6_emit_const(cs, CP_LOAD_STATE6_GEOM, base, sb, 0, size,
-                         patch_locs);
+                         producer->output_loc);
  }
  
  static uint16_t
diff --git a/src/gallium/drivers/freedreno/ir3/ir3_const.h b/src/gallium/drivers/freedreno/ir3/ir3_const.h

index 5e79661..4784ac6 100644 (file)
--- a/src/gallium/drivers/freedreno/ir3/ir3_const.h
+++ b/src/gallium/drivers/freedreno/ir3/ir3_const.h
@@ -308,11 +308,7 @@ ir3_emit_link_map(struct fd_screen *screen,
  {
         const struct ir3_const_state *const_state = ir3_const_state(v);
         uint32_t base = const_state->offsets.primitive_map;
-       uint32_t patch_locs[MAX_VARYING] = { }, num_loc;
-
-       num_loc = ir3_link_geometry_stages(producer, v, patch_locs);
-
-       int size = DIV_ROUND_UP(num_loc, 4);
+       int size = DIV_ROUND_UP(v->input_size, 4);
  
         /* truncate size to avoid writing constants that shader
          * does not use:
@@ -324,7 +320,7 @@ ir3_emit_link_map(struct fd_screen *screen,
         size *= 4;
  
         if (size > 0)
-               emit_const_user(ring, v, base, size, patch_locs);
+               emit_const_user(ring, v, base, size, producer->output_loc);
  }
  
  /* emit stream-out buffers: */
author	Connor Abbott <cwabbott0@gmail.com>
	Thu, 24 Sep 2020 14:24:55 +0000 (16:24 +0200)
committer	Marge Bot <eric+marge@anholt.net>
	Fri, 23 Oct 2020 11:09:18 +0000 (11:09 +0000)
src/freedreno/ir3/ir3_nir.c		patch \| blob \| history
src/freedreno/ir3/ir3_nir.h		patch \| blob \| history
src/freedreno/ir3/ir3_nir_lower_tess.c		patch \| blob \| history
src/freedreno/ir3/ir3_shader.h		patch \| blob \| history
src/freedreno/vulkan/tu_pipeline.c		patch \| blob \| history
src/gallium/drivers/freedreno/ir3/ir3_const.h		patch \| blob \| history