anv: work around for per-prim attributes corruption
authorMarcin Ślusarz <marcin.slusarz@intel.com>
Wed, 6 Jul 2022 15:17:42 +0000 (17:17 +0200)
committerMarge Bot <emma+marge@anholt.net>
Wed, 29 Mar 2023 18:35:55 +0000 (18:35 +0000)
Wa_14015590813 for gfx 12.5

Reviewed-by: Caio Oliveira <caio.oliveira@intel.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/17622>

src/intel/vulkan/anv_device.c
src/intel/vulkan/anv_mesh_perprim_wa.c [new file with mode: 0644]
src/intel/vulkan/anv_pipeline.c
src/intel/vulkan/anv_private.h
src/intel/vulkan/meson.build
src/util/driconf.h
src/util/xmlconfig.h

index 022cd53..9e411c4 100644 (file)
@@ -82,6 +82,7 @@ static const driOptionDescription anv_dri_options[] = {
       DRI_CONF_ALWAYS_FLUSH_CACHE(false)
       DRI_CONF_VK_WSI_FORCE_BGRA8_UNORM_FIRST(false)
       DRI_CONF_LIMIT_TRIG_INPUT_RANGE(false)
+      DRI_CONF_ANV_MESH_CONV_PRIM_ATTRS_TO_VERT_ATTRS(-2)
    DRI_CONF_SECTION_END
 
    DRI_CONF_SECTION_QUALITY
@@ -1100,7 +1101,8 @@ anv_init_dri_options(struct anv_instance *instance)
             driQueryOptionf(&instance->dri_options, "lower_depth_range_rate");
     instance->no_16bit =
             driQueryOptionb(&instance->dri_options, "no_16bit");
-
+    instance->mesh_conv_prim_attrs_to_vert_attrs =
+            driQueryOptioni(&instance->dri_options, "anv_mesh_conv_prim_attrs_to_vert_attrs");
     instance->fp64_workaround_enabled =
             driQueryOptionb(&instance->dri_options, "fp64_workaround_enabled");
     instance->generated_indirect_threshold =
diff --git a/src/intel/vulkan/anv_mesh_perprim_wa.c b/src/intel/vulkan/anv_mesh_perprim_wa.c
new file mode 100644 (file)
index 0000000..f7346b6
--- /dev/null
@@ -0,0 +1,557 @@
+/*
+ * Copyright © 2022 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ */
+
+#include "anv_private.h"
+#include "nir_builder.h"
+
+/*
+ * Wa_14015590813 for gfx 12.5.
+ *
+ * This file implements workaround for HW bug, which leads to fragment shader
+ * reading incorrect per-primitive data if mesh shader, in addition to writing
+ * per-primitive data, also writes to gl_ClipDistance.
+ *
+ * The suggested solution to that bug is to not use per-primitive data by:
+ * - creating new vertices for provoking vertices shared by multiple primitives
+ * - converting per-primitive attributes read by fragment shader to flat
+ *   per-vertex attributes for the provoking vertex
+ * - modifying fragment shader to read those per-vertex attributes
+ *
+ * There are at least 2 type of failures not handled very well:
+ * - if the number of varying slots overflows, than only some attributes will
+ *   be converted, leading to corruption of those unconverted attributes
+ * - if the overall MUE size is so large it doesn't fit in URB, then URB
+ *   allocation will fail in some way; unfortunately there's no good way to
+ *   say how big MUE will be at this moment and back out
+ *
+ * This workaround needs to be applied before linking, so that unused outputs
+ * created by this code are removed at link time.
+ *
+ * This workaround can be controlled by a driconf option to either disable it,
+ * lower its scope or force enable it.
+ *
+ * Option "anv_mesh_conv_prim_attrs_to_vert_attrs" is evaluated like this:
+ *  value == 0 - disable workaround
+ *  value < 0 - enable ONLY if workaround is required
+ *  value > 0 - enable ALWAYS, even if it's not required
+ *  abs(value) >= 1 - attribute conversion
+ *  abs(value) >= 2 - attribute conversion and vertex duplication
+ *
+ *  Default: -2 (both parts of the work around, ONLY if it's required)
+ *
+ */
+
+static bool
+anv_mesh_convert_attrs_prim_to_vert(struct nir_shader *nir,
+                                    gl_varying_slot *wa_mapping,
+                                    uint64_t fs_inputs,
+                                    const VkGraphicsPipelineCreateInfo *pCreateInfo,
+                                    void *mem_ctx,
+                                    const bool dup_vertices,
+                                    const bool force_conversion)
+{
+   uint64_t per_primitive_outputs = nir->info.per_primitive_outputs;
+   per_primitive_outputs &= ~BITFIELD64_BIT(VARYING_SLOT_PRIMITIVE_INDICES);
+
+   if (per_primitive_outputs == 0)
+      return false;
+
+   uint64_t outputs_written = nir->info.outputs_written;
+   uint64_t other_outputs = outputs_written & ~per_primitive_outputs;
+
+   if ((other_outputs & (VARYING_BIT_CLIP_DIST0 | VARYING_BIT_CLIP_DIST1)) == 0)
+      if (!force_conversion)
+         return false;
+
+   uint64_t all_outputs = outputs_written;
+   unsigned attrs = 0;
+
+   uint64_t remapped_outputs = outputs_written & per_primitive_outputs;
+   remapped_outputs &= ~BITFIELD64_BIT(VARYING_SLOT_CULL_PRIMITIVE);
+
+   /* Skip locations not read by the fragment shader, because they will
+    * be eliminated at linking time. Note that some fs inputs may be
+    * removed only after optimizations, so it's possible that we will
+    * create too many variables.
+    */
+   remapped_outputs &= fs_inputs;
+
+   /* Figure out the mapping between per-primitive and new per-vertex outputs. */
+   nir_foreach_shader_out_variable(var, nir) {
+      int location = var->data.location;
+
+      if (!(BITFIELD64_BIT(location) & remapped_outputs))
+         continue;
+
+      /* Although primitive shading rate, layer and viewport have predefined
+       * place in MUE Primitive Header (so we can't really move them anywhere),
+       * we have to copy them to per-vertex space if fragment shader reads them.
+       */
+      assert(location == VARYING_SLOT_PRIMITIVE_SHADING_RATE ||
+             location == VARYING_SLOT_LAYER ||
+             location == VARYING_SLOT_VIEWPORT ||
+             location == VARYING_SLOT_PRIMITIVE_ID ||
+             location >= VARYING_SLOT_VAR0);
+
+      const struct glsl_type *type = var->type;
+      if (nir_is_arrayed_io(var, MESA_SHADER_MESH) || var->data.per_view) {
+         assert(glsl_type_is_array(type));
+         type = glsl_get_array_element(type);
+      }
+
+      unsigned num_slots = glsl_count_attribute_slots(type, false);
+
+      for (gl_varying_slot slot = VARYING_SLOT_VAR0; slot <= VARYING_SLOT_VAR31; slot++) {
+         uint64_t mask = BITFIELD64_MASK(num_slots) << slot;
+         if ((all_outputs & mask) == 0) {
+            wa_mapping[location] = slot;
+            all_outputs |= mask;
+            attrs++;
+            break;
+         }
+      }
+
+      if (wa_mapping[location] == 0) {
+         fprintf(stderr, "Not enough space for hardware per-primitive data corruption work around.\n");
+         break;
+      }
+   }
+
+   if (attrs == 0)
+      if (!force_conversion)
+         return false;
+
+   unsigned provoking_vertex = 0;
+
+   const VkPipelineRasterizationStateCreateInfo *rs_info = pCreateInfo->pRasterizationState;
+   const VkPipelineRasterizationProvokingVertexStateCreateInfoEXT *rs_pv_info =
+      vk_find_struct_const(rs_info, PIPELINE_RASTERIZATION_PROVOKING_VERTEX_STATE_CREATE_INFO_EXT);
+   if (rs_pv_info && rs_pv_info->provokingVertexMode == VK_PROVOKING_VERTEX_MODE_LAST_VERTEX_EXT)
+      provoking_vertex = 2;
+
+   unsigned vertices_per_primitive =
+         num_mesh_vertices_per_primitive(nir->info.mesh.primitive_type);
+
+   nir_function_impl *impl = nir_shader_get_entrypoint(nir);
+   nir_builder b;
+   nir_builder_init(&b, impl);
+   b.cursor = nir_after_cf_list(&impl->body);
+
+   /* wait for all subgroups to finish */
+   nir_scoped_barrier(&b, NIR_SCOPE_WORKGROUP);
+
+   nir_ssa_def *zero = nir_imm_int(&b, 0);
+
+   nir_ssa_def *local_invocation_index = nir_build_load_local_invocation_index(&b);
+
+   nir_ssa_def *cmp = nir_ieq(&b, local_invocation_index, zero);
+   nir_if *if_stmt = nir_push_if(&b, cmp);
+   {
+      nir_variable *primitive_count_var = NULL;
+      nir_variable *primitive_indices_var = NULL;
+
+      unsigned num_other_variables = 0;
+      nir_foreach_shader_out_variable(var, b.shader) {
+         if ((BITFIELD64_BIT(var->data.location) & other_outputs) == 0)
+            continue;
+         num_other_variables++;
+      }
+
+      nir_deref_instr **per_vertex_derefs =
+            ralloc_array(mem_ctx, nir_deref_instr *, num_other_variables);
+
+      unsigned num_per_vertex_variables = 0;
+
+      unsigned processed = 0;
+      nir_foreach_shader_out_variable(var, b.shader) {
+         if ((BITFIELD64_BIT(var->data.location) & other_outputs) == 0)
+            continue;
+
+         switch (var->data.location) {
+            case VARYING_SLOT_PRIMITIVE_COUNT:
+               primitive_count_var = var;
+               break;
+            case VARYING_SLOT_PRIMITIVE_INDICES:
+               primitive_indices_var = var;
+               break;
+            default: {
+               const struct glsl_type *type = var->type;
+               assert(glsl_type_is_array(type));
+               const struct glsl_type *array_element_type =
+                     glsl_get_array_element(type);
+
+               if (dup_vertices) {
+                  /*
+                   * Resize type of array output to make space for one extra
+                   * vertex attribute for each primitive, so we ensure that
+                   * the provoking vertex is not shared between primitives.
+                   */
+                  const struct glsl_type *new_type =
+                        glsl_array_type(array_element_type,
+                                        glsl_get_length(type) +
+                                        nir->info.mesh.max_primitives_out,
+                                        0);
+
+                  var->type = new_type;
+               }
+
+               per_vertex_derefs[num_per_vertex_variables++] =
+                     nir_build_deref_var(&b, var);
+               break;
+            }
+         }
+
+         ++processed;
+      }
+      assert(processed == num_other_variables);
+
+      assert(primitive_count_var != NULL);
+      assert(primitive_indices_var != NULL);
+
+      /* Update types of derefs to match type of variables they (de)reference. */
+      if (dup_vertices) {
+         nir_foreach_function(function, b.shader) {
+            if (!function->impl)
+               continue;
+
+            nir_foreach_block(block, function->impl) {
+               nir_foreach_instr(instr, block) {
+                  if (instr->type != nir_instr_type_deref)
+                     continue;
+
+                  nir_deref_instr *deref = nir_instr_as_deref(instr);
+                  if (deref->deref_type != nir_deref_type_var)
+                     continue;
+
+                  if (deref->var->type != deref->type)
+                     deref->type = deref->var->type;
+               }
+            }
+         }
+      }
+
+      /* indexed by slot of per-prim attribute */
+      struct {
+         nir_deref_instr *per_prim_deref;
+         nir_deref_instr *per_vert_deref;
+      } mapping[VARYING_SLOT_MAX] = {{NULL, NULL}, };
+
+      /* Create new per-vertex output variables mirroring per-primitive variables
+       * and create derefs for both old and new variables.
+       */
+      nir_foreach_shader_out_variable(var, b.shader) {
+         gl_varying_slot location = var->data.location;
+
+         if ((BITFIELD64_BIT(location) & (outputs_written & per_primitive_outputs)) == 0)
+            continue;
+         if (wa_mapping[location] == 0)
+            continue;
+
+         const struct glsl_type *type = var->type;
+         assert(glsl_type_is_array(type));
+         const struct glsl_type *array_element_type = glsl_get_array_element(type);
+
+         const struct glsl_type *new_type =
+               glsl_array_type(array_element_type,
+                               nir->info.mesh.max_vertices_out +
+                               (dup_vertices ? nir->info.mesh.max_primitives_out : 0),
+                               0);
+
+         nir_variable *new_var =
+               nir_variable_create(b.shader, nir_var_shader_out, new_type, var->name);
+         assert(wa_mapping[location] >= VARYING_SLOT_VAR0);
+         assert(wa_mapping[location] <= VARYING_SLOT_VAR31);
+         new_var->data.location = wa_mapping[location];
+         new_var->data.interpolation = INTERP_MODE_FLAT;
+
+         mapping[location].per_vert_deref = nir_build_deref_var(&b, new_var);
+         mapping[location].per_prim_deref = nir_build_deref_var(&b, var);
+      }
+
+      nir_ssa_def *trueconst = nir_imm_true(&b);
+
+      /*
+       * for each Primitive (0 : primitiveCount)
+       *    if VertexUsed[PrimitiveIndices[Primitive][provoking vertex]]
+       *       create 1 new vertex at offset "Vertex"
+       *       copy per vert attributes of provoking vertex to the new one
+       *       update PrimitiveIndices[Primitive][provoking vertex]
+       *       Vertex++
+       *    else
+       *       VertexUsed[PrimitiveIndices[Primitive][provoking vertex]] := true
+       *
+       *    for each attribute : mapping
+       *       copy per_prim_attr(Primitive) to per_vert_attr[Primitive][provoking vertex]
+       */
+
+      /* primitive count */
+      nir_ssa_def *primitive_count = nir_load_var(&b, primitive_count_var);
+
+      /* primitive index */
+      nir_variable *primitive_var =
+            nir_local_variable_create(impl, glsl_uint_type(), "Primitive");
+      nir_deref_instr *primitive_deref = nir_build_deref_var(&b, primitive_var);
+      nir_store_deref(&b, primitive_deref, zero, 1);
+
+      /* vertex index */
+      nir_variable *vertex_var =
+            nir_local_variable_create(impl, glsl_uint_type(), "Vertex");
+      nir_deref_instr *vertex_deref = nir_build_deref_var(&b, vertex_var);
+      nir_store_deref(&b, vertex_deref, nir_imm_int(&b, nir->info.mesh.max_vertices_out), 1);
+
+      /* used vertices bitvector */
+      const struct glsl_type *used_vertex_type =
+            glsl_array_type(glsl_bool_type(),
+                            nir->info.mesh.max_vertices_out,
+                            0);
+      nir_variable *used_vertex_var =
+            nir_local_variable_create(impl, used_vertex_type, "VertexUsed");
+      nir_deref_instr *used_vertex_deref =
+               nir_build_deref_var(&b, used_vertex_var);
+      /* Initialize it as "not used" */
+      for (unsigned i = 0; i < nir->info.mesh.max_vertices_out; ++i) {
+         nir_deref_instr *indexed_used_vertex_deref =
+                        nir_build_deref_array(&b, used_vertex_deref, nir_imm_int(&b, i));
+         nir_store_deref(&b, indexed_used_vertex_deref, nir_imm_false(&b), 1);
+      }
+
+      nir_loop *loop = nir_push_loop(&b);
+      {
+         nir_ssa_def *primitive = nir_load_deref(&b, primitive_deref);
+         nir_ssa_def *cmp = nir_ige(&b, primitive, primitive_count);
+
+         nir_if *loop_check = nir_push_if(&b, cmp);
+         nir_jump(&b, nir_jump_break);
+         nir_pop_if(&b, loop_check);
+
+         nir_deref_instr *primitive_indices_deref =
+               nir_build_deref_var(&b, primitive_indices_var);
+         nir_deref_instr *indexed_primitive_indices_deref;
+         nir_ssa_def *src_vertex;
+         nir_ssa_def *prim_indices;
+
+         if (nir->info.mesh.nv) {
+            /* flat array, but we can deref each index directly */
+            nir_ssa_def *index_index =
+                  nir_imul(&b, primitive, nir_imm_int(&b, vertices_per_primitive));
+            index_index = nir_iadd(&b, index_index, nir_imm_int(&b, provoking_vertex));
+            indexed_primitive_indices_deref = nir_build_deref_array(&b, primitive_indices_deref, index_index);
+            src_vertex = nir_load_deref(&b, indexed_primitive_indices_deref);
+            prim_indices = NULL;
+         } else {
+            /* array of vectors, we have to extract index out of array deref */
+            indexed_primitive_indices_deref = nir_build_deref_array(&b, primitive_indices_deref, primitive);
+            prim_indices = nir_load_deref(&b, indexed_primitive_indices_deref);
+            src_vertex = nir_channel(&b, prim_indices, provoking_vertex);
+         }
+
+         nir_ssa_def *dst_vertex = nir_load_deref(&b, vertex_deref);
+
+         nir_deref_instr *indexed_used_vertex_deref =
+                        nir_build_deref_array(&b, used_vertex_deref, src_vertex);
+         nir_ssa_def *used_vertex = nir_load_deref(&b, indexed_used_vertex_deref);
+         if (!dup_vertices)
+            used_vertex = nir_imm_false(&b);
+
+         nir_if *vertex_used_check = nir_push_if(&b, used_vertex);
+         {
+            for (unsigned a = 0; a < num_per_vertex_variables; ++a) {
+               nir_deref_instr *attr_arr = per_vertex_derefs[a];
+               nir_deref_instr *src = nir_build_deref_array(&b, attr_arr, src_vertex);
+               nir_deref_instr *dst = nir_build_deref_array(&b, attr_arr, dst_vertex);
+
+               nir_copy_deref(&b, dst, src);
+            }
+
+            if (nir->info.mesh.nv) {
+               nir_store_deref(&b, indexed_primitive_indices_deref, dst_vertex, 1);
+            } else {
+               /* replace one component of primitive indices vector */
+               nir_ssa_def *new_val =
+                     nir_vector_insert_imm(&b, prim_indices, dst_vertex, provoking_vertex);
+
+               /* and store complete vector */
+               nir_store_deref(&b, indexed_primitive_indices_deref, new_val,
+                               BITFIELD_MASK(vertices_per_primitive));
+            }
+
+            nir_store_deref(&b, vertex_deref, nir_iadd_imm(&b, dst_vertex, 1), 1);
+
+            for (unsigned i = 0; i < ARRAY_SIZE(mapping); ++i) {
+               if (!mapping[i].per_vert_deref)
+                  continue;
+
+               nir_deref_instr *src =
+                     nir_build_deref_array(&b, mapping[i].per_prim_deref, primitive);
+               nir_deref_instr *dst =
+                     nir_build_deref_array(&b, mapping[i].per_vert_deref, dst_vertex);
+
+               nir_copy_deref(&b, dst, src);
+            }
+         }
+         nir_push_else(&b, vertex_used_check);
+         {
+            nir_store_deref(&b, indexed_used_vertex_deref, trueconst, 1);
+
+            for (unsigned i = 0; i < ARRAY_SIZE(mapping); ++i) {
+               if (!mapping[i].per_vert_deref)
+                  continue;
+
+               nir_deref_instr *src =
+                     nir_build_deref_array(&b, mapping[i].per_prim_deref, primitive);
+               nir_deref_instr *dst =
+                     nir_build_deref_array(&b, mapping[i].per_vert_deref, src_vertex);
+
+               nir_copy_deref(&b, dst, src);
+            }
+
+         }
+         nir_pop_if(&b, vertex_used_check);
+
+         nir_store_deref(&b, primitive_deref, nir_iadd_imm(&b, primitive, 1), 1);
+      }
+      nir_pop_loop(&b, loop);
+   }
+   nir_pop_if(&b, if_stmt); /* local_invocation_index == 0 */
+
+   if (dup_vertices)
+      nir->info.mesh.max_vertices_out += nir->info.mesh.max_primitives_out;
+
+   if (should_print_nir(nir)) {
+      printf("%s\n", __func__);
+      nir_print_shader(nir, stdout);
+   }
+
+   /* deal with copy_derefs */
+   NIR_PASS(_, nir, nir_split_var_copies);
+   NIR_PASS(_, nir, nir_lower_var_copies);
+
+   nir_shader_gather_info(nir, impl);
+
+   return true;
+}
+
+static bool
+anv_frag_update_derefs_instr(struct nir_builder *b, nir_instr *instr, void *data)
+{
+   if (instr->type != nir_instr_type_deref)
+      return false;
+
+   nir_deref_instr *deref = nir_instr_as_deref(instr);
+   if (deref->deref_type != nir_deref_type_var)
+      return false;
+
+   nir_variable *var = deref->var;
+   if (!(var->data.mode & nir_var_shader_in))
+      return false;
+
+   int location = var->data.location;
+   nir_deref_instr **new_derefs = (nir_deref_instr **)data;
+   if (new_derefs[location] == NULL)
+      return false;
+
+   assert(deref->dest.is_ssa);
+   assert(new_derefs[location]->dest.is_ssa);
+
+   nir_instr_remove(&deref->instr);
+   nir_ssa_def_rewrite_uses(&deref->dest.ssa, &new_derefs[location]->dest.ssa);
+
+   return true;
+}
+
+static bool
+anv_frag_update_derefs(nir_shader *shader, nir_deref_instr **mapping)
+{
+   return nir_shader_instructions_pass(shader, anv_frag_update_derefs_instr,
+                                       nir_metadata_none, (void *)mapping);
+}
+
+/* Update fragment shader inputs with new ones. */
+static void
+anv_frag_convert_attrs_prim_to_vert(struct nir_shader *nir,
+                                    gl_varying_slot *wa_mapping)
+{
+   /* indexed by slot of per-prim attribute */
+   nir_deref_instr *new_derefs[VARYING_SLOT_MAX] = {NULL, };
+
+   nir_function_impl *impl = nir_shader_get_entrypoint(nir);
+   nir_builder b;
+   nir_builder_init(&b, impl);
+   b.cursor = nir_before_cf_list(&impl->body);
+
+   nir_foreach_shader_in_variable_safe(var, nir) {
+      gl_varying_slot location = var->data.location;
+      gl_varying_slot new_location = wa_mapping[location];
+      if (new_location == 0)
+         continue;
+
+      assert(wa_mapping[new_location] == 0);
+
+      nir_variable *new_var =
+            nir_variable_create(b.shader, nir_var_shader_in, var->type, var->name);
+      new_var->data.location = new_location;
+      new_var->data.location_frac = var->data.location_frac;
+      new_var->data.interpolation = INTERP_MODE_FLAT;
+
+      new_derefs[location] = nir_build_deref_var(&b, new_var);
+   }
+
+   NIR_PASS(_, nir, anv_frag_update_derefs, new_derefs);
+
+   nir_shader_gather_info(nir, impl);
+}
+
+void
+anv_apply_per_prim_attr_wa(struct nir_shader *ms_nir,
+                           struct nir_shader *fs_nir,
+                           struct anv_device *device,
+                           const VkGraphicsPipelineCreateInfo *info)
+{
+   const struct intel_device_info *devinfo = device->info;
+
+   int mesh_conv_prim_attrs_to_vert_attrs =
+         device->physical->instance->mesh_conv_prim_attrs_to_vert_attrs;
+   if (mesh_conv_prim_attrs_to_vert_attrs < 0 &&
+         !intel_needs_workaround(devinfo, 14015590813))
+      mesh_conv_prim_attrs_to_vert_attrs = 0;
+
+   if (mesh_conv_prim_attrs_to_vert_attrs != 0) {
+      uint64_t fs_inputs = 0;
+      nir_foreach_shader_in_variable(var, fs_nir)
+         fs_inputs |= BITFIELD64_BIT(var->data.location);
+
+      void *stage_ctx = ralloc_context(NULL);
+
+      gl_varying_slot wa_mapping[VARYING_SLOT_MAX] = { 0, };
+
+      const bool dup_vertices = abs(mesh_conv_prim_attrs_to_vert_attrs) >= 2;
+      const bool force_conversion = mesh_conv_prim_attrs_to_vert_attrs > 0;
+
+      if (anv_mesh_convert_attrs_prim_to_vert(ms_nir, wa_mapping,
+                                              fs_inputs, info, stage_ctx,
+                                              dup_vertices, force_conversion))
+         anv_frag_convert_attrs_prim_to_vert(fs_nir, wa_mapping);
+
+      ralloc_free(stage_ctx);
+   }
+}
index 65fa682..c216d7e 100644 (file)
@@ -1734,6 +1734,13 @@ anv_graphics_pipeline_compile(struct anv_graphics_pipeline *pipeline,
    if (result != VK_SUCCESS)
       goto fail;
 
+   if (stages[MESA_SHADER_MESH].info && stages[MESA_SHADER_FRAGMENT].info) {
+      anv_apply_per_prim_attr_wa(stages[MESA_SHADER_MESH].nir,
+                                 stages[MESA_SHADER_FRAGMENT].nir,
+                                 device,
+                                 info);
+   }
+
    /* Walk backwards to link */
    struct anv_pipeline_stage *next_stage = NULL;
    for (int i = ARRAY_SIZE(graphics_shader_order) - 1; i >= 0; i--) {
index 02b9806..7082bfc 100644 (file)
@@ -1031,6 +1031,7 @@ struct anv_instance {
     struct driOptionCache                       dri_options;
     struct driOptionCache                       available_dri_options;
 
+    int                                         mesh_conv_prim_attrs_to_vert_attrs;
     /**
      * Workarounds for game bugs.
      */
@@ -4193,6 +4194,11 @@ void anv_perf_write_pass_results(struct intel_perf_config *perf,
                                  const struct intel_perf_query_result *accumulated_results,
                                  union VkPerformanceCounterResultKHR *results);
 
+void anv_apply_per_prim_attr_wa(struct nir_shader *ms_nir,
+                                struct nir_shader *fs_nir,
+                                struct anv_device *device,
+                                const VkGraphicsPipelineCreateInfo *info);
+
 /* Use to emit a series of memcpy operations */
 struct anv_memcpy_state {
    struct anv_device *device;
index e133979..5d63983 100644 (file)
@@ -160,6 +160,7 @@ libanv_files = files(
   'anv_kmd_backend.h',
   'anv_measure.c',
   'anv_measure.h',
+  'anv_mesh_perprim_wa.c',
   'anv_nir.h',
   'anv_nir_apply_pipeline_layout.c',
   'anv_nir_compute_push_layout.c',
index c5de0c1..fecff85 100644 (file)
    DRI_CONF_OPT_B(anv_sample_mask_out_opengl_behaviour, def, \
                   "Ignore sample mask out when having single sampled target")
 
+#define DRI_CONF_ANV_MESH_CONV_PRIM_ATTRS_TO_VERT_ATTRS(def) \
+   DRI_CONF_OPT_E(anv_mesh_conv_prim_attrs_to_vert_attrs, def, -2, 2, \
+                  "Apply workaround for gfx12.5 per-prim attribute corruption HW bug", \
+                  DRI_CONF_ENUM(-2, "enable attribute conversion and vertex duplication ONLY if needed") \
+                  DRI_CONF_ENUM(-1, "enable attribute conversion ONLY if needed") \
+                  DRI_CONF_ENUM(0,  "disable workaround") \
+                  DRI_CONF_ENUM(1,  "enable attribute conversion ALWAYS") \
+                  DRI_CONF_ENUM(2,  "enable attribute conversion and vertex duplication ALWAYS") )
+
 #define DRI_CONF_ANV_FP64_WORKAROUND_ENABLED(def) \
    DRI_CONF_OPT_B(fp64_workaround_enabled, def, \
                   "Use softpf64 when the shader uses float64, but the device doesn't support that type")
index dc219c9..7405f0c 100644 (file)
@@ -105,7 +105,7 @@ typedef struct driOptionDescription {
 
    driOptionInfo info;
    driOptionValue value;
-   driEnumDescription enums[4];
+   driEnumDescription enums[5];
 } driOptionDescription;
 
 /** Returns an XML string describing the options for the driver. */