From 1f41198772dd50c5e3ad07040d3653dbd6980cd7 Mon Sep 17 00:00:00 2001 From: =?utf8?q?Marcin=20=C5=9Alusarz?= Date: Wed, 6 Jul 2022 17:17:42 +0200 Subject: [PATCH] anv: work around for per-prim attributes corruption Wa_14015590813 for gfx 12.5 Reviewed-by: Caio Oliveira Part-of: --- src/intel/vulkan/anv_device.c | 4 +- src/intel/vulkan/anv_mesh_perprim_wa.c | 557 +++++++++++++++++++++++++++++++++ src/intel/vulkan/anv_pipeline.c | 7 + src/intel/vulkan/anv_private.h | 6 + src/intel/vulkan/meson.build | 1 + src/util/driconf.h | 9 + src/util/xmlconfig.h | 2 +- 7 files changed, 584 insertions(+), 2 deletions(-) create mode 100644 src/intel/vulkan/anv_mesh_perprim_wa.c diff --git a/src/intel/vulkan/anv_device.c b/src/intel/vulkan/anv_device.c index 022cd53..9e411c4 100644 --- a/src/intel/vulkan/anv_device.c +++ b/src/intel/vulkan/anv_device.c @@ -82,6 +82,7 @@ static const driOptionDescription anv_dri_options[] = { DRI_CONF_ALWAYS_FLUSH_CACHE(false) DRI_CONF_VK_WSI_FORCE_BGRA8_UNORM_FIRST(false) DRI_CONF_LIMIT_TRIG_INPUT_RANGE(false) + DRI_CONF_ANV_MESH_CONV_PRIM_ATTRS_TO_VERT_ATTRS(-2) DRI_CONF_SECTION_END DRI_CONF_SECTION_QUALITY @@ -1100,7 +1101,8 @@ anv_init_dri_options(struct anv_instance *instance) driQueryOptionf(&instance->dri_options, "lower_depth_range_rate"); instance->no_16bit = driQueryOptionb(&instance->dri_options, "no_16bit"); - + instance->mesh_conv_prim_attrs_to_vert_attrs = + driQueryOptioni(&instance->dri_options, "anv_mesh_conv_prim_attrs_to_vert_attrs"); instance->fp64_workaround_enabled = driQueryOptionb(&instance->dri_options, "fp64_workaround_enabled"); instance->generated_indirect_threshold = diff --git a/src/intel/vulkan/anv_mesh_perprim_wa.c b/src/intel/vulkan/anv_mesh_perprim_wa.c new file mode 100644 index 0000000..f7346b6 --- /dev/null +++ b/src/intel/vulkan/anv_mesh_perprim_wa.c @@ -0,0 +1,557 @@ +/* + * Copyright © 2022 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +#include "anv_private.h" +#include "nir_builder.h" + +/* + * Wa_14015590813 for gfx 12.5. + * + * This file implements workaround for HW bug, which leads to fragment shader + * reading incorrect per-primitive data if mesh shader, in addition to writing + * per-primitive data, also writes to gl_ClipDistance. + * + * The suggested solution to that bug is to not use per-primitive data by: + * - creating new vertices for provoking vertices shared by multiple primitives + * - converting per-primitive attributes read by fragment shader to flat + * per-vertex attributes for the provoking vertex + * - modifying fragment shader to read those per-vertex attributes + * + * There are at least 2 type of failures not handled very well: + * - if the number of varying slots overflows, than only some attributes will + * be converted, leading to corruption of those unconverted attributes + * - if the overall MUE size is so large it doesn't fit in URB, then URB + * allocation will fail in some way; unfortunately there's no good way to + * say how big MUE will be at this moment and back out + * + * This workaround needs to be applied before linking, so that unused outputs + * created by this code are removed at link time. + * + * This workaround can be controlled by a driconf option to either disable it, + * lower its scope or force enable it. + * + * Option "anv_mesh_conv_prim_attrs_to_vert_attrs" is evaluated like this: + * value == 0 - disable workaround + * value < 0 - enable ONLY if workaround is required + * value > 0 - enable ALWAYS, even if it's not required + * abs(value) >= 1 - attribute conversion + * abs(value) >= 2 - attribute conversion and vertex duplication + * + * Default: -2 (both parts of the work around, ONLY if it's required) + * + */ + +static bool +anv_mesh_convert_attrs_prim_to_vert(struct nir_shader *nir, + gl_varying_slot *wa_mapping, + uint64_t fs_inputs, + const VkGraphicsPipelineCreateInfo *pCreateInfo, + void *mem_ctx, + const bool dup_vertices, + const bool force_conversion) +{ + uint64_t per_primitive_outputs = nir->info.per_primitive_outputs; + per_primitive_outputs &= ~BITFIELD64_BIT(VARYING_SLOT_PRIMITIVE_INDICES); + + if (per_primitive_outputs == 0) + return false; + + uint64_t outputs_written = nir->info.outputs_written; + uint64_t other_outputs = outputs_written & ~per_primitive_outputs; + + if ((other_outputs & (VARYING_BIT_CLIP_DIST0 | VARYING_BIT_CLIP_DIST1)) == 0) + if (!force_conversion) + return false; + + uint64_t all_outputs = outputs_written; + unsigned attrs = 0; + + uint64_t remapped_outputs = outputs_written & per_primitive_outputs; + remapped_outputs &= ~BITFIELD64_BIT(VARYING_SLOT_CULL_PRIMITIVE); + + /* Skip locations not read by the fragment shader, because they will + * be eliminated at linking time. Note that some fs inputs may be + * removed only after optimizations, so it's possible that we will + * create too many variables. + */ + remapped_outputs &= fs_inputs; + + /* Figure out the mapping between per-primitive and new per-vertex outputs. */ + nir_foreach_shader_out_variable(var, nir) { + int location = var->data.location; + + if (!(BITFIELD64_BIT(location) & remapped_outputs)) + continue; + + /* Although primitive shading rate, layer and viewport have predefined + * place in MUE Primitive Header (so we can't really move them anywhere), + * we have to copy them to per-vertex space if fragment shader reads them. + */ + assert(location == VARYING_SLOT_PRIMITIVE_SHADING_RATE || + location == VARYING_SLOT_LAYER || + location == VARYING_SLOT_VIEWPORT || + location == VARYING_SLOT_PRIMITIVE_ID || + location >= VARYING_SLOT_VAR0); + + const struct glsl_type *type = var->type; + if (nir_is_arrayed_io(var, MESA_SHADER_MESH) || var->data.per_view) { + assert(glsl_type_is_array(type)); + type = glsl_get_array_element(type); + } + + unsigned num_slots = glsl_count_attribute_slots(type, false); + + for (gl_varying_slot slot = VARYING_SLOT_VAR0; slot <= VARYING_SLOT_VAR31; slot++) { + uint64_t mask = BITFIELD64_MASK(num_slots) << slot; + if ((all_outputs & mask) == 0) { + wa_mapping[location] = slot; + all_outputs |= mask; + attrs++; + break; + } + } + + if (wa_mapping[location] == 0) { + fprintf(stderr, "Not enough space for hardware per-primitive data corruption work around.\n"); + break; + } + } + + if (attrs == 0) + if (!force_conversion) + return false; + + unsigned provoking_vertex = 0; + + const VkPipelineRasterizationStateCreateInfo *rs_info = pCreateInfo->pRasterizationState; + const VkPipelineRasterizationProvokingVertexStateCreateInfoEXT *rs_pv_info = + vk_find_struct_const(rs_info, PIPELINE_RASTERIZATION_PROVOKING_VERTEX_STATE_CREATE_INFO_EXT); + if (rs_pv_info && rs_pv_info->provokingVertexMode == VK_PROVOKING_VERTEX_MODE_LAST_VERTEX_EXT) + provoking_vertex = 2; + + unsigned vertices_per_primitive = + num_mesh_vertices_per_primitive(nir->info.mesh.primitive_type); + + nir_function_impl *impl = nir_shader_get_entrypoint(nir); + nir_builder b; + nir_builder_init(&b, impl); + b.cursor = nir_after_cf_list(&impl->body); + + /* wait for all subgroups to finish */ + nir_scoped_barrier(&b, NIR_SCOPE_WORKGROUP); + + nir_ssa_def *zero = nir_imm_int(&b, 0); + + nir_ssa_def *local_invocation_index = nir_build_load_local_invocation_index(&b); + + nir_ssa_def *cmp = nir_ieq(&b, local_invocation_index, zero); + nir_if *if_stmt = nir_push_if(&b, cmp); + { + nir_variable *primitive_count_var = NULL; + nir_variable *primitive_indices_var = NULL; + + unsigned num_other_variables = 0; + nir_foreach_shader_out_variable(var, b.shader) { + if ((BITFIELD64_BIT(var->data.location) & other_outputs) == 0) + continue; + num_other_variables++; + } + + nir_deref_instr **per_vertex_derefs = + ralloc_array(mem_ctx, nir_deref_instr *, num_other_variables); + + unsigned num_per_vertex_variables = 0; + + unsigned processed = 0; + nir_foreach_shader_out_variable(var, b.shader) { + if ((BITFIELD64_BIT(var->data.location) & other_outputs) == 0) + continue; + + switch (var->data.location) { + case VARYING_SLOT_PRIMITIVE_COUNT: + primitive_count_var = var; + break; + case VARYING_SLOT_PRIMITIVE_INDICES: + primitive_indices_var = var; + break; + default: { + const struct glsl_type *type = var->type; + assert(glsl_type_is_array(type)); + const struct glsl_type *array_element_type = + glsl_get_array_element(type); + + if (dup_vertices) { + /* + * Resize type of array output to make space for one extra + * vertex attribute for each primitive, so we ensure that + * the provoking vertex is not shared between primitives. + */ + const struct glsl_type *new_type = + glsl_array_type(array_element_type, + glsl_get_length(type) + + nir->info.mesh.max_primitives_out, + 0); + + var->type = new_type; + } + + per_vertex_derefs[num_per_vertex_variables++] = + nir_build_deref_var(&b, var); + break; + } + } + + ++processed; + } + assert(processed == num_other_variables); + + assert(primitive_count_var != NULL); + assert(primitive_indices_var != NULL); + + /* Update types of derefs to match type of variables they (de)reference. */ + if (dup_vertices) { + nir_foreach_function(function, b.shader) { + if (!function->impl) + continue; + + nir_foreach_block(block, function->impl) { + nir_foreach_instr(instr, block) { + if (instr->type != nir_instr_type_deref) + continue; + + nir_deref_instr *deref = nir_instr_as_deref(instr); + if (deref->deref_type != nir_deref_type_var) + continue; + + if (deref->var->type != deref->type) + deref->type = deref->var->type; + } + } + } + } + + /* indexed by slot of per-prim attribute */ + struct { + nir_deref_instr *per_prim_deref; + nir_deref_instr *per_vert_deref; + } mapping[VARYING_SLOT_MAX] = {{NULL, NULL}, }; + + /* Create new per-vertex output variables mirroring per-primitive variables + * and create derefs for both old and new variables. + */ + nir_foreach_shader_out_variable(var, b.shader) { + gl_varying_slot location = var->data.location; + + if ((BITFIELD64_BIT(location) & (outputs_written & per_primitive_outputs)) == 0) + continue; + if (wa_mapping[location] == 0) + continue; + + const struct glsl_type *type = var->type; + assert(glsl_type_is_array(type)); + const struct glsl_type *array_element_type = glsl_get_array_element(type); + + const struct glsl_type *new_type = + glsl_array_type(array_element_type, + nir->info.mesh.max_vertices_out + + (dup_vertices ? nir->info.mesh.max_primitives_out : 0), + 0); + + nir_variable *new_var = + nir_variable_create(b.shader, nir_var_shader_out, new_type, var->name); + assert(wa_mapping[location] >= VARYING_SLOT_VAR0); + assert(wa_mapping[location] <= VARYING_SLOT_VAR31); + new_var->data.location = wa_mapping[location]; + new_var->data.interpolation = INTERP_MODE_FLAT; + + mapping[location].per_vert_deref = nir_build_deref_var(&b, new_var); + mapping[location].per_prim_deref = nir_build_deref_var(&b, var); + } + + nir_ssa_def *trueconst = nir_imm_true(&b); + + /* + * for each Primitive (0 : primitiveCount) + * if VertexUsed[PrimitiveIndices[Primitive][provoking vertex]] + * create 1 new vertex at offset "Vertex" + * copy per vert attributes of provoking vertex to the new one + * update PrimitiveIndices[Primitive][provoking vertex] + * Vertex++ + * else + * VertexUsed[PrimitiveIndices[Primitive][provoking vertex]] := true + * + * for each attribute : mapping + * copy per_prim_attr(Primitive) to per_vert_attr[Primitive][provoking vertex] + */ + + /* primitive count */ + nir_ssa_def *primitive_count = nir_load_var(&b, primitive_count_var); + + /* primitive index */ + nir_variable *primitive_var = + nir_local_variable_create(impl, glsl_uint_type(), "Primitive"); + nir_deref_instr *primitive_deref = nir_build_deref_var(&b, primitive_var); + nir_store_deref(&b, primitive_deref, zero, 1); + + /* vertex index */ + nir_variable *vertex_var = + nir_local_variable_create(impl, glsl_uint_type(), "Vertex"); + nir_deref_instr *vertex_deref = nir_build_deref_var(&b, vertex_var); + nir_store_deref(&b, vertex_deref, nir_imm_int(&b, nir->info.mesh.max_vertices_out), 1); + + /* used vertices bitvector */ + const struct glsl_type *used_vertex_type = + glsl_array_type(glsl_bool_type(), + nir->info.mesh.max_vertices_out, + 0); + nir_variable *used_vertex_var = + nir_local_variable_create(impl, used_vertex_type, "VertexUsed"); + nir_deref_instr *used_vertex_deref = + nir_build_deref_var(&b, used_vertex_var); + /* Initialize it as "not used" */ + for (unsigned i = 0; i < nir->info.mesh.max_vertices_out; ++i) { + nir_deref_instr *indexed_used_vertex_deref = + nir_build_deref_array(&b, used_vertex_deref, nir_imm_int(&b, i)); + nir_store_deref(&b, indexed_used_vertex_deref, nir_imm_false(&b), 1); + } + + nir_loop *loop = nir_push_loop(&b); + { + nir_ssa_def *primitive = nir_load_deref(&b, primitive_deref); + nir_ssa_def *cmp = nir_ige(&b, primitive, primitive_count); + + nir_if *loop_check = nir_push_if(&b, cmp); + nir_jump(&b, nir_jump_break); + nir_pop_if(&b, loop_check); + + nir_deref_instr *primitive_indices_deref = + nir_build_deref_var(&b, primitive_indices_var); + nir_deref_instr *indexed_primitive_indices_deref; + nir_ssa_def *src_vertex; + nir_ssa_def *prim_indices; + + if (nir->info.mesh.nv) { + /* flat array, but we can deref each index directly */ + nir_ssa_def *index_index = + nir_imul(&b, primitive, nir_imm_int(&b, vertices_per_primitive)); + index_index = nir_iadd(&b, index_index, nir_imm_int(&b, provoking_vertex)); + indexed_primitive_indices_deref = nir_build_deref_array(&b, primitive_indices_deref, index_index); + src_vertex = nir_load_deref(&b, indexed_primitive_indices_deref); + prim_indices = NULL; + } else { + /* array of vectors, we have to extract index out of array deref */ + indexed_primitive_indices_deref = nir_build_deref_array(&b, primitive_indices_deref, primitive); + prim_indices = nir_load_deref(&b, indexed_primitive_indices_deref); + src_vertex = nir_channel(&b, prim_indices, provoking_vertex); + } + + nir_ssa_def *dst_vertex = nir_load_deref(&b, vertex_deref); + + nir_deref_instr *indexed_used_vertex_deref = + nir_build_deref_array(&b, used_vertex_deref, src_vertex); + nir_ssa_def *used_vertex = nir_load_deref(&b, indexed_used_vertex_deref); + if (!dup_vertices) + used_vertex = nir_imm_false(&b); + + nir_if *vertex_used_check = nir_push_if(&b, used_vertex); + { + for (unsigned a = 0; a < num_per_vertex_variables; ++a) { + nir_deref_instr *attr_arr = per_vertex_derefs[a]; + nir_deref_instr *src = nir_build_deref_array(&b, attr_arr, src_vertex); + nir_deref_instr *dst = nir_build_deref_array(&b, attr_arr, dst_vertex); + + nir_copy_deref(&b, dst, src); + } + + if (nir->info.mesh.nv) { + nir_store_deref(&b, indexed_primitive_indices_deref, dst_vertex, 1); + } else { + /* replace one component of primitive indices vector */ + nir_ssa_def *new_val = + nir_vector_insert_imm(&b, prim_indices, dst_vertex, provoking_vertex); + + /* and store complete vector */ + nir_store_deref(&b, indexed_primitive_indices_deref, new_val, + BITFIELD_MASK(vertices_per_primitive)); + } + + nir_store_deref(&b, vertex_deref, nir_iadd_imm(&b, dst_vertex, 1), 1); + + for (unsigned i = 0; i < ARRAY_SIZE(mapping); ++i) { + if (!mapping[i].per_vert_deref) + continue; + + nir_deref_instr *src = + nir_build_deref_array(&b, mapping[i].per_prim_deref, primitive); + nir_deref_instr *dst = + nir_build_deref_array(&b, mapping[i].per_vert_deref, dst_vertex); + + nir_copy_deref(&b, dst, src); + } + } + nir_push_else(&b, vertex_used_check); + { + nir_store_deref(&b, indexed_used_vertex_deref, trueconst, 1); + + for (unsigned i = 0; i < ARRAY_SIZE(mapping); ++i) { + if (!mapping[i].per_vert_deref) + continue; + + nir_deref_instr *src = + nir_build_deref_array(&b, mapping[i].per_prim_deref, primitive); + nir_deref_instr *dst = + nir_build_deref_array(&b, mapping[i].per_vert_deref, src_vertex); + + nir_copy_deref(&b, dst, src); + } + + } + nir_pop_if(&b, vertex_used_check); + + nir_store_deref(&b, primitive_deref, nir_iadd_imm(&b, primitive, 1), 1); + } + nir_pop_loop(&b, loop); + } + nir_pop_if(&b, if_stmt); /* local_invocation_index == 0 */ + + if (dup_vertices) + nir->info.mesh.max_vertices_out += nir->info.mesh.max_primitives_out; + + if (should_print_nir(nir)) { + printf("%s\n", __func__); + nir_print_shader(nir, stdout); + } + + /* deal with copy_derefs */ + NIR_PASS(_, nir, nir_split_var_copies); + NIR_PASS(_, nir, nir_lower_var_copies); + + nir_shader_gather_info(nir, impl); + + return true; +} + +static bool +anv_frag_update_derefs_instr(struct nir_builder *b, nir_instr *instr, void *data) +{ + if (instr->type != nir_instr_type_deref) + return false; + + nir_deref_instr *deref = nir_instr_as_deref(instr); + if (deref->deref_type != nir_deref_type_var) + return false; + + nir_variable *var = deref->var; + if (!(var->data.mode & nir_var_shader_in)) + return false; + + int location = var->data.location; + nir_deref_instr **new_derefs = (nir_deref_instr **)data; + if (new_derefs[location] == NULL) + return false; + + assert(deref->dest.is_ssa); + assert(new_derefs[location]->dest.is_ssa); + + nir_instr_remove(&deref->instr); + nir_ssa_def_rewrite_uses(&deref->dest.ssa, &new_derefs[location]->dest.ssa); + + return true; +} + +static bool +anv_frag_update_derefs(nir_shader *shader, nir_deref_instr **mapping) +{ + return nir_shader_instructions_pass(shader, anv_frag_update_derefs_instr, + nir_metadata_none, (void *)mapping); +} + +/* Update fragment shader inputs with new ones. */ +static void +anv_frag_convert_attrs_prim_to_vert(struct nir_shader *nir, + gl_varying_slot *wa_mapping) +{ + /* indexed by slot of per-prim attribute */ + nir_deref_instr *new_derefs[VARYING_SLOT_MAX] = {NULL, }; + + nir_function_impl *impl = nir_shader_get_entrypoint(nir); + nir_builder b; + nir_builder_init(&b, impl); + b.cursor = nir_before_cf_list(&impl->body); + + nir_foreach_shader_in_variable_safe(var, nir) { + gl_varying_slot location = var->data.location; + gl_varying_slot new_location = wa_mapping[location]; + if (new_location == 0) + continue; + + assert(wa_mapping[new_location] == 0); + + nir_variable *new_var = + nir_variable_create(b.shader, nir_var_shader_in, var->type, var->name); + new_var->data.location = new_location; + new_var->data.location_frac = var->data.location_frac; + new_var->data.interpolation = INTERP_MODE_FLAT; + + new_derefs[location] = nir_build_deref_var(&b, new_var); + } + + NIR_PASS(_, nir, anv_frag_update_derefs, new_derefs); + + nir_shader_gather_info(nir, impl); +} + +void +anv_apply_per_prim_attr_wa(struct nir_shader *ms_nir, + struct nir_shader *fs_nir, + struct anv_device *device, + const VkGraphicsPipelineCreateInfo *info) +{ + const struct intel_device_info *devinfo = device->info; + + int mesh_conv_prim_attrs_to_vert_attrs = + device->physical->instance->mesh_conv_prim_attrs_to_vert_attrs; + if (mesh_conv_prim_attrs_to_vert_attrs < 0 && + !intel_needs_workaround(devinfo, 14015590813)) + mesh_conv_prim_attrs_to_vert_attrs = 0; + + if (mesh_conv_prim_attrs_to_vert_attrs != 0) { + uint64_t fs_inputs = 0; + nir_foreach_shader_in_variable(var, fs_nir) + fs_inputs |= BITFIELD64_BIT(var->data.location); + + void *stage_ctx = ralloc_context(NULL); + + gl_varying_slot wa_mapping[VARYING_SLOT_MAX] = { 0, }; + + const bool dup_vertices = abs(mesh_conv_prim_attrs_to_vert_attrs) >= 2; + const bool force_conversion = mesh_conv_prim_attrs_to_vert_attrs > 0; + + if (anv_mesh_convert_attrs_prim_to_vert(ms_nir, wa_mapping, + fs_inputs, info, stage_ctx, + dup_vertices, force_conversion)) + anv_frag_convert_attrs_prim_to_vert(fs_nir, wa_mapping); + + ralloc_free(stage_ctx); + } +} diff --git a/src/intel/vulkan/anv_pipeline.c b/src/intel/vulkan/anv_pipeline.c index 65fa682..c216d7e 100644 --- a/src/intel/vulkan/anv_pipeline.c +++ b/src/intel/vulkan/anv_pipeline.c @@ -1734,6 +1734,13 @@ anv_graphics_pipeline_compile(struct anv_graphics_pipeline *pipeline, if (result != VK_SUCCESS) goto fail; + if (stages[MESA_SHADER_MESH].info && stages[MESA_SHADER_FRAGMENT].info) { + anv_apply_per_prim_attr_wa(stages[MESA_SHADER_MESH].nir, + stages[MESA_SHADER_FRAGMENT].nir, + device, + info); + } + /* Walk backwards to link */ struct anv_pipeline_stage *next_stage = NULL; for (int i = ARRAY_SIZE(graphics_shader_order) - 1; i >= 0; i--) { diff --git a/src/intel/vulkan/anv_private.h b/src/intel/vulkan/anv_private.h index 02b9806..7082bfc 100644 --- a/src/intel/vulkan/anv_private.h +++ b/src/intel/vulkan/anv_private.h @@ -1031,6 +1031,7 @@ struct anv_instance { struct driOptionCache dri_options; struct driOptionCache available_dri_options; + int mesh_conv_prim_attrs_to_vert_attrs; /** * Workarounds for game bugs. */ @@ -4193,6 +4194,11 @@ void anv_perf_write_pass_results(struct intel_perf_config *perf, const struct intel_perf_query_result *accumulated_results, union VkPerformanceCounterResultKHR *results); +void anv_apply_per_prim_attr_wa(struct nir_shader *ms_nir, + struct nir_shader *fs_nir, + struct anv_device *device, + const VkGraphicsPipelineCreateInfo *info); + /* Use to emit a series of memcpy operations */ struct anv_memcpy_state { struct anv_device *device; diff --git a/src/intel/vulkan/meson.build b/src/intel/vulkan/meson.build index e133979..5d63983 100644 --- a/src/intel/vulkan/meson.build +++ b/src/intel/vulkan/meson.build @@ -160,6 +160,7 @@ libanv_files = files( 'anv_kmd_backend.h', 'anv_measure.c', 'anv_measure.h', + 'anv_mesh_perprim_wa.c', 'anv_nir.h', 'anv_nir_apply_pipeline_layout.c', 'anv_nir_compute_push_layout.c', diff --git a/src/util/driconf.h b/src/util/driconf.h index c5de0c1..fecff85 100644 --- a/src/util/driconf.h +++ b/src/util/driconf.h @@ -637,6 +637,15 @@ DRI_CONF_OPT_B(anv_sample_mask_out_opengl_behaviour, def, \ "Ignore sample mask out when having single sampled target") +#define DRI_CONF_ANV_MESH_CONV_PRIM_ATTRS_TO_VERT_ATTRS(def) \ + DRI_CONF_OPT_E(anv_mesh_conv_prim_attrs_to_vert_attrs, def, -2, 2, \ + "Apply workaround for gfx12.5 per-prim attribute corruption HW bug", \ + DRI_CONF_ENUM(-2, "enable attribute conversion and vertex duplication ONLY if needed") \ + DRI_CONF_ENUM(-1, "enable attribute conversion ONLY if needed") \ + DRI_CONF_ENUM(0, "disable workaround") \ + DRI_CONF_ENUM(1, "enable attribute conversion ALWAYS") \ + DRI_CONF_ENUM(2, "enable attribute conversion and vertex duplication ALWAYS") ) + #define DRI_CONF_ANV_FP64_WORKAROUND_ENABLED(def) \ DRI_CONF_OPT_B(fp64_workaround_enabled, def, \ "Use softpf64 when the shader uses float64, but the device doesn't support that type") diff --git a/src/util/xmlconfig.h b/src/util/xmlconfig.h index dc219c9..7405f0c 100644 --- a/src/util/xmlconfig.h +++ b/src/util/xmlconfig.h @@ -105,7 +105,7 @@ typedef struct driOptionDescription { driOptionInfo info; driOptionValue value; - driEnumDescription enums[4]; + driEnumDescription enums[5]; } driOptionDescription; /** Returns an XML string describing the options for the driver. */ -- 2.7.4