zink: add a workaround for a nir_assign_io_var_locations bug
authorMike Blumenkrantz <michael.blumenkrantz@gmail.com>
Fri, 7 Apr 2023 12:12:42 +0000 (08:12 -0400)
committerMarge Bot <emma+marge@anholt.net>
Thu, 27 Apr 2023 01:33:16 +0000 (01:33 +0000)
drivers that use nir_assign_io_var_locations() with EXT_shader_object all
have the same bug with a shader interface that looks like this:

shader output block:
* PSIZ
* VAR0
* VAR8

shader input block:
* VAR0
* VAR8

in this case, output driver locations will be assigned like:
* PSIZ=0
* VAR0=1
* VAR8=2

and input driver locations will be:
* VAR0=0
* VAR8=1

which breaks the shaders even though this is a totally legitimate thing
to do

thus, a second set of shaders have to be created without PSIZ to work around
the bug since I've already spent 18+ hours trying to fix it and have only succeeded
in breaking every driver that uses it

Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/22725>

src/gallium/drivers/zink/zink_compiler.c
src/gallium/drivers/zink/zink_program.c
src/gallium/drivers/zink/zink_types.h

index 5ecb317..2927070 100644 (file)
@@ -3783,15 +3783,36 @@ zink_shader_compile_separate(struct zink_screen *screen, struct zink_shader *zs)
    optimize_nir(nir, zs);
    zink_descriptor_shader_init(screen, zs);
    zs->sinfo.last_vertex = zs->sinfo.have_xfb;
+   nir_shader *nir_clone = NULL;
+   if (screen->info.have_EXT_shader_object)
+      nir_clone = nir_shader_clone(nir, nir);
    struct zink_shader_object obj = compile_module(screen, zs, nir, true);
-   /* always try to pre-generate a tcs in case it's needed */
-   if (zs->info.stage == MESA_SHADER_TESS_EVAL && screen->info.have_EXT_shader_object && !zs->info.internal) {
-      nir_shader *nir_tcs = NULL;
-      /* use max pcp for compat */
-      zs->non_fs.generated_tcs = zink_shader_tcs_create(screen, nir, 32, &nir_tcs);
-      nir_tcs->info.separate_shader = true;
-      zs->non_fs.generated_tcs->precompile.obj = zink_shader_compile_separate(screen, zs->non_fs.generated_tcs);
-      ralloc_free(nir_tcs);
+   if (screen->info.have_EXT_shader_object && !zs->info.internal) {
+      /* always try to pre-generate a tcs in case it's needed */
+      if (zs->info.stage == MESA_SHADER_TESS_EVAL) {
+         nir_shader *nir_tcs = NULL;
+         /* use max pcp for compat */
+         zs->non_fs.generated_tcs = zink_shader_tcs_create(screen, nir_clone, 32, &nir_tcs);
+         nir_tcs->info.separate_shader = true;
+         zs->non_fs.generated_tcs->precompile.obj = zink_shader_compile_separate(screen, zs->non_fs.generated_tcs);
+         ralloc_free(nir_tcs);
+      }
+      if (zs->info.stage == MESA_SHADER_VERTEX || zs->info.stage == MESA_SHADER_TESS_EVAL) {
+         /* create a second variant with PSIZ removed:
+          * this works around a bug in drivers using nir_assign_io_var_locations()
+          * where builtins that aren't read by following stages get assigned
+          * driver locations before varyings and break the i/o interface between shaders even
+          * though zink has correctly assigned all locations
+          */
+         nir_variable *var = nir_find_variable_with_location(nir_clone, nir_var_shader_out, VARYING_SLOT_PSIZ);
+         if (var && !var->data.explicit_location) {
+            var->data.mode = nir_var_shader_temp;
+            nir_fixup_deref_modes(nir_clone);
+            NIR_PASS_V(nir_clone, nir_remove_dead_variables, nir_var_shader_temp, NULL);
+            optimize_nir(nir_clone, NULL);
+            zs->precompile.no_psiz_obj = compile_module(screen, zs, nir_clone, true);
+         }
+      }
    }
    ralloc_free(nir);
    return obj;
@@ -5127,6 +5148,7 @@ zink_shader_free(struct zink_screen *screen, struct zink_shader *shader)
    zink_descriptor_shader_deinit(screen, shader);
    if (screen->info.have_EXT_shader_object) {
       VKSCR(DestroyShaderEXT)(screen->dev, shader->precompile.obj.obj, NULL);
+      VKSCR(DestroyShaderEXT)(screen->dev, shader->precompile.no_psiz_obj.obj, NULL);
    } else {
       if (shader->precompile.obj.mod)
          VKSCR(DestroyShaderModule)(screen->dev, shader->precompile.obj.mod, NULL);
index 4cab1a0..b48424f 100644 (file)
@@ -1180,7 +1180,11 @@ create_gfx_program_separable(struct zink_context *ctx, struct zink_shader **stag
          _mesa_set_add(prog->shaders[i]->programs, prog);
          simple_mtx_unlock(&prog->shaders[i]->lock);
          if (screen->info.have_EXT_shader_object) {
-            prog->objects[i] = prog->shaders[i]->precompile.obj.obj;
+            /* FIXME: delete this if nir_assign_io_var_locations is ever fixed */
+            if (prog->last_vertex_stage != prog->shaders[i] && (i == MESA_SHADER_VERTEX || i == MESA_SHADER_TESS_EVAL))
+               prog->objects[i] = prog->shaders[i]->precompile.no_psiz_obj.obj;
+            if (!prog->objects[i])
+               prog->objects[i] = prog->shaders[i]->precompile.obj.obj;
          }
          refs++;
       }
index a528e30..9a95914 100644 (file)
@@ -768,6 +768,7 @@ struct zink_shader {
    struct {
       struct util_queue_fence fence;
       struct zink_shader_object obj;
+      struct zink_shader_object no_psiz_obj; //avoid a crippling bug in nir_assign_io_var_locations()
       VkDescriptorSetLayout dsl;
       VkPipelineLayout layout;
       VkPipeline gpl;