zink: enable EXT_shader_object for generic precompiles
authorMike Blumenkrantz <michael.blumenkrantz@gmail.com>
Mon, 3 Apr 2023 20:35:40 +0000 (16:35 -0400)
committerMarge Bot <emma+marge@anholt.net>
Thu, 27 Apr 2023 01:33:17 +0000 (01:33 +0000)
this should match the functionality of GPL, but it should also (theoretically)
have significantly less CPU overhead, so I've enabled this to be the new
default when available

currently I'm not changing any of the requirements for shader object enablement,
so this is probably only be usable on desktops

Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/22725>

src/gallium/drivers/zink/zink_compiler.c
src/gallium/drivers/zink/zink_compiler.h
src/gallium/drivers/zink/zink_draw.cpp
src/gallium/drivers/zink/zink_pipeline.c
src/gallium/drivers/zink/zink_program.c
src/gallium/drivers/zink/zink_program_state.hpp
src/gallium/drivers/zink/zink_types.h

index 5f81000..7ad419f 100644 (file)
@@ -3197,7 +3197,7 @@ zink_shader_dump(const struct zink_shader *zs, void *words, size_t size, const c
 }
 
 struct zink_shader_object
-zink_shader_spirv_compile(struct zink_screen *screen, struct zink_shader *zs, struct spirv_shader *spirv, bool can_shobj)
+zink_shader_spirv_compile(struct zink_screen *screen, struct zink_shader *zs, struct spirv_shader *spirv, bool can_shobj, struct zink_program *pg)
 {
    VkShaderModuleCreateInfo smci = {0};
    VkShaderCreateInfoEXT sci = {0};
@@ -3220,10 +3220,15 @@ zink_shader_spirv_compile(struct zink_screen *screen, struct zink_shader *zs, st
    sci.codeSize = spirv->num_words * sizeof(uint32_t);
    sci.pCode = spirv->words;
    sci.pName = "main";
-   sci.setLayoutCount = zs->info.stage + 1;
    VkDescriptorSetLayout dsl[ZINK_GFX_SHADER_COUNT] = {0};
-   dsl[zs->info.stage] = zs->precompile.dsl;;
-   sci.pSetLayouts = dsl;
+   if (pg) {
+      sci.setLayoutCount = pg->num_dsl;
+      sci.pSetLayouts = pg->dsl;
+   } else {
+      sci.setLayoutCount = zs->info.stage + 1;
+      dsl[zs->info.stage] = zs->precompile.dsl;;
+      sci.pSetLayouts = dsl;
+   }
    VkPushConstantRange pcr;
    pcr.stageFlags = VK_SHADER_STAGE_ALL_GRAPHICS;
    pcr.offset = 0;
@@ -3525,7 +3530,7 @@ invert_point_coord(nir_shader *nir)
 }
 
 static struct zink_shader_object
-compile_module(struct zink_screen *screen, struct zink_shader *zs, nir_shader *nir, bool can_shobj)
+compile_module(struct zink_screen *screen, struct zink_shader *zs, nir_shader *nir, bool can_shobj, struct zink_program *pg)
 {
    struct zink_shader_info *sinfo = &zs->sinfo;
    prune_io(nir);
@@ -3535,7 +3540,7 @@ compile_module(struct zink_screen *screen, struct zink_shader *zs, nir_shader *n
    struct zink_shader_object obj;
    struct spirv_shader *spirv = nir_to_spirv(nir, sinfo, screen->spirv_version);
    if (spirv)
-      obj = zink_shader_spirv_compile(screen, zs, spirv, can_shobj);
+      obj = zink_shader_spirv_compile(screen, zs, spirv, can_shobj, pg);
 
    /* TODO: determine if there's any reason to cache spirv output? */
    if (zs->info.stage == MESA_SHADER_TESS_CTRL && zs->non_fs.is_generated)
@@ -3547,7 +3552,7 @@ compile_module(struct zink_screen *screen, struct zink_shader *zs, nir_shader *n
 
 struct zink_shader_object
 zink_shader_compile(struct zink_screen *screen, bool can_shobj, struct zink_shader *zs,
-                    nir_shader *nir, const struct zink_shader_key *key, const void *extra_data)
+                    nir_shader *nir, const struct zink_shader_key *key, const void *extra_data, struct zink_program *pg)
 {
    struct zink_shader_info *sinfo = &zs->sinfo;
    bool need_optimize = false;
@@ -3739,7 +3744,7 @@ zink_shader_compile(struct zink_screen *screen, bool can_shobj, struct zink_shad
    } else if (need_optimize)
       optimize_nir(nir, zs);
    
-   struct zink_shader_object obj = compile_module(screen, zs, nir, false);
+   struct zink_shader_object obj = compile_module(screen, zs, nir, can_shobj, pg);
    ralloc_free(nir);
    return obj;
 }
@@ -3786,7 +3791,7 @@ zink_shader_compile_separate(struct zink_screen *screen, struct zink_shader *zs)
    nir_shader *nir_clone = NULL;
    if (screen->info.have_EXT_shader_object)
       nir_clone = nir_shader_clone(nir, nir);
-   struct zink_shader_object obj = compile_module(screen, zs, nir, true);
+   struct zink_shader_object obj = compile_module(screen, zs, nir, true, NULL);
    if (screen->info.have_EXT_shader_object && !zs->info.internal) {
       /* always try to pre-generate a tcs in case it's needed */
       if (zs->info.stage == MESA_SHADER_TESS_EVAL) {
@@ -3810,7 +3815,7 @@ zink_shader_compile_separate(struct zink_screen *screen, struct zink_shader *zs)
             nir_fixup_deref_modes(nir_clone);
             NIR_PASS_V(nir_clone, nir_remove_dead_variables, nir_var_shader_temp, NULL);
             optimize_nir(nir_clone, NULL);
-            zs->precompile.no_psiz_obj = compile_module(screen, zs, nir_clone, true);
+            zs->precompile.no_psiz_obj = compile_module(screen, zs, nir_clone, true, NULL);
             spirv_shader_delete(zs->precompile.no_psiz_obj.spirv);
             zs->precompile.no_psiz_obj.spirv = NULL;
          }
@@ -5251,12 +5256,12 @@ zink_gfx_shader_free(struct zink_screen *screen, struct zink_shader *shader)
 
 
 struct zink_shader_object
-zink_shader_tcs_compile(struct zink_screen *screen, struct zink_shader *zs, unsigned patch_vertices)
+zink_shader_tcs_compile(struct zink_screen *screen, struct zink_shader *zs, unsigned patch_vertices, bool can_shobj, struct zink_program *pg)
 {
    assert(zs->info.stage == MESA_SHADER_TESS_CTRL);
    /* shortcut all the nir passes since we just have to change this one word */
    zs->spirv->words[zs->spirv->tcs_vertices_out_word] = patch_vertices;
-   return zink_shader_spirv_compile(screen, zs, NULL, false);
+   return zink_shader_spirv_compile(screen, zs, NULL, can_shobj, pg);
 }
 
 /* creating a passthrough tcs shader that's roughly:
index 3ee13ed..22bfb8c 100644 (file)
@@ -69,7 +69,7 @@ void
 zink_compiler_assign_io(struct zink_screen *screen, nir_shader *producer, nir_shader *consumer);
 /* pass very large shader key data with extra_data */
 struct zink_shader_object
-zink_shader_compile(struct zink_screen *screen, bool can_shobj, struct zink_shader *zs, nir_shader *nir, const struct zink_shader_key *key, const void *extra_data);
+zink_shader_compile(struct zink_screen *screen, bool can_shobj, struct zink_shader *zs, nir_shader *nir, const struct zink_shader_key *key, const void *extra_data, struct zink_program *pg);
 struct zink_shader_object
 zink_shader_compile_separate(struct zink_screen *screen, struct zink_shader *zs);
 struct zink_shader *
@@ -85,9 +85,9 @@ void
 zink_gfx_shader_free(struct zink_screen *screen, struct zink_shader *shader);
 
 struct zink_shader_object
-zink_shader_spirv_compile(struct zink_screen *screen, struct zink_shader *zs, struct spirv_shader *spirv, bool can_shobj);
+zink_shader_spirv_compile(struct zink_screen *screen, struct zink_shader *zs, struct spirv_shader *spirv, bool can_shobj, struct zink_program *pg);
 struct zink_shader_object
-zink_shader_tcs_compile(struct zink_screen *screen, struct zink_shader *zs, unsigned patch_vertices);
+zink_shader_tcs_compile(struct zink_screen *screen, struct zink_shader *zs, unsigned patch_vertices, bool can_shobj, struct zink_program *pg);
 struct zink_shader *
 zink_shader_tcs_create(struct zink_screen *screen, nir_shader *tes, unsigned vertices_per_patch, nir_shader **nir_ret);
 
index 59692fd..7950db4 100644 (file)
@@ -272,13 +272,25 @@ update_gfx_pipeline(struct zink_context *ctx, struct zink_batch_state *bs, enum
 {
    VkPipeline prev_pipeline = ctx->gfx_pipeline_state.pipeline;
    const struct zink_screen *screen = zink_screen(ctx->base.screen);
-   bool shaders_changed = ctx->gfx_dirty;
+   bool shaders_changed = ctx->gfx_dirty || ctx->dirty_gfx_stages;
    if (screen->optimal_keys && !ctx->is_generated_gs_bound)
       zink_gfx_program_update_optimal(ctx);
    else
       zink_gfx_program_update(ctx);
    bool pipeline_changed = false;
-   if (ctx->curr_program->base.uses_shobj) {
+   VkPipeline pipeline = VK_NULL_HANDLE;
+   if (!ctx->curr_program->base.uses_shobj) {
+      if (screen->info.have_EXT_graphics_pipeline_library)
+         pipeline = zink_get_gfx_pipeline<DYNAMIC_STATE, true>(ctx, ctx->curr_program, &ctx->gfx_pipeline_state, mode);
+      else
+         pipeline = zink_get_gfx_pipeline<DYNAMIC_STATE, false>(ctx, ctx->curr_program, &ctx->gfx_pipeline_state, mode);
+   }
+   if (pipeline) {
+      pipeline_changed = prev_pipeline != pipeline;
+      if (BATCH_CHANGED || pipeline_changed || ctx->shobj_draw)
+         VKCTX(CmdBindPipeline)(bs->cmdbuf, VK_PIPELINE_BIND_POINT_GRAPHICS, pipeline);
+      ctx->shobj_draw = false;
+   } else {
       if (BATCH_CHANGED || shaders_changed || !ctx->shobj_draw) {
          VkShaderStageFlagBits stages[] = {
             VK_SHADER_STAGE_VERTEX_BIT,
@@ -290,19 +302,8 @@ update_gfx_pipeline(struct zink_context *ctx, struct zink_batch_state *bs, enum
          /* always rebind all stages */
          VKCTX(CmdBindShadersEXT)(bs->cmdbuf, ZINK_GFX_SHADER_COUNT, stages, ctx->curr_program->objects);
          VKCTX(CmdSetDepthBiasEnable)(bs->cmdbuf, VK_TRUE);
-         pipeline_changed = false;
       }
       ctx->shobj_draw = true;
-   } else {
-      VkPipeline pipeline;
-      if (screen->info.have_EXT_graphics_pipeline_library)
-         pipeline = zink_get_gfx_pipeline<DYNAMIC_STATE, true>(ctx, ctx->curr_program, &ctx->gfx_pipeline_state, mode);
-      else
-         pipeline = zink_get_gfx_pipeline<DYNAMIC_STATE, false>(ctx, ctx->curr_program, &ctx->gfx_pipeline_state, mode);
-      pipeline_changed = prev_pipeline != pipeline;
-      if (BATCH_CHANGED || pipeline_changed || ctx->shobj_draw)
-         VKCTX(CmdBindPipeline)(bs->cmdbuf, VK_PIPELINE_BIND_POINT_GRAPHICS, pipeline);
-      ctx->shobj_draw = false;
    }
    return pipeline_changed;
 }
index dc0b83b..7a541d4 100644 (file)
@@ -26,6 +26,7 @@
 #include "zink_pipeline.h"
 
 #include "zink_compiler.h"
+#include "nir_to_spirv/nir_to_spirv.h"
 #include "zink_context.h"
 #include "zink_program.h"
 #include "zink_render_pass.h"
@@ -375,6 +376,7 @@ zink_create_gfx_pipeline(struct zink_screen *screen,
    }
 
    VkPipelineShaderStageCreateInfo shader_stages[ZINK_GFX_SHADER_COUNT];
+   VkShaderModuleCreateInfo smci[ZINK_GFX_SHADER_COUNT] = {0};
    uint32_t num_stages = 0;
    for (int i = 0; i < ZINK_GFX_SHADER_COUNT; ++i) {
       if (!prog->shaders[i])
@@ -383,8 +385,15 @@ zink_create_gfx_pipeline(struct zink_screen *screen,
       VkPipelineShaderStageCreateInfo stage = {0};
       stage.sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO;
       stage.stage = mesa_to_vk_shader_stage(i);
-      stage.module = objs[i].mod;
       stage.pName = "main";
+      if (objs[i].mod) {
+         stage.module = objs[i].mod;
+      } else {
+         smci[i].sType = VK_STRUCTURE_TYPE_SHADER_MODULE_CREATE_INFO;
+         stage.pNext = &smci[i];
+         smci[i].codeSize = objs[i].spirv->num_words * sizeof(uint32_t);
+         smci[i].pCode = objs[i].spirv->words;
+      }
       shader_stages[num_stages++] = stage;
    }
    assert(num_stages > 0);
index 7868317..011e943 100644 (file)
@@ -149,15 +149,15 @@ create_shader_module_for_stage(struct zink_context *ctx, struct zink_screen *scr
    unsigned patch_vertices = state->shader_keys.key[MESA_SHADER_TESS_CTRL].key.tcs.patch_vertices;
    if (stage == MESA_SHADER_TESS_CTRL && zs->non_fs.is_generated && zs->spirv) {
       assert(ctx); //TODO async
-      zm->obj = zink_shader_tcs_compile(screen, zs, patch_vertices);
+      zm->obj = zink_shader_tcs_compile(screen, zs, patch_vertices, prog->base.uses_shobj, &prog->base);
    } else {
-      zm->obj = zink_shader_compile(screen, false, zs, zink_shader_blob_deserialize(screen, &prog->blobs[stage]), key, &ctx->di.zs_swizzle[stage]);
+      zm->obj = zink_shader_compile(screen, prog->base.uses_shobj, zs, zink_shader_blob_deserialize(screen, &prog->blobs[stage]), key, &ctx->di.zs_swizzle[stage], &prog->base);
    }
    if (!zm->obj.mod) {
       FREE(zm);
       return NULL;
    }
-   zm->shobj = false;
+   zm->shobj = prog->base.uses_shobj;
    zm->num_uniforms = inline_size;
    if (!is_nongenerated_tcs) {
       zm->key_size = key->size;
@@ -267,16 +267,16 @@ create_shader_module_for_stage_optimal(struct zink_context *ctx, struct zink_scr
          struct zink_tcs_key *tcs = (struct zink_tcs_key*)key;
          patch_vertices = tcs->patch_vertices;
       }
-      zm->obj = zink_shader_tcs_compile(screen, zs, patch_vertices);
+      zm->obj = zink_shader_tcs_compile(screen, zs, patch_vertices, prog->base.uses_shobj, &prog->base);
    } else {
-      zm->obj = zink_shader_compile(screen, false, zs, zink_shader_blob_deserialize(screen, &prog->blobs[stage]),
-                                    (struct zink_shader_key*)key, shadow_needs_shader_swizzle ? &ctx->di.zs_swizzle[stage] : NULL);
+      zm->obj = zink_shader_compile(screen, prog->base.uses_shobj, zs, zink_shader_blob_deserialize(screen, &prog->blobs[stage]),
+                                    (struct zink_shader_key*)key, shadow_needs_shader_swizzle ? &ctx->di.zs_swizzle[stage] : NULL, &prog->base);
    }
    if (!zm->obj.mod) {
       FREE(zm);
       return NULL;
    }
-   zm->shobj = false;
+   zm->shobj = prog->base.uses_shobj;
    /* non-generated tcs won't use the shader key */
    const bool is_nongenerated_tcs = stage == MESA_SHADER_TESS_CTRL && !zs->non_fs.is_generated;
    if (key && !is_nongenerated_tcs) {
@@ -396,6 +396,7 @@ update_gfx_shader_modules(struct zink_context *ctx,
       hash_changed = true;
       default_variants &= zm->default_variant;
       prog->objs[i] = zm->obj;
+      prog->objects[i] = zm->obj.obj;
       prog->module_hash[i] = zm->hash;
       if (has_inline) {
          if (zm->num_uniforms)
@@ -436,7 +437,8 @@ generate_gfx_program_modules(struct zink_context *ctx, struct zink_screen *scree
                                                                      inline_size, nonseamless_size,
                                                                      screen->driconf.inline_uniforms, screen->info.have_EXT_non_seamless_cube_map);
       state->modules[i] = zm->obj.mod;
-      prog->objs[i] = zm->obj ;
+      prog->objs[i] = zm->obj;
+      prog->objects[i] = zm->obj.obj;
       prog->module_hash[i] = zm->hash;
       if (zm->num_uniforms)
          prog->inline_variants |= BITFIELD_BIT(i);
@@ -464,6 +466,7 @@ generate_gfx_program_modules_optimal(struct zink_context *ctx, struct zink_scree
 
       struct zink_shader_module *zm = create_shader_module_for_stage_optimal(ctx, screen, prog->shaders[i], prog, i, state);
       prog->objs[i] = zm->obj;
+      prog->objects[i] = zm->obj.obj;
    }
 
    p_atomic_dec(&prog->base.reference.count);
@@ -659,6 +662,7 @@ update_gfx_shader_module_optimal(struct zink_context *ctx, struct zink_gfx_progr
 
    bool changed = prog->objs[pstage].mod != zm->obj.mod;
    prog->objs[pstage] = zm->obj;
+   prog->objects[pstage] = zm->obj.obj;
    return changed;
 }
 
@@ -788,13 +792,29 @@ optimized_compile_job(void *data, void *gdata, int thread_index)
    }
 }
 
+static void
+optimized_shobj_compile_job(void *data, void *gdata, int thread_index)
+{
+   struct zink_gfx_pipeline_cache_entry *pc_entry = data;
+   struct zink_screen *screen = gdata;
+
+   struct zink_shader_object objs[ZINK_GFX_SHADER_COUNT];
+   for (unsigned i = 0; i < ZINK_GFX_SHADER_COUNT; i++) {
+      objs[i].mod = VK_NULL_HANDLE;
+      objs[i].spirv = pc_entry->shobjs[i].spirv;
+   }
+   pc_entry->pipeline = zink_create_gfx_pipeline(screen, pc_entry->prog, objs, &pc_entry->state, NULL, zink_primitive_topology(pc_entry->state.gfx_prim_mode), true);
+   /* no unoptimized_pipeline dance */
+}
+
 void
 zink_gfx_program_compile_queue(struct zink_context *ctx, struct zink_gfx_pipeline_cache_entry *pc_entry)
 {
    struct zink_screen *screen = zink_screen(ctx->base.screen);
    if (screen->driver_workarounds.disable_optimized_compile)
       return;
-   util_queue_add_job(&screen->cache_get_thread, pc_entry, &pc_entry->fence, optimized_compile_job, NULL, 0);
+   util_queue_add_job(&screen->cache_get_thread, pc_entry, &pc_entry->fence,
+                      pc_entry->prog->base.uses_shobj ? optimized_shobj_compile_job : optimized_compile_job, NULL, 0);
 }
 
 static void
@@ -853,7 +873,7 @@ update_cs_shader_module(struct zink_context *ctx, struct zink_compute_program *c
          return;
       }
       zm->shobj = false;
-      zm->obj = zink_shader_compile(screen, false, zs, zink_shader_blob_deserialize(screen, &comp->shader->blob), key, zs_swizzle_size ? &ctx->di.zs_swizzle[MESA_SHADER_COMPUTE] : NULL);
+      zm->obj = zink_shader_compile(screen, false, zs, zink_shader_blob_deserialize(screen, &comp->shader->blob), key, zs_swizzle_size ? &ctx->di.zs_swizzle[MESA_SHADER_COMPUTE] : NULL, &comp->base);
       if (!zm->obj.spirv) {
          FREE(zm);
          return;
@@ -1303,7 +1323,7 @@ precompile_compute_job(void *data, void *gdata, int thread_index)
    comp->curr = comp->module = CALLOC_STRUCT(zink_shader_module);
    assert(comp->module);
    comp->module->shobj = false;
-   comp->module->obj = zink_shader_compile(screen, false, comp->shader, comp->nir, NULL, NULL);
+   comp->module->obj = zink_shader_compile(screen, false, comp->shader, comp->nir, NULL, NULL, &comp->base);
    /* comp->nir will be freed by zink_shader_compile */
    comp->nir = NULL;
    assert(comp->module->obj.spirv);
@@ -2033,9 +2053,11 @@ precompile_job(void *data, void *gdata, int thread_index)
    state.optimal_key = state.shader_keys_optimal.key.val;
    generate_gfx_program_modules_optimal(NULL, screen, prog, &state);
    zink_screen_get_pipeline_cache(screen, &prog->base, true);
-   simple_mtx_lock(&prog->libs->lock);
-   zink_create_pipeline_lib(screen, prog, &state);
-   simple_mtx_unlock(&prog->libs->lock);
+   if (!screen->info.have_EXT_shader_object) {
+      simple_mtx_lock(&prog->libs->lock);
+      zink_create_pipeline_lib(screen, prog, &state);
+      simple_mtx_unlock(&prog->libs->lock);
+   }
    zink_screen_update_pipeline_cache(screen, &prog->base, true);
 }
 
@@ -2105,6 +2127,8 @@ zink_link_gfx_shader(struct pipe_context *pctx, void **shaders)
                                                      shaders[MESA_SHADER_TESS_EVAL] ? VK_PRIMITIVE_TOPOLOGY_PATCH_LIST : VK_PRIMITIVE_TOPOLOGY_TRIANGLE_LIST, true);
       print_pipeline_stats(screen, pipeline);
    } else {
+      if (zink_screen(pctx->screen)->info.have_EXT_shader_object)
+         prog->base.uses_shobj = !BITSET_TEST(zshaders[MESA_SHADER_FRAGMENT]->info.system_values_read, SYSTEM_VALUE_SAMPLE_MASK_IN);
       util_queue_add_job(&zink_screen(pctx->screen)->cache_get_thread, prog, &prog->base.cache_fence, precompile_job, NULL, 0);
    }
 }
@@ -2207,7 +2231,8 @@ zink_program_init(struct zink_context *ctx)
 
    STATIC_ASSERT(sizeof(union zink_shader_key_optimal) == sizeof(uint32_t));
 
-   if (zink_screen(ctx->base.screen)->info.have_EXT_graphics_pipeline_library || zink_debug & ZINK_DEBUG_SHADERDB)
+   struct zink_screen *screen = zink_screen(ctx->base.screen);
+   if (screen->info.have_EXT_graphics_pipeline_library || screen->info.have_EXT_shader_object || zink_debug & ZINK_DEBUG_SHADERDB)
       ctx->base.link_shader = zink_link_gfx_shader;
 }
 
index 81e9eb4..77e5f11 100644 (file)
@@ -186,7 +186,10 @@ zink_get_gfx_pipeline(struct zink_context *ctx,
       /* init the optimized background compile fence */
       util_queue_fence_init(&pc_entry->fence);
       entry = _mesa_hash_table_insert_pre_hashed(&prog->pipelines[rp_idx][idx], state->final_hash, pc_entry, pc_entry);
-      if (HAVE_LIB && zink_can_use_pipeline_libs(ctx)) {
+      if (prog->base.uses_shobj && !prog->is_separable) {
+         memcpy(pc_entry->shobjs, prog->objs, sizeof(prog->objs));
+         zink_gfx_program_compile_queue(ctx, pc_entry);
+      } else if (HAVE_LIB && zink_can_use_pipeline_libs(ctx)) {
          /* this is the graphics pipeline library path: find/construct all partial pipelines */
          simple_mtx_lock(&prog->libs->lock);
          struct set_entry *he = _mesa_set_search(&prog->libs->libs, &ctx->gfx_pipeline_state.optimal_key);
index 598fe89..ebf2a76 100644 (file)
@@ -1028,6 +1028,7 @@ struct zink_gfx_pipeline_cache_entry {
          struct zink_gfx_output_key *okey;
          VkPipeline unoptimized_pipeline;
       } gpl;
+      struct zink_shader_object shobjs[ZINK_GFX_SHADER_COUNT];
    };
 };