zink: use EXT_shader_object to (re)implement separate shaders
authorMike Blumenkrantz <michael.blumenkrantz@gmail.com>
Fri, 31 Mar 2023 20:40:43 +0000 (16:40 -0400)
committerMarge Bot <emma+marge@anholt.net>
Mon, 24 Apr 2023 02:49:19 +0000 (02:49 +0000)
this reimplements the same functionality that exists already, but
using shader object instead of GPL

it must be disabled by default, as this extension is not (currently)
compatible with feedback loops

Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/22612>

src/gallium/drivers/zink/driinfo_zink.h
src/gallium/drivers/zink/zink_compiler.c
src/gallium/drivers/zink/zink_descriptors.c
src/gallium/drivers/zink/zink_draw.cpp
src/gallium/drivers/zink/zink_program.c
src/gallium/drivers/zink/zink_screen.c
src/gallium/drivers/zink/zink_types.h
src/util/00-mesa-defaults.conf

index 62de73c..d4ed3d7 100644 (file)
@@ -9,6 +9,7 @@ DRI_CONF_SECTION_END
 
 DRI_CONF_SECTION_PERFORMANCE
 DRI_CONF_MESA_GLTHREAD_DRIVER(true)
+DRI_CONF_OPT_B(zink_shader_object_enable, false, "Enable support for EXT_shader_object")
 DRI_CONF_SECTION_END
 
 DRI_CONF_SECTION_QUALITY
index ddbaf8c..9e6e07b 100644 (file)
@@ -3108,6 +3108,7 @@ static struct zink_shader_object
 zink_shader_spirv_compile(struct zink_screen *screen, struct zink_shader *zs, struct spirv_shader *spirv, bool separate)
 {
    VkShaderModuleCreateInfo smci = {0};
+   VkShaderCreateInfoEXT sci = {0};
 
    if (!spirv)
       spirv = zs->spirv;
@@ -3119,6 +3120,25 @@ zink_shader_spirv_compile(struct zink_screen *screen, struct zink_shader *zs, st
       zink_shader_dump(spirv->words, spirv->num_words * sizeof(uint32_t), buf);
    }
 
+   sci.sType = VK_STRUCTURE_TYPE_SHADER_CREATE_INFO_EXT;
+   sci.stage = mesa_to_vk_shader_stage(zs->info.stage);
+   if (sci.stage != VK_SHADER_STAGE_FRAGMENT_BIT)
+      sci.nextStage = VK_SHADER_STAGE_FRAGMENT_BIT;
+   sci.codeType = VK_SHADER_CODE_TYPE_SPIRV_EXT;
+   sci.codeSize = spirv->num_words * sizeof(uint32_t);
+   sci.pCode = spirv->words;
+   sci.pName = "main";
+   sci.setLayoutCount = 2;
+   VkDescriptorSetLayout dsl[2] = {0};
+   dsl[zs->info.stage == MESA_SHADER_FRAGMENT] = zs->precompile.dsl;
+   sci.pSetLayouts = dsl;
+   VkPushConstantRange pcr;
+   pcr.stageFlags = VK_SHADER_STAGE_ALL_GRAPHICS;
+   pcr.offset = 0;
+   pcr.size = sizeof(struct zink_gfx_push_constant);
+   sci.pushConstantRangeCount = 1;
+   sci.pPushConstantRanges = &pcr;
+
    smci.sType = VK_STRUCTURE_TYPE_SHADER_MODULE_CREATE_INFO;
    smci.codeSize = spirv->num_words * sizeof(uint32_t);
    smci.pCode = spirv->words;
@@ -3197,8 +3217,12 @@ zink_shader_spirv_compile(struct zink_screen *screen, struct zink_shader *zs, st
    }
 #endif
 
+   VkResult ret;
    struct zink_shader_object obj;
-   VkResult ret = VKSCR(CreateShaderModule)(screen->dev, &smci, NULL, &obj.mod);
+   if (!separate || !screen->info.have_EXT_shader_object)
+      ret = VKSCR(CreateShaderModule)(screen->dev, &smci, NULL, &obj.mod);
+   else
+      ret = VKSCR(CreateShadersEXT)(screen->dev, 1, &sci, NULL, &obj.obj);
    bool success = zink_screen_handle_vkresult(screen, ret);
    assert(success);
    return obj;
@@ -4982,10 +5006,14 @@ zink_shader_free(struct zink_screen *screen, struct zink_shader *shader)
    util_queue_fence_wait(&shader->precompile.fence);
    util_queue_fence_destroy(&shader->precompile.fence);
    zink_descriptor_shader_deinit(screen, shader);
-   if (shader->precompile.obj.mod)
-      VKSCR(DestroyShaderModule)(screen->dev, shader->precompile.obj.mod, NULL);
-   if (shader->precompile.gpl)
-      VKSCR(DestroyPipeline)(screen->dev, shader->precompile.gpl, NULL);
+   if (screen->info.have_EXT_shader_object) {
+      VKSCR(DestroyShaderEXT)(screen->dev, shader->precompile.obj.obj, NULL);
+   } else {
+      if (shader->precompile.obj.mod)
+         VKSCR(DestroyShaderModule)(screen->dev, shader->precompile.obj.mod, NULL);
+      if (shader->precompile.gpl)
+         VKSCR(DestroyPipeline)(screen->dev, shader->precompile.gpl, NULL);
+   }
    blob_finish(&shader->blob);
    ralloc_free(shader->spirv);
    free(shader->precompile.bindings);
index 005891a..a4814dc 100644 (file)
@@ -741,6 +741,8 @@ zink_descriptor_shader_init(struct zink_screen *screen, struct zink_shader *shad
          shader->precompile.db_offset[i] = val;
       }
    }
+   if (screen->info.have_EXT_shader_object)
+      return;
    VkDescriptorSetLayout dsl[ZINK_DESCRIPTOR_ALL_TYPES] = {0};
    unsigned num_dsl = num_bindings ? 2 : 0;
    if (shader->bindless)
index 3ecf1eb..5dff531 100644 (file)
@@ -272,18 +272,37 @@ update_gfx_pipeline(struct zink_context *ctx, struct zink_batch_state *bs, enum
 {
    VkPipeline prev_pipeline = ctx->gfx_pipeline_state.pipeline;
    const struct zink_screen *screen = zink_screen(ctx->base.screen);
+   bool shaders_changed = ctx->gfx_dirty;
    if (screen->optimal_keys && !ctx->is_generated_gs_bound)
       zink_gfx_program_update_optimal(ctx);
    else
       zink_gfx_program_update(ctx);
-   VkPipeline pipeline;
-   if (screen->info.have_EXT_graphics_pipeline_library)
-      pipeline = zink_get_gfx_pipeline<DYNAMIC_STATE, true>(ctx, ctx->curr_program, &ctx->gfx_pipeline_state, mode);
-   else
-      pipeline = zink_get_gfx_pipeline<DYNAMIC_STATE, false>(ctx, ctx->curr_program, &ctx->gfx_pipeline_state, mode);
-   bool pipeline_changed = prev_pipeline != pipeline;
-   if (BATCH_CHANGED || pipeline_changed)
-      VKCTX(CmdBindPipeline)(bs->cmdbuf, VK_PIPELINE_BIND_POINT_GRAPHICS, pipeline);
+   bool pipeline_changed = false;
+   if (ctx->curr_program->is_separable && screen->info.have_EXT_shader_object) {
+      if (BATCH_CHANGED || shaders_changed || !ctx->shobj_draw) {
+         VkShaderStageFlagBits stages[] = {
+            VK_SHADER_STAGE_VERTEX_BIT,
+            VK_SHADER_STAGE_TESSELLATION_CONTROL_BIT,
+            VK_SHADER_STAGE_TESSELLATION_EVALUATION_BIT,
+            VK_SHADER_STAGE_GEOMETRY_BIT,
+            VK_SHADER_STAGE_FRAGMENT_BIT,
+         };
+         /* always rebind all stages */
+         VKCTX(CmdBindShadersEXT)(bs->cmdbuf, ZINK_GFX_SHADER_COUNT, stages, ctx->curr_program->objects);
+         VKCTX(CmdSetDepthBiasEnable)(bs->cmdbuf, VK_TRUE);
+      }
+      ctx->shobj_draw = true;
+   } else {
+      VkPipeline pipeline;
+      if (screen->info.have_EXT_graphics_pipeline_library)
+         pipeline = zink_get_gfx_pipeline<DYNAMIC_STATE, true>(ctx, ctx->curr_program, &ctx->gfx_pipeline_state, mode);
+      else
+         pipeline = zink_get_gfx_pipeline<DYNAMIC_STATE, false>(ctx, ctx->curr_program, &ctx->gfx_pipeline_state, mode);
+      pipeline_changed = prev_pipeline != pipeline;
+      if (BATCH_CHANGED || pipeline_changed || ctx->shobj_draw)
+         VKCTX(CmdBindPipeline)(bs->cmdbuf, VK_PIPELINE_BIND_POINT_GRAPHICS, pipeline);
+      ctx->shobj_draw = false;
+   }
    return pipeline_changed;
 }
 
index a10b363..5abca2f 100644 (file)
@@ -1149,9 +1149,12 @@ create_gfx_program_separable(struct zink_context *ctx, struct zink_shader **stag
    prog->stages_remaining = prog->stages_present = shader_stages;
    prog->shaders[MESA_SHADER_FRAGMENT] = stages[MESA_SHADER_FRAGMENT];
    prog->last_vertex_stage = stages[MESA_SHADER_VERTEX];
-   prog->libs = create_lib_cache(prog, false);
-   /* this libs cache is owned by the program */
-   p_atomic_set(&prog->libs->refcount, 1);
+
+   if (!screen->info.have_EXT_shader_object) {
+      prog->libs = create_lib_cache(prog, false);
+      /* this libs cache is owned by the program */
+      p_atomic_set(&prog->libs->refcount, 1);
+   }
 
    unsigned refs = 0;
    for (int i = 0; i < ZINK_GFX_SHADER_COUNT; ++i) {
@@ -1159,6 +1162,9 @@ create_gfx_program_separable(struct zink_context *ctx, struct zink_shader **stag
          simple_mtx_lock(&prog->shaders[i]->lock);
          _mesa_set_add(prog->shaders[i]->programs, prog);
          simple_mtx_unlock(&prog->shaders[i]->lock);
+         if (screen->info.have_EXT_shader_object) {
+            prog->objects[i] = stages[i]->precompile.obj.obj;
+         }
          refs++;
       }
    }
@@ -1195,18 +1201,20 @@ create_gfx_program_separable(struct zink_context *ctx, struct zink_shader **stag
    }
    prog->base.layout = zink_pipeline_layout_create(screen, prog->base.dsl, prog->base.num_dsl, false, VK_PIPELINE_LAYOUT_CREATE_INDEPENDENT_SETS_BIT_EXT);
 
-   VkPipeline libs[] = {stages[MESA_SHADER_VERTEX]->precompile.gpl, stages[MESA_SHADER_FRAGMENT]->precompile.gpl};
    prog->last_variant_hash = ctx->gfx_pipeline_state.optimal_key;
 
-   struct zink_gfx_library_key *gkey = CALLOC_STRUCT(zink_gfx_library_key);
-   if (!gkey) {
-      mesa_loge("ZINK: failed to allocate gkey!");
-      goto fail;
+   if (!screen->info.have_EXT_shader_object) {
+      VkPipeline libs[] = {stages[MESA_SHADER_VERTEX]->precompile.gpl, stages[MESA_SHADER_FRAGMENT]->precompile.gpl};
+      struct zink_gfx_library_key *gkey = CALLOC_STRUCT(zink_gfx_library_key);
+      if (!gkey) {
+         mesa_loge("ZINK: failed to allocate gkey!");
+         goto fail;
+      }
+      gkey->optimal_key = prog->last_variant_hash;
+      assert(gkey->optimal_key);
+      gkey->pipeline = zink_create_gfx_pipeline_combined(screen, prog, VK_NULL_HANDLE, libs, 2, VK_NULL_HANDLE, false);
+      _mesa_set_add(&prog->libs->libs, gkey);
    }
-   gkey->optimal_key = prog->last_variant_hash;
-   assert(gkey->optimal_key);
-   gkey->pipeline = zink_create_gfx_pipeline_combined(screen, prog, VK_NULL_HANDLE, libs, 2, VK_NULL_HANDLE, false);
-   _mesa_set_add(&prog->libs->libs, gkey);
 
    util_queue_add_job(&screen->cache_get_thread, prog, &prog->base.cache_fence, create_linked_separable_job, NULL, 0);
 
@@ -1479,7 +1487,7 @@ zink_destroy_gfx_program(struct zink_screen *screen,
          blob_finish(&prog->blobs[i]);
       }
    }
-   if (prog->is_separable)
+   if (prog->is_separable && prog->libs)
       zink_gfx_lib_cache_unref(screen, prog->libs);
 
    ralloc_free(prog);
@@ -2056,9 +2064,11 @@ precompile_separate_shader_job(void *data, void *gdata, int thread_index)
    struct zink_shader *zs = data;
 
    zs->precompile.obj = zink_shader_compile_separate(screen, zs);
-   VkShaderModule mods[ZINK_GFX_SHADER_COUNT] = {0};
-   mods[zs->info.stage] = zs->precompile.obj.mod;
-   zs->precompile.gpl = zink_create_gfx_pipeline_separate(screen, mods, zs->precompile.layout);
+   if (!screen->info.have_EXT_shader_object) {
+      VkShaderModule mods[ZINK_GFX_SHADER_COUNT] = {0};
+      mods[zs->info.stage] = zs->precompile.obj.mod;
+      zs->precompile.gpl = zink_create_gfx_pipeline_separate(screen, mods, zs->precompile.layout);
+   }
 }
 
 static void
index 299e69f..dec19dc 100644 (file)
@@ -2386,6 +2386,9 @@ zink_get_sample_pixel_grid(struct pipe_screen *pscreen, unsigned sample_count,
 static void
 init_driver_workarounds(struct zink_screen *screen)
 {
+   /* EXT_shader_object can't yet be used for feedback loop, so this must be per-app enabled */
+   if (!screen->driconf.zink_shader_object_enable)
+      screen->info.have_EXT_shader_object = false;
    /* enable implicit sync for all non-mesa drivers */
    screen->driver_workarounds.implicit_sync = true;
    switch (screen->info.driver_props.driverID) {
@@ -2423,6 +2426,8 @@ init_driver_workarounds(struct zink_screen *screen)
             screen->info.dynamic_state3_feats.extendedDynamicState3LogicOpEnable &&
             screen->info.dynamic_state2_feats.extendedDynamicState2LogicOp)
       screen->have_full_ds3 = true;
+   if (screen->info.have_EXT_shader_object)
+      screen->have_full_ds3 = true;
    if (screen->info.have_EXT_graphics_pipeline_library)
       screen->info.have_EXT_graphics_pipeline_library = screen->info.have_EXT_extended_dynamic_state &&
                                                         screen->info.have_EXT_extended_dynamic_state2 &&
@@ -2729,6 +2734,7 @@ zink_internal_create_screen(const struct pipe_screen_config *config)
       screen->driconf.glsl_correct_derivatives_after_discard = driQueryOptionb(config->options, "glsl_correct_derivatives_after_discard");
       //screen->driconf.inline_uniforms = driQueryOptionb(config->options, "radeonsi_inline_uniforms");
       screen->driconf.emulate_point_smooth = driQueryOptionb(config->options, "zink_emulate_point_smooth");
+      screen->driconf.zink_shader_object_enable = driQueryOptionb(config->options, "zink_shader_object_enable");
       screen->instance_info.disable_xcb_surface = driQueryOptionb(config->options, "disable_xcb_surface");
    }
 
index 07a86e9..e182efd 100644 (file)
@@ -733,7 +733,10 @@ enum zink_rast_prim {
 };
 
 struct zink_shader_object {
-   VkShaderModule mod;
+   union {
+      VkShaderEXT obj;
+      VkShaderModule mod;
+   };
 };
 
 struct zink_shader {
@@ -1044,7 +1047,10 @@ struct zink_gfx_program {
    struct zink_shader *last_vertex_stage;
 
    /* full */
-   VkShaderModule modules[ZINK_GFX_SHADER_COUNT]; // compute stage doesn't belong here
+   union {
+      VkShaderModule modules[ZINK_GFX_SHADER_COUNT]; // compute stage doesn't belong here
+      VkShaderEXT objects[ZINK_GFX_SHADER_COUNT];
+   };
    uint32_t module_hash[ZINK_GFX_SHADER_COUNT];
    struct blob blobs[ZINK_GFX_SHADER_COUNT];
    struct util_dynarray shader_cache[ZINK_GFX_SHADER_COUNT][2][2]; //normal, nonseamless cubes, inline uniforms
@@ -1436,6 +1442,7 @@ struct zink_screen {
       bool glsl_correct_derivatives_after_discard;
       bool inline_uniforms;
       bool emulate_point_smooth;
+      bool zink_shader_object_enable;
    } driconf;
 
    VkFormatProperties format_props[PIPE_FORMAT_COUNT];
@@ -1889,6 +1896,7 @@ struct zink_context {
 
    bool gfx_dirty;
 
+   bool shobj_draw : 1; //using shader objects for draw
    bool is_device_lost;
    bool primitive_restart;
    bool blitting : 1;
index 698a63d..6801fbd 100644 (file)
@@ -984,6 +984,9 @@ TODO: document the other workarounds.
         <application name="Quake II (wine)" executable="quake2.exe">
             <option name="zink_emulate_point_smooth" value="true"/>
         </application>
+        <application name="Tomb Raider 2013" executable="TombRaider">
+            <option name="zink_shader_object_enable" value="true" />
+        </application>
     </device>
     <device driver="iris">
         <application name="Middle Earth: Shadow of Mordor" executable="ShadowOfMordor">