tu: Move FS-specific pipeline information to the shader
authorConnor Abbott <cwabbott0@gmail.com>
Tue, 5 Sep 2023 14:15:42 +0000 (16:15 +0200)
committerMarge Bot <emma+marge@anholt.net>
Mon, 25 Sep 2023 19:03:56 +0000 (19:03 +0000)
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/25276>

src/freedreno/vulkan/tu_cmd_buffer.cc
src/freedreno/vulkan/tu_device.h
src/freedreno/vulkan/tu_lrz.cc
src/freedreno/vulkan/tu_pipeline.cc
src/freedreno/vulkan/tu_pipeline.h
src/freedreno/vulkan/tu_shader.cc
src/freedreno/vulkan/tu_shader.h

index e716837..bb8006d 100644 (file)
@@ -2974,7 +2974,10 @@ tu_bind_gs(struct tu_cmd_buffer *cmd, struct tu_shader *gs)
 static void
 tu_bind_fs(struct tu_cmd_buffer *cmd, struct tu_shader *fs)
 {
-   cmd->state.shaders[MESA_SHADER_FRAGMENT] = fs;
+   if (cmd->state.shaders[MESA_SHADER_FRAGMENT] != fs) {
+      cmd->state.shaders[MESA_SHADER_FRAGMENT] = fs;
+      cmd->state.dirty |= TU_CMD_DIRTY_LRZ;
+   }
 }
 
 VKAPI_ATTR void VKAPI_CALL
@@ -4457,8 +4460,11 @@ tu6_build_depth_plane_z_mode(struct tu_cmd_buffer *cmd, struct tu_cs *cs)
    bool depth_test_enable = cmd->vk.dynamic_graphics_state.ds.depth.test_enable;
    bool depth_write = tu6_writes_depth(cmd, depth_test_enable);
    bool stencil_write = tu6_writes_stencil(cmd);
+   const struct tu_shader *fs = cmd->state.shaders[MESA_SHADER_FRAGMENT];
+   const struct tu_render_pass *pass = cmd->state.pass;
+   const struct tu_subpass *subpass = cmd->state.subpass;
 
-   if ((cmd->state.pipeline->base.lrz.fs.has_kill ||
+   if ((fs->variant->has_kill ||
         cmd->state.pipeline->feedback_loop_ds) &&
        (depth_write || stencil_write)) {
       zmode = (cmd->state.lrz.valid && cmd->state.lrz.enabled)
@@ -4466,15 +4472,19 @@ tu6_build_depth_plane_z_mode(struct tu_cmd_buffer *cmd, struct tu_cs *cs)
                  : A6XX_LATE_Z;
    }
 
-   bool force_late_z = cmd->state.pipeline->base.lrz.force_late_z ||
+   bool force_late_z = 
+      (subpass->depth_stencil_attachment.attachment != VK_ATTACHMENT_UNUSED &&
+       pass->attachments[subpass->depth_stencil_attachment.attachment].format
+       == VK_FORMAT_S8_UINT) ||
+      fs->fs.lrz.force_late_z ||
       /* alpha-to-coverage can behave like a discard. */
       cmd->vk.dynamic_graphics_state.ms.alpha_to_coverage_enable;
-   if ((force_late_z && !cmd->state.pipeline->base.lrz.fs.force_early_z) ||
+   if ((force_late_z && !fs->variant->fs.early_fragment_tests) ||
        !depth_test_enable)
       zmode = A6XX_LATE_Z;
 
    /* User defined early tests take precedence above all else */
-   if (cmd->state.pipeline->base.lrz.fs.early_fragment_tests)
+   if (fs->variant->fs.early_fragment_tests)
       zmode = A6XX_EARLY_Z;
 
    tu_cs_emit_pkt4(cs, REG_A6XX_GRAS_SU_DEPTH_PLANE_CNTL, 1);
@@ -4544,11 +4554,11 @@ tu6_emit_fs_params(struct tu_cmd_buffer *cmd)
       return;
    }
 
-   struct tu_graphics_pipeline *pipeline = cmd->state.pipeline;
+   struct tu_shader *fs = cmd->state.shaders[MESA_SHADER_FRAGMENT];
 
    unsigned num_units = fs_params_size(cmd);
 
-   if (pipeline->has_fdm)
+   if (fs->fs.has_fdm)
       tu_cs_set_writeable(&cmd->sub_cs, true);
 
    struct tu_cs cs;
@@ -4569,7 +4579,7 @@ tu6_emit_fs_params(struct tu_cmd_buffer *cmd)
    tu_cs_emit(&cs, 0);
 
    STATIC_ASSERT(IR3_DP_FS_FRAG_INVOCATION_COUNT == IR3_DP_FS_DYNAMIC);
-   tu_cs_emit(&cs, pipeline->base.fs.per_samp ?
+   tu_cs_emit(&cs, fs->fs.per_samp ?
               cmd->vk.dynamic_graphics_state.ms.rasterization_samples : 1);
    tu_cs_emit(&cs, 0);
    tu_cs_emit(&cs, 0);
@@ -4578,7 +4588,7 @@ tu6_emit_fs_params(struct tu_cmd_buffer *cmd)
    STATIC_ASSERT(IR3_DP_FS_FRAG_SIZE == IR3_DP_FS_DYNAMIC + 4);
    STATIC_ASSERT(IR3_DP_FS_FRAG_OFFSET == IR3_DP_FS_DYNAMIC + 6);
    if (num_units > 1) {
-      if (pipeline->has_fdm) {
+      if (fs->fs.has_fdm) {
          struct apply_fs_params_state state = {
             .num_consts = num_units - 1,
          };
@@ -4596,7 +4606,7 @@ tu6_emit_fs_params(struct tu_cmd_buffer *cmd)
 
    cmd->state.fs_params = tu_cs_end_draw_state(&cmd->sub_cs, &cs);
 
-   if (pipeline->has_fdm)
+   if (fs->fs.has_fdm)
       tu_cs_set_writeable(&cmd->sub_cs, false);
 }
 
index 286789a..4878823 100644 (file)
@@ -287,7 +287,7 @@ struct tu_device
    struct ir3_shader *global_shaders[GLOBAL_SH_COUNT];
    uint64_t global_shader_va[GLOBAL_SH_COUNT];
 
-   struct tu_shader *empty_tcs, *empty_tes, *empty_gs, *empty_fs;
+   struct tu_shader *empty_tcs, *empty_tes, *empty_gs, *empty_fs, *empty_fs_fdm;
 
    uint32_t vsc_draw_strm_pitch;
    uint32_t vsc_prim_strm_pitch;
index 2744e6b..3c8baf1 100644 (file)
@@ -559,6 +559,7 @@ tu6_calculate_lrz_state(struct tu_cmd_buffer *cmd,
                         const uint32_t a)
 {
    struct tu_pipeline *pipeline = &cmd->state.pipeline->base;
+   const struct tu_shader *fs = cmd->state.shaders[MESA_SHADER_FRAGMENT];
    bool z_test_enable = cmd->vk.dynamic_graphics_state.ds.depth.test_enable;
    bool z_write_enable = cmd->vk.dynamic_graphics_state.ds.depth.write_enable;
    bool z_bounds_enable = cmd->vk.dynamic_graphics_state.ds.depth.bounds_test.enable;
@@ -587,7 +588,8 @@ tu6_calculate_lrz_state(struct tu_cmd_buffer *cmd,
    gras_lrz_cntl.enable = true;
    gras_lrz_cntl.lrz_write =
       z_write_enable &&
-      !(pipeline->lrz.lrz_status & TU_LRZ_FORCE_DISABLE_WRITE);
+      !(pipeline->lrz.lrz_status & TU_LRZ_FORCE_DISABLE_WRITE) &&
+      !(fs->fs.lrz.status & TU_LRZ_FORCE_DISABLE_WRITE);
    gras_lrz_cntl.z_test_enable = z_write_enable;
    gras_lrz_cntl.z_bounds_enable = z_bounds_enable;
    gras_lrz_cntl.fc_enable = cmd->state.lrz.fast_clear;
@@ -608,7 +610,7 @@ tu6_calculate_lrz_state(struct tu_cmd_buffer *cmd,
     * fragment tests.  We have to skip LRZ testing and updating, but as long as
     * the depth direction stayed the same we can continue with LRZ testing later.
     */
-   if (pipeline->lrz.lrz_status & TU_LRZ_FORCE_DISABLE_LRZ) {
+   if (fs->fs.lrz.status & TU_LRZ_FORCE_DISABLE_LRZ) {
       if (cmd->state.lrz.prev_direction != TU_LRZ_UNKNOWN || !cmd->state.lrz.gpu_dir_tracking) {
          perf_debug(cmd->device, "Skipping LRZ due to FS");
          temporary_disable_lrz = true;
index d1be0cf..86c5d7c 100644 (file)
@@ -934,29 +934,6 @@ tu6_emit_vpc(struct tu_cs *cs,
 TU_GENX(tu6_emit_vpc);
 
 static void
-tu_emit_fs_pipeline(const struct ir3_shader_variant *fs,
-                    struct tu_pipeline *pipeline)
-{
-   if (fs->has_kill) {
-      pipeline->lrz.lrz_status |= TU_LRZ_FORCE_DISABLE_WRITE;
-   }
-   if (fs->no_earlyz || fs->writes_pos) {
-      pipeline->lrz.lrz_status = TU_LRZ_FORCE_DISABLE_LRZ;
-   }
-   pipeline->lrz.fs.has_kill = fs->has_kill;
-   pipeline->lrz.fs.early_fragment_tests = fs->fs.early_fragment_tests;
-
-   if (!fs->fs.early_fragment_tests &&
-       (fs->no_earlyz || fs->writes_pos || fs->writes_stencilref || fs->writes_smask)) {
-      pipeline->lrz.force_late_z = true;
-   }
-
-   pipeline->lrz.fs.force_early_z = fs->fs.early_fragment_tests;
-
-   pipeline->fs.per_samp = fs->per_samp || fs->key.sample_shading;
-}
-
-static void
 tu6_emit_vs_params(struct tu_cs *cs,
                    const struct ir3_const_state *const_state,
                    unsigned constlen,
@@ -2024,7 +2001,9 @@ done:
    if (builder->state &
        VK_GRAPHICS_PIPELINE_LIBRARY_FRAGMENT_SHADER_BIT_EXT) {
       if (!shaders[MESA_SHADER_FRAGMENT]) {
-         shaders[MESA_SHADER_FRAGMENT] = builder->device->empty_fs;
+         shaders[MESA_SHADER_FRAGMENT] =
+            builder->fragment_density_map ?
+            builder->device->empty_fs_fdm : builder->device->empty_fs;
          vk_pipeline_cache_object_ref(&shaders[MESA_SHADER_FRAGMENT]->base);
       }
    }
@@ -2177,10 +2156,7 @@ tu_pipeline_builder_parse_libraries(struct tu_pipeline_builder *builder,
       if (library->state &
           VK_GRAPHICS_PIPELINE_LIBRARY_FRAGMENT_SHADER_BIT_EXT) {
          pipeline->ds = library->base.ds;
-         pipeline->fs = library->base.fs;
-         pipeline->lrz.fs = library->base.lrz.fs;
          pipeline->lrz.lrz_status |= library->base.lrz.lrz_status;
-         pipeline->lrz.force_late_z |= library->base.lrz.force_late_z;
          pipeline->shared_consts = library->base.shared_consts;
       }
 
@@ -2188,7 +2164,6 @@ tu_pipeline_builder_parse_libraries(struct tu_pipeline_builder *builder,
           VK_GRAPHICS_PIPELINE_LIBRARY_FRAGMENT_OUTPUT_INTERFACE_BIT_EXT) {
          pipeline->output = library->base.output;
          pipeline->lrz.lrz_status |= library->base.lrz.lrz_status;
-         pipeline->lrz.force_late_z |= library->base.lrz.force_late_z;
          pipeline->prim_order = library->base.prim_order;
       }
 
@@ -2422,8 +2397,6 @@ tu_pipeline_builder_parse_shader_stages(struct tu_pipeline_builder *builder,
       !last_shader->writes_viewport &&
       builder->fragment_density_map &&
       builder->device->physical_device->info->a6xx.has_per_view_viewport;
-
-   tu_emit_fs_pipeline(fs, pipeline);
 }
 
 static const enum mesa_vk_dynamic_graphics_state tu_vertex_input_state[] = {
@@ -3608,7 +3581,7 @@ tu_emit_draw_state(struct tu_cmd_buffer *cmd)
 #define DRAW_STATE_FDM(name, id, ...)                                         \
    if ((EMIT_STATE(name) || (cmd->state.dirty & TU_CMD_DIRTY_FDM)) &&         \
        !(cmd->state.pipeline->base.set_state_mask & (1u << id))) {            \
-      if (cmd->state.pipeline->has_fdm) {                                     \
+      if (cmd->state.shaders[MESA_SHADER_FRAGMENT]->fs.has_fdm) {             \
          tu_cs_set_writeable(&cmd->sub_cs, true);                             \
          tu6_emit_##name##_fdm(&cs, cmd, __VA_ARGS__);                        \
          cmd->state.dynamic_state[id] =                                       \
@@ -3727,14 +3700,6 @@ tu_pipeline_builder_parse_depth_stencil(
          (VK_PIPELINE_DEPTH_STENCIL_STATE_CREATE_RASTERIZATION_ORDER_ATTACHMENT_DEPTH_ACCESS_BIT_ARM |
           VK_PIPELINE_DEPTH_STENCIL_STATE_CREATE_RASTERIZATION_ORDER_ATTACHMENT_STENCIL_ACCESS_BIT_ARM);
    }
-
-   /* FDM isn't compatible with LRZ, because the LRZ image uses the original
-    * resolution and we would need to use the low resolution.
-    *
-    * TODO: Use a patchpoint to only disable LRZ for scaled bins.
-    */
-   if (builder->fragment_density_map)
-      pipeline->lrz.lrz_status = TU_LRZ_FORCE_DISABLE_LRZ;
 }
 
 static void
@@ -3768,9 +3733,6 @@ tu_pipeline_builder_parse_multisample_and_color_blend(
        VK_IMAGE_ASPECT_COLOR_BIT) ? builder->create_info->pColorBlendState :
       &dummy_blend_info;
 
-   pipeline->lrz.force_late_z |=
-      builder->graphics_state.rp->depth_attachment_format == VK_FORMAT_S8_UINT;
-
    if (builder->graphics_state.rp->attachment_aspects & VK_IMAGE_ASPECT_COLOR_BIT) {
       pipeline->output.raster_order_attachment_access =
          blend_info->flags &
@@ -4017,7 +3979,6 @@ tu_pipeline_builder_build(struct tu_pipeline_builder *builder,
          (gfx_pipeline->feedback_loop_color ||
           gfx_pipeline->feedback_loop_ds) &&
          !builder->graphics_state.rp->feedback_loop_input_only;
-      gfx_pipeline->has_fdm = builder->fragment_density_map;
    }
 
    return VK_SUCCESS;
index 59b1c10..f5b5770 100644 (file)
@@ -40,13 +40,6 @@ struct tu_lrz_pipeline
 {
    uint32_t lrz_status;
 
-   struct {
-      bool has_kill;
-      bool force_early_z;
-      bool early_fragment_tests;
-   } fs;
-
-   bool force_late_z;
    bool blend_valid;
 };
 
@@ -150,10 +143,6 @@ struct tu_pipeline
 
    struct tu_shader *shaders[MESA_SHADER_STAGES];
 
-   struct {
-      bool per_samp;
-   } fs;
-
    struct
    {
       struct tu_draw_state config_state;
@@ -222,7 +211,6 @@ struct tu_graphics_pipeline {
 
    bool feedback_loop_color, feedback_loop_ds;
    bool feedback_loop_may_involve_textures;
-   bool has_fdm;
 };
 
 struct tu_compute_pipeline {
index b3f9b31..dc62737 100644 (file)
@@ -18,6 +18,7 @@
 #include "tu_device.h"
 #include "tu_descriptor_set.h"
 #include "tu_pipeline.h"
+#include "tu_lrz.h"
 
 nir_shader *
 tu_spirv_to_nir(struct tu_device *dev,
@@ -2095,6 +2096,14 @@ tu_shader_serialize(struct vk_pipeline_cache_object *object,
       blob_write_uint8(blob, 0);
    }
 
+   switch (shader->variant->type) {
+   case MESA_SHADER_FRAGMENT:
+      blob_write_bytes(blob, &shader->fs, sizeof(shader->fs));
+      break;
+   default:
+      break;
+   }
+
    return true;
 }
 
@@ -2122,6 +2131,14 @@ tu_shader_deserialize(struct vk_pipeline_cache *cache,
    if (has_safe_const)
       shader->safe_const_variant = ir3_retrieve_variant(blob, dev->compiler, NULL);
 
+   switch (shader->variant->type) {
+   case MESA_SHADER_FRAGMENT:
+      blob_copy_bytes(blob, &shader->fs, sizeof(shader->fs));
+      break;
+   default:
+      break;
+   }
+
    VkResult result = tu_upload_shader(dev, shader);
    if (result != VK_SUCCESS) {
       vk_free(&dev->vk.alloc, shader);
@@ -2279,6 +2296,32 @@ tu_shader_create(struct tu_device *dev,
 
    shader->view_mask = key->multiview_mask;
 
+   switch (shader->variant->type) {
+   case MESA_SHADER_FRAGMENT: {
+      const struct ir3_shader_variant *fs = shader->variant;
+      shader->fs.per_samp = fs->per_samp || ir3_key->sample_shading;
+      shader->fs.has_fdm = key->fragment_density_map;
+      if (fs->has_kill)
+         shader->fs.lrz.status |= TU_LRZ_FORCE_DISABLE_WRITE;
+      if (fs->no_earlyz || fs->writes_pos)
+         shader->fs.lrz.status = TU_LRZ_FORCE_DISABLE_LRZ;
+      /* FDM isn't compatible with LRZ, because the LRZ image uses the original
+       * resolution and we would need to use the low resolution.
+       *
+       * TODO: Use a patchpoint to only disable LRZ for scaled bins.
+       */
+      if (key->fragment_density_map)
+         shader->fs.lrz.status = TU_LRZ_FORCE_DISABLE_LRZ;
+      if (!fs->fs.early_fragment_tests &&
+          (fs->no_earlyz || fs->writes_pos || fs->writes_stencilref || fs->writes_smask)) {
+         shader->fs.lrz.force_late_z = true;
+      }
+      break;
+   }
+   default:
+      break;
+   }
+
    VkResult result = tu_upload_shader(dev, shader);
    if (result != VK_SUCCESS) {
       vk_free(&dev->vk.alloc, shader);
@@ -2323,7 +2366,8 @@ tu_empty_shader_create(struct tu_device *dev,
 }
 
 static VkResult
-tu_empty_fs_create(struct tu_device *dev, struct tu_shader **shader)
+tu_empty_fs_create(struct tu_device *dev, struct tu_shader **shader,
+                   bool fragment_density_map)
 {
    struct ir3_shader_key key = {};
    const struct ir3_shader_options options = {};
@@ -2339,6 +2383,10 @@ tu_empty_fs_create(struct tu_device *dev, struct tu_shader **shader)
    if (!*shader)
       return VK_ERROR_OUT_OF_HOST_MEMORY;
 
+   (*shader)->fs.has_fdm = fragment_density_map;
+   if (fragment_density_map)
+      (*shader)->fs.lrz.status = TU_LRZ_FORCE_DISABLE_LRZ;
+
    struct ir3_shader *ir3_shader =
       ir3_shader_from_nir(dev->compiler, fs_b.shader, &options, &so_info);
    (*shader)->variant = ir3_shader_create_variant(ir3_shader, &key, false);
@@ -2363,7 +2411,11 @@ tu_init_empty_shaders(struct tu_device *dev)
    if (result != VK_SUCCESS)
       goto out;
 
-   result = tu_empty_fs_create(dev, &dev->empty_fs);
+   result = tu_empty_fs_create(dev, &dev->empty_fs, false);
+   if (result != VK_SUCCESS)
+      goto out;
+
+   result = tu_empty_fs_create(dev, &dev->empty_fs_fdm, true);
    if (result != VK_SUCCESS)
       goto out;
 
@@ -2378,6 +2430,8 @@ out:
       vk_pipeline_cache_object_unref(&dev->vk, &dev->empty_gs->base);
    if (dev->empty_fs)
       vk_pipeline_cache_object_unref(&dev->vk, &dev->empty_fs->base);
+   if (dev->empty_fs_fdm)
+      vk_pipeline_cache_object_unref(&dev->vk, &dev->empty_fs_fdm->base);
    return result;
 }
 
@@ -2388,6 +2442,7 @@ tu_destroy_empty_shaders(struct tu_device *dev)
    vk_pipeline_cache_object_unref(&dev->vk, &dev->empty_tes->base);
    vk_pipeline_cache_object_unref(&dev->vk, &dev->empty_gs->base);
    vk_pipeline_cache_object_unref(&dev->vk, &dev->empty_fs->base);
+   vk_pipeline_cache_object_unref(&dev->vk, &dev->empty_fs_fdm->base);
 }
 
 void
index a89ebe0..15e5e9d 100644 (file)
@@ -62,6 +62,18 @@ struct tu_shader
    struct tu_const_state const_state;
    uint32_t view_mask;
    uint8_t active_desc_sets;
+
+   union {
+      struct {
+         bool per_samp;
+         bool has_fdm;
+
+         struct {
+            uint32_t status;
+            bool force_late_z;
+         } lrz;
+      } fs;
+   };
 };
 
 struct tu_shader_key {