radv: add support for a PS epilogs cache in the device
authorSamuel Pitoiset <samuel.pitoiset@gmail.com>
Tue, 6 Dec 2022 14:23:30 +0000 (15:23 +0100)
committerMarge Bot <emma+marge@anholt.net>
Tue, 10 Jan 2023 09:55:29 +0000 (09:55 +0000)
Similar to VS prologs.

Signed-off-by: Samuel Pitoiset <samuel.pitoiset@gmail.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/20201>

src/amd/vulkan/radv_cmd_buffer.c
src/amd/vulkan/radv_device.c
src/amd/vulkan/radv_private.h

index e1b2af9..6c0a6c8 100644 (file)
@@ -4080,6 +4080,21 @@ radv_emit_color_blend_enable(struct radv_cmd_buffer *cmd_buffer)
    }
 }
 
+uint32_t
+radv_hash_ps_epilog(const void *key_)
+{
+   const struct radv_ps_epilog_key *key = key_;
+   return _mesa_hash_data(key, sizeof(*key));
+}
+
+bool
+radv_cmp_ps_epilog(const void *a_, const void *b_)
+{
+   const struct radv_ps_epilog_key *a = a_;
+   const struct radv_ps_epilog_key *b = b_;
+   return memcmp(a, b, sizeof(*a)) == 0;
+}
+
 static void
 radv_emit_msaa_state(struct radv_cmd_buffer *cmd_buffer)
 {
index b255438..1488e79 100644 (file)
@@ -3318,6 +3318,31 @@ radv_device_finish_vs_prologs(struct radv_device *device)
    }
 }
 
+static VkResult
+radv_device_init_ps_epilogs(struct radv_device *device)
+{
+   u_rwlock_init(&device->ps_epilogs_lock);
+
+   device->ps_epilogs = _mesa_hash_table_create(NULL, &radv_hash_ps_epilog, &radv_cmp_ps_epilog);
+   if (!device->ps_epilogs)
+      return vk_error(device->physical_device->instance, VK_ERROR_OUT_OF_HOST_MEMORY);
+
+   return VK_SUCCESS;
+}
+
+static void
+radv_device_finish_ps_epilogs(struct radv_device *device)
+{
+   if (device->ps_epilogs) {
+      hash_table_foreach(device->ps_epilogs, entry)
+      {
+         free((void *)entry->key);
+         radv_shader_part_unref(device, entry->data);
+      }
+      _mesa_hash_table_destroy(device->ps_epilogs, NULL);
+   }
+}
+
 VkResult
 radv_device_init_vrs_state(struct radv_device *device)
 {
@@ -3635,6 +3660,7 @@ radv_CreateDevice(VkPhysicalDevice physicalDevice, const VkDeviceCreateInfo *pCr
    bool attachment_vrs_enabled = false;
    bool image_float32_atomics = false;
    bool vs_prologs = false;
+   bool ps_epilogs = false;
    bool global_bo_list = false;
    bool image_2d_view_of_3d = false;
    bool primitives_generated_query = false;
@@ -3991,6 +4017,12 @@ radv_CreateDevice(VkPhysicalDevice physicalDevice, const VkDeviceCreateInfo *pCr
          goto fail;
    }
 
+   if (ps_epilogs) {
+      result = radv_device_init_ps_epilogs(device);
+      if (result != VK_SUCCESS)
+         goto fail;
+   }
+
    if (device->physical_device->rad_info.gfx_level >= GFX7)
       cik_create_gfx_config(device);
 
@@ -4062,6 +4094,7 @@ fail:
 
    radv_device_finish_notifier(device);
    radv_device_finish_vs_prologs(device);
+   radv_device_finish_ps_epilogs(device);
    radv_device_finish_border_color(device);
 
    for (unsigned i = 0; i < RADV_MAX_QUEUE_FAMILIES; i++) {
@@ -4102,6 +4135,7 @@ radv_DestroyDevice(VkDevice _device, const VkAllocationCallbacks *pAllocator)
 
    radv_device_finish_notifier(device);
    radv_device_finish_vs_prologs(device);
+   radv_device_finish_ps_epilogs(device);
    radv_device_finish_border_color(device);
    radv_device_finish_vrs_image(device);
 
index 64d8cfd..9f5181c 100644 (file)
@@ -963,6 +963,10 @@ struct radv_device {
    struct radv_shader_part *simple_vs_prologs[MAX_VERTEX_ATTRIBS];
    struct radv_shader_part *instance_rate_vs_prologs[816];
 
+   /* PS epilogs */
+   struct u_rwlock ps_epilogs_lock;
+   struct hash_table *ps_epilogs;
+
    simple_mtx_t trace_mtx;
 
    /* Whether per-vertex VRS is forced. */
@@ -1701,6 +1705,9 @@ unsigned radv_instance_rate_prolog_index(unsigned num_attributes, uint32_t insta
 uint32_t radv_hash_vs_prolog(const void *key_);
 bool radv_cmp_vs_prolog(const void *a_, const void *b_);
 
+uint32_t radv_hash_ps_epilog(const void *key_);
+bool radv_cmp_ps_epilog(const void *a_, const void *b_);
+
 void radv_cmd_buffer_reset_rendering(struct radv_cmd_buffer *cmd_buffer);
 bool radv_cmd_buffer_upload_alloc(struct radv_cmd_buffer *cmd_buffer, unsigned size,
                                   unsigned *out_offset, void **ptr);